diff options
author | Sidath Senanayake <sidaths@google.com> | 2021-06-15 13:39:30 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2021-06-15 14:11:16 +0100 |
commit | fca8613cfcf585bf9113dca96a05daea9fd89794 (patch) | |
tree | f2baa14910f83edf00450bc30d3703eb255a0bba /mali_kbase | |
parent | 8037b534570814775d79aeddd06b76e5ee941f59 (diff) | |
download | gpu-fca8613cfcf585bf9113dca96a05daea9fd89794.tar.gz |
Mali Valhall DDK r31p0 KMD
Provenance: 2ea0ef9bd (collaborate/EAC/v_r31p0)
VX504X08X-BU-00000-r31p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r31p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r31p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r31p0-01eac0 - Valhall Android Renderscript AOSP parts
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Ide9d5fdc6d9c95fa66a3546b01f619b43c09496d
Diffstat (limited to 'mali_kbase')
128 files changed, 3404 insertions, 9006 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 1c9e109..5463a24 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -20,11 +20,11 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r30p0-01eac0" +MALI_RELEASE_NAME ?= '"r31p0-01eac0"' # Paths required for build -# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +# make $(src) as absolute path if it is not already, by prefixing $(srctree) src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) KBASE_PATH = $(src) KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy @@ -64,7 +64,7 @@ DEFINES = \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) @@ -114,7 +114,6 @@ SRC := \ mali_kbase_mem_profile_debugfs.c \ mmu/mali_kbase_mmu.c \ mmu/mali_kbase_mmu_hw_direct.c \ - mmu/mali_kbase_mmu_mode_lpae.c \ mmu/mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_debug_mem_view.c \ @@ -170,9 +169,6 @@ ifeq ($(CONFIG_MALI_CINSTR_GWT),y) SRC += mali_kbase_gwt.c endif -ifeq ($(MALI_UNIT_TEST),1) - SRC += tl/mali_kbase_timeline_test.c -endif ifeq ($(MALI_CUSTOMER_RELEASE),0) SRC += mali_kbase_regs_dump_debugfs.c diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 2ba2d77..84103af 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -27,7 +27,7 @@ ifeq ($(KBUILD_EXTMOD),) export CONFIG_MALI_MIDGARD?=m ifneq ($(CONFIG_MALI_MIDGARD),n) -export CONFIF_MALI_CSF_SUPPORT?=n +export CONFIG_MALI_CSF_SUPPORT?=n export CONFIG_MALI_KUTF?=m export CONFIG_MALI_REAL_HW?=y @@ -39,7 +39,7 @@ export CONFIG_MALI_DEVFREQ?=y endif DEFINES += -DCONFIG_MALI_MIDGARD=$(CONFIG_MALI_MIDGARD) \ - -DCONFIF_MALI_CSF_SUPPORT=$(CONFIF_MALI_CSF_SUPPORT) \ + -DCONFIG_MALI_CSF_SUPPORT=$(CONFIG_MALI_CSF_SUPPORT) \ -DCONFIG_MALI_KUTF=$(CONFIG_MALI_KUTF) \ -DCONFIG_MALI_REAL_HW=$(CONFIG_MALI_REAL_HW) \ -DCONFIG_MALI_GATOR_SUPPORT=$(CONFIG_MALI_GATOR_SUPPORT) \ @@ -50,13 +50,8 @@ export DEFINES endif endif -BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. KBASE_PATH_RELATIVE = $(CURDIR) -ifeq ($(CONFIG_MALI_BUSLOG),y) -#Add bus logger symbols -EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers -endif # we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions all: diff --git a/mali_kbase/arbiter/mali_kbase_arbif.c b/mali_kbase/arbiter/mali_kbase_arbif.c index 5ed5f80..7d6ab0c 100644 --- a/mali_kbase/arbiter/mali_kbase_arbif.c +++ b/mali_kbase/arbiter/mali_kbase_arbif.c @@ -30,6 +30,66 @@ #include <linux/of_platform.h> #include "mali_kbase_arbiter_interface.h" +/* Arbiter interface version against which was implemented this module */ +#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 +#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ + MALI_KBASE_ARBITER_INTERFACE_VERSION +#error "Unsupported Mali Arbiter interface version." +#endif + +static void on_max_config(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + if (!max_l2_slices || !max_core_mask) { + dev_dbg(dev, + "%s(): max_config ignored as one of the fields is zero", + __func__); + return; + } + + /* set the max config info in the kbase device */ + kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); +} + +/** + * on_update_freq() - Updates GPU clock frequency + * @dev: arbiter interface device handle + * @freq: GPU clock frequency value reported from arbiter + * + * call back function to update GPU clock frequency with + * new value from arbiter + */ +static void on_update_freq(struct device *dev, uint32_t freq) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); +} + /** * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop * @dev: arbiter interface device handle @@ -38,7 +98,18 @@ */ static void on_gpu_stop(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); @@ -52,7 +123,18 @@ static void on_gpu_stop(struct device *dev) */ static void on_gpu_granted(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); @@ -66,7 +148,18 @@ static void on_gpu_granted(struct device *dev) */ static void on_gpu_lost(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); } @@ -122,6 +215,12 @@ int kbase_arbif_init(struct kbase_device *kbdev) ops.arb_vm_gpu_stop = on_gpu_stop; ops.arb_vm_gpu_granted = on_gpu_granted; ops.arb_vm_gpu_lost = on_gpu_lost; + ops.arb_vm_max_config = on_max_config; + ops.arb_vm_update_freq = on_update_freq; + + + kbdev->arb.arb_freq.arb_freq = 0; + mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); /* register kbase arbiter_if callbacks */ if (arb_if->vm_ops.vm_arb_register_dev) { @@ -133,6 +232,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) return err; } } + #else /* CONFIG_OF */ dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); kbdev->arb.arb_dev = NULL; @@ -162,6 +262,22 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) } /** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_get_max_config(arb_if); + } +} + +/** * kbase_arbif_gpu_request() - Request GPU from * @kbdev: The kbase device structure for the device (must be a valid pointer) * @@ -173,6 +289,7 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev) if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); arb_if->vm_ops.vm_arb_gpu_request(arb_if); } } diff --git a/mali_kbase/arbiter/mali_kbase_arbif.h b/mali_kbase/arbiter/mali_kbase_arbif.h index c6a2031..710559c 100644 --- a/mali_kbase/arbiter/mali_kbase_arbif.h +++ b/mali_kbase/arbiter/mali_kbase_arbif.h @@ -72,6 +72,14 @@ int kbase_arbif_init(struct kbase_device *kbdev); void kbase_arbif_destroy(struct kbase_device *kbdev); /** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev); + +/** * kbase_arbif_gpu_request() - Send GPU request message to the arbiter * @kbdev: The kbase device structure for the device (must be a valid pointer) * diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h index c754b6e..586c5d4 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,6 +44,8 @@ * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU * @vm_arb_starting: Work queue resume in progress * @vm_arb_stopping: Work queue suspend in progress + * @interrupts_installed: Flag set when interrupts are installed + * @vm_request_timer: Timer to monitor GPU request */ struct kbase_arbiter_vm_state { struct kbase_device *kbdev; @@ -55,6 +57,8 @@ struct kbase_arbiter_vm_state { struct work_struct vm_resume_work; bool vm_arb_starting; bool vm_arb_stopping; + bool interrupts_installed; + struct hrtimer vm_request_timer; }; /** @@ -62,10 +66,12 @@ struct kbase_arbiter_vm_state { * allocated from the probe method of Mali driver * @arb_if: Pointer to the arbiter interface device * @arb_dev: Pointer to the arbiter device + * @arb_freq: GPU clock frequency retrieved from arbiter. */ struct kbase_arbiter_device { struct arbiter_if_dev *arb_if; struct device *arb_dev; + struct kbase_arbiter_freq arb_freq; }; #endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h index 958b0a1..84389e8 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #define _MALI_KBASE_ARBITER_INTERFACE_H_ /** - * @brief Mali arbiter interface version + * Mali arbiter interface version * * This specifies the current version of the configuration interface. Whenever * the arbiter interface changes, so that integration effort is required, the @@ -39,8 +39,15 @@ * 1 - Added the Mali arbiter configuration interface. * 2 - Strip out reference code from header * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + * 4 - Added max_config support + * 5 - Added GPU clock frequency reporting support from arbiter */ -#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5 + +/** + * NO_FREQ is used in case platform doesn't support reporting frequency + */ +#define NO_FREQ 0 struct arbiter_if_dev; @@ -86,6 +93,27 @@ struct arbiter_if_arb_vm_ops { * If successful, will respond with a vm_arb_gpu_stopped message. */ void (*arb_vm_gpu_lost)(struct device *dev); + + /** + * arb_vm_max_config() - Send max config info to the VM + * @dev: The arbif kernel module device. + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * Informs KBase the maximum resources that can be allocated to the + * partition in use. + */ + void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask); + + /** + * arb_vm_update_freq() - GPU clock frequency has been updated + * @dev: The arbif kernel module device. + * @freq: GPU clock frequency value reported from arbiter + * + * Informs KBase that the GPU clock frequency has been updated. + */ + void (*arb_vm_update_freq)(struct device *dev, uint32_t freq); }; /** @@ -115,6 +143,13 @@ struct arbiter_if_vm_arb_ops { void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); /** + * vm_arb_gpu_get_max_config() - Request the max config from the + * Arbiter. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev); + + /** * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. * @arbif_dev: The arbiter interface we want to issue the request. */ diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c index 08a6872..456cc70 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,14 +20,33 @@ */ /** - * @file mali_kbase_arbiter_pm.c + * @file * Mali arbiter power manager state machine and APIs */ #include <mali_kbase.h> #include <mali_kbase_pm.h> +#include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_irq_internal.h> +#include <mali_kbase_hwcnt_context.h> +#include <mali_kbase_pm_internal.h> #include <tl/mali_kbase_tracepoints.h> +#include <mali_kbase_gpuprops.h> + +/* A dmesg warning will occur if the GPU is not granted + * after the following time (in milliseconds) has ellapsed. + */ +#define GPU_REQUEST_TIMEOUT 1000 + +#define MAX_L2_SLICES_MASK 0xFF + +/* Maximum time in ms, before deferring probe incase + * GPU_GRANTED message is not received + */ +static int gpu_req_timeout = 1; +module_param(gpu_req_timeout, int, 0644); +MODULE_PARM_DESC(gpu_req_timeout, + "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( @@ -195,6 +214,60 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) } /** + * request_timer_callback() - Issue warning on request timer expiration + * @timer: Request hr timer data + * + * Called when the Arbiter takes too long to grant the GPU after a + * request has been made. Issues a warning in dmesg. + * + * Return: Always returns HRTIMER_NORESTART + */ +static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, + struct kbase_arbiter_vm_state, vm_request_timer); + + KBASE_DEBUG_ASSERT(arb_vm_state); + KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); + + dev_warn(arb_vm_state->kbdev->dev, + "Still waiting for GPU to be granted from Arbiter after %d ms\n", + GPU_REQUEST_TIMEOUT); + return HRTIMER_NORESTART; +} + +/** + * start_request_timer() - Start a timer after requesting GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Start a timer to track when kbase is waiting for the GPU from the + * Arbiter. If the timer expires before GPU is granted, a warning in + * dmesg will be issued. + */ +static void start_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_start(&arb_vm_state->vm_request_timer, + HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), + HRTIMER_MODE_REL); +} + +/** + * cancel_request_timer() - Stop the request timer + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Stops the request timer once GPU has been granted. Safe to call + * even if timer is no longer running. + */ +static void cancel_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_cancel(&arb_vm_state->vm_request_timer); +} + +/** * kbase_arbiter_pm_early_init() - Initialize arbiter for VM * Paravirtualized use. * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -230,6 +303,10 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); arb_vm_state->vm_arb_starting = false; atomic_set(&kbdev->pm.gpu_users_waiting, 0); + hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + arb_vm_state->vm_request_timer.function = + request_timer_callback; kbdev->pm.arb_vm_state = arb_vm_state; err = kbase_arbif_init(kbdev); @@ -237,17 +314,31 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to initialise arbif module\n"); goto arbif_init_fail; } + if (kbdev->arb.arb_if) { kbase_arbif_gpu_request(kbdev); dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); - wait_event(arb_vm_state->vm_state_wait, + err = wait_event_timeout(arb_vm_state->vm_state_wait, arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU); + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + msecs_to_jiffies(gpu_req_timeout)); + + if (!err) { + dev_dbg(kbdev->dev, + "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", + gpu_req_timeout); + err = -EPROBE_DEFER; + goto arbif_eprobe_defer; + } + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment - done\n"); } return 0; +arbif_eprobe_defer: + kbase_arbiter_pm_early_term(kbdev); + return err; arbif_init_fail: destroy_workqueue(arb_vm_state->vm_arb_wq); kfree(arb_vm_state); @@ -265,14 +356,15 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + cancel_request_timer(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, false); } mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_arbif_destroy(kbdev); destroy_workqueue(arb_vm_state->vm_arb_wq); + kbase_arbif_destroy(kbdev); arb_vm_state->vm_arb_wq = NULL; kfree(kbdev->pm.arb_vm_state); kbdev->pm.arb_vm_state = NULL; @@ -282,19 +374,36 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Releases interrupts if needed (GPU is available) otherwise does nothing + * Releases interrupts and set the interrupt flag to false */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; mutex_lock(&arb_vm_state->vm_state_lock); - if (!kbdev->arb.arb_if || - arb_vm_state->vm_state > - KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + if (arb_vm_state->interrupts_installed == true) { + arb_vm_state->interrupts_installed = false; kbase_release_interrupts(kbdev); + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} +/** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + int err; + + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->interrupts_installed = true; + err = kbase_install_interrupts(kbdev); mutex_unlock(&arb_vm_state->vm_state_lock); + return err; } /** @@ -317,7 +426,12 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); - kbase_release_interrupts(kbdev); + + if (arb_vm_state->interrupts_installed) { + arb_vm_state->interrupts_installed = false; + kbase_release_interrupts(kbdev); + } + switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPING_ACTIVE: request_gpu = true; @@ -338,6 +452,71 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, request_gpu); + if (request_gpu) + start_request_timer(kbdev); +} + +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + struct max_config_props max_config; + + if (!kbdev) + return; + + /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ + max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; + max_config.core_mask = max_core_mask; + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + /* Just set the max_props in kbase during initialization. */ + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) + kbase_gpuprops_set_max_config(kbdev, &max_config); + else + dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + int result = -EINVAL; + + if (!kbdev) + return result; + + /* First check the GPU_LOST state */ + kbase_pm_lock(kbdev); + if (kbase_pm_is_gpu_lost(kbdev)) { + kbase_pm_unlock(kbdev); + return 0; + } + kbase_pm_unlock(kbdev); + + /* Then the arbitration state machine */ + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + result = 0; + break; + default: + result = 1; + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + + return result; } /** @@ -351,6 +530,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; lockdep_assert_held(&arb_vm_state->vm_state_lock); + cancel_request_timer(kbdev); switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_INITIALIZING: kbase_arbiter_pm_vm_set_state(kbdev, @@ -358,7 +538,14 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) break; case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + arb_vm_state->interrupts_installed = true; kbase_install_interrupts(kbdev); + /* + * GPU GRANTED received while in stop can be a result of a + * repartitioning. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + /* curr_config will be updated while resuming the PM. */ queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_resume_work); break; @@ -591,6 +778,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); /* Release lock and block resume OS function until we have * asynchronously received the GRANT message from the Arbiter and @@ -764,6 +952,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) break; @@ -811,3 +1000,60 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, } return res; } + +/** + * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received + * from arbiter. + * @arb_freq - Pointer to struchture holding GPU clock frequenecy data + * @freq - New frequency value + */ +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq) +{ + mutex_lock(&arb_freq->arb_freq_lock); + arb_freq->arb_freq = freq; + mutex_unlock(&arb_freq->arb_freq_lock); +} + +/** + * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns pointer to structure holding GPU clock frequency data reported from + * arbiter, only index 0 is valid. + */ +static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index == 0) + return &kbdev->arb.arb_freq; + return NULL; +} + +/** + * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns the GPU clock frequency value saved when gpu is granted from arbiter + */ +static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + uint32_t freq; + struct kbase_arbiter_freq *arb_dev_freq = + (struct kbase_arbiter_freq *) gpu_clk_handle; + + mutex_lock(&arb_dev_freq->arb_freq_lock); + freq = arb_dev_freq->arb_freq; + mutex_unlock(&arb_dev_freq->arb_freq_lock); + return freq; +} + +struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { + .get_gpu_clk_rate = get_arb_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_arb_gpu_clk, + .gpu_clk_notifier_register = NULL, + .gpu_clk_notifier_unregister = NULL +}; diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h index ef82271..0f74b63 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -93,11 +93,19 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Releases interrupts if needed (GPU is available) otherwise does nothing + * Releases interrupts and set the interrupt flag to false */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); /** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); + +/** * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine * @kbdev: The kbase device structure for the device (must be a valid pointer) * @@ -133,4 +141,42 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, */ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); +/** + * kbase_arbiter_set_max_config() - Set the max config data in kbase device. + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask); + +/** + * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the VM does not have access, 1 if it does, and a negative number + * if an error occurred + */ +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); + +extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; + +/** + * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved + * from arbiter + * @arb_freq: GPU clock frequency value + * @arb_freq_lock: Mutex protecting access to arbfreq value + */ +struct kbase_arbiter_freq { + uint32_t arb_freq; + struct mutex arb_freq_lock; +}; + +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq); + #endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 84fb1fc..fcf4e5b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2016, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #define _KBASE_CACHE_POLICY_BACKEND_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /** * kbase_cache_set_coherency_mode() - Sets the system coherency mode diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index dcd1b02..7076ab4 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,38 @@ #define CLK_RATE_TRACE_OPS (NULL) #endif +/** + * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. + * @kbdev: Pointer to kbase device, used to check if arbitration is enabled + * when compiled with arbiter support. + * Return: Pointer to clk trace ops if supported or NULL. + */ +static struct kbase_clk_rate_trace_op_conf * +get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused) +{ + /* base case */ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + const void *arbiter_if_node; + + if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) + return callbacks; + + arbiter_if_node = + of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ + if (arbiter_if_node) + callbacks = &arb_clk_rate_trace_ops; + else + dev_dbg(kbdev->dev, + "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); + +#endif + + return callbacks; +} + static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -69,12 +101,13 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, static int gpu_clk_data_init(struct kbase_device *kbdev, void *gpu_clk_handle, unsigned int index) { - struct kbase_clk_rate_trace_op_conf *callbacks = - (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_data *clk_data; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; int ret = 0; + callbacks = get_clk_rate_trace_callbacks(kbdev); + if (WARN_ON(!callbacks) || WARN_ON(!gpu_clk_handle) || WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) @@ -108,8 +141,9 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, clk_data->clk_rate_change_nb.notifier_call = gpu_clk_rate_change_notifier; - ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, - &clk_data->clk_rate_change_nb); + if (callbacks->gpu_clk_notifier_register) + ret = callbacks->gpu_clk_notifier_register(kbdev, + gpu_clk_handle, &clk_data->clk_rate_change_nb); if (ret) { dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); kfree(clk_data); @@ -120,12 +154,13 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) { - struct kbase_clk_rate_trace_op_conf *callbacks = - (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; unsigned int i; int ret = 0; + callbacks = get_clk_rate_trace_callbacks(kbdev); + spin_lock_init(&clk_rtm->lock); INIT_LIST_HEAD(&clk_rtm->listeners); @@ -186,9 +221,10 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) if (!clk_rtm->clks[i]) break; - clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( - kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister + (kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 07767c2..9b82184 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -643,7 +643,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) /* Record the maximum frequency possible */ kbdev->gpu_props.props.core_props.gpu_freq_khz_max = dp->freq_table[0] / 1000; - }; + } err = kbase_devfreq_init_core_mask_table(kbdev); if (err) { diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 4254a64..7542209 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -121,6 +121,32 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, return -EIO; } +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) + return -EINVAL; + + curr_config_regdump->mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES)); + + curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO)); + curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI)); + + if (WARN_ON(kbase_is_gpu_removed(kbdev))) + return -EIO; + + return 0; + +} + int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { @@ -156,11 +182,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { u32 l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); + u32 l2_config = + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + if (kbase_is_gpu_removed(kbdev)) return -EIO; regdump->l2_features = l2_features; + regdump->l2_config = l2_config; } return 0; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 9cc425e..6868dc3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -107,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, err = 0; - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); return err; out_err: return err; @@ -167,7 +167,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); err = 0; @@ -214,7 +214,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); - dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); err = 0; @@ -325,7 +325,7 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); int kbase_instr_backend_init(struct kbase_device *kbdev) { - int ret = 0; + spin_lock_init(&kbdev->hwcnt.lock); kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -344,12 +344,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY; #endif #endif - return ret; + return 0; } void kbase_instr_backend_term(struct kbase_device *kbdev) { - (void)kbdev; + CSTD_UNUSED(kbdev); } #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index 39b009d..05d5193 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014, 2016, 2018, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 7cfca97..e84f3a9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -40,10 +40,12 @@ #include <mali_kbase_regs_history_debugfs.h> static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask); static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, - int js) + int js, const u64 limited_core_mask) { u64 affinity; @@ -72,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, */ if (js == 2 && num_core_groups > 1) affinity &= coherency_info->group[1].core_mask; - else + else if (num_core_groups > 1) affinity &= coherency_info->group[0].core_mask; + else + affinity &= kbdev->gpu_props.curr_config.shader_present; } else { /* Use all cores */ affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + } + if (unlikely(!affinity)) { #ifdef CONFIG_MALI_DEBUG u64 shaders_ready = @@ -89,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, #endif affinity = kbdev->pm.backend.shaders_avail; + + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + +#ifdef CONFIG_MALI_DEBUG + /* affinity should never be 0 */ + WARN_ON(!affinity); +#endif + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), @@ -169,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) } dev_dbg(kctx->kbdev->dev, - "Selected job chain 0x%llx for end atom %p in state %d\n", + "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, (void *)katom, (int)rp->state); katom->jc = jc; @@ -193,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); - dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), @@ -201,7 +220,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32); - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, + kctx->limited_core_mask); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start @@ -257,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom->start_timestamp = ktime_get(); /* GO ! */ - dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", katom, kctx, js, jc_head); KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, @@ -431,7 +451,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked( + kbdev, + RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -789,7 +811,7 @@ static int softstop_start_rp_nolock( if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { dev_dbg(kctx->kbdev->dev, - "Atom %p on job slot is not start RP\n", (void *)katom); + "Atom %pK on job slot is not start RP\n", (void *)katom); return -EPERM; } @@ -802,13 +824,13 @@ static int softstop_start_rp_nolock( rp->state != KBASE_JD_RP_RETRY)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); if (WARN_ON(katom != rp->start_katom)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, (void *)&rp->oom_reg_list); list_move_tail(®->link, &rp->oom_reg_list); dev_dbg(kctx->kbdev->dev, "Added region to list\n"); @@ -853,7 +875,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; - if (kbase_prepare_to_reset_gpu(kbdev)) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { dev_err(kbdev->dev, "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT); @@ -863,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) /* Wait for the reset to complete */ kbase_reset_gpu_wait(kbdev); exit: - dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); /* Ensure that the signallers of the waitqs have finished */ mutex_lock(&kctx->jctx.lock); @@ -924,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { - dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); @@ -1337,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) /** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU * @kbdev: kbase device + * @flags: Bitfield indicating impact of reset (see flag defines) * * This function just soft-stops all the slots to ensure that as many jobs as * possible are saved. @@ -1347,10 +1370,12 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) { int i; + CSTD_UNUSED(flags); KBASE_DEBUG_ASSERT(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -1378,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) return true; } -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { - unsigned long flags; + unsigned long lock_flags; bool ret; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); return ret; } @@ -1506,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } + +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask) +{ + const u64 result = affinity & limited_core_mask; + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", + (unsigned long int)affinity, + (unsigned long int)result, + (unsigned long int)limited_core_mask); +#else + CSTD_UNUSED(kbdev); +#endif + + return result; +} diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 7104658..5fdf9b6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1024,7 +1024,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); kbase_gpu_enqueue_atom(kbdev, katom); kbase_backend_slot_update(kbdev); @@ -1085,7 +1085,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_context *kctx = katom->kctx; dev_dbg(kbdev->dev, - "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", (void *)katom, completion_code, job_tail, js); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1205,7 +1205,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (job_tail != 0 && job_tail != katom->jc) { /* Some of the job has been executed */ dev_dbg(kbdev->dev, - "Update job chain address of atom %p to resume from 0x%llx\n", + "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); katom->jc = job_tail; @@ -1266,7 +1266,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (katom) { dev_dbg(kbdev->dev, - "Cross-slot dependency %p has become runnable.\n", + "Cross-slot dependency %pK has become runnable.\n", (void *)katom); /* Check if there are lower priority jobs to soft stop */ @@ -1666,7 +1666,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) if (katom) dev_info(kbdev->dev, - " js%d idx%d : katom=%p gpu_rb_state=%d\n", + " js%d idx%d : katom=%pK gpu_rb_state=%d\n", js, idx, katom, katom->gpu_rb_state); else dev_info(kbdev->dev, " js%d idx%d : empty\n", diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index d28e7b0..cab222d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -257,7 +257,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (reset_needed) { dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } /* the timer is re-issued if there is contexts in the run-pool */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 921849b..0cfa93c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -498,7 +498,15 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) /* PM state was updated while we were doing the disable, * so we need to undo the disable we just performed. */ +#if MALI_USE_CSF + unsigned long lock_flags; + + kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); +#endif kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); +#endif } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -664,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) if (kbdev->pm.backend.hwcnt_disabled) { unsigned long flags; - +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } /* Free any resources the policy allocated */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index c546766..3cf7608 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -102,10 +102,18 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_DEVFREQ - return kbdev->pm.backend.ca_cores_enabled & debug_core_mask; + /* + * Although in the init we let the pm_backend->ca_cores_enabled to be + * the max config (it uses the base_gpu_props), at this function we need + * to limit it to be a subgroup of the curr config, otherwise the + * shaders state machine on the PM does not evolve. + */ + return kbdev->gpu_props.curr_config.shader_present & + kbdev->pm.backend.ca_cores_enabled & + debug_core_mask; #else - return kbdev->gpu_props.props.raw_props.shader_present & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & + debug_core_mask; #endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 1b4e141..0687a43 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -61,24 +61,9 @@ enum kbase_pm_core_type { KBASE_PM_CORE_STACK = STACK_PRESENT_LO }; -/** +/* * enum kbase_l2_core_state - The states used for the L2 cache & tiler power * state machine. - * - * @KBASE_L2_OFF: The L2 cache and tiler are off - * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on - * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. - * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being - * enabled - * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled - * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being - * disabled - * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest - * clock. Conditionally used. - * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off - * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off - * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state - * are unknown */ enum kbase_l2_core_state { #define KBASEP_L2_STATE(n) KBASE_L2_ ## n, @@ -87,26 +72,8 @@ enum kbase_l2_core_state { }; #if MALI_USE_CSF -/** +/* * enum kbase_mcu_state - The states used for the MCU state machine. - * - * @KBASE_MCU_OFF: The MCU is powered off. - * @KBASE_MCU_PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with - * firmware reloading) is in progress. - * @KBASE_MCU_ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration - * requests have been sent to the firmware. - * @KBASE_MCU_ON_HWCNT_ENABLE: The Global requests have completed and MCU is - * now ready for use and hwcnt is being enabled. - * @KBASE_MCU_ON: The MCU is active and hwcnt has been enabled. - * @KBASE_MCU_ON_CORE_MASK_UPDATE_PEND: The MCU is active and mask of enabled - * shader cores is being updated. - * @KBASE_MCU_ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. - * @KBASE_MCU_ON_HALT: The MCU is on and hwcnt has been disabled, - * MCU halt would be triggered. - * @KBASE_MCU_ON_PEND_HALT: MCU halt in progress, confirmation pending. - * @KBASE_MCU_POWER_DOWN: MCU halted operations, pending being disabled. - * @KBASE_MCU_PEND_OFF: MCU is being disabled, pending on powering off. - * @KBASE_MCU_RESET_WAIT: The GPU is resetting, MCU state is unknown. */ enum kbase_mcu_state { #define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, @@ -115,45 +82,8 @@ enum kbase_mcu_state { }; #endif -/** +/* * enum kbase_shader_core_state - The states used for the shaders' state machine. - * - * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have - * been requested to power on and hwcnt - * is being disabled - * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been - * requested to power on. Or after doing - * partial shader on/off, checking whether - * it's the desired state. - * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt - * already enabled. - * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks - * are on, hwcnt disabled, and checks - * to powering down or re-enabling - * hwcnt. - * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to - * power off, but they remain on for the - * duration of the hysteresis timer - * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach - * a state where jobs on the GPU are finished - * including jobs currently running and in the - * GPU queue because of GPU2017-861 - * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired - * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the - * level 2 cache is being flushed. - * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders - * are ready to be powered off. - * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are - * off, but the tick timer - * cancellation is still - * pending. - * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power - * states are unknown */ enum kbase_shader_core_state { #define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index da32510..a2f96b5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -407,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, switch (type) { case KBASE_PM_CORE_L2: - return kbdev->gpu_props.props.raw_props.l2_present; + return kbdev->gpu_props.curr_config.l2_present; case KBASE_PM_CORE_SHADER: - return kbdev->gpu_props.props.raw_props.shader_present; + return kbdev->gpu_props.curr_config.shader_present; case KBASE_PM_CORE_TILER: return kbdev->gpu_props.props.raw_props.tiler_present; case KBASE_PM_CORE_STACK: @@ -695,8 +695,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_HWCNT_ENABLE: backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; } backend->mcu_state = KBASE_MCU_ON; @@ -851,7 +855,7 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 l2_present = kbdev->gpu_props.curr_config.l2_present; #if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; #endif @@ -1255,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; - int err = 0; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1350,8 +1353,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } @@ -1531,8 +1544,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = 0; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; @@ -1559,7 +1582,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) } while (backend->shaders_state != prev_state); - return err; + return 0; } #endif @@ -1883,17 +1906,9 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_LO))); -#if MALI_USE_CSF - /* PM timeout probably means hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); -#endif - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -2105,6 +2120,13 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) PM_NO_RESET); } } + /* + * This point means that the GPU trasitioned to ON. So there is a chance + * that a repartitioning occurred. In this case the current config + * should be read again. + */ + kbase_gpuprops_get_curr_config_props(kbdev, + &kbdev->gpu_props.curr_config); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -2253,7 +2275,7 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) struct kbasep_reset_timeout_data *rtdata = container_of(timer, struct kbasep_reset_timeout_data, timer); - rtdata->timed_out = 1; + rtdata->timed_out = true; /* Set the wait queue to wake up kbase_pm_init_hw even though the reset * hasn't completed @@ -2263,14 +2285,13 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) { #if MALI_USE_CSF - kbdev->hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CSF_CONFIG)); + kbdev->hw_quirks_gpu = + kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); #else - u32 hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JM_CONFIG)); + u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { /* Only for tMIx */ @@ -2284,39 +2305,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) */ if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { - hw_quirks_jm |= (COHERENCY_ACE_LITE | - COHERENCY_ACE) << - JM_FORCE_COHERENCY_FEATURES_SHIFT; + hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) + << JM_FORCE_COHERENCY_FEATURES_SHIFT; } } if (kbase_is_gpu_removed(kbdev)) return -EIO; - kbdev->hw_quirks_jm = hw_quirks_jm; + kbdev->hw_quirks_gpu = hw_quirks_gpu; #endif /* !MALI_USE_CSF */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { int default_idvs_group_size = 0xF; - u32 tmp; + u32 group_size = 0; - if (of_property_read_u32(kbdev->dev->of_node, - "idvs-group-size", &tmp)) - tmp = default_idvs_group_size; + if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", + &group_size)) + group_size = default_idvs_group_size; - if (tmp > IDVS_GROUP_MAX_SIZE) { + if (group_size > IDVS_GROUP_MAX_SIZE) { dev_err(kbdev->dev, "idvs-group-size of %d is too large. Maximum value is %d", - tmp, IDVS_GROUP_MAX_SIZE); - tmp = default_idvs_group_size; + group_size, IDVS_GROUP_MAX_SIZE); + group_size = default_idvs_group_size; } - kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; + kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; } #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) if (corestack_driver_control) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; return 0; } @@ -2370,18 +2390,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) GPU_ID_VERSION_PRODUCT_ID_SHIFT; int error = 0; - kbdev->hw_quirks_jm = 0; + kbdev->hw_quirks_gpu = 0; kbdev->hw_quirks_sc = 0; kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_jm", - &kbdev->hw_quirks_jm)) { + if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { dev_info(kbdev->dev, - "Found quirks_jm = [0x%x] in Devicetree\n", - kbdev->hw_quirks_jm); + "Found quirks_gpu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_gpu); } else { - error = kbase_set_jm_quirks(kbdev, prod_id); + error = kbase_set_gpu_quirks(kbdev, prod_id); if (error) return error; } @@ -2432,10 +2451,10 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) kbdev->hw_quirks_mmu); #if MALI_USE_CSF kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), - kbdev->hw_quirks_jm); + kbdev->hw_quirks_gpu); #else kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_jm); + kbdev->hw_quirks_gpu); #endif } @@ -2466,6 +2485,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) } } +#if !MALI_USE_CSF static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) { unsigned long irq_flags; @@ -2478,6 +2498,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } +#endif static int kbase_pm_do_reset(struct kbase_device *kbdev) { @@ -2504,7 +2525,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; - rtdata.timed_out = 0; + rtdata.timed_out = false; /* Create a timer to use as a timeout on the reset */ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -2516,7 +2537,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { + if (!rtdata.timed_out) { /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); @@ -2556,7 +2577,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) GPU_COMMAND_HARD_RESET); /* Restart the timer to wait for the hard reset to complete */ - rtdata.timed_out = 0; + rtdata.timed_out = false; hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); @@ -2564,7 +2585,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { + if (!rtdata.timed_out) { /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); @@ -2637,8 +2658,13 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); #if MALI_USE_CSF if (kbdev->protected_mode) { + unsigned long flags; + kbase_ipa_control_protm_exited(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, flags); } #endif kbdev->protected_mode = false; @@ -2685,12 +2711,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_enable_interrupts(kbdev); exit: +#if !MALI_USE_CSF if (!kbdev->pm.backend.protected_entry_transition_override) { /* Re-enable GPU hardware counters if we're resetting from * protected mode. */ reenable_protected_mode_hwcnt(kbdev); } +#endif return err; } @@ -2726,8 +2754,9 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) /* This might happen after GPU reset. * Then counter needs to be kicked. */ - if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_CYCLE_COUNT_ACTIVE)) { + if (!IS_ENABLED(CONFIG_MALI_NO_MALI) && + (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_CYCLE_COUNT_ACTIVE))) { kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index f6b8485..500578f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,6 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * * Return: 0 on success, error code on error */ +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #else /** * kbase_pm_wait_for_desired_state - Wait for the desired power state to be @@ -247,8 +248,8 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * * Return: 0 on success, error code on error */ -#endif int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); +#endif /** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on @@ -534,8 +535,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * Return: Returns 0 on failure and non zero on success. + */ +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +#else /** - * kbase_platform_dvfs_event - Report utilisation to DVFS code + * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU * * Function provided by platform specific code when DVFS is enabled to allow * the power management metrics system to report utilisation. @@ -548,10 +563,6 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, * group. * Return: Returns 0 on failure and non zero on success. */ - -#if MALI_USE_CSF -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); -#else int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]); #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h index b9bd364..d66b928 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,19 @@ * The function-like macro KBASEP_L2_STATE() must be defined before including * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_L2_STATE(). + * + * @OFF: The L2 cache and tiler are off + * @PEND_ON: The L2 cache and tiler are powering on + * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. + * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled + * @ON: The L2 cache and tiler are on, and hwcnt is enabled + * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled + * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. + * Conditionally used. + * @POWER_DOWN: The L2 cache and tiler are about to be powered off + * @PEND_OFF: The L2 cache and tiler are powering off + * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are + * unknown */ KBASEP_L2_STATE(OFF) KBASEP_L2_STATE(PEND_ON) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h index c03adf3..eab30eb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,24 @@ * The function-like macro KBASEP_MCU_STATE() must be defined before including * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_MCU_STATE(). + * + * @OFF: The MCU is powered off. + * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with + * firmware reloading) is in progress. + * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration + * requests have been sent to the firmware. + * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now + * ready for use and hwcnt is being enabled. + * @ON: The MCU is active and hwcnt has been enabled. + * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores + * is being updated. + * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. + * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU + * halt would be triggered. + * @ON_PEND_HALT: MCU halt in progress, confirmation pending. + * @POWER_DOWN: MCU halted operations, pending being disabled. + * @PEND_OFF: MCU is being disabled, pending on powering off. + * @RESET_WAIT: The GPU is resetting, MCU state is unknown. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index e5c7c71..769888f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -360,9 +360,9 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) { int utilisation; - int busy; struct kbasep_pm_metrics *diff; #if !MALI_USE_CSF + int busy; int util_gl_share; int util_cl_share[2]; #endif @@ -377,9 +377,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); +#if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); -#if !MALI_USE_CSF util_gl_share = (100 * diff->busy_gl) / busy; util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 97bcb44..5c2aa0c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -405,7 +405,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, /* Reverse the suspension done */ if (reset_gpu) { dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); kbase_reset_gpu_wait(kbdev); } else if (sched_suspend) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h index 766bf1d..2276713 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,41 @@ * including this header file. This header file can be included multiple * times in the same compilation unit with different definitions of * KBASEP_SHADER_STATE(). + * + * @OFF_CORESTACK_OFF: The shaders and core stacks are off + * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been + * requested to power on and hwcnt is being + * disabled + * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on. Or after doing + * partial shader on/off, checking whether + * it's the desired state. + * @ON_CORESTACK_ON: The shaders and core stacks are on, and + * hwcnt already enabled. + * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt + * disabled, and checks to powering down or + * re-enabling hwcnt. + * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power + * off, but they remain on for the duration + * of the hysteresis timer + * @WAIT_GPU_IDLE: The shaders partial poweroff needs to + * reach a state where jobs on the GPU are + * finished including jobs currently running + * and in the GPU queue because of + * GPU2017-861 + * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 + * cache is being flushed. + * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are + * ready to be powered off. + * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the + * tick timer cancellation is still pending. + * @RESET_WAIT: The GPU is resetting, shader and core + * stack power states are unknown */ KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index f964af0..ea7b21a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -76,6 +76,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, */ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) { +#ifdef CONFIG_MALI_NO_MALI + return true; +#else bool success = false; const unsigned int timeout = 100; const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); @@ -87,8 +90,8 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) break; } } - return success; +#endif } #endif diff --git a/mali_kbase/csf/mali_base_csf_kernel.h b/mali_kbase/csf/mali_base_csf_kernel.h deleted file mode 100644 index 9a13760..0000000 --- a/mali_kbase/csf/mali_base_csf_kernel.h +++ /dev/null @@ -1,637 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _BASE_CSF_KERNEL_H_ -#define _BASE_CSF_KERNEL_H_ - -/* Memory allocation, access/hint flags. - * - * See base_mem_alloc_flags. - */ - -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) - -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - -/* CSF event memory - * - * If Outer shareable coherence is not specified or not available, then on - * allocation kbase will automatically use the uncached GPU mapping. - * There is no need for the client to specify BASE_MEM_UNCACHED_GPU - * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag. - * - * This memory requires a permanent mapping - * - * See also kbase_reg_needs_kernel_mapping() - */ -#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) - -#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) - -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) - -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags - */ -#define BASE_MEM_FLAGS_NR_BITS 29 - -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. - */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) - -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED \ - BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20 - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ -#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12) -#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12) -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) - -#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ - ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ - LOCAL_PAGE_SHIFT) - -/** - * Valid set of just-in-time memory allocation flags - */ -#define BASE_JIT_ALLOC_VALID_FLAGS ((u8)0) - -/* Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - -/* Create CSF event thread. - * - * The creation of a CSF event thread is conditional and only allowed in - * unit tests for the moment, in order to avoid clashes with the existing - * Base unit tests. - */ -#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - -/* Bitpattern describing the ::base_context_create_flags that can be - * passed to base_context_init() - */ -#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | \ - BASE_CONTEXT_CSF_EVENT_THREAD | \ - BASEP_CONTEXT_CREATE_KERNEL_FLAGS) - -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - -/* Enable KBase tracepoints for CSF builds */ -#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) - -/* Enable additional CSF Firmware side tracepoints */ -#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3) - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ - BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \ - BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) - -/* Number of pages mapped into the process address space for a bound GPU - * command queue. A pair of input/output pages and a Hw doorbell page - * are mapped to enable direct submission of commands to Hw. - */ -#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3) - -#define BASE_QUEUE_MAX_PRIORITY (15U) - -/* CQS Sync object is an array of u32 event_mem[2], error field index is 1 */ -#define BASEP_EVENT_VAL_INDEX (0U) -#define BASEP_EVENT_ERR_INDEX (1U) - -/* The upper limit for number of objects that could be waited/set per command. - * This limit is now enforced as internally the error inherit inputs are - * converted to 32-bit flags in a u32 variable occupying a previously padding - * field. - */ -#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) - -/** - * enum base_kcpu_command_type - Kernel CPU queue command type. - * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, - * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, - * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, - * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, - * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, - * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, - * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, - * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, - * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, - * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, - * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, - * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: sample_time, - */ -enum base_kcpu_command_type { - BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, - BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, - BASE_KCPU_COMMAND_TYPE_CQS_WAIT, - BASE_KCPU_COMMAND_TYPE_CQS_SET, - BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, - BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, - BASE_KCPU_COMMAND_TYPE_JIT_FREE, - BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, - BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, -#if MALI_UNIT_TEST - BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, -#endif /* MALI_UNIT_TEST */ -}; - -/** - * enum base_queue_group_priority - Priority of a GPU Command Queue Group. - * @BASE_QUEUE_GROUP_PRIORITY_HIGH: GPU Command Queue Group is of high - * priority. - * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium - * priority. - * @BASE_QUEUE_GROUP_PRIORITY_LOW: GPU Command Queue Group is of low - * priority. - * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time - * priority. - * @BASE_QUEUE_GROUP_PRIORITY_COUNT: Number of GPU Command Queue Group - * priority levels. - * - * Currently this is in order of highest to lowest, but if new levels are added - * then those new levels may be out of order to preserve the ABI compatibility - * with previous releases. At that point, ensure assignment to - * the 'priority' member in &kbase_queue_group is updated to ensure it remains - * a linear ordering. - * - * There should be no gaps in the enum, otherwise use of - * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated. - */ -enum base_queue_group_priority { - BASE_QUEUE_GROUP_PRIORITY_HIGH = 0, - BASE_QUEUE_GROUP_PRIORITY_MEDIUM, - BASE_QUEUE_GROUP_PRIORITY_LOW, - BASE_QUEUE_GROUP_PRIORITY_REALTIME, - BASE_QUEUE_GROUP_PRIORITY_COUNT -}; - -struct base_kcpu_command_fence_info { - u64 fence; -}; - -struct base_cqs_wait_info { - u64 addr; - u32 val; - u32 padding; -}; - -struct base_kcpu_command_cqs_wait_info { - u64 objs; - u32 nr_objs; - u32 inherit_err_flags; -}; - -struct base_cqs_set { - u64 addr; -}; - -struct base_kcpu_command_cqs_set_info { - u64 objs; - u32 nr_objs; - u32 propagate_flags; -}; - -/** - * struct base_kcpu_command_import_info - structure which contains information - * about the imported buffer. - * - * @handle: Address of imported user buffer. - */ -struct base_kcpu_command_import_info { - u64 handle; -}; - -/** - * struct base_kcpu_command_jit_alloc_info - structure which contains - * information about jit memory allocation. - * - * @info: An array of elements of the - * struct base_jit_alloc_info type. - * @count: The number of elements in the info array. - * @padding: Padding to a multiple of 64 bits. - */ -struct base_kcpu_command_jit_alloc_info { - u64 info; - u8 count; - u8 padding[7]; -}; - -/** - * struct base_kcpu_command_jit_free_info - structure which contains - * information about jit memory which is to be freed. - * - * @ids: An array containing the JIT IDs to free. - * @count: The number of elements in the ids array. - * @padding: Padding to a multiple of 64 bits. - */ -struct base_kcpu_command_jit_free_info { - u64 ids; - u8 count; - u8 padding[7]; -}; - -/** - * struct base_kcpu_command_group_suspend_info - structure which contains - * suspend buffer data captured for a suspended queue group. - * - * @buffer: Pointer to an array of elements of the type char. - * @size: Number of elements in the @buffer array. - * @group_handle: Handle to the mapping of CSG. - * @padding: padding to a multiple of 64 bits. - */ -struct base_kcpu_command_group_suspend_info { - u64 buffer; - u32 size; - u8 group_handle; - u8 padding[3]; -}; - -#if MALI_UNIT_TEST -struct base_kcpu_command_sample_time_info { - u64 time; -}; -#endif /* MALI_UNIT_TEST */ - -/** - * struct base_kcpu_command - kcpu command. - * @type: type of the kcpu command, one enum base_kcpu_command_type - * @padding: padding to a multiple of 64 bits - * @info: structure which contains information about the kcpu command; - * actual type is determined by @p type - * @info.fence: Fence - * @info.cqs_wait: CQS wait - * @info.cqs_set: CQS set - * @info.import: import - * @info.jit_alloc: jit allocation - * @info.jit_free: jit deallocation - * @info.suspend_buf_copy: suspend buffer copy - * @info.sample_time: sample time - * @info.padding: padding - */ -struct base_kcpu_command { - u8 type; - u8 padding[sizeof(u64) - sizeof(u8)]; - union { - struct base_kcpu_command_fence_info fence; - struct base_kcpu_command_cqs_wait_info cqs_wait; - struct base_kcpu_command_cqs_set_info cqs_set; - struct base_kcpu_command_import_info import; - struct base_kcpu_command_jit_alloc_info jit_alloc; - struct base_kcpu_command_jit_free_info jit_free; - struct base_kcpu_command_group_suspend_info suspend_buf_copy; -#if MALI_UNIT_TEST - struct base_kcpu_command_sample_time_info sample_time; -#endif /* MALI_UNIT_TEST */ - u64 padding[2]; /* No sub-struct should be larger */ - } info; -}; - -/** - * struct basep_cs_stream_control - CSI capabilities. - * - * @features: Features of this stream - * @padding: Padding to a multiple of 64 bits. - */ -struct basep_cs_stream_control { - u32 features; - u32 padding; -}; - -/** - * struct basep_cs_group_control - CSG interface capabilities. - * - * @features: Features of this group - * @stream_num: Number of streams in this group - * @suspend_size: Size in bytes of the suspend buffer for this group - * @padding: Padding to a multiple of 64 bits. - */ -struct basep_cs_group_control { - u32 features; - u32 stream_num; - u32 suspend_size; - u32 padding; -}; - -/** - * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault - * error information associated with GPU command queue group. - * - * @sideband: Additional information of the unrecoverable fault. - * @status: Unrecoverable fault information. - * This consists of exception type (least significant byte) and - * data (remaining bytes). One example of exception type is - * CS_INVALID_INSTRUCTION (0x49). - * @padding: Padding to make multiple of 64bits - */ -struct base_gpu_queue_group_error_fatal_payload { - u64 sideband; - u32 status; - u32 padding; -}; - -/** - * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault - * error information related to GPU command queue. - * - * @sideband: Additional information about this unrecoverable fault. - * @status: Unrecoverable fault information. - * This consists of exception type (least significant byte) and - * data (remaining bytes). One example of exception type is - * CS_INVALID_INSTRUCTION (0x49). - * @csi_index: Index of the CSF interface the queue is bound to. - * @padding: Padding to make multiple of 64bits - */ -struct base_gpu_queue_error_fatal_payload { - u64 sideband; - u32 status; - u8 csi_index; - u8 padding[3]; -}; - -/** - * enum base_gpu_queue_group_error_type - GPU Fatal error type. - * - * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU - * command queue group. - * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU - * command queue. - * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT: Fatal error associated with - * progress timeout. - * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out - * of tiler heap memory. - * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types - * - * This type is used for &struct_base_gpu_queue_group_error.error_type. - */ -enum base_gpu_queue_group_error_type { - BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0, - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, - BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, - BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT -}; - -/** - * struct base_gpu_queue_group_error - Unrecoverable fault information - * @error_type: Error type of @base_gpu_queue_group_error_type - * indicating which field in union payload is filled - * @padding: Unused bytes for 64bit boundary - * @payload: Input Payload - * @payload.fatal_group: Unrecoverable fault error associated with - * GPU command queue group - * @payload.fatal_queue: Unrecoverable fault error associated with command queue - */ -struct base_gpu_queue_group_error { - u8 error_type; - u8 padding[7]; - union { - struct base_gpu_queue_group_error_fatal_payload fatal_group; - struct base_gpu_queue_error_fatal_payload fatal_queue; - } payload; -}; - -/** - * enum base_csf_notification_type - Notification type - * - * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event - * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal - * error - * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu - * queue - * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type - * - * This type is used for &struct_base_csf_notification.type. - */ -enum base_csf_notification_type { - BASE_CSF_NOTIFICATION_EVENT = 0, - BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP, - BASE_CSF_NOTIFICATION_COUNT -}; - -/** - * struct base_csf_notification - Event or error notification - * - * @type: Notification type of @base_csf_notification_type - * @padding: Padding for 64bit boundary - * @payload: Input Payload - * @payload.align: To fit the struct into a 64-byte cache line - * @payload.csg_error: CSG error - * @payload.csg_error.handle: Handle of GPU command queue group associated with - * fatal error - * @payload.csg_error.padding: Padding - * @payload.csg_error.error: Unrecoverable fault error - * - */ -struct base_csf_notification { - u8 type; - u8 padding[7]; - union { - struct { - u8 handle; - u8 padding[7]; - struct base_gpu_queue_group_error error; - } csg_error; - - u8 align[56]; - } payload; -}; - -#endif /* _BASE_CSF_KERNEL_H_ */ diff --git a/mali_kbase/csf/mali_gpu_csf_control_registers.h b/mali_kbase/csf/mali_gpu_csf_control_registers.h deleted file mode 100644 index 8c4fc82..0000000 --- a/mali_kbase/csf/mali_gpu_csf_control_registers.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _GPU_CSF_CONTROL_REGISTERS_H_ -#define _GPU_CSF_CONTROL_REGISTERS_H_ - -/* GPU_REGISTERS register offsets */ -#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ - -#endif /* _GPU_CSF_CONTROL_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_gpu_csf_registers.h b/mali_kbase/csf/mali_gpu_csf_registers.h deleted file mode 100644 index d37b9cc..0000000 --- a/mali_kbase/csf/mali_gpu_csf_registers.h +++ /dev/null @@ -1,1401 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _GPU_CSF_REGISTERS_H_ -#define _GPU_CSF_REGISTERS_H_ - -/* - * Begin register sets - */ - -/* DOORBELLS base address */ -#define DOORBELLS_BASE 0x0080000 -#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) - -/* CS_KERNEL_INPUT_BLOCK base address */ -#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 -#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) - -/* CS_KERNEL_OUTPUT_BLOCK base address */ -#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 -#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) - -/* CS_USER_INPUT_BLOCK base address */ -#define CS_USER_INPUT_BLOCK_BASE 0x0000 -#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) - -/* CS_USER_OUTPUT_BLOCK base address */ -#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 -#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) - -/* CSG_INPUT_BLOCK base address */ -#define CSG_INPUT_BLOCK_BASE 0x0000 -#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) - -/* CSG_OUTPUT_BLOCK base address */ -#define CSG_OUTPUT_BLOCK_BASE 0x0000 -#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) - -/* GLB_CONTROL_BLOCK base address */ -#define GLB_CONTROL_BLOCK_BASE 0x04000000 -#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) - -/* GLB_INPUT_BLOCK base address */ -#define GLB_INPUT_BLOCK_BASE 0x0000 -#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) - -/* GLB_OUTPUT_BLOCK base address */ -#define GLB_OUTPUT_BLOCK_BASE 0x0000 -#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) - -/* USER base address */ -#define USER_BASE 0x0010000 -#define USER_REG(r) (USER_BASE + (r)) - -/* End register sets */ - -/* - * Begin register offsets - */ - -/* DOORBELLS register offsets */ -#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ -#define DOORBELL(n) (DOORBELL_0 + (n)*65536) -#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) -#define DOORBELL_COUNT 1024 - -/* DOORBELL_BLOCK register offsets */ -#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ - -/* CS_KERNEL_INPUT_BLOCK register offsets */ -#define CS_REQ 0x0000 /* () CS request flags */ -#define CS_CONFIG 0x0004 /* () CS configuration */ -#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ -#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ -#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ -#define CS_SIZE 0x0018 /* () Size of the ring buffer */ -#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ -#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ -#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ -#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ -#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ -#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ -#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ -#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ - -/* CS_KERNEL_OUTPUT_BLOCK register offsets */ -#define CS_ACK 0x0000 /* () CS acknowledge flags */ -#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ -#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ -#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ -#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ -#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ -#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ -#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ -#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ -#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ -#define CS_FAULT 0x0080 /* () Recoverable fault information */ -#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ -#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ -#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ -#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ -#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ -#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ -#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ -#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ -#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ -#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ - -/* CS_USER_INPUT_BLOCK register offsets */ -#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ -#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ -#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ -#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ - -/* CS_USER_OUTPUT_BLOCK register offsets */ -#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ -#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ -#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ - -/* CSG_INPUT_BLOCK register offsets */ -#define CSG_REQ 0x0000 /* () CSG request */ -#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ -#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ -#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ -#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ -#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ -#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ -#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ -#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ -#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ -#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ -#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ -#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ -#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ -#define CSG_CONFIG 0x0050 /* () CSG configuration options */ - -/* CSG_OUTPUT_BLOCK register offsets */ -#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ -#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ -#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ -#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ -#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ -#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ - -/* GLB_CONTROL_BLOCK register offsets */ -#define GLB_VERSION 0x0000 /* () Global interface version */ -#define GLB_FEATURES 0x0004 /* () Global interface features */ -#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ -#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ -#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ -#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ -#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ -#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ -#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) -#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) -#define GROUP_CONTROL_COUNT 16 - -/* STREAM_CONTROL_BLOCK register offsets */ -#define STREAM_FEATURES 0x0000 /* () CSI features */ -#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ -#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ - -/* GROUP_CONTROL_BLOCK register offsets */ -#define GROUP_FEATURES 0x0000 /* () CSG interface features */ -#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ -#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ -#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ -#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ -#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ -#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ -#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ -#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) -#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) -#define STREAM_CONTROL_COUNT 16 - -/* GLB_INPUT_BLOCK register offsets */ -#define GLB_REQ 0x0000 /* () Global request */ -#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ -#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ -#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ -#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ -#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ -#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ -#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */ - -#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ -#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ -#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ -#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ -#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ -#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ -#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ -#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ -#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ -#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ -#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ -#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ - -#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ -#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ -#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ -#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ -#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ -#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ - -/* GLB_OUTPUT_BLOCK register offsets */ -#define GLB_ACK 0x0000 /* () Global acknowledge */ -#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ -#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ -#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ -#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ -#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ -#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ - -/* USER register offsets */ -#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ - -/* End register offsets */ - -/* CS_KERNEL_INPUT_BLOCK register set definitions */ -/* GLB_VERSION register */ -#define GLB_VERSION_PATCH_SHIFT (0) -#define GLB_VERSION_MINOR_SHIFT (16) -#define GLB_VERSION_MAJOR_SHIFT (24) - -/* CS_REQ register */ -#define CS_REQ_STATE_SHIFT 0 -#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) -#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) -#define CS_REQ_STATE_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) -/* CS_REQ_STATE values */ -#define CS_REQ_STATE_STOP 0x0 -#define CS_REQ_STATE_START 0x1 -/* End of CS_REQ_STATE values */ -#define CS_REQ_EXTRACT_EVENT_SHIFT 4 -#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) -#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) -#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) - -#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 -#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) -#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) -#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ - (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) -#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 -#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) -#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) -#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ - (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) -#define CS_REQ_IDLE_EMPTY_SHIFT 10 -#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) -#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) -#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) -#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 -#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) -#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ - (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) -#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ - (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) -#define CS_REQ_TILER_OOM_SHIFT 26 -#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) -#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) -#define CS_REQ_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) -#define CS_REQ_PROTM_PEND_SHIFT 27 -#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) -#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) -#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) -#define CS_REQ_FATAL_SHIFT 30 -#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) -#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) -#define CS_REQ_FATAL_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) -#define CS_REQ_FAULT_SHIFT 31 -#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) -#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) -#define CS_REQ_FAULT_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) - -/* CS_CONFIG register */ -#define CS_CONFIG_PRIORITY_SHIFT 0 -#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) -#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) -#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ - (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) -#define CS_CONFIG_USER_DOORBELL_SHIFT 8 -#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) -#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) -#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ - (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) - -/* CS_ACK_IRQ_MASK register */ -#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 -#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) -#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) -#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) -/* CS_ACK_IRQ_MASK_STATE values */ -#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 -#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 -/* End of CS_ACK_IRQ_MASK_STATE values */ -#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 -#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ - (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) -#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 -#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) -#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ - (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) -#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) -#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 -#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) -#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ - (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) -#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) -#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 -#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) -#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) -#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) -#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 -#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) -#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) -#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ - (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) - -/* CS_BASE register */ -#define CS_BASE_POINTER_SHIFT 0 -#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT) -#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) -#define CS_BASE_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) - -/* CS_SIZE register */ -#define CS_SIZE_SIZE_SHIFT 0 -#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) -#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) -#define CS_SIZE_SIZE_SET(reg_val, value) \ - (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) - -/* CS_TILER_HEAP_START register */ -#define CS_TILER_HEAP_START_POINTER_SHIFT 0 -#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT) -#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ - (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) -#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ - (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) -/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ -/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ - -/* CS_TILER_HEAP_END register */ -#define CS_TILER_HEAP_END_POINTER_SHIFT 0 -#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT) -#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ - (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) -#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ - (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) -/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ -/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ - -/* CS_USER_INPUT register */ -#define CS_USER_INPUT_POINTER_SHIFT 0 -#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT) -#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) -#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ - (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) - -/* CS_USER_OUTPUT register */ -#define CS_USER_OUTPUT_POINTER_SHIFT 0 -#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT) -#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) -#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ - (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) -/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ - -/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ - -/* CS_ACK register */ -#define CS_ACK_STATE_SHIFT 0 -#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) -#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) -#define CS_ACK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) -/* CS_ACK_STATE values */ -#define CS_ACK_STATE_STOP 0x0 -#define CS_ACK_STATE_START 0x1 -/* End of CS_ACK_STATE values */ -#define CS_ACK_EXTRACT_EVENT_SHIFT 4 -#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) -#define CS_ACK_TILER_OOM_SHIFT 26 -#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) -#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) -#define CS_ACK_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) -#define CS_ACK_PROTM_PEND_SHIFT 27 -#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) -#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) -#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) -#define CS_ACK_FATAL_SHIFT 30 -#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) -#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) -#define CS_ACK_FATAL_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) -#define CS_ACK_FAULT_SHIFT 31 -#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) -#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) -#define CS_ACK_FAULT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) - -/* CS_STATUS_CMD_PTR register */ -#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 -#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT) -#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ - (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) -#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ - (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) - -/* CS_STATUS_WAIT register */ -#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 -#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) -#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) -#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ - (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) -/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 -/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ -#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 -#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) -#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) -#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ - (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) -#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 -#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) -#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) -#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ - (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) -#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 -#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) - -/* CS_STATUS_REQ_RESOURCE register */ -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) - -/* CS_STATUS_WAIT_SYNC_POINTER register */ -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) - -/* CS_STATUS_WAIT_SYNC_VALUE register */ -#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 -#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) -#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) -#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) - -/* CS_STATUS_SCOREBOARDS register */ -#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) -#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ - ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ - (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ - CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ - (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ - CS_STATUS_SCOREBOARDS_NONZERO_MASK)) - -/* CS_STATUS_BLOCKED_REASON register */ -#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) -#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ - ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ - (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ - CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ - (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ - CS_STATUS_BLOCKED_REASON_REASON_MASK)) -/* CS_STATUS_BLOCKED_REASON_reason values */ -#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 -#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 -#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 -#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 -#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 -#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 -#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 -/* End of CS_STATUS_BLOCKED_REASON_reason values */ - -/* CS_FAULT register */ -#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 -#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) -#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) -#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ - (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) -/* CS_FAULT_EXCEPTION_TYPE values */ -#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F -#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B -#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 -#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 -#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 -#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 -#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 -#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A -#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B -#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 -/* End of CS_FAULT_EXCEPTION_TYPE values */ -#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 -#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ - (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) - -/* CS_FATAL register */ -#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 -#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) -#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) -#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ - (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) -/* CS_FATAL_EXCEPTION_TYPE values */ -#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 -#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 -#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 -#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 -#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A -#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 -/* End of CS_FATAL_EXCEPTION_TYPE values */ -#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 -#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ - (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) - -/* CS_FAULT_INFO register */ -#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ - (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ - (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) - -/* CS_FATAL_INFO register */ -#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ - (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ - (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) - -/* CS_HEAP_VT_START register */ -#define CS_HEAP_VT_START_VALUE_SHIFT 0 -#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) -#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) -#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ - (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) - -/* CS_HEAP_VT_END register */ -#define CS_HEAP_VT_END_VALUE_SHIFT 0 -#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) -#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) -#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) - -/* CS_HEAP_FRAG_END register */ -#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 -#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) -#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) -#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ - (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) - -/* CS_HEAP_ADDRESS register */ -#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 -#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT) -#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) -#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ - (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) -/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ - -/* CS_USER_INPUT_BLOCK register set definitions */ - -/* CS_INSERT register */ -#define CS_INSERT_VALUE_SHIFT 0 -#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT) -#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) -#define CS_INSERT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) - -/* CS_EXTRACT_INIT register */ -#define CS_EXTRACT_INIT_VALUE_SHIFT 0 -#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT) -#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) -#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ - (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) -/* End of CS_USER_INPUT_BLOCK register set definitions */ - -/* CS_USER_OUTPUT_BLOCK register set definitions */ - -/* CS_EXTRACT register */ -#define CS_EXTRACT_VALUE_SHIFT 0 -#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT) -#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) -#define CS_EXTRACT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) - -/* CS_ACTIVE register */ -#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 -#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) -#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) -#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ - (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) -/* End of CS_USER_OUTPUT_BLOCK register set definitions */ - -/* CSG_INPUT_BLOCK register set definitions */ - -/* CSG_REQ register */ -#define CSG_REQ_STATE_SHIFT 0 -#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) -#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) -#define CSG_REQ_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) -/* CSG_REQ_STATE values */ -#define CSG_REQ_STATE_TERMINATE 0x0 -#define CSG_REQ_STATE_START 0x1 -#define CSG_REQ_STATE_SUSPEND 0x2 -#define CSG_REQ_STATE_RESUME 0x3 -/* End of CSG_REQ_STATE values */ -#define CSG_REQ_EP_CFG_SHIFT 4 -#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) -#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) -#define CSG_REQ_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) -#define CSG_REQ_STATUS_UPDATE_SHIFT 5 -#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) -#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) -#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ - (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) -#define CSG_REQ_SYNC_UPDATE_SHIFT 28 -#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) -#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) -#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) -#define CSG_REQ_IDLE_SHIFT 29 -#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) -#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) -#define CSG_REQ_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) -#define CSG_REQ_DOORBELL_SHIFT 30 -#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT) -#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT) -#define CSG_REQ_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK)) -#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 -#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ - (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ - (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) - -/* CSG_ACK_IRQ_MASK register */ -#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 -#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) -/* CSG_ACK_IRQ_MASK_STATE values */ -#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 -#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 -/* End of CSG_ACK_IRQ_MASK_STATE values */ -#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 -#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) -#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 -#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ - (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) -#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 -#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ - (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) -#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 -#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) -#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) -#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) -#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 -#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) -#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ - (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) -#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ - (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) - -/* CSG_EP_REQ register */ -#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 -#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ - (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) -#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 -#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ - (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) -#define CSG_EP_REQ_TILER_EP_SHIFT 16 -#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) -#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) -#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) -#define CSG_EP_REQ_PRIORITY_SHIFT 28 -#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) -#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) -#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) - -/* CSG_SUSPEND_BUF register */ -#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 -#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ - (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) - -/* CSG_PROTM_SUSPEND_BUF register */ -#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 -#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ - (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ - (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) - -/* End of CSG_INPUT_BLOCK register set definitions */ - -/* CSG_OUTPUT_BLOCK register set definitions */ - -/* CSG_ACK register */ -#define CSG_ACK_STATE_SHIFT 0 -#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) -#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) -#define CSG_ACK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) -/* CSG_ACK_STATE values */ -#define CSG_ACK_STATE_TERMINATE 0x0 -#define CSG_ACK_STATE_START 0x1 -#define CSG_ACK_STATE_SUSPEND 0x2 -#define CSG_ACK_STATE_RESUME 0x3 -/* End of CSG_ACK_STATE values */ -#define CSG_ACK_EP_CFG_SHIFT 4 -#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) -#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) -#define CSG_ACK_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) -#define CSG_ACK_STATUS_UPDATE_SHIFT 5 -#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ - (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) -#define CSG_ACK_SYNC_UPDATE_SHIFT 28 -#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) -#define CSG_ACK_IDLE_SHIFT 29 -#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) -#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) -#define CSG_ACK_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) -#define CSG_ACK_DOORBELL_SHIFT 30 -#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) -#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) -#define CSG_ACK_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) -#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 -#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ - (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ - (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) - -/* CSG_STATUS_EP_CURRENT register */ -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) -#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 -#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) - -/* CSG_STATUS_EP_REQ register */ -#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 -#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) -#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 -#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) -#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 -#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) -#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) -#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) - -/* End of CSG_OUTPUT_BLOCK register set definitions */ - -/* STREAM_CONTROL_BLOCK register set definitions */ - -/* STREAM_FEATURES register */ -#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 -#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) -#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ - (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) -#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ - (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) -#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 -#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) -#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ - (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) -#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ - (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) -#define STREAM_FEATURES_COMPUTE_SHIFT 16 -#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) -#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) -#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ - (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) -#define STREAM_FEATURES_FRAGMENT_SHIFT 17 -#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) -#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ - (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) -#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ - (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) -#define STREAM_FEATURES_TILER_SHIFT 18 -#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) -#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) -#define STREAM_FEATURES_TILER_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ - (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) - -/* STREAM_INPUT_VA register */ -#define STREAM_INPUT_VA_VALUE_SHIFT 0 -#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) -#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) -#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ - (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) - -/* STREAM_OUTPUT_VA register */ -#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 -#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) -#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) -#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ - (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) -/* End of STREAM_CONTROL_BLOCK register set definitions */ - -/* GLB_INPUT_BLOCK register set definitions */ - -/* GLB_REQ register */ -#define GLB_REQ_HALT_SHIFT 0 -#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) -#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) -#define GLB_REQ_HALT_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) -#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 -#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ - (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ - (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) -#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 -#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) -#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) -#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) -#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 -#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) -#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ - (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) -#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ - (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) -#define GLB_REQ_PROTM_ENTER_SHIFT 4 -#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) -#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) -#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) -#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 -#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) -#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) -#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ - (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) -#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 -#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) -#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) -#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ - (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) -#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 -#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) -#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) -#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ - (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) -#define GLB_REQ_PING_SHIFT 8 -#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) -#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) -#define GLB_REQ_PING_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ - (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ - (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ - GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ - (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ - GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) -#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 -#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) -#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) -#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ - (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) -#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 -#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) -#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) -#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ - (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) -#define GLB_REQ_INACTIVE_TILER_SHIFT 22 -#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) -#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) -#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ - (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) -#define GLB_REQ_PROTM_EXIT_SHIFT 23 -#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) -#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) -#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) -#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 -#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) -#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ - (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> GLB_REQ_PRFCNT_THRESHOLD_SHIFT) -#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ - (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & GLB_REQ_PRFCNT_THRESHOLD_MASK)) -#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 -#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ - (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) -#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 -#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) -#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) -#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ - (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) -#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 -#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) -#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) -#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ - (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) - -/* GLB_ACK_IRQ_MASK register */ -#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 -#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) -#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) -#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) -#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 -#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) -#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 -#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) -#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 -#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) -#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 -#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) -#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 -#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) -#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 -#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) -#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) -#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ - (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ - GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ - GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) -#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 -#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) -#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 -#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) -#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 -#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) -#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 -#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) - -/* GLB_PROGRESS_TIMER register */ -#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 -#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) -#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ - (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) -#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ - (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) - -/* GLB_PWROFF_TIMER register */ -#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 -#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) -#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ - (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) -#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ - (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) -#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 -#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ - (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ - (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ - (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) -/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ -#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 -#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 -/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ - -/* GLB_ALLOC_EN register */ -#define GLB_ALLOC_EN_MASK_SHIFT 0 -#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT) -#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) -#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ - (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) - -/* GLB_PROTM_COHERENCY register */ -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0 -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \ - (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \ - (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \ - GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \ - (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \ - GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK)) -/* End of GLB_INPUT_BLOCK register set definitions */ - -/* GLB_OUTPUT_BLOCK register set definitions */ - -/* GLB_ACK register */ -#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 -#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ - (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ - (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) -#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 -#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) -/* End of GLB_OUTPUT_BLOCK register set definitions */ - -/* The following register and fields are for headers before 10.x.7/11.x.4 */ -#define GLB_REQ_IDLE_ENABLE_SHIFT (10) -#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) -#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) -#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) -#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) -#define GLB_REQ_IDLE_EVENT_SHIFT (26) -#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) -#define GLB_ACK_IDLE_ENABLE_SHIFT (10) -#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) -#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) -#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) -#define GLB_ACK_IDLE_EVENT_SHIFT (26) -#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) - -#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) -#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) - -#define GLB_IDLE_TIMER (0x0080) -/* GLB_IDLE_TIMER register */ -#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) -#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) -#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) -#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ - (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) -#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) -#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ - (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ - (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ - (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) -/* GLB_IDLE_TIMER_TIMER_SOURCE values */ -#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 -#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 -/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ - -#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ -/* CSG_STATUS_STATE register */ -#define CSG_STATUS_STATE_IDLE_SHIFT (0) -#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) -#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ - (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) -#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ - (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) - -#endif /* _GPU_CSF_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index e35c570..e3e046c 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -27,7 +27,7 @@ #include <linux/export.h> #include <linux/priority_control_manager.h> #include <linux/shmem_fs.h> -#include "mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "mali_kbase_csf_tiler_heap.h" #include <mmu/mali_kbase_mmu.h> #include "mali_kbase_csf_timeout.h" @@ -588,7 +588,7 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); dev_dbg(kctx->kbdev->dev, - "Remove any pending command queue fatal from context %p\n", + "Remove any pending command queue fatal from context %pK\n", (void *)kctx); list_del_init(&queue->error.link); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1132,6 +1132,26 @@ static int create_suspend_buffers(struct kbase_context *const kctx, } /** + * generate_group_uid() - Makes an ID unique to all kernel base devices + * and contexts, for a queue group and CSG. + * + * Return: A unique ID in the form of an unsigned 32-bit integer + */ +static u32 generate_group_uid(void) +{ + /* use first KBase device to store max UID */ + struct kbase_device *kbdev = kbase_find_device(-1); + u32 uid = 1; + + if (kbdev) + uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices); + else + WARN(1, "NULL kbase device pointer in group UID generation"); + + return uid; +} + +/** * create_queue_group() - Create a queue group * * @kctx: Address of the kbase context within which the queue group @@ -1142,7 +1162,7 @@ static int create_suspend_buffers(struct kbase_context *const kctx, * Return: a queue group handle on success, or a negative error code on failure. */ static int create_queue_group(struct kbase_context *const kctx, - const union kbase_ioctl_cs_queue_group_create *const create) + union kbase_ioctl_cs_queue_group_create *const create) { int group_handle = find_free_group_handle(kctx); @@ -1178,6 +1198,9 @@ static int create_queue_group(struct kbase_context *const kctx, group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; group->faulted = false; + group->group_uid = generate_group_uid(); + create->out.group_uid = group->group_uid; + INIT_LIST_HEAD(&group->link); INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); @@ -1409,7 +1432,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); dev_dbg(kbdev->dev, - "Remove any pending group fatal error from context %p\n", + "Remove any pending group fatal error from context %pK\n", (void *)group->kctx); list_del_init(&group->error_tiler_oom.link); @@ -1503,7 +1526,7 @@ static void add_error(struct kbase_context *const kctx, error->data = *data; list_add_tail(&error->link, &kctx->csf.error_list); dev_dbg(kctx->kbdev->dev, - "Added error %p of type %d in context %p\n", + "Added error %pK of type %d in context %pK\n", (void *)error, data->type, (void *)kctx); } @@ -1796,7 +1819,7 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); list_add_tail(&event->link, &kctx->csf.event_callback_list); dev_dbg(kctx->kbdev->dev, - "Added event handler %p with param %p\n", event, + "Added event handler %pK with param %pK\n", event, event->param); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1818,7 +1841,7 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx, if ((event->callback == callback) && (event->param == param)) { list_del(&event->link); dev_dbg(kctx->kbdev->dev, - "Removed event handler %p with param %p\n", + "Removed event handler %pK with param %pK\n", event, event->param); kfree(event); break; @@ -1841,7 +1864,7 @@ bool kbase_csf_read_error(struct kbase_context *kctx, struct kbase_csf_notification, link); list_del_init(&error_data->link); *event_data = error_data->data; - dev_dbg(kctx->kbdev->dev, "Dequeued error %p in context %p\n", + dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", (void *)error_data, (void *)kctx); } else { got_event = false; @@ -1859,7 +1882,7 @@ bool kbase_csf_error_pending(struct kbase_context *kctx) spin_lock_irqsave(&kctx->csf.event_lock, flags); event_pended = !list_empty(&kctx->csf.error_list); - dev_dbg(kctx->kbdev->dev, "%s error is pending in context %p\n", + dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", event_pended ? "An" : "No", (void *)kctx); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1872,7 +1895,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) unsigned long flags; dev_dbg(kctx->kbdev->dev, - "Signal event (%s GPU notify) for context %p\n", + "Signal event (%s GPU notify) for context %pK\n", notify_gpu ? "with" : "without", (void *)kctx); /* First increment the signal count and wake up event thread. @@ -1903,7 +1926,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) enum kbase_csf_event_callback_action action; dev_dbg(kctx->kbdev->dev, - "Calling event handler %p with param %p\n", + "Calling event handler %pK with param %pK\n", (void *)event, event->param); action = event->callback(event->param); if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { @@ -1926,7 +1949,7 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) event, next_event, &kctx->csf.event_callback_list, link) { list_del(&event->link); dev_dbg(kctx->kbdev->dev, - "Removed event handler %p with param %p\n", + "Removed event handler %pK with param %pK\n", (void *)event, event->param); kfree(event); } @@ -2231,6 +2254,31 @@ static void protm_event_worker(struct work_struct *data) kbase_csf_scheduler_group_protm_enter(group); } +static void report_queue_fatal_error(struct kbase_queue *const queue, + u32 cs_fatal, u64 cs_fatal_info, + u8 group_handle) +{ + struct base_csf_notification error = + { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group_handle, + .error = { + .error_type = + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + .payload = { + .fatal_queue = { + .sideband = + cs_fatal_info, + .status = cs_fatal, + .csi_index = + queue->csi_index, + } } } } } }; + + add_error(queue->kctx, &queue->error, &error); + kbase_event_wakeup(queue->kctx); +} + /** * handle_fault_event - Handler for CS fault. * @@ -2268,51 +2316,10 @@ handle_fault_event(struct kbase_queue *const queue, kbase_gpu_exception_name(cs_fault_exception_type), cs_fault_exception_data, cs_fault_info_exception_data); - /* TODO GPUCORE-26291: We've'identified an issue with faulted CSIs not - * making progress in some cases. Until the issue is resolved, - * RESOURCE_EVICTION_TIMEOUT error shall be treated as a fatal error - * to give userspace a chance to terminate the group. This is intended - * to be a temporary workaround. - */ if (cs_fault_exception_type == CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT) - kbase_csf_add_queue_fatal_error( - queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0); -} - -static void report_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info, - u8 group_handle) -{ - struct base_csf_notification error = { - .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .handle = group_handle, - .error = { - .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { - .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - .csi_index = queue->csi_index, - } - } - } - } - } - }; - - add_error(queue->kctx, &queue->error, &error); - kbase_event_wakeup(queue->kctx); -} - -void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info) -{ - report_queue_fatal_error(queue, cs_fatal, cs_fatal_info, - queue->group->handle); + report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, + 0, queue->group->handle); } /** @@ -2643,8 +2650,20 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, process_cs_interrupts(group, ginfo, irqreq, irqack); } +/** + * process_prfcnt_interrupts - Process performance counter interrupts. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @glb_req: Global request register value. + * @glb_ack: Global acknowledge register value. + * + * Handles interrupts issued by the firmware that relate to the performance + * counters. For example, on completion of a performance counter sample. It is + * expected that the scheduler spinlock is already held on calling this + * function. + */ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, - u32 glb_ack, unsigned long *flags) + u32 glb_ack) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -2656,14 +2675,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { kbdev->csf.hwcnt.request_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_sample( &kbdev->hwcnt_gpu_iface); - - kbase_csf_scheduler_spin_lock(kbdev, flags); } /* Process PRFCNT_ENABLE interrupt. */ @@ -2671,32 +2687,25 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { kbdev->csf.hwcnt.enable_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_ENABLE status changed interrupt received."); - if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) { + if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) kbase_hwcnt_backend_csf_on_prfcnt_enable( &kbdev->hwcnt_gpu_iface); - } else { + else kbase_hwcnt_backend_csf_on_prfcnt_disable( &kbdev->hwcnt_gpu_iface); - } - - kbase_csf_scheduler_spin_lock(kbdev, flags); } /* Process PRFCNT_THRESHOLD interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_threshold( &kbdev->hwcnt_gpu_iface); - kbase_csf_scheduler_spin_lock(kbdev, flags); - /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to * the same value as GLB_ACK.PRFCNT_THRESHOLD * flag in order to enable reporting of another @@ -2709,13 +2718,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, /* Process PRFCNT_OVERFLOW interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_overflow( &kbdev->hwcnt_gpu_iface); - kbase_csf_scheduler_spin_lock(kbdev, flags); /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to * the same value as GLB_ACK.PRFCNT_OVERFLOW * flag in order to enable reporting of another @@ -2790,8 +2797,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) } } - process_prfcnt_interrupts(kbdev, glb_req, glb_ack, - &flags); + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); kbase_csf_scheduler_spin_unlock(kbdev, flags); diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index 6252515..effd468 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -366,19 +366,6 @@ void kbase_csf_add_group_fatal_error( struct base_gpu_queue_group_error const *const err_payload); /** - * kbase_csf_add_queue_fatal_error - Report a fatal queue error to userspace - * - * @queue: Pointer to queue for which fatal event was received. - * @cs_fatal: Fault information - * @cs_fatal_info: Additional fault information - * - * If a queue has already been in fatal error status, - * subsequent fatal error on the queue should never take place. - */ -void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info); - -/** * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. * * @kbdev: The kbase device to handle an IRQ for diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c index fb3a718..b54b2fc 100644 --- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,8 +154,7 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, mutex_lock(&kctx->csf.lock); - if (kctx->csf.cpu_queue.buffer) - kfree(kctx->csf.cpu_queue.buffer); + kfree(kctx->csf.cpu_queue.buffer); if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == BASE_CSF_CPU_QUEUE_DUMP_PENDING) { diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index a6f1958..0517399 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -401,6 +401,8 @@ struct kbase_protected_suspend_buffer { * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. + * @group_uid: 32-bit wide unsigned identifier for the group, unique + * across all kbase devices and contexts. * @link: Link to this queue group in the 'runnable_groups' list of * the corresponding kctx. * @link_to_schedule: Link to this queue group in the list of prepared groups @@ -449,6 +451,8 @@ struct kbase_queue_group { u64 fragment_mask; u64 compute_mask; + u32 group_uid; + struct list_head link; struct list_head link_to_schedule; enum kbase_csf_group_state run_state; @@ -801,9 +805,6 @@ struct kbase_csf_csg_slot { * other phases. * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out * list at the scheduling prepare stage. - * @apply_async_protm: Signalling the internal scheduling apply stage to - * act with some special handling for entering the - * protected mode asynchronously. * @pm_active_count: Count indicating if the scheduler is owning a power * management reference count. Reference is taken when * the count becomes 1 and is dropped when the count @@ -853,7 +854,6 @@ struct kbase_csf_scheduler { struct work_struct gpu_idle_work; atomic_t non_idle_offslot_grps; u32 non_idle_scanout_grps; - bool apply_async_protm; u32 pm_active_count; unsigned int csg_scheduling_period_ms; bool tick_timer_active; @@ -1055,7 +1055,7 @@ struct kbase_csf_firmware_interface { struct protected_memory_allocation **pma; }; -/** +/* * struct kbase_csf_hwcnt - Object containing members for handling the dump of * HW counters. * diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index ae039aa..73b8e03 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -48,10 +48,17 @@ #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) + static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); +/* The waiting time for firmware to boot */ +static unsigned int csf_firmware_boot_timeout_ms = 500; +module_param(csf_firmware_boot_timeout_ms, uint, 0444); +MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, + "Maximum time to wait for firmware to boot."); + #ifdef CONFIG_MALI_DEBUG /* Makes Driver wait indefinitely for an acknowledgment for the different * requests it sends to firmware. Otherwise the timeouts interfere with the @@ -93,7 +100,6 @@ MODULE_PARM_DESC(fw_debug, #define TL_METADATA_ENTRY_NAME_OFFSET (0x8) -#define CSF_FIRMWARE_BOOT_TIMEOUT_MS (500) #define CSF_MAX_FW_STOP_LOOPS (100000) #define CSF_GLB_REQ_CFG_MASK \ @@ -232,7 +238,7 @@ static void stop_csf_firmware(struct kbase_device *kbdev) static void wait_for_firmware_boot(struct kbase_device *kbdev) { const long wait_timeout = - kbase_csf_timeout_in_jiffies(CSF_FIRMWARE_BOOT_TIMEOUT_MS); + kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); long remaining; /* Firmware will generate a global interface interrupt once booting @@ -987,6 +993,7 @@ static int parse_capabilities(struct kbase_device *kbdev) iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; + iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; if ((GROUP_CONTROL_0 + (unsigned long)iface->group_num * iface->group_stride) > @@ -1239,14 +1246,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - /* Internal FW error could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1669,6 +1670,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } + int kbase_csf_firmware_init(struct kbase_device *kbdev) { const struct firmware *firmware; @@ -1836,6 +1838,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) if (ret != 0) goto error; + /* Firmware loaded successfully */ release_firmware(firmware); KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, @@ -1987,7 +1990,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -int kbase_csf_firmware_ping(struct kbase_device *const kbdev) +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1997,7 +2000,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev) set_global_request(global_iface, GLB_REQ_PING_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); +} +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -2040,11 +2047,17 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { + unsigned long irq_flags; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = true; kbase_ipa_protection_mode_switch_event(kbdev); kbase_ipa_control_protm_entered(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &irq_flags); kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } @@ -2139,26 +2152,28 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *const group_data, u32 const max_group_num, struct basep_cs_stream_control *const stream_data, u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size) + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *instr_features) { const struct kbase_csf_global_iface * const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || - WARN_ON(!features) || - WARN_ON(!group_num) || - WARN_ON(!prfcnt_size)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) return 0; *glb_version = iface->version; *features = iface->features; *group_num = iface->group_num; *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, max_total_stream_num); @@ -2237,9 +2252,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); va_reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); if (ret) goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; @@ -2261,9 +2276,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(va_reg); - mutex_unlock(&kbdev->csf.reg_lock); va_region_add_error: kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); va_region_alloc_error: vunmap(cpu_addr); vmap_error: @@ -2293,8 +2308,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(csf_mapping->va_reg); - mutex_unlock(&kbdev->csf.reg_lock); kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); } if (csf_mapping->phys) { diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index a2dc4fd..13ff701 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -23,7 +23,7 @@ #define _KBASE_CSF_FIRMWARE_H_ #include "device/mali_kbase_device.h" -#include "mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> /* * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: @@ -266,6 +266,7 @@ u32 kbase_csf_firmware_csg_output( * @group_stride: Stride in bytes in JASID0 virtual address between * CSG capability structures. * @prfcnt_size: Performance counters size. + * @instr_features: Instrumentation features. * @groups: Address of an array of CSG capability structures. */ struct kbase_csf_global_iface { @@ -277,6 +278,7 @@ struct kbase_csf_global_iface { u32 group_num; u32 group_stride; u32 prfcnt_size; + u32 instr_features; struct kbase_csf_cmd_stream_group_info *groups; }; @@ -397,13 +399,23 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping - Send the ping request to firmware. * - * The function sends the ping request to firmware to confirm it is alive. + * The function sends the ping request to firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_ping(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. + * + * The function sends the ping request to firmware and waits to confirm it is + * alive. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * * Return: 0 on success, or negative on failure. */ -int kbase_csf_firmware_ping(struct kbase_device *kbdev); +int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); /** * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. @@ -570,12 +582,14 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); * in bytes. Bits 31:16 hold the size of firmware * performance counter data and 15:0 hold the size of * hardware performance counter data. - */ -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, - struct basep_cs_group_control *group_data, u32 max_group_num, - struct basep_cs_stream_control *stream_data, u32 max_total_stream_num, - u32 *glb_version, u32 *features, u32 *group_num, u32 *prfcnt_size); - + * @instr_features: Instrumentation features. Bits 7:4 hold the max size + * of events. Bits 3:0 hold the offset update rate. + */ +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *group_data, + u32 max_group_num, struct basep_cs_stream_control *stream_data, + u32 max_total_stream_num, u32 *glb_version, u32 *features, + u32 *group_num, u32 *prfcnt_size, u32 *instr_features); /** * Get CSF firmware header timeline metadata content diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 6349917..a3901cd 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -237,6 +237,9 @@ static int invent_capabilities(struct kbase_device *kbdev) iface->kbdev = kbdev; iface->features = 0; iface->prfcnt_size = 64; + iface->instr_features = + 0x81; /* update rate=1, max event size = 1<<8 = 256 */ + iface->group_num = ARRAY_SIZE(interface->csg); iface->group_stride = 0; @@ -463,14 +466,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - /* Internal FW error could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1032,7 +1029,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -int kbase_csf_firmware_ping(struct kbase_device *const kbdev) +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1042,7 +1039,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev) set_global_request(global_iface, GLB_REQ_PING_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); +} +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -1170,26 +1171,28 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *const group_data, u32 const max_group_num, struct basep_cs_stream_control *const stream_data, u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size) + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *const instr_features) { const struct kbase_csf_global_iface * const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || - WARN_ON(!features) || - WARN_ON(!group_num) || - WARN_ON(!prfcnt_size)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) return 0; *glb_version = iface->version; *features = iface->features; *group_num = iface->group_num; *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, max_total_stream_num); @@ -1269,9 +1272,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); va_reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); if (ret) goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; @@ -1293,9 +1296,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(va_reg); - mutex_unlock(&kbdev->csf.reg_lock); va_region_add_error: kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); va_region_alloc_error: vunmap(cpu_addr); vmap_error: @@ -1325,8 +1328,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(csf_mapping->va_reg); - mutex_unlock(&kbdev->csf.reg_lock); kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); } if (csf_mapping->phys) { diff --git a/mali_kbase/csf/mali_kbase_csf_ioctl.h b/mali_kbase/csf/mali_kbase_csf_ioctl.h deleted file mode 100644 index 8c63e1c..0000000 --- a/mali_kbase/csf/mali_kbase_csf_ioctl.h +++ /dev/null @@ -1,382 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_CSF_IOCTL_H_ -#define _KBASE_CSF_IOCTL_H_ - -#include <asm-generic/ioctl.h> -#include <linux/types.h> - -/* - * 1.0: - * - CSF IOCTL header separated from JM - * 1.1: - * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME - * - Add ioctl 54: This controls the priority setting. - * 1.2: - * - Add new CSF GPU_FEATURES register into the property structure - * returned by KBASE_IOCTL_GET_GPUPROPS - */ - -#define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 2 - -/** - * struct kbase_ioctl_version_check - Check version compatibility between - * kernel and userspace - * - * @major: Major version number - * @minor: Minor version number - */ -struct kbase_ioctl_version_check { - __u16 major; - __u16 minor; -}; - -#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ - _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) - - -/** - * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the - * base back-end - * - * @buffer_gpu_addr: GPU address of the buffer backing the queue - * @buffer_size: Size of the buffer in bytes - * @priority: Priority of the queue within a group when run within a process - * @padding: Currently unused, must be zero - */ -struct kbase_ioctl_cs_queue_register { - __u64 buffer_gpu_addr; - __u32 buffer_size; - __u8 priority; - __u8 padding[3]; -}; - -#define KBASE_IOCTL_CS_QUEUE_REGISTER \ - _IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register) - -/** - * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler - * to notify that a queue has been updated - * - * @buffer_gpu_addr: GPU address of the buffer backing the queue - */ -struct kbase_ioctl_cs_queue_kick { - __u64 buffer_gpu_addr; -}; - -#define KBASE_IOCTL_CS_QUEUE_KICK \ - _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) - -/** - * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group - * - * @in: Input parameters - * @in.buffer_gpu_addr: GPU address of the buffer backing the queue - * @in.group_handle: Handle of the group to which the queue should be bound - * @in.csi_index: Index of the CSF interface the queue should be bound to - * @in.padding: Currently unused, must be zero - * @out: Output parameters - * @out.mmap_handle: Handle to be used for creating the mapping of CS - * input/output pages - */ -union kbase_ioctl_cs_queue_bind { - struct { - __u64 buffer_gpu_addr; - __u8 group_handle; - __u8 csi_index; - __u8 padding[6]; - } in; - struct { - __u64 mmap_handle; - } out; -}; - -#define KBASE_IOCTL_CS_QUEUE_BIND \ - _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) - -/* ioctl 40 is free to use */ - -/** - * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue - * - * @buffer_gpu_addr: GPU address of the buffer backing the queue - */ -struct kbase_ioctl_cs_queue_terminate { - __u64 buffer_gpu_addr; -}; - -#define KBASE_IOCTL_CS_QUEUE_TERMINATE \ - _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) - -/** - * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group - * @in: Input parameters - * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. - * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. - * @in.compute_mask: Mask of compute endpoints the group is allowed to use. - * @in.cs_min: Minimum number of CSs required. - * @in.priority: Queue group's priority within a process. - * @in.tiler_max: Maximum number of tiler endpoints the group is allowed - * to use. - * @in.fragment_max: Maximum number of fragment endpoints the group is - * allowed to use. - * @in.compute_max: Maximum number of compute endpoints the group is allowed - * to use. - * @in.padding: Currently unused, must be zero - * @out: Output parameters - * @out.group_handle: Handle of a newly created queue group. - * @out.padding: Currently unused, must be zero - */ -union kbase_ioctl_cs_queue_group_create { - struct { - __u64 tiler_mask; - __u64 fragment_mask; - __u64 compute_mask; - __u8 cs_min; - __u8 priority; - __u8 tiler_max; - __u8 fragment_max; - __u8 compute_max; - __u8 padding[3]; - - } in; - struct { - __u8 group_handle; - __u8 padding[7]; - } out; -}; - -#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ - _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) - -/** - * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group - * - * @group_handle: Handle of the queue group to be terminated - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_cs_queue_group_term { - __u8 group_handle; - __u8 padding[7]; -}; - -#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \ - _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term) - -#define KBASE_IOCTL_CS_EVENT_SIGNAL \ - _IO(KBASE_IOCTL_TYPE, 44) - -typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */ - -/** - * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue - * - * @id: ID of the new command queue returned by the kernel - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_kcpu_queue_new { - base_kcpu_queue_id id; - __u8 padding[7]; -}; - -#define KBASE_IOCTL_KCPU_QUEUE_CREATE \ - _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) - -/** - * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue - * - * @id: ID of the command queue to be destroyed - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_kcpu_queue_delete { - base_kcpu_queue_id id; - __u8 padding[7]; -}; - -#define KBASE_IOCTL_KCPU_QUEUE_DELETE \ - _IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete) - -/** - * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue - * - * @addr: Memory address of an array of struct base_kcpu_queue_command - * @nr_commands: Number of commands in the array - * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_kcpu_queue_enqueue { - __u64 addr; - __u32 nr_commands; - base_kcpu_queue_id id; - __u8 padding[3]; -}; - -#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \ - _IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue) - -/** - * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap - * @in: Input parameters - * @in.chunk_size: Size of each chunk. - * @in.initial_chunks: Initial number of chunks that heap will be created with. - * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. - * @in.target_in_flight: Number of render-passes that the driver should attempt to - * keep in flight for which allocation of new chunks is - * allowed. - * @in.group_id: Group ID to be used for physical allocations. - * @in.padding: Padding - * @out: Output parameters - * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up - * for the heap. - * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, - * actually points to the header of heap chunk and not to - * the low address of free memory in the chunk. - */ -union kbase_ioctl_cs_tiler_heap_init { - struct { - __u32 chunk_size; - __u32 initial_chunks; - __u32 max_chunks; - __u16 target_in_flight; - __u8 group_id; - __u8 padding; - } in; - struct { - __u64 gpu_heap_va; - __u64 first_chunk_va; - } out; -}; - -#define KBASE_IOCTL_CS_TILER_HEAP_INIT \ - _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) - -/** - * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap - * instance - * - * @gpu_heap_va: GPU VA of Heap context that was set up for the heap. - */ -struct kbase_ioctl_cs_tiler_heap_term { - __u64 gpu_heap_va; -}; - -#define KBASE_IOCTL_CS_TILER_HEAP_TERM \ - _IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term) - -/** - * union kbase_ioctl_cs_get_glb_iface - Request the global control block - * of CSF interface capabilities - * - * @in: Input parameters - * @in.max_group_num: The maximum number of groups to be read. Can be 0, in - * which case groups_ptr is unused. - * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in - * which case streams_ptr is unused. - * @in.groups_ptr: Pointer where to store all the group data (sequentially). - * @in.streams_ptr: Pointer where to store all the CS data (sequentially). - * @out: Output parameters - * @out.glb_version: Global interface version. - * @out.features: Bit mask of features (e.g. whether certain types of job - * can be suspended). - * @out.group_num: Number of CSGs supported. - * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 - * hold the size of firmware performance counter data - * and 15:0 hold the size of hardware performance counter - * data. - * @out.total_stream_num: Total number of CSs, summed across all groups. - * @out.padding: Will be zeroed. - * - * - */ -union kbase_ioctl_cs_get_glb_iface { - struct { - __u32 max_group_num; - __u32 max_total_stream_num; - __u64 groups_ptr; - __u64 streams_ptr; - } in; - struct { - __u32 glb_version; - __u32 features; - __u32 group_num; - __u32 prfcnt_size; - __u32 total_stream_num; - __u32 padding; - } out; -}; - -#define KBASE_IOCTL_CS_GET_GLB_IFACE \ - _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) - -struct kbase_ioctl_cs_cpu_queue_info { - __u64 buffer; - __u64 size; -}; - -#define KBASE_IOCTL_VERSION_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) - -#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \ - _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info) - -/*************** - * test ioctls * - ***************/ -#if MALI_UNIT_TEST -/* These ioctls are purely for test purposes and are not used in the production - * driver, they therefore may change without notice - */ - -/** - * struct kbase_ioctl_cs_event_memory_write - Write an event memory address - * @cpu_addr: Memory address to write - * @value: Value to write - * @padding: Currently unused, must be zero - */ -struct kbase_ioctl_cs_event_memory_write { - __u64 cpu_addr; - __u8 value; - __u8 padding[7]; -}; - -/** - * union kbase_ioctl_cs_event_memory_read - Read an event memory address - * @in: Input parameters - * @in.cpu_addr: Memory address to read - * @out: Output parameters - * @out.value: Value read - * @out.padding: Currently unused, must be zero - */ -union kbase_ioctl_cs_event_memory_read { - struct { - __u64 cpu_addr; - } in; - struct { - __u8 value; - __u8 padding[7]; - } out; -}; - -#endif /* MALI_UNIT_TEST */ - -#endif /* _KBASE_CSF_IOCTL_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index e5aee61..1203d2c 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -257,7 +257,7 @@ static int kbase_kcpu_jit_allocate_process( * No prior JIT_FREE command is active. Roll * back previous allocations and fail. */ - dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %p\n", cmd); + dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ret = -ENOMEM; goto fail; } @@ -858,10 +858,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, "Sync memory %llx already freed", cqs_set->objs[i].addr); queue->has_error = true; } else { - if (cqs_set->propagate_flags & (1 << i)) - evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; - else - evt[BASEP_EVENT_ERR_INDEX] = false; + evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; /* Set to signaled */ evt[BASEP_EVENT_VAL_INDEX]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); @@ -908,8 +905,267 @@ static int kbase_kcpu_cqs_set_prepare( current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; current_command->info.cqs_set.nr_objs = nr_objs; current_command->info.cqs_set.objs = objs; - current_command->info.cqs_set.propagate_flags = - cqs_set_info->propagate_flags; + + return 0; +} + +static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + WARN_ON(!cqs_wait_operation->nr_objs); + WARN_ON(!cqs_wait_operation->objs); + WARN_ON(!cqs_wait_operation->signaled); + WARN_ON(!queue->cqs_wait_count); + + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(cqs_wait_operation->signaled); + kfree(cqs_wait_operation->objs); + cqs_wait_operation->signaled = NULL; + cqs_wait_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + u32 i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_wait_operation->objs)) + return -EINVAL; + + /* Skip the CQS waits that have already been signaled when processing */ + for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { + if (!test_bit(i, cqs_wait_operation->signaled)) { + struct kbase_vmap_struct *mapping; + bool sig_set; + u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, + cqs_wait_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + if (!queue->command_started) + queue->command_started = true; + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); + queue->has_error = true; + return -EINVAL; + } + + switch (cqs_wait_operation->objs[i].operation) { + case BASEP_CQS_WAIT_OPERATION_LE: + sig_set = *evt <= cqs_wait_operation->objs[i].val; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + sig_set = *evt > cqs_wait_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + + return -EINVAL; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_wait_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + if (sig_set) { + bitmap_set(cqs_wait_operation->signaled, i, 1); + if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && + *evt > 0) { + queue->has_error = true; + } + + /* GPUCORE-28172 RDT to review */ + + queue->command_started = false; + } + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + + if (!sig_set) + break; + } + } + + /* For the queue to progress further, all cqs objects should get + * signaled. + */ + return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); +} + +static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, + struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct base_cqs_wait_operation_info *objs; + unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + if (++queue->cqs_wait_count == 1) { + if (kbase_csf_event_wait_add(queue->kctx, + event_cqs_callback, queue)) { + kfree(objs); + queue->cqs_wait_count--; + return -ENOMEM; + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; + current_command->info.cqs_wait_operation.nr_objs = nr_objs; + current_command->info.cqs_wait_operation.objs = objs; + current_command->info.cqs_wait_operation.inherit_err_flags = + cqs_wait_operation_info->inherit_err_flags; + + current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), + sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); + if (!current_command->info.cqs_wait_operation.signaled) { + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(objs); + return -ENOMEM; + } + + return 0; +} + +static void kbase_kcpu_cqs_set_operation_process( + struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) +{ + unsigned int i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_set_operation->objs)) + return; + + for (i = 0; i < cqs_set_operation->nr_objs; i++) { + struct kbase_vmap_struct *mapping; + u64 *evt; + + evt = (u64 *)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_set_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); + queue->has_error = true; + } else { + switch (cqs_set_operation->objs[i].operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *evt += cqs_set_operation->objs[i].val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *evt = cqs_set_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); + queue->has_error = true; + break; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_set_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + /* GPUCORE-28172 RDT to review */ + + /* Always propagate errors */ + *evt = queue->has_error; + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } + } + + kbase_csf_event_signal_notify_gpu(queue->kctx); + + kfree(cqs_set_operation->objs); + cqs_set_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_set_operation_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs = cqs_set_operation_info->nr_objs; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; + current_command->info.cqs_set_operation.nr_objs = nr_objs; + current_command->info.cqs_set_operation.objs = objs; return 0; } @@ -1365,6 +1621,28 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, &cmd->info.cqs_set); break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, + &cmd->info.cqs_wait_operation); + + if (!status && !ignore_waits) { + process_next = false; + } else { + /* Either all CQS objects were signaled or + * there was an error or the queue itself is + * being deleted. + * In all cases can move to the next command. + * TBD: handle the error + */ + cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); + } + + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbase_kcpu_cqs_set_operation_process(kbdev, queue, + &cmd->info.cqs_set_operation); + + break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: /* Clear the queue's error state */ queue->has_error = false; @@ -1404,7 +1682,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->kctx, NULL, cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); - if (ret == false) { + if (!ret) { queue->has_error = true; dev_warn(kbdev->dev, "failed to release the reference. resource not found\n"); @@ -1425,7 +1703,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->kctx, NULL, cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); - if (ret == false) { + if (!ret) { queue->has_error = true; dev_warn(kbdev->dev, "failed to release the reference. resource not found\n"); @@ -1591,6 +1869,16 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } break; } + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue); @@ -1758,6 +2046,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ret = kbase_kcpu_cqs_set_prepare(queue, &command.info.cqs_set, kcpu_cmd); break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + ret = kbase_kcpu_cqs_wait_operation_prepare(queue, + &command.info.cqs_wait_operation, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + ret = kbase_kcpu_cqs_set_operation_prepare(queue, + &command.info.cqs_set_operation, kcpu_cmd); + break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; ret = 0; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index a528572..86aa7dc 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -69,13 +69,10 @@ struct kbase_kcpu_command_fence_info { * @objs: Array of structures which define CQS objects to be used by * the kcpu command. * @nr_objs: Number of CQS objects in the array. - * @propagate_flags: Bit-pattern for the CQSs in the array that are set - * to propagate queue error-state to the flagged CQSs. */ struct kbase_kcpu_command_cqs_set_info { struct base_cqs_set *objs; unsigned int nr_objs; - u32 propagate_flags; }; /** @@ -99,6 +96,36 @@ struct kbase_kcpu_command_cqs_wait_info { }; /** + * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline set command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_set_operation_info { + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs; +}; + +/** + * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline wait command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @signaled: Bit array used to report the status of the CQS wait objects. + * 1 is signaled, 0 otherwise. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_wait_operation_info { + struct base_cqs_wait_operation_info *objs; + unsigned long *signaled; + unsigned int nr_objs; + u32 inherit_err_flags; +}; + +/** * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information * needed for the kcpu command for jit allocations * @@ -200,6 +227,8 @@ struct kbase_kcpu_command { struct kbase_kcpu_command_fence_info fence; struct kbase_kcpu_command_cqs_wait_info cqs_wait; struct kbase_kcpu_command_cqs_set_info cqs_set; + struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; struct kbase_kcpu_command_import_info import; struct kbase_kcpu_command_jit_alloc_info jit_alloc; struct kbase_kcpu_command_jit_free_info jit_free; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c index 5c2e8e3..d59e77c 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,7 +95,7 @@ static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, struct kbase_sync_fence_info info; kbase_sync_fence_info_get(cmd->info.fence.fence, &info); - seq_printf(file, ", Fence %p %s %s", + seq_printf(file, ", Fence %pK %s %s", info.fence, info.name, kbase_sync_status_string(info.status)); break; diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index b59ffd4..e8da0f3 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -307,6 +307,31 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) kfree(buf); } +/** + * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the + * event of an error during GPU reset. + * @kbdev: Pointer to KBase device + */ +static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) +{ + unsigned long flags; + + /* Treat this as an unrecoverable error for HWCNT */ + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); + + /* Re-enable counters to ensure matching enable/disable pair. + * This might reduce the hwcnt disable count to 0, and therefore + * trigger actual re-enabling of hwcnt. + * However, as the backend is now in the unrecoverable error state, + * re-enabling will immediately fail and put the context into the error + * state, preventing the hardware from being touched (which could have + * risked a hang). + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent) { @@ -396,8 +421,10 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, mutex_unlock(&kbdev->pm.lock); - if (WARN_ON(err)) - goto error; + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -414,40 +441,20 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, err = kbase_pm_wait_for_desired_state(kbdev); mutex_unlock(&kbdev->pm.lock); - if (err) - goto error; + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } /* Re-enable GPU hardware counters */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); if (!silent) dev_err(kbdev->dev, "Reset complete"); return 0; -error: - WARN_ON(!err); - - /* If hardware init failed, we assume hardware counters will - * not work and put the backend into the unrecoverable error - * state. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - /* Re-enable counters to ensure matching enable/disable pair. - * This might reduce the hwcnt disable count to 0, and therefore - * trigger actual re-enabling of hwcnt. - * However, as the backend is now in the unrecoverable error state, - * re-enabling will immediately fail and put the context into the error - * state, preventing the hardware from being touched (which could have - * risked a hang). - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return err; } static void kbase_csf_reset_gpu_worker(struct work_struct *data) @@ -484,25 +491,29 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); } -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) + kbase_hwcnt_backend_csf_on_unrecoverable_error( + &kbdev->hwcnt_gpu_iface); + if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING, KBASE_CSF_RESET_GPU_PREPARED) != - KBASE_CSF_RESET_GPU_NOT_PENDING) { + KBASE_CSF_RESET_GPU_NOT_PENDING) /* Some other thread is already resetting the GPU */ return false; - } return true; } KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) { lockdep_assert_held(&kbdev->hwaccess_lock); - return kbase_prepare_to_reset_gpu(kbdev); + return kbase_prepare_to_reset_gpu(kbdev, flags); } void kbase_reset_gpu(struct kbase_device *kbdev) diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index b9dc59c..84d6f81 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -28,8 +28,8 @@ #include "../tl/mali_kbase_tracepoints.h" #include "backend/gpu/mali_kbase_pm_internal.h" #include <linux/export.h> -#include "mali_gpu_csf_registers.h" -#include <mali_base_kernel.h> +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -373,6 +373,45 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) } /** + * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function waits for the GPU to exit protected mode which is confirmed + * when active_protm_grp is set to NULL. + */ +static void scheduler_wait_protm_quit(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining; + + lockdep_assert_held(&scheduler->lock); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); + + if (!remaining) + dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); +} + +/** + * scheduler_force_protm_exit() - Force GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function sends a ping request to the firmware and waits for the GPU + * to exit protected mode. + */ +static void scheduler_force_protm_exit(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + kbase_csf_firmware_ping(kbdev); + scheduler_wait_protm_quit(kbdev); +} + +/** * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up * automatically for periodic tasks. * @@ -607,7 +646,7 @@ static int halt_stream_sync(struct kbase_queue *queue) if (!remaining) { dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", csi_index, group->handle, group->csg_nr); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); return -ETIMEDOUT; @@ -629,26 +668,14 @@ static int halt_stream_sync(struct kbase_queue *queue) (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) == CS_ACK_STATE_STOP), remaining); - /* Queues that have failed to stop in time shall raise a fatal error - * as their group would fail to suspend which could no longer be safely - * resumed. - */ if (!remaining) { - unsigned long flags; - dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", queue->csi_index, group->handle, group->csg_nr); - spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); - kbase_csf_add_queue_fatal_error( - queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0); - spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, - flags); - /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU * will be reset as a work-around. */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); } return (remaining) ? 0 : -ETIMEDOUT; @@ -722,27 +749,6 @@ static int sched_halt_stream(struct kbase_queue *queue) } } retry: - /* First wait for the group to reach a stable state. IDLE state is - * an intermediate state that is only set by Scheduler at the start - * of a tick (prior to scanout) for groups that received idle - * notification, then later the idle group is moved to one of the - * suspended states or the runnable state. - */ - while (group->run_state == KBASE_CSF_GROUP_IDLE) { - mutex_unlock(&scheduler->lock); - remaining = wait_event_timeout(kbdev->csf.event_wait, - group->run_state != - KBASE_CSF_GROUP_IDLE, - kbdev->csf.fw_timeout_ms); - mutex_lock(&scheduler->lock); - if (!remaining) { - dev_warn(kbdev->dev, - "Timed out waiting for state change of Group-%d when stopping a queue on csi %d", - group->handle, queue->csi_index); - } - } - - WARN_ON(group->run_state == KBASE_CSF_GROUP_IDLE); /* Update the group state so that it can get scheduled soon */ update_idle_suspended_group_state(group); @@ -1559,7 +1565,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( lockdep_assert_held(&scheduler->lock); - if (scheduler->state == SCHED_BUSY || scheduler->apply_async_protm) { + if (scheduler->state == SCHED_BUSY) { /* active phase or, async entering the protected mode */ if (group->prepared_seq_num >= scheduler->non_idle_scanout_grps) { @@ -1731,7 +1737,6 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) /* The csg does not need cleanup other than drop its AS */ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); - WARN_ON(kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND); kbase_ctx_sched_release_ctx(kctx); if (unlikely(group->faulted)) as_fault = true; @@ -1779,11 +1784,12 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ginfo = &kbdev->csf.global_iface.groups[slot]; + /* CSGs remaining on-slot can be either idle or runnable. + * This also applies in protected mode. + */ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || (group->run_state == KBASE_CSF_GROUP_IDLE))); - group->run_state = KBASE_CSF_GROUP_RUNNABLE; - /* Update consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); @@ -1858,12 +1864,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_ctx_sched_retain_ctx(kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_mmu_deferred_flush_invalidate(kctx); mutex_unlock(&kbdev->mmu_hw_mutex); if (kctx->as_nr == KBASEP_AS_NR_INVALID) { - dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", - group->handle, kctx->tgid, kctx->id, slot); + dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", + group->handle, kctx->tgid, kctx->id, slot); return; } @@ -1896,6 +1901,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, tiler_mask & U32_MAX); + ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); @@ -2043,7 +2049,7 @@ static int term_group_sync(struct kbase_queue_group *group) if (!remaining) { dev_warn(kbdev->dev, "term request timed out for group %d on slot %d", group->handle, group->csg_nr); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); err = -ETIMEDOUT; } @@ -2112,9 +2118,10 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&kctx->csf.lock); - lockdep_assert_held(&kbdev->csf.scheduler.lock); + lockdep_assert_held(&scheduler->lock); KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) @@ -2125,8 +2132,39 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) update_idle_suspended_group_state(group); - else + else { + struct kbase_queue_group *protm_grp; + unsigned long flags; + + WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( + group)); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + + /* A normal mode CSG could be idle onslot during + * protected mode. In this case clear the + * appropriate bit in csg_slots_idle_mask. + */ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + if (protm_grp && protm_grp != group) + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + spin_unlock_irqrestore(&scheduler->interrupt_lock, + flags); + + /* If GPU is in protected mode then any doorbells rang + * would have no effect. Check if GPU is in protected + * mode and if this group has higher priority than the + * active protected mode group. If so prompt the FW + * to exit protected mode. + */ + if (protm_grp && + group->scan_seq_num < protm_grp->scan_seq_num) { + /* Prompt the FW to exit protected mode */ + scheduler_force_protm_exit(kbdev); + } + } } else if (!queue_group_scheduled_locked(group)) { insert_group_to_runnable(&kbdev->csf.scheduler, group, KBASE_CSF_GROUP_RUNNABLE); @@ -2511,7 +2549,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ dev_warn( kbdev->dev, - "Group %p on slot %u failed to suspend\n", + "Group %pK on slot %u failed to suspend\n", (void *)group, i); /* The group has failed suspension, stop @@ -2541,11 +2579,13 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) if (WARN_ON(i == num_groups)) break; program_vacant_csg_slot(kbdev, (s8)i); - if (WARN_ON(!csg_slot_in_use(kbdev, (int)i))) + if (!csg_slot_in_use(kbdev, (int)i)) { + dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); break; + } } } else { - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); } } @@ -2611,7 +2651,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", num_groups, slot_mask); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); break; } @@ -3287,7 +3327,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) continue; if (WARN_ON(!group)) continue; - if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE)) + if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && + group->run_state != KBASE_CSF_GROUP_IDLE)) continue; if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) continue; @@ -3295,7 +3336,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) if (group_on_slot_is_idle(kbdev, i)) { group->run_state = KBASE_CSF_GROUP_IDLE; set_bit(i, scheduler->csg_slots_idle_mask); - } + } else + group->run_state = KBASE_CSF_GROUP_RUNNABLE; } bitmap_or(scheduler->csg_slots_idle_mask, @@ -3381,7 +3423,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", kbdev->csf.global_iface.group_num, slot_mask); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); if (is_suspend) { @@ -3526,21 +3568,6 @@ static int scheduler_prepare(struct kbase_device *kbdev) return 0; } -static void scheduler_wait_protm_quit(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - long remaining; - - lockdep_assert_held(&scheduler->lock); - - remaining = wait_event_timeout(kbdev->csf.event_wait, - !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); - - if (!remaining) - dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); -} - static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -3572,6 +3599,8 @@ static void schedule_actions(struct kbase_device *kbdev) unsigned long flags; struct kbase_queue_group *protm_grp; int ret; + bool skip_idle_slots_update; + bool new_protm_top_grp = false; kbase_reset_gpu_assert_prevented(kbdev); lockdep_assert_held(&scheduler->lock); @@ -3582,7 +3611,14 @@ static void schedule_actions(struct kbase_device *kbdev) return; } - scheduler_handle_idle_slots(kbdev); + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Skip updating on-slot idle CSGs if GPU is in protected mode. */ + if (!skip_idle_slots_update) + scheduler_handle_idle_slots(kbdev); + scheduler_prepare(kbdev); spin_lock_irqsave(&scheduler->interrupt_lock, flags); protm_grp = scheduler->active_protm_grp; @@ -3613,12 +3649,12 @@ static void schedule_actions(struct kbase_device *kbdev) scheduler->top_grp->kctx->tgid, scheduler->top_grp->kctx->id); - /* Due to GPUCORE-24491 only the top-group is allowed - * to be on slot and all other on slot groups have to - * be suspended before entering protected mode. - * This would change in GPUCORE-24492. + /* When entering protected mode all CSG slots can be occupied + * but only the protected mode CSG will be running. Any event + * that would trigger the execution of an on-slot idle CSG will + * need to be handled by the host during protected mode. */ - scheduler->num_csg_slots_for_tick = 1; + new_protm_top_grp = true; } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); @@ -3635,12 +3671,12 @@ static void schedule_actions(struct kbase_device *kbdev) * locked in the secure mode. */ if (protm_grp) - scheduler_wait_protm_quit(kbdev); + scheduler_force_protm_exit(kbdev); wait_csg_slots_start(kbdev); wait_csg_slots_finish_prio_update(kbdev); - if (scheduler->num_csg_slots_for_tick == 1) { + if (new_protm_top_grp) { scheduler_group_check_protm_enter(kbdev, scheduler->top_grp); } @@ -3913,8 +3949,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) WARN_ON(!kbase_reset_gpu_is_active(kbdev)); KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); - if (!kbase_csf_scheduler_protected_mode_in_use(kbdev) && - !suspend_active_queue_groups_on_reset(kbdev)) { + if (!suspend_active_queue_groups_on_reset(kbdev)) { /* As all groups have been successfully evicted from the CSG * slots, clear out thee scheduler data fields and return */ @@ -4002,21 +4037,14 @@ static void firmware_aliveness_monitor(struct work_struct *work) kbase_pm_wait_for_desired_state(kbdev); - err = kbase_csf_firmware_ping(kbdev); + err = kbase_csf_firmware_ping_wait(kbdev); if (err) { - /* FW not responding means hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - /* It is acceptable to enqueue a reset whilst we've prevented * them, it will happen after we've allowed them again */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else if (get_nr_active_csgs(kbdev) == 1) { queue_delayed_work(system_long_wq, @@ -4132,7 +4160,9 @@ static bool group_sync_updated(struct kbase_queue_group *group) bool updated = false; int stream; - WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); + /* Groups can also be blocked on-slot during protected mode. */ + WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && + group->run_state != KBASE_CSF_GROUP_IDLE); for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { struct kbase_queue *const queue = group->bound_queues[stream]; @@ -4233,40 +4263,159 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - /* Check if the group is now eligible for execution in protected mode - * and accordingly undertake full scheduling actions as due to - * GPUCORE-24491 the on slot groups other than the top group have to - * be suspended first before entering protected mode. - */ - if (scheduler_get_protm_enter_async_group(kbdev, group)) { - scheduler->apply_async_protm = true; - schedule_actions(kbdev); - scheduler->apply_async_protm = false; - } + /* Check if the group is now eligible for execution in protected mode. */ + if (scheduler_get_protm_enter_async_group(kbdev, group)) + scheduler_group_check_protm_enter(kbdev, group); mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); } /** + * check_sync_update_for_idle_group_protm() - Check the sync wait condition + * for all the queues bound to + * the given group. + * + * @group: Pointer to the group that requires evaluation. + * + * This function is called if the GPU is in protected mode and there are on + * slot idle groups with higher priority than the active protected mode group. + * This function will evaluate the sync condition, if any, of all the queues + * bound to the given group. + * + * Return true if the sync condition of at least one queue has been satisfied. + */ +static bool check_sync_update_for_idle_group_protm( + struct kbase_queue_group *group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = + &kbdev->csf.scheduler; + bool sync_update_done = false; + int i; + + lockdep_assert_held(&scheduler->lock); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + + if (queue && queue->enabled && !sync_update_done) { + struct kbase_csf_cmd_stream_group_info *const ginfo = + &kbdev->csf.global_iface.groups[group->csg_nr]; + struct kbase_csf_cmd_stream_info *const stream = + &ginfo->streams[queue->csi_index]; + u32 status = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT); + unsigned long flags; + + if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) + continue; + + /* Save the information of sync object of the command + * queue so the callback function, 'group_sync_updated' + * can evaluate the sync object when it gets updated + * later. + */ + queue->status_wait = status; + queue->sync_ptr = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_LO); + queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; + queue->sync_value = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_VALUE); + + if (!evaluate_sync_update(queue)) + continue; + + /* Update csg_slots_idle_mask and group's run_state */ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + spin_unlock_irqrestore(&scheduler->interrupt_lock, + flags); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + sync_update_done = true; + } + } + + return sync_update_done; +} + +/** + * check_sync_update_for_idle_groups_protm() - Check the sync wait condition + * for the idle groups on slot + * during protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function checks the gpu queues of all the idle groups on slot during + * protected mode that has a higher priority than the active protected mode + * group. + * + * Return true if the sync condition of at least one queue in a group has been + * satisfied. + */ +static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *protm_grp; + bool exit_protm = false; + unsigned long flags; + u32 num_groups; + u32 i; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + if (!protm_grp) + return exit_protm; + + num_groups = kbdev->csf.global_iface.group_num; + + for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { + struct kbase_csf_csg_slot *csg_slot = + &scheduler->csg_slots[i]; + struct kbase_queue_group *group = csg_slot->resident_group; + + if (group->scan_seq_num < protm_grp->scan_seq_num) { + /* If sync update has been performed for the group that + * has a higher priority than the protm group, then we + * need to exit protected mode. + */ + if (check_sync_update_for_idle_group_protm(group)) + exit_protm = true; + } + } + + return exit_protm; +} + +/** * check_group_sync_update_worker() - Check the sync wait condition for all the * blocked queue groups * * @work: Pointer to the context-specific work item for evaluating the wait * condition for all the queue groups in idle_wait_groups list. * - * This function checks the gpu queues of all the groups present in - * idle_wait_groups list of a context. If the sync wait condition - * for at least one queue bound to the group has been satisfied then - * the group is moved to the per context list of runnable groups so - * that Scheduler can consider scheduling the group in next tick. + * This function checks the gpu queues of all the groups present in both + * idle_wait_groups list of a context and all on slot idle groups (if GPU + * is in protected mode). + * If the sync wait condition for at least one queue bound to the group has + * been satisfied then the group is moved to the per context list of + * runnable groups so that Scheduler can consider scheduling the group + * in next tick or exit protected mode. */ static void check_group_sync_update_worker(struct work_struct *work) { struct kbase_context *const kctx = container_of(work, struct kbase_context, csf.sched.sync_update_work); - struct kbase_csf_scheduler *const scheduler = - &kctx->kbdev->csf.scheduler; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; mutex_lock(&scheduler->lock); @@ -4280,13 +4429,16 @@ static void check_group_sync_update_worker(struct work_struct *work) * groups list of the context. */ update_idle_suspended_group_state(group); - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); } } } else { WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); } + if (check_sync_update_for_idle_groups_protm(kbdev)) + scheduler_force_protm_exit(kbdev); + mutex_unlock(&scheduler->lock); } @@ -4402,7 +4554,6 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->tock_pending_request = false; scheduler->active_protm_grp = NULL; scheduler->gpu_idle_fw_timer_enabled = false; - scheduler->apply_async_protm = false; scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 20d1bc9..1607ff6 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,7 +125,7 @@ struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue * group from the firmware. * - * @group: Pointer to the queue group to be scheduled. + * @group: Pointer to the queue group to be descheduled. * * This function would disable the scheduling of GPU command queue group on * firmware. @@ -174,7 +174,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx); int kbase_csf_scheduler_init(struct kbase_device *kbdev); /** - * kbase_csf_scheduler_context_init() - Terminate the context-specific part + * kbase_csf_scheduler_context_term() - Terminate the context-specific part * for CSF scheduler. * * @kctx: Pointer to kbase context that is being terminated. diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 0b4fb5a..9e4ed17 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -596,14 +596,14 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, if (likely(heap)) { err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, new_chunk_ptr); - } - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( - kctx->kbdev, kctx->id, heap->heap_id, - PFN_UP(heap->chunk_size * heap->max_chunks), - PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks, - heap->chunk_size, heap->chunk_count, heap->target_in_flight, - nr_in_flight); + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( + kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, nr_in_flight); + } mutex_unlock(&kctx->csf.tiler_heaps.lock); diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index 7e9eb75..afcc90b 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -289,10 +289,6 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, trace_buffer->trace_enable_entry_count = entry[6]; trace_buffer->num_pages = trace_buffer_data[i].size; - /* Temporary workaround until handled by GPUCORE-27330 */ - if (!strcmp(trace_buffer_data[i].name, "timeline")) - trace_buffer->updatable = 0; - for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { trace_buffer->trace_enable_init_mask[j] = trace_buffer_data[i].trace_enable_init_mask[j]; @@ -456,6 +452,7 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( dev_warn( kbdev->dev, "GPU reset already in progress when enabling firmware timeline."); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return -EAGAIN; } } diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index f657bcb..cb2c2e2 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -23,6 +23,7 @@ #include "../mali_kbase_device.h" #include <mali_kbase_hwaccess_backend.h> +#include <mali_kbase_hwcnt_backend_csf_if_fw.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_reset_gpu.h> #include <csf/mali_kbase_csf.h> @@ -170,6 +171,77 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) kbase_hwaccess_pm_term(kbdev); } +/** + * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_if_fw_create( + kbdev, &kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. + * @kbdev: Device pointer + */ + +static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_create( + &kbdev->hwcnt_backend_csf_if_fw, + KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, + &kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_metadata_init - Initialize hardware counter + * metadata. + * @kbdev: Device pointer + */ +static int +kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev) +{ + /* For CSF GPUs, HWC metadata needs to query information from CSF + * firmware, so the initialization of HWC metadata only can be called + * after firmware initialized, but firmware initialization depends on + * HWC backend initialization, so we need to separate HWC backend + * metadata initialization from HWC backend initialization. + */ + return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_metadata_term - Terminate hardware counter + * metadata. + * @kbdev: Device pointer + */ +static void +kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); +} + static const struct kbase_device_init dev_init[] = { #ifdef CONFIG_MALI_NO_MALI {kbase_gpu_device_create, kbase_gpu_device_destroy, @@ -244,12 +316,10 @@ static const struct kbase_device_init dev_init[] = { * paragraph that starts with "Word of warning", currently the * second-last paragraph. */ - {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, + {kbase_sysfs_init, kbase_sysfs_term, + "SysFS group creation failed"}, {kbase_device_misc_register, kbase_device_misc_deregister, "Misc device registration failed"}, -#ifdef CONFIG_MALI_BUSLOG - {buslog_init, buslog_term, "Bus log client registration failed"}, -#endif {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed"}, #endif diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index 4d11a82..259e42a 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -69,17 +69,9 @@ static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev) if (!as_valid || (as_nr == MCU_AS_NR)) { kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); - /* MCU bus fault could mean hardware counters will stop - * working. - * Put the backend into the unrecoverable error state to - * cause current and subsequent counter operations to - * immediately fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else { /* Handle Bus fault */ @@ -133,16 +125,8 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) } kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Protected fault means we're unlikely to have the counter - * operations we might do during reset acknowledged. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index 8052fba..9301310 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -21,6 +21,7 @@ #include "../mali_kbase_device_internal.h" #include "../mali_kbase_device.h" +#include "../mali_kbase_hwaccess_instr.h" #include <mali_kbase_config_defaults.h> #include <mali_kbase_hwaccess_backend.h> @@ -107,6 +108,7 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) return 0; fail_update_l2_features: + kbase_backend_devfreq_term(kbdev); fail_devfreq_init: kbase_job_slot_term(kbdev); fail_job_slot: @@ -144,6 +146,16 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) kbase_hwaccess_pm_term(kbdev); } +static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); +} + static const struct kbase_device_init dev_init[] = { #ifdef CONFIG_MALI_NO_MALI {kbase_gpu_device_create, kbase_gpu_device_destroy, @@ -183,6 +195,8 @@ static const struct kbase_device_init dev_init[] = { {kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed"}, + {kbase_instr_backend_init, kbase_instr_backend_term, + "Instrumentation backend initialization failed"}, {kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term, "GPU hwcnt backend creation failed"}, @@ -215,9 +229,6 @@ static const struct kbase_device_init dev_init[] = { {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, {kbase_device_misc_register, kbase_device_misc_deregister, "Misc device registration failed"}, -#ifdef CONFIG_MALI_BUSLOG - {buslog_init, buslog_term, "Bus log client registration failed"}, -#endif {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed"}, #endif @@ -254,7 +265,8 @@ int kbase_device_init(struct kbase_device *kbdev) for (i = 0; i < ARRAY_SIZE(dev_init); i++) { err = dev_init[i].init(kbdev); if (err) { - dev_err(kbdev->dev, "%s error = %d\n", + if (err != -EPROBE_DEFER) + dev_err(kbdev->dev, "%s error = %d\n", dev_init[i].err_mes, err); kbase_device_term_partial(kbdev, i); break; diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index a90c8cd..5e900d0 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -40,9 +40,6 @@ #include <tl/mali_kbase_timeline.h> #include "mali_kbase_vinstr.h" -#if MALI_USE_CSF -#include <mali_kbase_hwcnt_backend_csf_if_fw.h> -#endif #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_virtualizer.h" @@ -227,10 +224,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) if (err) goto dma_set_mask_failed; -#if !MALI_USE_CSF - spin_lock_init(&kbdev->hwcnt.lock); -#endif - err = kbase_ktrace_init(kbdev); if (err) goto term_as; @@ -241,20 +234,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) atomic_set(&kbdev->ctx_num, 0); -#if !MALI_USE_CSF - err = kbase_instr_backend_init(kbdev); - if (err) - goto term_trace; -#endif - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - else - kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); @@ -263,11 +247,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) return 0; -#if !MALI_USE_CSF -term_trace: - kbase_ktrace_term(kbdev); -#endif - term_as: kbase_device_all_as_term(kbdev); dma_set_mask_failed: @@ -285,10 +264,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev) kbase_debug_assert_register_hook(NULL, NULL); #endif -#if !MALI_USE_CSF - kbase_instr_backend_term(kbdev); -#endif - kbase_ktrace_term(kbdev); kbase_device_all_as_term(kbdev); @@ -311,60 +286,6 @@ void kbase_increment_device_id(void) kbase_dev_nr++; } -#if MALI_USE_CSF - -int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_csf_if_fw_create( - kbdev, &kbdev->hwcnt_backend_csf_if_fw); -} - -void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); -} - -int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_csf_create( - &kbdev->hwcnt_backend_csf_if_fw, - KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, - &kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); -} - -int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev) -{ - /* For CSF GPUs, HWC metadata needs to query informatoin from CSF - * firmware, so the initialization of HWC metadata only can be called - * after firmware initialised, but firmware initialization depends on - * HWC backend initialization, so we need to separate HWC backend - * metadata initialization from HWC backend initialization. - */ - return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); -} -#else - -int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); -} -#endif /* MALI_USE_CSF */ - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) { return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, @@ -484,7 +405,14 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + err = kbase_arbiter_pm_install_interrupts(kbdev); + else + err = kbase_install_interrupts(kbdev); +#else err = kbase_install_interrupts(kbdev); +#endif if (err) goto fail_interrupts; diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h index 2705e67..067f33c 100644 --- a/mali_kbase/device/mali_kbase_device_internal.h +++ b/mali_kbase/device/mali_kbase_device_internal.h @@ -42,18 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev); int kbase_device_timeline_init(struct kbase_device *kbdev); void kbase_device_timeline_term(struct kbase_device *kbdev); -#if MALI_USE_CSF -int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev); -#else -int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev); -#endif - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c index fa70afc..16eae0a 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ #include <mali_kbase.h> -#include "csf/mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "../mali_kbase_gpu_fault.h" const char *kbase_gpu_exception_name(u32 const exception_code) diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h deleted file mode 100644 index 65a06d2..0000000 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ /dev/null @@ -1,334 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_REGMAP_CSF_H_ -#define _KBASE_GPU_REGMAP_CSF_H_ - -#if !MALI_USE_CSF -#error "Cannot be compiled with JM" -#endif - -/* IPA control registers */ - -#define IPA_CONTROL_BASE 0x40000 -#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) -#define COMMAND 0x000 /* (WO) Command register */ -#define STATUS 0x004 /* (RO) Status register */ -#define TIMER 0x008 /* (RW) Timer control register */ - -#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ -#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ -#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ -#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ -#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ -#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ -#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ -#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ - -/* Accumulated counter values for CS hardware */ -#define VALUE_CSHW_BASE 0x100 -#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -/* Accumulated counter values for memory system */ -#define VALUE_MEMSYS_BASE 0x140 -#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#define VALUE_TILER_BASE 0x180 -#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#define VALUE_SHADER_BASE 0x1C0 -#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - - -#include "csf/mali_gpu_csf_control_registers.h" - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull -/* Set to shared memory, that is inner cacheable on ACE and inner or outer - * shared, otherwise inner non-cacheable. - * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. - */ -#define AS_MEMATTR_AARCH64_SHARED 0x8ull - -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching - */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 -/* Normal memory, shared between MCU and Host */ -#define AS_MEMATTR_INDEX_SHARED 6 - -/* Configuration bits for the CSF. */ -#define CSF_CONFIG 0xF00 - -/* CSF_CONFIG register */ -#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 - -/* GPU control registers */ -#define CORE_FEATURES 0x008 /* () Shader Core Features */ -#define MCU_CONTROL 0x700 -#define MCU_STATUS 0x704 - -#define MCU_CNTRL_ENABLE (1 << 0) -#define MCU_CNTRL_AUTO (1 << 1) -#define MCU_CNTRL_DISABLE (0) - -#define MCU_STATUS_HALTED (1 << 1) - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory - * region base address, low word - */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory - * region base address, high word - */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter - * configuration - */ - -#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter - * enable for CS Hardware - */ - -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable - * flags for shader cores - */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable - * flags for tiler - */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable - * flags for MMU/L2 cache - */ - -/* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ - -/* GPU_COMMAND codes */ -#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ -#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */ -#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ -#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ -#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ -#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ -#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ - -/* GPU_COMMAND_RESET payloads */ - -/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. - * Power domains will remain powered on. - */ -#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 - -/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and - * idle state. - */ -#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 - -/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave - * the system bus in an inconsistent state. Use only as a last resort when nothing else works. - */ -#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 - -/* GPU_COMMAND_PRFCNT payloads */ -#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */ -#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */ - -/* GPU_COMMAND_TIME payloads */ -#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ -#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ - -/* GPU_COMMAND_FLUSH_CACHES payloads */ -#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */ -#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */ - -/* GPU_COMMAND command + payload */ -#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ - ((u32)opcode | ((u32)payload << 8)) - -/* Final GPU_COMMAND form */ -/* No operation, nothing happens */ -#define GPU_COMMAND_NOP \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) - -/* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_SOFT_RESET \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) - -/* Immediately reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) - -/* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_CLEAR \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR) - -/* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_PRFCNT_SAMPLE \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE) - -/* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_START \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) - -/* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) - -/* Clean all caches */ -#define GPU_COMMAND_CLEAN_CACHES \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN) - -/* Clean and invalidate all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE) - -/* Places the GPU in protected mode */ -#define GPU_COMMAND_SET_PROTECTED_MODE \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) - -/* Halt CSF */ -#define GPU_COMMAND_FINISH_HALT \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) - -/* Clear GPU faults */ -#define GPU_COMMAND_CLEAR_FAULT \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) - -/* End Command Values */ - -/* GPU_FAULTSTATUS register */ -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ - >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ - (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 -#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) - -#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 -#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) - -#define GPU_FAULTSTATUS_JASID_SHIFT 12 -#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) -#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) -#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ - (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ - (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) - -#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ - (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) -/* End GPU_FAULTSTATUS register */ - -/* GPU_FAULTSTATUS_ACCESS_TYPE values */ -#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 -#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 -#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 -#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 -/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ - -/* Implementation-dependent exception codes used to indicate CSG - * and CS errors that are not specified in the specs. - */ -#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((u8)0x70) -#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((u8)0x71) -#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((u8)0x72) - -/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A -/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ - -#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) -#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ - (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ - (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ -#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ - -/* - * In Debug build, - * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ - * by writing it onto GPU_IRQ_CLEAR/MASK registers. - * - * In Release build, - * GPU_IRQ_REG_COMMON is used. - * - * Note: - * CLEAN_CACHES_COMPLETED - Used separately for cache operation. - * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON - * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen - */ -#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ - | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) - -/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ - -#endif /* _KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h deleted file mode 100644 index 1669d5a..0000000 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ /dev/null @@ -1,287 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_REGMAP_JM_H_ -#define _KBASE_GPU_REGMAP_JM_H_ - -#if MALI_USE_CSF -#error "Cannot be compiled with CSF" -#endif - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull - -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching - */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 - -/* GPU control registers */ - -#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ -#define JS_PRESENT 0x01C /* (RO) Job slots present */ -#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest - * clean-and-invalidate operation - */ - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory - * region base address, low word - */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory - * region base address, high word - */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter - * configuration - */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable - * flags for Job Manager - */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable - * flags for shader cores - */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable - * flags for tiler - */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable - * flags for MMU/L2 cache - */ - -#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ - -#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) - -#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ - -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ -#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ - -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -/* (RO) Extended affinity mask for job slot n*/ -#define JS_XAFFINITY 0x1C - -#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ - -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ - -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -/* (RW) Next extended affinity mask for job slot n */ -#define JS_XAFFINITY_NEXT 0x5C - -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ - -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ - -/* No JM-specific MMU control registers */ -/* No JM-specific MMU address space control registers */ - -/* JS_COMMAND register commands */ -#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ - -#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ - -/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -#define JS_CONFIG_START_MMU (1u << 10) -#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) - -/* JS_XAFFINITY register values */ -#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) - -/* JS_STATUS register values */ - -/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. - * The values are separated to avoid dependency of userspace and kernel code. - */ - -/* Group of values representing the job status instead of a particular fault */ -#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ - -/* General fault values */ -#define JS_STATUS_FAULT_BASE 0x40 -#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ - -/* Instruction or data faults */ -#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -/* NOTE: No fault with 0x57 code defined in spec. */ -#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ - -/* Other faults */ -#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ - -/* JS<n>_FEATURES register */ -#define JS_FEATURE_NULL_JOB (1u << 1) -#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -#define JS_FEATURE_COMPUTE_JOB (1u << 4) -#define JS_FEATURE_VERTEX_JOB (1u << 5) -#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -#define JS_FEATURE_TILER_JOB (1u << 7) -#define JS_FEATURE_FUSED_JOB (1u << 8) -#define JS_FEATURE_FRAGMENT_JOB (1u << 9) - -/* JM_CONFIG register */ -#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) - -/* GPU_COMMAND values */ -#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ - -/* - * In Debug build, - * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ - * by writing it onto GPU_IRQ_CLEAR/MASK registers. - * - * In Release build, - * GPU_IRQ_REG_COMMON is used. - * - * Note: - * CLEAN_CACHES_COMPLETED - Used separately for cache operation. - */ -#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ - | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) - -#endif /* _KBASE_GPU_REGMAP_JM_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu.h b/mali_kbase/gpu/mali_kbase_gpu.h deleted file mode 100644 index dba0e28..0000000 --- a/mali_kbase/gpu/mali_kbase_gpu.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_H_ -#define _KBASE_GPU_H_ - -#include "mali_kbase_gpu_regmap.h" -#include "mali_kbase_gpu_fault.h" -#include "mali_kbase_gpu_coherency.h" -#include "mali_kbase_gpu_id.h" - -#endif /* _KBASE_GPU_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_coherency.h b/mali_kbase/gpu/mali_kbase_gpu_coherency.h deleted file mode 100644 index a075ed0..0000000 --- a/mali_kbase/gpu/mali_kbase_gpu_coherency.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_COHERENCY_H_ -#define _KBASE_GPU_COHERENCY_H_ - -#define COHERENCY_ACE_LITE 0 -#define COHERENCY_ACE 1 -#define COHERENCY_NONE 31 -#define COHERENCY_FEATURE_BIT(x) (1 << (x)) - -#endif /* _KBASE_GPU_COHERENCY_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/mali_kbase/gpu/mali_kbase_gpu_id.h deleted file mode 100644 index 8d687c4..0000000 --- a/mali_kbase/gpu/mali_kbase_gpu_id.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_ID_H_ -#define _KBASE_GPU_ID_H_ - -/* GPU_ID register */ -#define GPU_ID_VERSION_STATUS_SHIFT 0 -#define GPU_ID_VERSION_MINOR_SHIFT 4 -#define GPU_ID_VERSION_MAJOR_SHIFT 12 -#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) - -#define GPU_ID2_VERSION_STATUS_SHIFT 0 -#define GPU_ID2_VERSION_MINOR_SHIFT 4 -#define GPU_ID2_VERSION_MAJOR_SHIFT 12 -#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 -#define GPU_ID2_ARCH_REV_SHIFT 20 -#define GPU_ID2_ARCH_MINOR_SHIFT 24 -#define GPU_ID2_ARCH_MAJOR_SHIFT 28 -#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) -#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) -#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) -#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) -#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) -#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) -#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ - GPU_ID2_VERSION_MINOR | \ - GPU_ID2_VERSION_STATUS) - -/* Helper macro to create a partial GPU_ID (new format) that defines - * a product ignoring its version. - */ -#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ - (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) - -/* Helper macro to create a partial GPU_ID (new format) that specifies the - * revision (major, minor, status) of a product - */ -#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ - ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ - (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ - (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) - -/* Helper macro to create a complete GPU_ID (new format) */ -#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ - version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ - product_major) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ - version_status)) - -/* Helper macro to create a partial GPU_ID (new format) that identifies - * a particular GPU model by its arch_major and product_major. - */ -#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) - -/* Strip off the non-relevant bits from a product_id value and make it suitable - * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU - * model. - */ -#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ - ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ - GPU_ID2_PRODUCT_MODEL) - -#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) -#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) -#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) -#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) -#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) -#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) -#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) -#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) -#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) -#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) -#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) -#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) -#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) -#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) - -/* Helper macro to create a GPU_ID assuming valid values for id, major, - * minor, status - */ -#define GPU_ID_MAKE(id, major, minor, status) \ - ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) - -#endif /* _KBASE_GPU_ID_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h index b7a566f..05a229d 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h +++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,420 +22,12 @@ #ifndef _KBASE_GPU_REGMAP_H_ #define _KBASE_GPU_REGMAP_H_ -#include "mali_kbase_gpu_coherency.h" -#include "mali_kbase_gpu_id.h" -#if MALI_USE_CSF -#include "backend/mali_kbase_gpu_regmap_csf.h" -#else -#include "backend/mali_kbase_gpu_regmap_jm.h" -#endif - -/* Begin Register Offsets */ -/* GPU control registers */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ - -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core - * supergroup are l2 coherent - */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ -#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ -#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ - -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ - -#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ - -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ - -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ - -#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ - -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ - -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ - -#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ -#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) -#define ASN_HASH_COUNT 3 - -#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ - -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ - -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ - -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ - -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ - -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ - -/* MMU control registers */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ - -/* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C - -/* End Register Offsets */ +#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h> /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ #ifdef CONFIG_MALI_DEBUG +#undef GPU_IRQ_REG_ALL #define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) -#else /* CONFIG_MALI_DEBUG */ -#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) #endif /* CONFIG_MALI_DEBUG */ -/* - * MMU_IRQ_RAWSTAT register values. Values are valid also for - * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. - */ - -#define MMU_PAGE_FAULT_FLAGS 16 - -/* Macros returning a bitmask to retrieve page fault or bus error flags from - * MMU registers - */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) - -/* - * Begin LPAE MMU TRANSTAB register values - */ -#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 -#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) -#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) -#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) -#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) -#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) - -#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 - -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) - -/* - * Begin MMU STATUS register values - */ -#define AS_STATUS_AS_ACTIVE 0x01 - -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) - -#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 - -#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) -#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) - -#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) -#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) - -/* - * Begin MMU TRANSCFG register values - */ -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) - -/* - * Begin Command Values - */ - -/* AS_COMMAND register commands */ -#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -/* Flush all L2 caches then issue a flush region command to all MMUs - * (deprecated - only for use with T60x) - */ -#define AS_COMMAND_FLUSH 0x04 -/* Flush all L2 caches then issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH_PT 0x04 -/* Wait for memory accesses to complete, flush all the L1s cache then flush all - * L2 caches then issue a flush region command to all MMUs - */ -#define AS_COMMAND_FLUSH_MEM 0x05 - -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - -/* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -/* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_OFF 0 -/* The performance counters are enabled, but are only written out when a - * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. - */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 -/* The performance counters are enabled, and are written out each time a tile - * finishes rendering. - */ -#define PRFCNT_CONFIG_MODE_TILE 2 - -/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -/* There is no LPAE support for non-cacheable, since the memory type is always - * write-back. - * Marking this setting as reserved for LPAE - */ -#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED - -/* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) - -/* End L2_MMU_CONFIG register */ - -/* THREAD_* registers */ - -/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -#define IMPLEMENTATION_UNSPECIFIED 0 -#define IMPLEMENTATION_SILICON 1 -#define IMPLEMENTATION_FPGA 2 -#define IMPLEMENTATION_MODEL 3 - -/* Default values when registers are not supported by the implemented hardware */ -#define THREAD_MT_DEFAULT 256 -#define THREAD_MWS_DEFAULT 256 -#define THREAD_MBS_DEFAULT 256 -#define THREAD_MR_DEFAULT 1024 -#define THREAD_MTQ_DEFAULT 4 -#define THREAD_MTGS_DEFAULT 10 - -/* End THREAD_* registers */ - -/* SHADER_CONFIG register */ -#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -#define SC_TLS_HASH_ENABLE (1ul << 17) -#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -#define SC_VAR_ALGORITHM (1ul << 29) -/* End SHADER_CONFIG register */ - -/* TILER_CONFIG register */ -#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) -/* End TILER_CONFIG register */ - -/* L2_CONFIG register */ -#define L2_CONFIG_SIZE_SHIFT 16 -#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) -#define L2_CONFIG_HASH_SHIFT 24 -#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) -#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 -#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) -/* End L2_CONFIG register */ - -/* IDVS_GROUP register */ -#define IDVS_GROUP_SIZE_SHIFT (16) -#define IDVS_GROUP_MAX_SIZE (0x3F) - #endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c index d7648cd..00c0f60 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -23,7 +23,9 @@ #include "mali_kbase_ipa_counter_common_jm.h" #include "mali_kbase.h" - +#ifdef CONFIG_MALI_NO_MALI +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) @@ -94,10 +96,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { +#ifdef CONFIG_MALI_NO_MALI + const u32 sc_base = MEMSYS_BASE + + (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * + KBASE_IPA_NR_BYTES_PER_BLOCK); +#else const u32 sc_base = MEMSYS_BASE + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * KBASE_IPA_NR_BYTES_PER_BLOCK); - +#endif return sc_base + counter_block_offset; } diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h deleted file mode 100644 index a72819e..0000000 --- a/mali_kbase/jm/mali_base_jm_kernel.h +++ /dev/null @@ -1,1191 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _BASE_JM_KERNEL_H_ -#define _BASE_JM_KERNEL_H_ - -/* Memory allocation, access/hint flags. - * - * See base_mem_alloc_flags. - */ - -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) - -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the allocation - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - -/** - * Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) - -/** - * Memory starting from the end of the initial commit is aligned to 'extension' - * pages, where 'extension' must be a power of 2 and no more than - * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES - */ -#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) - -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - -/* Use the GPU VA chosen by the kernel client */ -#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) - -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) - -/* Force trimming of JIT allocations when creating a new allocation */ -#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) - -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags - */ -#define BASE_MEM_FLAGS_NR_BITS 30 - -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. - */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ - BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) - -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) - -/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the - * initial commit is aligned to 'extension' pages, where 'extension' must be a power - * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES - */ -#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) - -/** - * If set, the heap info address points to a u32 holding the used size in bytes; - * otherwise it points to a u64 holding the lowest address of unused memory. - */ -#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) - -/** - * Valid set of just-in-time memory allocation flags - * - * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr - * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set - * and heap_info_gpu_addr being 0 will be rejected). - */ -#define BASE_JIT_ALLOC_VALID_FLAGS \ - (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) - -/** - * typedef base_context_create_flags - Flags to pass to ::base_context_init. - * - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - -/* Bitpattern describing the ::base_context_create_flags that can be - * passed to base_context_init() - */ -#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) - -/* - * Private flags used on the base context - * - * These start at bit 31, and run down to zero. - * - * They share the same space as base_context_create_flags, and so must - * not collide with them. - */ - -/* Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ - ((base_context_create_flags)(1 << 31)) - -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED) -/* - * Dependency stuff, keep it private for now. May want to expose it if - * we decide to make the number of semaphores a configurable - * option. - */ -#define BASE_JD_ATOM_COUNT 256 - -/* Maximum number of concurrent render passes. - */ -#define BASE_JD_RP_COUNT (256) - -/* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) - -/** - * struct base_jd_udata - Per-job data - * - * This structure is used to store per-job data, and is completely unused - * by the Base driver. It can be used to store things such as callback - * function pointer, data to handle job completion. It is guaranteed to be - * untouched by the Base driver. - * - * @blob: per-job data array - */ -struct base_jd_udata { - u64 blob[2]; -}; - -/** - * typedef base_jd_dep_type - Job dependency type. - * - * A flags field will be inserted into the atom structure to specify whether a - * dependency is a data or ordering dependency (by putting it before/after - * 'core_req' in the structure it should be possible to add without changing - * the structure size). - * When the flag is set for a particular dependency to signal that it is an - * ordering only dependency then errors will not be propagated. - */ -typedef u8 base_jd_dep_type; - -#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ - -/** - * typedef base_jd_core_req - Job chain hardware requirements. - * - * A job chain must specify what GPU features it needs to allow the - * driver to schedule the job correctly. By not specifying the - * correct settings can/will cause an early job termination. Multiple - * values can be ORed together to specify multiple requirements. - * Special case is ::BASE_JD_REQ_DEP, which is used to express complex - * dependencies, and that doesn't execute anything on the hardware. - */ -typedef u32 base_jd_core_req; - -/* Requirements that come from the HW */ - -/* No requirement, dependency only - */ -#define BASE_JD_REQ_DEP ((base_jd_core_req)0) - -/* Requires fragment shaders - */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) - -/* Requires compute shaders - * - * This covers any of the following GPU job types: - * - Vertex Shader Job - * - Geometry Shader Job - * - An actual Compute Shader Job - * - * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the - * job is specifically just the "Compute Shader" job type, and not the "Vertex - * Shader" nor the "Geometry Shader" job type. - */ -#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) - -/* Requires tiling */ -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) - -/* Requires cache flushes */ -#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) - -/* Requires value writeback */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) - -/* SW-only requirements - the HW does not expose these as part of the job slot - * capabilities - */ - -/* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) - -/* SW-only requirement: coalesce completion events. - * If this bit is set then completion of this atom will not cause an event to - * be sent to userspace, whether successful or not; completion events will be - * deferred until an atom completes which does not have this bit set. - * - * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. - */ -#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) - -/* SW Only requirement: the job chain requires a coherent core group. We don't - * mind which coherent core group is used. - */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) - -/* SW Only requirement: The performance counters should be enabled only when - * they are needed, to reduce power consumption. - */ -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) - -/* SW Only requirement: External resources are referenced by this atom. - * - * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and - * BASE_JD_REQ_SOFT_EVENT_WAIT. - */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) - -/* SW Only requirement: Software defined job. Jobs with this bit set will not be - * submitted to the hardware but will cause some action to happen within the - * driver - */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) - -#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) - -/* 0x4 RESERVED for now */ - -/* SW only requirement: event wait/trigger job. - * - * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. - * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the - * other waiting jobs. It completes immediately. - * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it - * possible for other jobs to wait upon. It completes immediately. - */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) - -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) - -/* SW only requirement: Just In Time allocation - * - * This job requests a single or multiple just-in-time allocations through a - * list of base_jit_alloc_info structure which is passed via the jc element of - * the atom. The number of base_jit_alloc_info structures present in the - * list is passed via the nr_extres element of the atom - * - * It should be noted that the id entry in base_jit_alloc_info must not - * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. - * - * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE - * soft job to free the JIT allocation is still made. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) - -/* SW only requirement: Just In Time free - * - * This job requests a single or multiple just-in-time allocations created by - * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time - * allocations is passed via the jc element of the atom. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) - -/* SW only requirement: Map external resource - * - * This job requests external resource(s) are mapped once the dependencies - * of the job have been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) - -/* SW only requirement: Unmap external resource - * - * This job requests external resource(s) are unmapped once the dependencies - * of the job has been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) - -/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) - * - * This indicates that the Job Chain contains GPU jobs of the 'Compute - * Shaders' type. - * - * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job - * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) - -/* HW Requirement: Use the base_jd_atom::device_nr field to specify a - * particular core group - * - * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag - * takes priority - * - * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned - * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. - */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) - -/* SW Flag: If this bit is set then the successful completion of this atom - * will not cause an event to be sent to userspace - */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) - -/* SW Flag: If this bit is set then completion of this atom will not cause an - * event to be sent to userspace, whether successful or not. - */ -#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) - -/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job starts which does not have this bit set or a job completes - * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use - * if the CPU may have written to memory addressed by the job since the last job - * without this bit set was submitted. - */ -#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) - -/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job completes which does not have this bit set or a job starts - * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use - * if the CPU may read from or partially overwrite memory addressed by the job - * before the next job without this bit set completes. - */ -#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) - -/* Request the atom be executed on a specific job slot. - * - * When this flag is specified, it takes precedence over any existing job slot - * selection logic. - */ -#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) - -/* SW-only requirement: The atom is the start of a renderpass. - * - * If this bit is set then the job chain will be soft-stopped if it causes the - * GPU to write beyond the end of the physical pages backing the tiler heap, and - * committing more memory to the heap would exceed an internal threshold. It may - * be resumed after running one of the job chains attached to an atom with - * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be - * resumed multiple times until it completes without memory usage exceeding the - * threshold. - * - * Usually used with BASE_JD_REQ_T. - */ -#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) - -/* SW-only requirement: The atom is the end of a renderpass. - * - * If this bit is set then the atom incorporates the CPU address of a - * base_jd_fragment object instead of the GPU address of a job chain. - * - * Which job chain is run depends upon whether the atom with the same renderpass - * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or - * was soft-stopped when it exceeded an upper threshold for tiler heap memory - * usage. - * - * It also depends upon whether one of the job chains attached to the atom has - * already been run as part of the same renderpass (in which case it would have - * written unresolved multisampled and otherwise-discarded output to temporary - * buffers that need to be read back). The job chain for doing a forced read and - * forced write (from/to temporary buffers) is run as many times as necessary. - * - * Usually used with BASE_JD_REQ_FS. - */ -#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) - -/* These requirement bits are currently unused in base_jd_core_req - */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ - BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ - BASE_JD_REQ_END_RENDERPASS)) - -/* Mask of all bits in base_jd_core_req that control the type of the atom. - * - * This allows dependency only atoms to have flags set - */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) - -/** - * Mask of all bits in base_jd_core_req that control the type of a soft job. - */ -#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) - -/* Returns non-zero value if core requirements passed define a soft job or - * a dependency only job. - */ -#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ - (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ - ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) - -/** - * enum kbase_jd_atom_state - * - * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. - * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. - * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). - * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet - * handed back to job dispatcher for - * dependency resolution. - * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed - * back to userspace. - */ -enum kbase_jd_atom_state { - KBASE_JD_ATOM_STATE_UNUSED, - KBASE_JD_ATOM_STATE_QUEUED, - KBASE_JD_ATOM_STATE_IN_JS, - KBASE_JD_ATOM_STATE_HW_COMPLETED, - KBASE_JD_ATOM_STATE_COMPLETED -}; - -/** - * typedef base_atom_id - Type big enough to store an atom number in. - */ -typedef u8 base_atom_id; - -/** - * struct base_dependency - - * - * @atom_id: An atom number - * @dependency_type: Dependency type - */ -struct base_dependency { - base_atom_id atom_id; - base_jd_dep_type dependency_type; -}; - -/** - * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. - * - * @norm_read_norm_write: Job chain for full rendering. - * GPU address of a fragment job chain to render in the - * circumstance where the tiler job chain did not exceed - * its memory usage threshold and no fragment job chain - * was previously run for the same renderpass. - * It is used no more than once per renderpass. - * @norm_read_forced_write: Job chain for starting incremental - * rendering. - * GPU address of a fragment job chain to render in - * the circumstance where the tiler job chain exceeded - * its memory usage threshold for the first time and - * no fragment job chain was previously run for the - * same renderpass. - * Writes unresolved multisampled and normally- - * discarded output to temporary buffers that must be - * read back by a subsequent forced_read job chain - * before the renderpass is complete. - * It is used no more than once per renderpass. - * @forced_read_forced_write: Job chain for continuing incremental - * rendering. - * GPU address of a fragment job chain to render in - * the circumstance where the tiler job chain - * exceeded its memory usage threshold again - * and a fragment job chain was previously run for - * the same renderpass. - * Reads unresolved multisampled and - * normally-discarded output from temporary buffers - * written by a previous forced_write job chain and - * writes the same to temporary buffers again. - * It is used as many times as required until - * rendering completes. - * @forced_read_norm_write: Job chain for ending incremental rendering. - * GPU address of a fragment job chain to render in the - * circumstance where the tiler job chain did not - * exceed its memory usage threshold this time and a - * fragment job chain was previously run for the same - * renderpass. - * Reads unresolved multisampled and normally-discarded - * output from temporary buffers written by a previous - * forced_write job chain in order to complete a - * renderpass. - * It is used no more than once per renderpass. - * - * This structure is referenced by the main atom structure if - * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. - */ -struct base_jd_fragment { - u64 norm_read_norm_write; - u64 norm_read_forced_write; - u64 forced_read_forced_write; - u64 forced_read_norm_write; -}; - -/** - * typedef base_jd_prio - Base Atom priority. - * - * Only certain priority levels are actually implemented, as specified by the - * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority - * level that is not one of those defined below. - * - * Priority levels only affect scheduling after the atoms have had dependencies - * resolved. For example, a low priority atom that has had its dependencies - * resolved might run before a higher priority atom that has not had its - * dependencies resolved. - * - * In general, fragment atoms do not affect non-fragment atoms with - * lower priorities, and vice versa. One exception is that there is only one - * priority value for each context. So a high-priority (e.g.) fragment atom - * could increase its context priority, causing its non-fragment atoms to also - * be scheduled sooner. - * - * The atoms are scheduled as follows with respect to their priorities: - * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies - * resolved, and atom 'X' has a higher priority than atom 'Y' - * * If atom 'Y' is currently running on the HW, then it is interrupted to - * allow atom 'X' to run soon after - * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing - * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' - * * Any two atoms that have the same priority could run in any order with - * respect to each other. That is, there is no ordering constraint between - * atoms of the same priority. - * - * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are - * scheduled between contexts. The default value, 0, will cause higher-priority - * atoms to be scheduled first, regardless of their context. The value 1 will - * use a round-robin algorithm when deciding which context's atoms to schedule - * next, so higher-priority atoms can only preempt lower priority atoms within - * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and - * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. - */ -typedef u8 base_jd_prio; - -/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) -/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and - * BASE_JD_PRIO_LOW - */ -#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) -/* Low atom priority. */ -#define BASE_JD_PRIO_LOW ((base_jd_prio)2) -/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH, - * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW - */ -#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) - -/* Count of the number of priority levels. This itself is not a valid - * base_jd_prio setting - */ -#define BASE_JD_NR_PRIO_LEVELS 4 - -/** - * struct base_jd_atom_v2 - Node of a dependency graph used to submit a - * GPU job chain or soft-job to the kernel driver. - * - * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS - * is set in the base_jd_core_req) the CPU address of a - * base_jd_fragment object. - * @udata: User data. - * @extres_list: List of external resources. - * @nr_extres: Number of external resources or JIT allocations. - * @jit_id: Zero-terminated array of IDs of just-in-time memory - * allocations written to by the atom. When the atom - * completes, the value stored at the - * &struct_base_jit_alloc_info.heap_info_gpu_addr of - * each allocation is read in order to enforce an - * overall physical memory usage limit. - * @pre_dep: Pre-dependencies. One need to use SETTER function to assign - * this field; this is done in order to reduce possibility of - * improper assignment of a dependency field. - * @atom_number: Unique number to identify the atom. - * @prio: Atom priority. Refer to base_jd_prio for more details. - * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP - * specified. - * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. - * @core_req: Core requirements. - * @renderpass_id: Renderpass identifier used to associate an atom that has - * BASE_JD_REQ_START_RENDERPASS set in its core requirements - * with an atom that has BASE_JD_REQ_END_RENDERPASS set. - * @padding: Unused. Must be zero. - * - * This structure has changed since UK 10.2 for which base_jd_core_req was a - * u16 value. - * - * In UK 10.3 a core_req field of a u32 type was added to the end of the - * structure, and the place in the structure previously occupied by u16 - * core_req was kept but renamed to compat_core_req. - * - * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. - * Compatibility with UK 10.x from UK 11.y is not handled because - * the major version increase prevents this. - * - * For UK 11.20 jit_id[2] must be initialized to zero. - */ -struct base_jd_atom_v2 { - u64 jc; - struct base_jd_udata udata; - u64 extres_list; - u16 nr_extres; - u8 jit_id[2]; - struct base_dependency pre_dep[2]; - base_atom_id atom_number; - base_jd_prio prio; - u8 device_nr; - u8 jobslot; - base_jd_core_req core_req; - u8 renderpass_id; - u8 padding[7]; -}; - -/** - * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr - * at the beginning. - * - * @seq_nr: Sequence number of logical grouping of atoms. - * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS - * is set in the base_jd_core_req) the CPU address of a - * base_jd_fragment object. - * @udata: User data. - * @extres_list: List of external resources. - * @nr_extres: Number of external resources or JIT allocations. - * @jit_id: Zero-terminated array of IDs of just-in-time memory - * allocations written to by the atom. When the atom - * completes, the value stored at the - * &struct_base_jit_alloc_info.heap_info_gpu_addr of - * each allocation is read in order to enforce an - * overall physical memory usage limit. - * @pre_dep: Pre-dependencies. One need to use SETTER function to assign - * this field; this is done in order to reduce possibility of - * improper assignment of a dependency field. - * @atom_number: Unique number to identify the atom. - * @prio: Atom priority. Refer to base_jd_prio for more details. - * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP - * specified. - * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. - * @core_req: Core requirements. - * @renderpass_id: Renderpass identifier used to associate an atom that has - * BASE_JD_REQ_START_RENDERPASS set in its core requirements - * with an atom that has BASE_JD_REQ_END_RENDERPASS set. - * @padding: Unused. Must be zero. - */ -typedef struct base_jd_atom { - u64 seq_nr; - u64 jc; - struct base_jd_udata udata; - u64 extres_list; - u16 nr_extres; - u8 jit_id[2]; - struct base_dependency pre_dep[2]; - base_atom_id atom_number; - base_jd_prio prio; - u8 device_nr; - u8 jobslot; - base_jd_core_req core_req; - u8 renderpass_id; - u8 padding[7]; -} base_jd_atom; - -/* Job chain event code bits - * Defines the bits used to create ::base_jd_event_code - */ -enum { - BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ - BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ - /* Event indicates success (SW events only) */ - BASE_JD_SW_EVENT_SUCCESS = (1u << 13), - BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ - BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ - BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ - BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ - /* Mask to extract the type from an event code */ - BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) -}; - -/** - * enum base_jd_event_code - Job chain event codes - * - * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status - * codes. - * Obscurely, BASE_JD_EVENT_TERMINATED - * indicates a real fault, because the - * job was hard-stopped. - * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as - * 'previous job done'. - * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes - * TERMINATED, DONE or JOB_CANCELLED. - * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job - * was hard stopped. - * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on - * complete/fail/cancel. - * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. - * Obscurely, BASE_JD_EVENT_TERMINATED - * indicates a real fault, - * because the job was hard-stopped. - * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and - * software error status codes. - * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and - * software error status codes. - * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status - * codes. - * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. - * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. - * Such codes are never returned to - * user-space. - * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. - * @BASE_JD_EVENT_DONE: atom has completed successfull - * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which - * shall result in a failed atom - * @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the - * part of the memory system required to access - * job descriptors was not powered on - * @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job - * manager failed - * @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job - * manager failed - * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the - * specified affinity mask does not intersect - * any available cores - * @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job - * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program - * counter was executed. - * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal - * encoding was executed. - * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where - * the instruction encoding did not match the - * instruction type encoded in the program - * counter. - * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that - * contained invalid combinations of operands. - * @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried - * to access the thread local storage section - * of another thread. - * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that - * tried to do an unsupported unaligned memory - * access. - * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that - * failed to complete an instruction barrier. - * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job - * contains invalid combinations of data. - * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to - * process a tile that is entirely outside the - * bounding box of the frame. - * @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address - * has been found that exceeds the virtual - * address range. - * @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job. - * @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only - * the first one the reports an error will set - * and return full error information. - * Subsequent failing jobs will not update the - * error status registers, and may write an - * error status of UNKNOWN. - * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to - * physical memory where the original virtual - * address is no longer available. - * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache - * has detected that the same line has been - * accessed as both shareable and non-shareable - * memory from inside the GPU. - * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table - * entry at level 1 of the translation table. - * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table - * entry at level 2 of the translation table. - * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table - * entry at level 3 of the translation table. - * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table - * entry at level 4 of the translation table. - * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to - * the permission flags set in translation - * table - * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading - * level 0 of the translation tables. - * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading - * level 1 of the translation tables. - * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading - * level 2 of the translation tables. - * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading - * level 3 of the translation tables. - * @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a - * translation table entry with the ACCESS_FLAG - * bit set to zero in level 0 of the - * page table, and the DISABLE_AF_FAULT flag - * was not set. - * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to - * grow memory on demand - * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its - * dependencies failed - * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data - * in the atom which overlaps with - * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the - * platform doesn't support the feature specified in - * the atom. - * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used - * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used - * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used - * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate - * to userspace that the KBase context has been - * destroyed and Base should stop listening for - * further events - * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in - * the GPU has to be retried (but it has not - * started) due to e.g., GPU reset - * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal - * the completion of a renderpass. This value - * shouldn't be returned to userspace but I haven't - * seen where it is reset back to JD_EVENT_DONE. - * - * HW and low-level SW events are represented by event codes. - * The status of jobs which succeeded are also represented by - * an event code (see @BASE_JD_EVENT_DONE). - * Events are usually reported as part of a &struct base_jd_event. - * - * The event codes are encoded in the following way: - * * 10:0 - subtype - * * 12:11 - type - * * 13 - SW success (only valid if the SW bit is set) - * * 14 - SW event (HW event if not set) - * * 15 - Kernel event (should never be seen in userspace) - * - * Events are split up into ranges as follows: - * * BASE_JD_EVENT_RANGE_<description>_START - * * BASE_JD_EVENT_RANGE_<description>_END - * - * code is in <description>'s range when: - * BASE_JD_EVENT_RANGE_<description>_START <= code < - * BASE_JD_EVENT_RANGE_<description>_END - * - * Ranges can be asserted for adjacency by testing that the END of the previous - * is equal to the START of the next. This is useful for optimizing some tests - * for range. - * - * A limitation is that the last member of this enum must explicitly be handled - * (with an assert-unreachable statement) in switch statements that use - * variables of this type. Otherwise, the compiler warns that we have not - * handled that enum value. - */ -enum base_jd_event_code { - /* HW defined exceptions */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, - - /* non-fatal exceptions */ - BASE_JD_EVENT_NOT_STARTED = 0x00, - BASE_JD_EVENT_DONE = 0x01, - BASE_JD_EVENT_STOPPED = 0x03, - BASE_JD_EVENT_TERMINATED = 0x04, - BASE_JD_EVENT_ACTIVE = 0x08, - - BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, - - /* job exceptions */ - BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, - BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, - BASE_JD_EVENT_JOB_READ_FAULT = 0x42, - BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, - BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, - BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, - BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, - BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, - BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, - BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, - BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, - BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, - BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, - BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, - BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, - BASE_JD_EVENT_STATE_FAULT = 0x5A, - BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, - BASE_JD_EVENT_UNKNOWN = 0x7F, - - /* GPU exceptions */ - BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, - BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, - - /* MMU exceptions */ - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, - BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, - BASE_JD_EVENT_ACCESS_FLAG = 0xD8, - - /* SW defined exceptions */ - BASE_JD_EVENT_MEM_GROWTH_FAILED = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, - BASE_JD_EVENT_JOB_CANCELLED = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, - BASE_JD_EVENT_JOB_INVALID = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - - BASE_JD_EVENT_BAG_INVALID = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, - - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | 0x000, - - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | - BASE_JD_SW_EVENT_BAG | 0x000, - BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, - - BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | 0x000, - BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, - - BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF -}; - -/** - * struct base_jd_event_v2 - Event reporting structure - * - * @event_code: event code. - * @atom_number: the atom number that has completed. - * @udata: user data. - * - * This structure is used by the kernel driver to report information - * about GPU events. They can either be HW-specific events or low-level - * SW events, such as job-chain completion. - * - * The event code contains an event type field which can be extracted - * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. - */ -struct base_jd_event_v2 { - enum base_jd_event_code event_code; - base_atom_id atom_number; - struct base_jd_udata udata; -}; - -/** - * struct base_dump_cpu_gpu_counters - Structure for - * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS - * jobs. - * @system_time: gpu timestamp - * @cycle_counter: gpu cycle count - * @sec: cpu time(sec) - * @usec: cpu time(usec) - * @padding: padding - * - * This structure is stored into the memory pointed to by the @jc field - * of &struct base_jd_atom. - * - * It must not occupy the same CPU cache line(s) as any neighboring data. - * This is to avoid cases where access to pages containing the structure - * is shared between cached and un-cached memory regions, which would - * cause memory corruption. - */ - -struct base_dump_cpu_gpu_counters { - u64 system_time; - u64 cycle_counter; - u64 sec; - u32 usec; - u8 padding[36]; -}; - -#endif /* _BASE_JM_KERNEL_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h deleted file mode 100644 index 93c9c44..0000000 --- a/mali_kbase/jm/mali_kbase_jm_ioctl.h +++ /dev/null @@ -1,220 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_JM_IOCTL_H_ -#define _KBASE_JM_IOCTL_H_ - -#include <asm-generic/ioctl.h> -#include <linux/types.h> - -/* - * 11.1: - * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags - * 11.2: - * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, - * which some user-side clients prior to 11.2 might fault if they received - * them - * 11.3: - * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and - * KBASE_IOCTL_STICKY_RESOURCE_UNMAP - * 11.4: - * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET - * 11.5: - * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) - * 11.6: - * - Added flags field to base_jit_alloc_info structure, which can be used to - * specify pseudo chunked tiler alignment for JIT allocations. - * 11.7: - * - Removed UMP support - * 11.8: - * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags - * 11.9: - * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY - * under base_mem_alloc_flags - * 11.10: - * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for - * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations - * with one softjob. - * 11.11: - * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags - * 11.12: - * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS - * 11.13: - * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT - * 11.14: - * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set - * under base_mem_alloc_flags - * 11.15: - * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. - * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be - * passed to mmap(). - * 11.16: - * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. - * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for - * dma-buf. Now, buffers are mapped on GPU when first imported, no longer - * requiring external resource or sticky resource tracking. UNLESS, - * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. - * 11.17: - * - Added BASE_JD_REQ_JOB_SLOT. - * - Reused padding field in base_jd_atom_v2 to pass job slot number. - * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO - * 11.18: - * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags - * 11.19: - * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. - * 11.20: - * - Added new phys_pages member to kbase_ioctl_mem_jit_init for - * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 - * (replacing '_OLD') and _11_5 suffixes - * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in - * base_jd_atom_v2. It must currently be initialized to zero. - * - Added heap_info_gpu_addr to base_jit_alloc_info, and - * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's - * flags member. Previous variants of this structure are kept and given _10_2 - * and _11_5 suffixes. - * - The above changes are checked for safe values in usual builds - * 11.21: - * - v2.0 of mali_trace debugfs file, which now versions the file separately - * 11.22: - * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. - * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. - * 11.23: - * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify - * the physical memory backing of JIT allocations. This was not supposed - * to be a valid use case, but it was allowed by the previous implementation. - * 11.24: - * - Added a sysfs file 'serialize_jobs' inside a new sub-directory - * 'scheduling'. - * 11.25: - * - Enabled JIT pressure limit in base/kbase by default - * 11.26 - * - Added kinstr_jm API - * 11.27 - * - Backwards compatible extension to HWC ioctl. - * 11.28: - * - Added kernel side cache ops needed hint - * 11.29: - * - Reserve ioctl 52 - * 11.30: - * - Add a new priority level BASE_JD_PRIO_REALTIME - * - Add ioctl 54: This controls the priority setting. - */ -#define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 30 - -/** - * struct kbase_ioctl_version_check - Check version compatibility between - * kernel and userspace - * - * @major: Major version number - * @minor: Minor version number - */ -struct kbase_ioctl_version_check { - __u16 major; - __u16 minor; -}; - -#define KBASE_IOCTL_VERSION_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) - - -/** - * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel - * - * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 - * @nr_atoms: Number of entries in the array - * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) - */ -struct kbase_ioctl_job_submit { - __u64 addr; - __u32 nr_atoms; - __u32 stride; -}; - -#define KBASE_IOCTL_JOB_SUBMIT \ - _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) - -#define KBASE_IOCTL_POST_TERM \ - _IO(KBASE_IOCTL_TYPE, 4) - -/** - * struct kbase_ioctl_soft_event_update - Update the status of a soft-event - * @event: GPU address of the event which has been updated - * @new_status: The new status to set - * @flags: Flags for future expansion - */ -struct kbase_ioctl_soft_event_update { - __u64 event; - __u32 new_status; - __u32 flags; -}; - -#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ - _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) - -/** - * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for - * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the - * kernel - * - * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` - * @version: Represents a breaking change in the - * `struct kbase_kinstr_jm_atom_state_change` - * @padding: Explicit padding to get the structure up to 64bits. See - * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst - * - * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the - * end of the structure that older user space might not understand. If the - * `version` is the same, the structure is still compatible with newer kernels. - * The `size` can be used to cast the opaque memory returned from the kernel. - */ -struct kbase_kinstr_jm_fd_out { - __u16 size; - __u8 version; - __u8 padding[5]; -}; - -/** - * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor - * - * @count: Number of atom states that can be stored in the kernel circular - * buffer. Must be a power of two - * @padding: Explicit padding to get the structure up to 64bits. See - * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst - */ -struct kbase_kinstr_jm_fd_in { - __u16 count; - __u8 padding[6]; -}; - -union kbase_kinstr_jm_fd { - struct kbase_kinstr_jm_fd_in in; - struct kbase_kinstr_jm_fd_out out; -}; - -#define KBASE_IOCTL_KINSTR_JM_FD \ - _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) - - -#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ - _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) - -#endif /* _KBASE_JM_IOCTL_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h index 06adb36..e327536 100644 --- a/mali_kbase/jm/mali_kbase_jm_js.h +++ b/mali_kbase/jm/mali_kbase_jm_js.h @@ -657,7 +657,7 @@ static inline bool kbasep_js_is_submit_allowed( test_bit = (u16) (1u << kctx->as_nr); is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); - dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); return is_allowed; } @@ -684,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed( set_bit = (u16) (1u << kctx->as_nr); - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed |= set_bit; @@ -715,7 +715,7 @@ static inline void kbasep_js_clear_submit_allowed( clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed &= clear_mask; diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h index 997cd49..183f0b0 100644 --- a/mali_kbase/jm/mali_kbase_js_defs.h +++ b/mali_kbase/jm/mali_kbase_js_defs.h @@ -171,7 +171,8 @@ enum { * Internal atom priority defines for kbase_jd_atom::sched_prio */ enum { - KBASE_JS_ATOM_SCHED_PRIO_REALTIME = 0, + KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, + KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, KBASE_JS_ATOM_SCHED_PRIO_HIGH, KBASE_JS_ATOM_SCHED_PRIO_MED, KBASE_JS_ATOM_SCHED_PRIO_LOW, diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index d6f31cf..bdc769f 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,7 +49,6 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -306,7 +297,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -335,7 +325,6 @@ static const enum base_hw_feature base_hw_features_tBAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -364,7 +353,6 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -393,7 +381,6 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 0afabb1..a61eeb2 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h deleted file mode 100644 index 5c173eb..0000000 --- a/mali_kbase/mali_base_kernel.h +++ /dev/null @@ -1,812 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * Base structures shared with the kernel. - */ - -#ifndef _BASE_KERNEL_H_ -#define _BASE_KERNEL_H_ - -struct base_mem_handle { - struct { - u64 handle; - } basep; -}; - -#include "mali_base_mem_priv.h" -#include "gpu/mali_kbase_gpu_coherency.h" -#include "gpu/mali_kbase_gpu_id.h" - -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - -#define BASE_MAX_COHERENT_GROUPS 16 - -#if defined CDBG_ASSERT -#define LOCAL_ASSERT CDBG_ASSERT -#elif defined KBASE_DEBUG_ASSERT -#define LOCAL_ASSERT KBASE_DEBUG_ASSERT -#else -#error assert macro not defined! -#endif - -#if defined(PAGE_MASK) && defined(PAGE_SHIFT) -#define LOCAL_PAGE_SHIFT PAGE_SHIFT -#define LOCAL_PAGE_LSB ~PAGE_MASK -#else -#include <osu/mali_osu.h> - -#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 -#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 -#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) -#else -#error Failed to find page size -#endif -#endif - -/* Physical memory group ID for normal usage. - */ -#define BASE_MEM_GROUP_DEFAULT (0) - -/* Number of physical memory groups. - */ -#define BASE_MEM_GROUP_COUNT (16) - -/** - * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. - * - * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator - * in order to determine the best cache policy. Some combinations are - * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), - * which defines a write-only region on the CPU side, which is - * heavily read by the CPU... - * Other flags are only meaningful to a particular allocator. - * More flags can be added to this list, as long as they don't clash - * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). - */ -typedef u32 base_mem_alloc_flags; - -/* A mask for all the flags which are modifiable via the base_mem_set_flags - * interface. - */ -#define BASE_MEM_FLAGS_MODIFIABLE \ - (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ - BASE_MEM_COHERENT_LOCAL) - -/* A mask of all the flags that can be returned via the base_mem_get_flags() - * interface. - */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ - BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ - BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ - BASEP_MEM_FLAGS_KERNEL_ONLY)) - -/** - * enum base_mem_import_type - Memory types supported by @a base_mem_import - * - * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type - * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) - * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a - * base_mem_import_user_buffer - * - * Each type defines what the supported handle type is. - * - * If any new type is added here ARM must be contacted - * to allocate a numeric value for it. - * Do not just add a new type without synchronizing with ARM - * as future releases from ARM might include other new types - * which could clash with your custom types. - */ -enum base_mem_import_type { - BASE_MEM_IMPORT_TYPE_INVALID = 0, - /* - * Import type with value 1 is deprecated. - */ - BASE_MEM_IMPORT_TYPE_UMM = 2, - BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 -}; - -/** - * struct base_mem_import_user_buffer - Handle of an imported user buffer - * - * @ptr: address of imported user buffer - * @length: length of imported user buffer in bytes - * - * This structure is used to represent a handle of an imported user buffer. - */ - -struct base_mem_import_user_buffer { - u64 ptr; - u64 length; -}; - -/* Mask to detect 4GB boundary alignment */ -#define BASE_MEM_MASK_4GB 0xfffff000UL -/* Mask to detect 4GB boundary (in page units) alignment */ -#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) - -/* Limit on the 'extension' parameter for an allocation with the - * BASE_MEM_TILER_ALIGN_TOP flag set - * - * This is the same as the maximum limit for a Buffer Descriptor's chunk size - */ -#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ - (21u - (LOCAL_PAGE_SHIFT)) -#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ - (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) - -/* Bit mask of cookies used for for memory allocation setup */ -#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ - -/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ -#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ - -/* - * struct base_fence - Cross-device synchronisation fence. - * - * A fence is used to signal when the GPU has finished accessing a resource that - * may be shared with other devices, and also to delay work done asynchronously - * by the GPU until other devices have finished accessing a shared resource. - */ -struct base_fence { - struct { - int fd; - int stream_fd; - } basep; -}; - -/** - * struct base_mem_aliasing_info - Memory aliasing info - * - * Describes a memory handle to be aliased. - * A subset of the handle can be chosen for aliasing, given an offset and a - * length. - * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a - * region where a special page is mapped with a write-alloc cache setup, - * typically used when the write result of the GPU isn't needed, but the GPU - * must write anyway. - * - * Offset and length are specified in pages. - * Offset must be within the size of the handle. - * Offset+length must not overrun the size of the handle. - * - * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * @offset: Offset within the handle to start aliasing from, in pages. - * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. - * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * specifies the number of times the special page is needed. - */ -struct base_mem_aliasing_info { - struct base_mem_handle handle; - u64 offset; - u64 length; -}; - -/* Maximum percentage of just-in-time memory allocation trimming to perform - * on free. - */ -#define BASE_JIT_MAX_TRIM_LEVEL (100) - -/* Maximum number of concurrent just-in-time memory allocations. - */ -#define BASE_JIT_ALLOC_COUNT (255) - -/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 - * - * jit_version is 1 - * - * Due to the lack of padding specified, user clients between 32 and 64-bit - * may have assumed a different size of the struct - * - * An array of structures was not supported - */ -struct base_jit_alloc_info_10_2 { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; -}; - -/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up - * to 11.19 - * - * This structure had a number of modifications during and after kernel driver - * version 11.5, but remains size-compatible throughout its version history, and - * with earlier variants compatible with future variants by requiring - * zero-initialization to the unused space in the structure. - * - * jit_version is 2 - * - * Kernel driver version history: - * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes - * must be zero. Kbase minor version was not incremented, so some - * versions of 11.5 do not have this change. - * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase - * minor version not incremented) - * 11.6: Added 'flags', replacing 1 padding byte - * 11.10: Arrays of this structure are supported - */ -struct base_jit_alloc_info_11_5 { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; - u8 bin_id; - u8 max_allocations; - u8 flags; - u8 padding[2]; - u16 usage_id; -}; - -/** - * struct base_jit_alloc_info - Structure which describes a JIT allocation - * request. - * @gpu_alloc_addr: The GPU virtual address to write the JIT - * allocated GPU virtual address to. - * @va_pages: The minimum number of virtual pages required. - * @commit_pages: The minimum number of physical pages which - * should back the allocation. - * @extension: Granularity of physical pages to grow the - * allocation by during a fault. - * @id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * Zero is not a valid value. - * @bin_id: The JIT allocation bin, used in conjunction with - * @max_allocations to limit the number of each - * type of JIT allocation. - * @max_allocations: The maximum number of allocations allowed within - * the bin specified by @bin_id. Should be the same - * for all allocations within the same bin. - * @flags: flags specifying the special requirements for - * the JIT allocation, see - * %BASE_JIT_ALLOC_VALID_FLAGS - * @padding: Expansion space - should be initialised to zero - * @usage_id: A hint about which allocation should be reused. - * The kernel should attempt to use a previous - * allocation with the same usage_id - * @heap_info_gpu_addr: Pointer to an object in GPU memory describing - * the actual usage of the region. - * - * jit_version is 3. - * - * When modifications are made to this structure, it is still compatible with - * jit_version 3 when: a) the size is unchanged, and b) new members only - * replace the padding bytes. - * - * Previous jit_version history: - * jit_version == 1, refer to &base_jit_alloc_info_10_2 - * jit_version == 2, refer to &base_jit_alloc_info_11_5 - * - * Kbase version history: - * 11.20: added @heap_info_gpu_addr - */ -struct base_jit_alloc_info { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; - u8 bin_id; - u8 max_allocations; - u8 flags; - u8 padding[2]; - u16 usage_id; - u64 heap_info_gpu_addr; -}; - -enum base_external_resource_access { - BASE_EXT_RES_ACCESS_SHARED, - BASE_EXT_RES_ACCESS_EXCLUSIVE -}; - -struct base_external_resource { - u64 ext_resource; -}; - - -/** - * The maximum number of external resources which can be mapped/unmapped - * in a single request. - */ -#define BASE_EXT_RES_COUNT_MAX 10 - -/** - * struct base_external_resource_list - Structure which describes a list of - * external resources. - * @count: The number of resources. - * @ext_res: Array of external resources which is - * sized at allocation time. - */ -struct base_external_resource_list { - u64 count; - struct base_external_resource ext_res[1]; -}; - -struct base_jd_debug_copy_buffer { - u64 address; - u64 size; - struct base_external_resource extres; -}; - -#define GPU_MAX_JOB_SLOTS 16 - -/** - * User-side Base GPU Property Queries - * - * The User-side Base GPU Property Query interface encapsulates two - * sub-modules: - * - * - "Dynamic GPU Properties" - * - "Base Platform Config GPU Properties" - * - * Base only deals with properties that vary between different GPU - * implementations - the Dynamic GPU properties and the Platform Config - * properties. - * - * For properties that are constant for the GPU Architecture, refer to the - * GPU module. However, we will discuss their relevance here just to - * provide background information. - * - * About the GPU Properties in Base and GPU modules - * - * The compile-time properties (Platform Config, GPU Compile-time - * properties) are exposed as pre-processor macros. - * - * Complementing the compile-time properties are the Dynamic GPU - * Properties, which act as a conduit for the GPU Configuration - * Discovery. - * - * In general, the dynamic properties are present to verify that the platform - * has been configured correctly with the right set of Platform Config - * Compile-time Properties. - * - * As a consistent guide across the entire DDK, the choice for dynamic or - * compile-time should consider the following, in order: - * 1. Can the code be written so that it doesn't need to know the - * implementation limits at all? - * 2. If you need the limits, get the information from the Dynamic Property - * lookup. This should be done once as you fetch the context, and then cached - * as part of the context data structure, so it's cheap to access. - * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, - * then use a Compile-Time Property (Platform Config, or GPU Compile-time - * property). Examples of where this might be sensible follow: - * - Part of a critical inner-loop - * - Frequent re-use throughout the driver, causing significant extra load - * instructions or control flow that would be worthwhile optimizing out. - * - * We cannot provide an exhaustive set of examples, neither can we provide a - * rule for every possible situation. Use common sense, and think about: what - * the rest of the driver will be doing; how the compiler might represent the - * value if it is a compile-time constant; whether an OEM shipping multiple - * devices would benefit much more from a single DDK binary, instead of - * insignificant micro-optimizations. - * - * Dynamic GPU Properties - * - * Dynamic GPU properties are presented in two sets: - * 1. the commonly used properties in @ref base_gpu_props, which have been - * unpacked from GPU register bitfields. - * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props - * (also a member of base_gpu_props). All of these are presented in - * the packed form, as presented by the GPU registers themselves. - * - * The raw properties in gpu_raw_gpu_props are necessary to - * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device - * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali - * Tools software on the host PC. - * - * The properties returned extend the GPU Configuration Discovery - * registers. For example, GPU clock speed is not specified in the GPU - * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. - * - * The GPU properties are obtained by a call to - * base_get_gpu_props(). This simply returns a pointer to a const - * base_gpu_props structure. It is constant for the life of a base - * context. Multiple calls to base_get_gpu_props() to a base context - * return the same pointer to a constant structure. This avoids cache pollution - * of the common data. - * - * This pointer must not be freed, because it does not point to the start of a - * region allocated by the memory allocator; instead, just close the @ref - * base_context. - * - * - * Kernel Operation - * - * During Base Context Create time, user-side makes a single kernel call: - * - A call to fill user memory with GPU information structures - * - * The kernel-side will fill the provided the entire processed base_gpu_props - * structure, because this information is required in both - * user and kernel side; it does not make sense to decode it twice. - * - * Coherency groups must be derived from the bitmasks, but this can be done - * kernel side, and just once at kernel startup: Coherency groups must already - * be known kernel-side, to support chains that specify a 'Only Coherent Group' - * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. - * - * Coherency Group calculation - * - * Creation of the coherent group data is done at device-driver startup, and so - * is one-time. This will most likely involve a loop with CLZ, shifting, and - * bit clearing on the L2_PRESENT mask, depending on whether the - * system is L2 Coherent. The number of shader cores is done by a - * population count, since faulty cores may be disabled during production, - * producing a non-contiguous mask. - * - * The memory requirements for this algorithm can be determined either by a u64 - * population count on the L2_PRESENT mask (a LUT helper already is - * required for the above), or simple assumption that there can be no more than - * 16 coherent groups, since core groups are typically 4 cores. - */ - -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - -#define BASE_MAX_COHERENT_GROUPS 16 -/** - * struct mali_base_gpu_core_props - GPU core props info - * @product_id: Pro specific value. - * @version_status: Status of the GPU release. No defined values, but starts at - * 0 and increases by one for each release status (alpha, beta, EAC, etc.). - * 4 bit values (0-15). - * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" - * release number. - * 8 bit values (0-255). - * @major_revision: Major release number of the GPU. "R" part of an "RnPn" - * release number. - * 4 bit values (0-15). - * @padding: padding to allign to 8-byte - * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by - * clGetDeviceInfo() - * @log2_program_counter_size: Size of the shader program counter, in bits. - * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This - * is a bitpattern where a set bit indicates that the format is supported. - * Before using a texture format, it is recommended that the corresponding - * bit be checked. - * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. - * It is unlikely that a client will be able to allocate all of this memory - * for their own purposes, but this at least provides an upper bound on the - * memory available to the GPU. - * This is required for OpenCL's clGetDeviceInfo() call when - * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The - * client will not be expecting to allocate anywhere near this value. - * @num_exec_engines: The number of execution engines. - */ -struct mali_base_gpu_core_props { - u32 product_id; - u16 version_status; - u16 minor_revision; - u16 major_revision; - u16 padding; - u32 gpu_freq_khz_max; - u32 log2_program_counter_size; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - u64 gpu_available_memory_size; - u8 num_exec_engines; -}; - -/* - * More information is possible - but associativity and bus width are not - * required by upper-level apis. - */ -struct mali_base_gpu_l2_cache_props { - u8 log2_line_size; - u8 log2_cache_size; - u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ - u8 padding[5]; -}; - -struct mali_base_gpu_tiler_props { - u32 bin_size_bytes; /* Max is 4*2^15 */ - u32 max_active_levels; /* Max is 2^15 */ -}; - -/** - * struct mali_base_gpu_thread_props - GPU threading system details. - * @max_threads: Max. number of threads per core - * @max_workgroup_size: Max. number of threads per workgroup - * @max_barrier_size: Max. number of threads that can synchronize on a - * simple barrier - * @max_registers: Total size [1..65535] of the register file available - * per core. - * @max_task_queue: Max. tasks [1..255] which may be sent to a core - * before it becomes blocked. - * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split - * field. - * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, - * 3 = SW Model/Emulation - * @padding: padding to allign to 8-byte - * @tls_alloc: Number of threads per core that TLS must be - * allocated for - */ -struct mali_base_gpu_thread_props { - u32 max_threads; - u32 max_workgroup_size; - u32 max_barrier_size; - u16 max_registers; - u8 max_task_queue; - u8 max_thread_group_split; - u8 impl_tech; - u8 padding[3]; - u32 tls_alloc; -}; - -/** - * struct mali_base_gpu_coherent_group - descriptor for a coherent group - * @core_mask: Core restriction mask required for the group - * @num_cores: Number of cores in the group - * @padding: padding to allign to 8-byte - * - * \c core_mask exposes all cores in that coherent group, and \c num_cores - * provides a cached population-count for that mask. - * - * @note Whilst all cores are exposed in the mask, not all may be available to - * the application, depending on the Kernel Power policy. - * - * @note if u64s must be 8-byte aligned, then this structure has 32-bits of - * wastage. - */ -struct mali_base_gpu_coherent_group { - u64 core_mask; - u16 num_cores; - u16 padding[3]; -}; - -/** - * struct mali_base_gpu_coherent_group_info - Coherency group information - * @num_groups: Number of coherent groups in the GPU. - * @num_core_groups: Number of core groups (coherent or not) in the GPU. - * Equivalent to the number of L2 Caches. - * The GPU Counter dumping writes 2048 bytes per core group, regardless - * of whether the core groups are coherent or not. Hence this member is - * needed to calculate how much memory is required for dumping. - * @note Do not use it to work out how many valid elements are in the - * group[] member. Use num_groups instead. - * @coherency: Coherency features of the memory, accessed by gpu_mem_features - * methods - * @padding: padding to allign to 8-byte - * @group: Descriptors of coherent groups - * - * Note that the sizes of the members could be reduced. However, the \c group - * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte - * aligned, thus leading to wastage if the other members sizes were reduced. - * - * The groups are sorted by core mask. The core masks are non-repeating and do - * not intersect. - */ -struct mali_base_gpu_coherent_group_info { - u32 num_groups; - u32 num_core_groups; - u32 coherency; - u32 padding; - struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; -}; - -/** - * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware - * Configuration Discovery registers. - * @shader_present: Shader core present bitmap - * @tiler_present: Tiler core present bitmap - * @l2_present: Level 2 cache present bitmap - * @stack_present: Core stack present bitmap - * @l2_features: L2 features - * @core_features: Core features - * @mem_features: Mem features - * @mmu_features: Mmu features - * @as_present: Bitmap of address spaces present - * @js_present: Job slots present - * @js_features: Array of job slot features. - * @tiler_features: Tiler features - * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU - * @gpu_id: GPU and revision identifier - * @thread_max_threads: Maximum number of threads per core - * @thread_max_workgroup_size: Maximum number of threads per workgroup - * @thread_max_barrier_size: Maximum number of threads per barrier - * @thread_features: Thread features - * @coherency_mode: Note: This is the _selected_ coherency mode rather than the - * available modes as exposed in the coherency_features register - * @thread_tls_alloc: Number of threads per core that TLS must be allocated for - * @gpu_features: GPU features - * - * The information is presented inefficiently for access. For frequent access, - * the values should be better expressed in an unpacked form in the - * base_gpu_props structure. - * - * The raw properties in gpu_raw_gpu_props are necessary to - * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device - * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali - * Tools software on the host PC. - * - */ -struct gpu_raw_gpu_props { - u64 shader_present; - u64 tiler_present; - u64 l2_present; - u64 stack_present; - u32 l2_features; - u32 core_features; - u32 mem_features; - u32 mmu_features; - - u32 as_present; - - u32 js_present; - u32 js_features[GPU_MAX_JOB_SLOTS]; - u32 tiler_features; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - - u32 gpu_id; - - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; - - /* - * Note: This is the _selected_ coherency mode rather than the - * available modes as exposed in the coherency_features register. - */ - u32 coherency_mode; - - u32 thread_tls_alloc; - u64 gpu_features; -}; - -/** - * struct base_gpu_props - Return structure for base_get_gpu_props(). - * @core_props: Core props. - * @l2_props: L2 props. - * @unused_1: Keep for backwards compatibility. - * @tiler_props: Tiler props. - * @thread_props: Thread props. - * @raw_props: This member is large, likely to be 128 bytes. - * @coherency_info: This must be last member of the structure. - * - * NOTE: the raw_props member in this data structure contains the register - * values from which the value of the other members are derived. The derived - * members exist to allow for efficient access and/or shielding the details - * of the layout of the registers. - * */ -struct base_gpu_props { - struct mali_base_gpu_core_props core_props; - struct mali_base_gpu_l2_cache_props l2_props; - u64 unused_1; - struct mali_base_gpu_tiler_props tiler_props; - struct mali_base_gpu_thread_props thread_props; - struct gpu_raw_gpu_props raw_props; - struct mali_base_gpu_coherent_group_info coherency_info; -}; - -#if MALI_USE_CSF -#include "csf/mali_base_csf_kernel.h" -#else -#include "jm/mali_base_jm_kernel.h" -#endif - -/** - * base_mem_group_id_get() - Get group ID from flags - * @flags: Flags to pass to base_mem_alloc - * - * This inline function extracts the encoded group ID from flags - * and converts it into numeric value (0~15). - * - * Return: group ID(0~15) extracted from the parameter - */ -static inline int base_mem_group_id_get(base_mem_alloc_flags flags) -{ - LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); - return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> - BASEP_MEM_GROUP_ID_SHIFT); -} - -/** - * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags - * @id: group ID(0~15) you want to encode - * - * This inline function encodes specific group ID into base_mem_alloc_flags. - * Parameter 'id' should lie in-between 0 to 15. - * - * Return: base_mem_alloc_flags with the group ID (id) encoded - * - * The return value can be combined with other flags against base_mem_alloc - * to identify a specific memory group. - */ -static inline base_mem_alloc_flags base_mem_group_id_set(int id) -{ - if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { - /* Set to default value when id is out of range. */ - id = BASE_MEM_GROUP_DEFAULT; - } - - return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & - BASE_MEM_GROUP_ID_MASK; -} - -/** - * base_context_mmu_group_id_set - Encode a memory group ID in - * base_context_create_flags - * - * Memory allocated for GPU page tables will come from the specified group. - * - * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). - * - * Return: Bitmask of flags to pass to base_context_init. - */ -static inline base_context_create_flags base_context_mmu_group_id_set( - int const group_id) -{ - LOCAL_ASSERT(group_id >= 0); - LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); - return BASEP_CONTEXT_MMU_GROUP_ID_MASK & - ((base_context_create_flags)group_id << - BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); -} - -/** - * base_context_mmu_group_id_get - Decode a memory group ID from - * base_context_create_flags - * - * Memory allocated for GPU page tables will come from the returned group. - * - * @flags: Bitmask of flags to pass to base_context_init. - * - * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). - */ -static inline int base_context_mmu_group_id_get( - base_context_create_flags const flags) -{ - LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); - return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> - BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); -} - -/* - * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These - * flags are also used, where applicable, for specifying which fields - * are valid following the request operation. - */ - -/* For monotonic (counter) timefield */ -#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) -/* For system wide timestamp */ -#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) -/* For GPU cycle counter */ -#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) -/* Specify kernel GPU register timestamp */ -#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) -/* Specify userspace cntvct_el0 timestamp source */ -#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) - -#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ - BASE_TIMEINFO_MONOTONIC_FLAG | \ - BASE_TIMEINFO_TIMESTAMP_FLAG | \ - BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ - BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ - BASE_TIMEINFO_USER_SOURCE_FLAG) - -#endif /* _BASE_KERNEL_H_ */ diff --git a/mali_kbase/mali_base_mem_priv.h b/mali_kbase/mali_base_mem_priv.h deleted file mode 100644 index 9f59a4f..0000000 --- a/mali_kbase/mali_base_mem_priv.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _BASE_MEM_PRIV_H_ -#define _BASE_MEM_PRIV_H_ - -#define BASE_SYNCSET_OP_MSYNC (1U << 0) -#define BASE_SYNCSET_OP_CSYNC (1U << 1) - -/* - * This structure describe a basic memory coherency operation. - * It can either be: - * @li a sync from CPU to Memory: - * - type = ::BASE_SYNCSET_OP_MSYNC - * - mem_handle = a handle to the memory object on which the operation - * is taking place - * - user_addr = the address of the range to be synced - * - size = the amount of data to be synced, in bytes - * - offset is ignored. - * @li a sync from Memory to CPU: - * - type = ::BASE_SYNCSET_OP_CSYNC - * - mem_handle = a handle to the memory object on which the operation - * is taking place - * - user_addr = the address of the range to be synced - * - size = the amount of data to be synced, in bytes. - * - offset is ignored. - */ -struct basep_syncset { - struct base_mem_handle mem_handle; - u64 user_addr; - u64 size; - u8 type; - u8 padding[7]; -}; - -#endif diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index a78ff43..b6683b9 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -45,7 +45,7 @@ #include <linux/workqueue.h> #include <linux/interrupt.h> -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_linux.h> /* @@ -64,7 +64,7 @@ #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" #include "mali_kbase_gpuprops.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #if !MALI_USE_CSF #include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" @@ -213,10 +213,6 @@ void registers_unmap(struct kbase_device *kbdev); int kbase_device_coherency_init(struct kbase_device *kbdev); -#ifdef CONFIG_MALI_BUSLOG -int buslog_init(struct kbase_device *kbdev); -void buslog_term(struct kbase_device *kbdev); -#endif #if !MALI_USE_CSF int kbase_jd_init(struct kbase_context *kctx); diff --git a/mali_kbase/mali_kbase_cache_policy.h b/mali_kbase/mali_kbase_cache_policy.h index 817710a..2cd3079 100644 --- a/mali_kbase/mali_kbase_cache_policy.h +++ b/mali_kbase/mali_kbase_cache_policy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2012-2013, 2015, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,7 @@ #define _KBASE_CACHE_POLICY_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /** * kbase_cache_enabled - Choose the cache policy for a specific region diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 4e5155a..96fcbcd 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -53,7 +53,7 @@ #include <mali_kbase_hwaccess_instr.h> #endif #include <mali_kbase_reset_gpu.h> -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #if !MALI_USE_CSF #include "mali_kbase_kinstr_jm.h" #endif @@ -1150,10 +1150,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, u64 flags; int err; - if (alias->in.nents == 0 || alias->in.nents > 2048) - return -EINVAL; - - if (alias->in.stride > (U64_MAX / 2048)) + if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) return -EINVAL; ai = vmalloc(sizeof(*ai) * alias->in.nents); @@ -1357,18 +1354,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } #if MALI_UNIT_TEST -static int kbase_api_tlstream_test(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_test *test) -{ - kbase_timeline_test( - kctx->kbdev, - test->tpw_count, - test->msg_delay, - test->msg_count, - test->aux_msg); - - return 0; -} static int kbase_api_tlstream_stats(struct kbase_context *kctx, struct kbase_ioctl_tlstream_stats *stats) @@ -1508,14 +1493,11 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, } if (!err) { - param->out.total_stream_num = - kbase_csf_firmware_get_glb_iface(kctx->kbdev, - group_data, max_group_num, - stream_data, max_total_stream_num, - ¶m->out.glb_version, ¶m->out.features, - ¶m->out.group_num, ¶m->out.prfcnt_size); - - param->out.padding = 0; + param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( + kctx->kbdev, group_data, max_group_num, stream_data, + max_total_stream_num, ¶m->out.glb_version, + ¶m->out.features, ¶m->out.group_num, + ¶m->out.prfcnt_size, ¶m->out.instr_features); if (copy_to_user(user_groups, group_data, MIN(max_group_num, param->out.group_num) * @@ -1619,6 +1601,23 @@ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, return ret; \ } while (0) +static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, + struct kbase_ioctl_set_limited_core_count *set_limited_core_count) +{ + const u64 shader_core_mask = + kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); + const u64 limited_core_mask = + ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + + if ((shader_core_mask & limited_core_mask) == 0) { + /* At least one shader core must be available after applying the mask */ + return -EINVAL; + } + + kctx->limited_core_mask = limited_core_mask; + return 0; +} + static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct kbase_file *const kfile = filp->private_data; @@ -1980,12 +1979,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; #endif /* MALI_USE_CSF */ #if MALI_UNIT_TEST - case KBASE_IOCTL_TLSTREAM_TEST: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, - kbase_api_tlstream_test, - struct kbase_ioctl_tlstream_test, - kctx); - break; case KBASE_IOCTL_TLSTREAM_STATS: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, kbase_api_tlstream_stats, @@ -1999,6 +1992,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_context_priority_check, kctx); break; + case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, + kbasep_ioctl_set_limited_core_count, + struct kbase_ioctl_set_limited_core_count, + kctx); + break; } dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); @@ -2115,7 +2114,7 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait) void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); - dev_dbg(kctx->kbdev->dev, "Waking event queue for context %p\n", + dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx); wake_up_interruptible(&kctx->event_queue); } @@ -3086,7 +3085,7 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G78" }, { .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TBAX" }, + .name = "Mali-G78AE" }, { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G68" }, { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, @@ -4094,21 +4093,28 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) { struct kbase_device *kbdev = container_of(data, struct kbase_device, protected_mode_hwcnt_disable_work); + spinlock_t *backend_lock; unsigned long flags; bool do_disable; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + backend_lock = &kbdev->csf.scheduler.interrupt_lock; +#else + backend_lock = &kbdev->hwaccess_lock; +#endif + + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); if (!do_disable) return; kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; @@ -4128,9 +4134,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); } +#ifndef PLATFORM_PROTECTED_CALLBACKS static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) { struct kbase_device *kbdev = pdev->data; @@ -4150,7 +4157,6 @@ static const struct protected_mode_ops kbasep_native_protected_ops = { .protected_mode_disable = kbasep_protected_mode_disable }; -#ifndef PLATFORM_PROTECTED_CALLBACKS #define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) #endif /* PLATFORM_PROTECTED_CALLBACKS */ @@ -4330,6 +4336,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) u32 gpu_model_id; if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + dev_info(kbdev->dev, "Arbitration interface enabled\n"); if (kbase_is_pm_enabled(kbdev->dev->of_node)) { /* Arbitration AND power management invalid */ dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); @@ -4353,7 +4360,8 @@ int kbase_device_pm_init(struct kbase_device *kbdev) gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); if (gpu_model_id != GPU_ID2_PRODUCT_TGOX - && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + && gpu_model_id != GPU_ID2_PRODUCT_TNOX + && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { kbase_arbiter_pm_early_term(kbdev); dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); return -EPERM; @@ -4542,7 +4550,7 @@ void power_control_term(struct kbase_device *kbdev) static void trigger_reset(struct kbase_device *kbdev) { kbase_pm_context_active(kbdev); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); kbase_pm_context_idle(kbdev); } @@ -4570,7 +4578,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ MAKE_QUIRK_ACCESSORS(sc); MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); -MAKE_QUIRK_ACCESSORS(jm); +MAKE_QUIRK_ACCESSORS(gpu); static ssize_t kbase_device_debugfs_reset_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) @@ -4691,7 +4699,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, NULL); if (!kbdev->mali_debugfs_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + dev_err(kbdev->dev, + "Couldn't create mali debugfs directory: %s\n", + kbdev->devname); err = -ENOMEM; goto out; } @@ -4746,9 +4756,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("quirks_mmu", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_mmu_quirks); - debugfs_create_file("quirks_jm", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_jm_quirks); + debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, + kbdev, &fops_gpu_quirks); debugfs_create_bool("infinite_cache", mode, debugfs_ctx_defaults_directory, @@ -4878,40 +4887,6 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) return 0; } -#ifdef CONFIG_MALI_BUSLOG - -/* Callback used by the kbase bus logger client, to initiate a GPU reset - * when the bus log is restarted. GPU reset is used as reference point - * in HW bus log analyses. - */ -static void kbase_logging_started_cb(void *data) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); -} - -int buslog_init(struct kbase_device *kbdev) -{ - int err = 0; - - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); - - return err; -} - -void buslog_term(struct kbase_device *kbdev) -{ - bl_core_client_unregister(kbdev->buslogger); -} -#endif #if MALI_USE_CSF /** @@ -5222,7 +5197,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) if (err) { if (err == -EPROBE_DEFER) - dev_err(kbdev->dev, "Device initialization Deferred\n"); + dev_info(kbdev->dev, + "Device initialization Deferred\n"); else dev_err(kbdev->dev, "Device initialization failed\n"); @@ -5448,7 +5424,6 @@ static struct platform_driver kbase_platform_driver = { .remove = kbase_platform_device_remove, .driver = { .name = kbase_drv_name, - .owner = THIS_MODULE, .pm = &kbase_pm_ops, .of_match_table = of_match_ptr(kbase_dt_ids), }, diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index f59a2d7..c63bc8d 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -365,8 +365,7 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) } #if MALI_USE_CSF -bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, - bool sync) +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) { struct kbase_device *kbdev; bool added_ref = false; @@ -383,20 +382,16 @@ bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - added_ref = kbase_ctx_sched_inc_refcount_nolock(kctx); - - WARN_ON(added_ref && - (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND)); - - if (!added_ref && (kctx->as_nr != KBASEP_AS_NR_INVALID)) { - enum kbase_ctx_mmu_flush_pending_state new_state = - sync ? KCTX_MMU_FLUSH_PEND_SYNC : - KCTX_MMU_FLUSH_PEND_NO_SYNC; + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && + (kctx == kbdev->as_to_kctx[kctx->as_nr])) { + atomic_inc(&kctx->refcount); - WARN_ON(kctx != kbdev->as_to_kctx[kctx->as_nr]); + if (kbdev->as_free & (1u << kctx->as_nr)) + kbdev->as_free &= ~(1u << kctx->as_nr); - if (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_PEND_SYNC) - kctx->mmu_flush_pend_state = new_state; + KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + added_ref = true; } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h index 1aa3762..cadb735 100644 --- a/mali_kbase/mali_kbase_ctx_sched.h +++ b/mali_kbase/mali_kbase_ctx_sched.h @@ -222,23 +222,20 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); #if MALI_USE_CSF /** - * kbase_ctx_sched_refcount_mmu_flush - Refcount the context for the MMU flush - * operation. + * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU + * address space slot assigned to it. * - * @kctx: Context to be refcounted. - * @sync: Flag passed to the caller function kbase_mmu_flush_invalidate(). + * @kctx: Context to be refcounted * - * This function takes a reference on the context for the MMU flush operation. - * The refcount is taken only if the context is busy/active. - * If the context isn't active but has a GPU address space slot assigned to it - * then a flag is set to indicate that MMU flush operation is pending, which - * will be performed when the context becomes active. + * This function takes a reference on the context if it has a GPU address space + * slot assigned to it. The address space slot will not be available for + * re-assignment until the reference is released. * * Return: true if refcount succeeded and the address space slot will not be - * reassigned, false if the refcount failed (because the context was inactive) + * reassigned, false if the refcount failed (because the address space slot + * was not assigned). */ -bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, - bool sync); +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); #endif #endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c index 6902ded..7dfdff1 100644 --- a/mali_kbase/mali_kbase_debug_job_fault.c +++ b/mali_kbase/mali_kbase_debug_job_fault.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) { WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + /* Return early if the job fault part of the kbase_device is not + * initialized yet. An error can happen during the device probe after + * the privileged Kbase context was created for the HW counter dumping + * but before the job fault part is initialized. + */ + if (!kctx->kbdev->job_fault_resume_workq) + return; + kbase_ctx_remove_pending_event(kctx); } diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index d813f2f..5b7591c 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -48,9 +48,6 @@ #include <linux/file.h> #include <linux/sizes.h> -#ifdef CONFIG_MALI_BUSLOG -#include <linux/bus_logger.h> -#endif #if defined(CONFIG_SYNC) #include <sync.h> @@ -554,7 +551,6 @@ struct kbase_mmu_mode { unsigned long flags; }; -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 @@ -624,8 +620,8 @@ struct kbase_process { * issues present in the GPU. * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW * issues present in the GPU. - * @hw_quirks_jm: Configuration to be used for the Job Manager as per - * the HW issues present in the GPU. + * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU + * subsystems as per the HW issues present in the GPU. * @entry: Links the device instance to the global list of GPU * devices. The list would have as many entries as there * are GPU device instances. @@ -710,6 +706,8 @@ struct kbase_process { * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance + * counters. * @hwcnt: Structure used for instrumentation and HW counters * dumping * @hwcnt.lock: The lock should be used when accessing any of the @@ -754,6 +752,8 @@ struct kbase_process { * including any contexts that might be created for * hardware counters. * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @group_max_uid_in_devices: Max value of any queue group UID in any kernel + * context in the kbase device. * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed * to devfreq_add_device() to add devfreq feature to Mali * GPU device. @@ -918,7 +918,7 @@ struct kbase_device { u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; - u32 hw_quirks_jm; + u32 hw_quirks_gpu; struct list_head entry; struct device *dev; @@ -1016,6 +1016,7 @@ struct kbase_device { struct list_head kctx_list; struct mutex kctx_list_lock; + atomic_t group_max_uid_in_devices; #ifdef CONFIG_MALI_DEVFREQ struct devfreq_dev_profile devfreq_profile; @@ -1120,9 +1121,6 @@ struct kbase_device { struct work_struct protected_mode_hwcnt_disable_work; -#ifdef CONFIG_MALI_BUSLOG - struct bus_logger_client *buslogger; -#endif bool irq_reset_flush; @@ -1225,7 +1223,7 @@ struct kbase_file { unsigned long api_version; atomic_t setup_state; }; - +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * enum kbase_context_flags - Flags for kbase contexts * @@ -1285,6 +1283,9 @@ struct kbase_file { * refcount for the context drops to 0 or on when the address spaces are * re-enabled on GPU reset or power cycle. * + * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual + * address page limit, so we must take care to not exceed the physical limit + * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ @@ -1305,38 +1306,90 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, -#if MALI_JIT_PRESSURE_LIMIT_BASE - /* - * Set when JIT physical page limit is less than JIT virtual address - * page limit, so we must take care to not exceed the physical limit - */ KCTX_JPL_ENABLED = 1U << 16, -#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */ }; - -#if MALI_USE_CSF +#else /** - * enum kbase_ctx_mmu_flush_pending_state - State for the pending mmu flush - * operation for a kbase context. + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this + * context, to disable use of implicit dma-buf fences. This is used to avoid + * potential synchronization deadlocks. + * + * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory + * allocations. For 64-bit clients it is enabled by default, and disabled by + * default on 32-bit clients. Being able to clear this flag is only used for + * testing purposes of the custom zone allocation on 64-bit user-space builds, + * where we also require more control than is available through e.g. the JIT + * allocation mechanism. However, the 64-bit user-space client must still + * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT + * + * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled + * from it for job slot 0. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled + * from it for job slot 1. This is reset when the context first goes active or + * is re-activated on that slot. * - * @KCTX_MMU_FLUSH_NOT_PEND: Set when there is no MMU flush operation pending - * for a kbase context or deferred flush operation - * is performed. + * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled + * from it for job slot 2. This is reset when the context first goes active or + * is re-activated on that slot. * - * @KCTX_MMU_FLUSH_PEND_NO_SYNC: Set when the MMU flush operation is deferred - * for a kbase context when it is inactive and - * the sync flag passed is 0. + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. * - * @KCTX_MMU_FLUSH_PEND_SYNC: Set when the MMU flush operation is deferred - * for a kbase context when it is inactive and - * the sync flag passed is 1. + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. */ -enum kbase_ctx_mmu_flush_pending_state { - KCTX_MMU_FLUSH_NOT_PEND, - KCTX_MMU_FLUSH_PEND_NO_SYNC, - KCTX_MMU_FLUSH_PEND_SYNC, +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, + KCTX_NO_IMPLICIT_SYNC = 1U << 10, + KCTX_FORCE_SAME_VA = 1U << 11, + KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, + KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, }; -#endif +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct kbase_sub_alloc { struct list_head link; @@ -1616,12 +1669,8 @@ struct kbase_reg_zone { * @kinstr_jm: Kernel job manager instrumentation context handle * @tl_kctx_list_node: List item into the device timeline's list of * contexts, for timeline summarization. - * @mmu_flush_pend_state: Tracks if the MMU flush operations are pending for the - * context. The flush required due to unmap is also - * tracked. It is supposed to be in - * KCTX_MMU_FLUSH_NOT_PEND state whilst a context is - * active and shall be updated with mmu_hw_mutex lock - * held. + * @limited_core_mask: The mask that is applied to the affinity in case of atoms + * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1769,9 +1818,7 @@ struct kbase_context { #endif struct list_head tl_kctx_list_node; -#if MALI_USE_CSF - enum kbase_ctx_mmu_flush_pending_state mmu_flush_pend_state; -#endif + u64 limited_core_mask; }; #ifdef CONFIG_MALI_CINSTR_GWT diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c index 04687ee..25a379d 100644 --- a/mali_kbase/mali_kbase_event.c +++ b/mali_kbase/mali_kbase_event.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016,2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); wake_up(&katom->completed); return data; @@ -79,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve mutex_unlock(&ctx->event_mutex); - dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); uevent->event_code = atom->event_code; uevent->atom_number = (atom - ctx->jctx.atoms); @@ -164,11 +164,11 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { struct kbase_device *kbdev = ctx->kbdev; - dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { dev_warn(kbdev->dev, - "%s: Atom %d (%p) not completed (status %d)\n", + "%s: Atom %d (%pK) not completed (status %d)\n", __func__, kbase_jd_atom_id(atom->kctx, atom), atom->kctx, diff --git a/mali_kbase/mali_kbase_gpu_memory_debugfs.c b/mali_kbase/mali_kbase_gpu_memory_debugfs.c index 45ce740..a10b2bb 100644 --- a/mali_kbase/mali_kbase_gpu_memory_debugfs.c +++ b/mali_kbase/mali_kbase_gpu_memory_debugfs.c @@ -56,7 +56,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) /* output the memory usage and cap for each kctx * opened on this device */ - seq_printf(sfile, " %s-0x%p %10u\n", + seq_printf(sfile, " %s-0x%pK %10u\n", "kctx", kctx, atomic_read(&(kctx->used_pages))); diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index 9da0b00..49f96f6 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -28,7 +28,7 @@ #include <mali_kbase_gpuprops.h> #include <mali_kbase_hwaccess_gpuprops.h> #include <mali_kbase_config_defaults.h> -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <linux/clk.h> #include <mali_kbase_pm_internal.h> #include <linux/of_platform.h> @@ -104,6 +104,71 @@ static void kbase_gpuprops_construct_coherent_groups( } /** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config) +{ + struct kbase_current_config_regdump curr_config_regdump; + int err; + + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + /* If update not needed just return. */ + if (!curr_config->update_needed) + return 0; + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get_curr_config(kbdev, + &curr_config_regdump); + if (err) + return err; + + curr_config->l2_slices = + KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; + + curr_config->l2_present = + ((u64) curr_config_regdump.l2_present_hi << 32) + + curr_config_regdump.l2_present_lo; + + curr_config->shader_present = + ((u64) curr_config_regdump.shader_present_hi << 32) + + curr_config_regdump.shader_present_lo; + + curr_config->num_cores = hweight64(curr_config->shader_present); + + curr_config->update_needed = false; + + return 0; +} + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) +{ + if (WARN_ON(!kbdev)) + return -EINVAL; + + kbdev->gpu_props.curr_config.update_needed = true; + return 0; +} + +/** * kbase_gpuprops_get_props - Get the GPU configuration * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device @@ -183,6 +248,59 @@ void kbase_gpuprops_update_core_props_gpu_id( } /** + * kbase_gpuprops_update_max_config_props - Updates the max config properties in + * the base_gpu_props. + * @base_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Updates the &struct base_gpu_props structure with the max config properties. + */ +static void kbase_gpuprops_update_max_config_props( + struct base_gpu_props * const base_props, struct kbase_device *kbdev) +{ + int l2_n = 0; + + if (WARN_ON(!kbdev) || WARN_ON(!base_props)) + return; + + /* return if the max_config is not set during arbif initialization */ + if (kbdev->gpu_props.max_config.core_mask == 0) + return; + + /* + * Set the base_props with the maximum config values to ensure that the + * user space will always be based on the maximum resources available. + */ + base_props->l2_props.num_l2_slices = + kbdev->gpu_props.max_config.l2_slices; + base_props->raw_props.shader_present = + kbdev->gpu_props.max_config.core_mask; + /* + * Update l2_present in the raw data to be consistent with the + * max_config.l2_slices number. + */ + base_props->raw_props.l2_present = 0; + for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { + base_props->raw_props.l2_present <<= 1; + base_props->raw_props.l2_present |= 0x1; + } + /* + * Update the coherency_info data using just one core group. For + * architectures where the max_config is provided by the arbiter it is + * not necessary to split the shader core groups in different coherent + * groups. + */ + base_props->coherency_info.coherency = + base_props->raw_props.mem_features; + base_props->coherency_info.num_core_groups = 1; + base_props->coherency_info.num_groups = 1; + base_props->coherency_info.group[0].core_mask = + kbdev->gpu_props.max_config.core_mask; + base_props->coherency_info.group[0].num_cores = + hweight32(kbdev->gpu_props.max_config.core_mask); +} + +/** * kbase_gpuprops_calculate_props - Calculate the derived properties * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device @@ -297,8 +415,30 @@ static void kbase_gpuprops_calculate_props( gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; } - /* Initialize the coherent_group structure for each group */ - kbase_gpuprops_construct_coherent_groups(gpu_props); + + /* + * If the maximum resources allocated information is available it is + * necessary to update the base_gpu_props with the max_config info to + * the userspace. This is applicable to systems that receive this + * information from the arbiter. + */ + if (kbdev->gpu_props.max_config.core_mask) + /* Update the max config properties in the base_gpu_props */ + kbase_gpuprops_update_max_config_props(gpu_props, + kbdev); + else + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!max_config)) + return; + + kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; + kbdev->gpu_props.max_config.core_mask = max_config->core_mask; } void kbase_gpuprops_set(struct kbase_device *kbdev) @@ -306,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) struct kbase_gpu_props *gpu_props; struct gpu_raw_gpu_props *raw; - KBASE_DEBUG_ASSERT(kbdev != NULL); + if (WARN_ON(!kbdev)) + return; gpu_props = &kbdev->gpu_props; raw = &gpu_props->props.raw_props; @@ -326,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); gpu_props->num_cores = hweight64(raw->shader_present); - gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_core_groups = + gpu_props->props.coherency_info.num_core_groups; gpu_props->num_address_spaces = hweight32(raw->as_present); gpu_props->num_job_slots = hweight32(raw->js_present); + + /* + * Current configuration is used on HW interactions so that the maximum + * config is just used for user space avoiding interactions with parts + * of the hardware that might not be allocated to the kbase instance at + * that moment. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); } int kbase_gpuprops_set_features(struct kbase_device *kbdev) @@ -494,7 +645,10 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) goto exit; dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", - regdump.l2_features); + regdump.l2_features); + dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", + regdump.l2_config); + /* Update gpuprops with reflected L2_FEATURES */ gpu_props->raw_props.l2_features = regdump.l2_features; diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h index 7c7b123..72f76c3 100644 --- a/mali_kbase/mali_kbase_gpuprops.h +++ b/mali_kbase/mali_kbase_gpuprops.h @@ -115,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev); void kbase_gpuprops_update_core_props_gpu_id( struct base_gpu_props * const gpu_props); +/** + * kbase_gpuprops_set_max_config - Set the max config information + * @kbdev: Device pointer + * @max_config: Maximum configuration data to be updated + * + * This function sets max_config in the kbase_gpu_props. + */ +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config); + +/** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config); + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); + #endif /* _KBASE_GPUPROPS_H_ */ diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h index 8ecb54f..8b37b88 100644 --- a/mali_kbase/mali_kbase_gpuprops_types.h +++ b/mali_kbase/mali_kbase_gpuprops_types.h @@ -26,7 +26,7 @@ #ifndef _KBASE_GPUPROPS_TYPES_H_ #define _KBASE_GPUPROPS_TYPES_H_ -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #define KBASE_GPU_SPEED_MHZ 123 #define KBASE_GPU_PC_SIZE_LOG2 24U @@ -34,6 +34,7 @@ struct kbase_gpuprops_regdump { u32 gpu_id; u32 l2_features; + u32 l2_config; u32 core_features; u32 tiler_features; u32 mem_features; @@ -60,6 +61,28 @@ struct kbase_gpuprops_regdump { u32 gpu_features_hi; }; +/** + * struct kbase_current_config_regdump - Register dump for current resources + * allocated to the GPU. + * @mem_features: Memory system features. Contains information about the + * features of the memory system. Used here to get the L2 slice + * count. + * @shader_present_lo: Shader core present bitmap. Low word. + * @shader_present_hi: Shader core present bitmap. High word. + * @l2_present_lo: L2 cache present bitmap. Low word. + * @l2_present_hi: L2 cache present bitmap. High word. + * + * Register dump structure used to store the resgisters data realated to the + * current resources allocated to the GPU. + */ +struct kbase_current_config_regdump { + u32 mem_features; + u32 shader_present_lo; + u32 shader_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; +}; + struct kbase_gpu_cache_props { u8 associativity; u8 external_bus_width; @@ -74,6 +97,50 @@ struct kbase_gpu_mmu_props { u8 pa_bits; }; +/** + * struct max_config_props - Properties based on the maximum resources + * available. + * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU + * during runtime. + * @padding: Padding to a multiple of 64 bits. + * @core_mask: Largest core mask bitmap that can be assigned to the GPU during + * runtime. + * + * Properties based on the maximum resources available (not necessarly + * allocated at that moment). Used to provide the maximum configuration to the + * userspace allowing the applications to allocate enough resources in case the + * real allocated resources change. + */ +struct max_config_props { + u8 l2_slices; + u8 padding[3]; + u32 core_mask; +}; + +/** + * struct curr_config_props - Properties based on the current resources + * allocated to the GPU. + * @l2_present: Current L2 present bitmap that is allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * @num_cores: Current number of shader cores allocated to the GPU. + * @l2_slices: Current number of L2 slices allocated to the GPU. + * @update_needed: Defines if it is necessary to re-read the registers to + * update the current allocated resources. + * @padding: Padding to a multiple of 64 bits. + * + * Properties based on the current resource available. Used for operations with + * hardware interactions to avoid using userspace data that can be based on + * the maximum resource available. + */ +struct curr_config_props { + u64 l2_present; + u64 shader_present; + u16 num_cores; + u8 l2_slices; + bool update_needed; + u8 padding[4]; +}; + struct kbase_gpu_props { /* kernel-only properties */ u8 num_cores; @@ -86,6 +153,12 @@ struct kbase_gpu_props { struct kbase_gpu_mem_props mem; struct kbase_gpu_mmu_props mmu; + /* Properties based on the current resource available */ + struct curr_config_props curr_config; + + /* Properties based on the maximum resource available */ + struct max_config_props max_config; + /* Properties shared with userspace */ struct base_gpu_props props; diff --git a/mali_kbase/mali_kbase_gwt.h b/mali_kbase/mali_kbase_gwt.h index f349d8f..32b0f5f 100644 --- a/mali_kbase/mali_kbase_gwt.h +++ b/mali_kbase/mali_kbase_gwt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2017, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #define _KBASE_GWT_H #include <mali_kbase.h> -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> /** * kbase_gpu_gwt_start - Start the GPU write tracking diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index d2063bb..b1758d7 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -126,91 +126,91 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( }; static const struct base_hw_product base_hw_products[] = { - {GPU_ID2_PRODUCT_TMIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 1), - base_hw_issues_tMIx_r0p0_05dev0}, - {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, - {U32_MAX /* sentinel value */, NULL} } }, - - {GPU_ID2_PRODUCT_THEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, - {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TSIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDVX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TGOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TTRX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LBEX, - {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBAx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBAx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDUX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, + { GPU_ID2_PRODUCT_TMIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 1), + base_hw_issues_tMIx_r0p0_05dev0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, + { U32_MAX /* sentinel value */, NULL } } }, + + { GPU_ID2_PRODUCT_THEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, + { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TSIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDVX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TGOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TTRX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LBEX, + { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDUX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, }; u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; diff --git a/mali_kbase/mali_kbase_hwaccess_gpuprops.h b/mali_kbase/mali_kbase_hwaccess_gpuprops.h index 5e5f9dc..0fca83e 100644 --- a/mali_kbase/mali_kbase_hwaccess_gpuprops.h +++ b/mali_kbase/mali_kbase_hwaccess_gpuprops.h @@ -40,6 +40,23 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); /** + * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with + * relevant GPU properties read from + * the GPU registers. + * @kbdev: Device pointer. + * @curr_config_regdump: Pointer to struct kbase_current_config_regdump + * structure. + * + * The caller should ensure that GPU remains powered-on during this function and + * the caller must ensure this function returns success before using the values + * returned in the curr_config_regdump in any part of the kernel. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump); + +/** * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read * from GPU * @kbdev: Device pointer diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c index c1bc7fc..4bc62c1 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,41 +127,31 @@ enum kbase_hwcnt_backend_csf_enable_state { * struct kbase_hwcnt_backend_csf_info - Information used to create an instance * of a CSF hardware counter backend. * @backend: Pointer to access CSF backend. - * @lock: Spinlock protecting backend and its internal - * states. * @fw_in_protected_mode: True if FW is running in protected mode, else * false. * @unrecoverable_error_happened: True if an recoverable error happened, else * false. - * @csf_if: CSF interface object pointer. Functions inside - * this interface MUST never be called while - * holding the spin lock, as that could cause - * deadlocks. + * @csf_if: CSF interface object pointer. * @ring_buf_cnt: Dump buffer count in the ring buffer. * @counter_set: The performance counter set to use. * @metadata: Hardware counter metadata. - * @dump_bytes: Bytes of GPU memory required to perform a - * hardware counter dump. - * @gpu_info: GPU information to initialise HWC dump memory - * layout. + * @prfcnt_info: Performance counter information. */ struct kbase_hwcnt_backend_csf_info { struct kbase_hwcnt_backend_csf *backend; - spinlock_t lock; bool fw_in_protected_mode; bool unrecoverable_error_happened; struct kbase_hwcnt_backend_csf_if *csf_if; u32 ring_buf_cnt; enum kbase_hwcnt_set counter_set; const struct kbase_hwcnt_metadata *metadata; - size_t dump_bytes; - struct kbase_hwcnt_gpu_info gpu_info; + struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; }; /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout * information. - * @fe_cnt: FroneEnd block count. + * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. @@ -207,7 +197,7 @@ struct kbase_hwcnt_csf_physical_layout { * count for sample period. * @phys_layout: Physical memory layout information of HWC * sample buffer. - * @dump_completed: Completion signalled by the dump worker when + * @dump_completed: Completion signaled by the dump worker when * it is completed accumulating up to the * insert_index_to_accumulate. * Should be initialized to the "complete" state. @@ -242,7 +232,7 @@ bool kbasep_hwcnt_backend_csf_backend_exists( struct kbase_hwcnt_backend_csf_info *csf_info) { WARN_ON(!csf_info); - lockdep_assert_held(&csf_info->lock); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } @@ -280,6 +270,9 @@ kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->get_gpu_cycle_count( backend_csf->info->csf_if->ctx, cycle_counts, backend_csf->clk_enable_map); @@ -310,10 +303,9 @@ kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) } /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to - * guarantee the header is - * enabled, the header will be - * used when do the samples - * delta calculation. + * guarantee headers are + * enabled if any counter is + * required. *@phys_enable_map: HWC physical enable map to be processed. */ static void kbasep_hwcnt_backend_csf_process_enable_map( @@ -338,21 +330,21 @@ static void kbasep_hwcnt_backend_csf_process_enable_map( } static void kbasep_hwcnt_backend_csf_init_layout( - const struct kbase_hwcnt_gpu_info *gpu_info, + const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, struct kbase_hwcnt_csf_physical_layout *phys_layout) { - WARN_ON(!gpu_info); + WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); phys_layout->fe_cnt = 1; phys_layout->tiler_cnt = 1; - phys_layout->mmu_l2_cnt = gpu_info->l2_count; - phys_layout->shader_cnt = fls64(gpu_info->core_mask); + phys_layout->mmu_l2_cnt = prfcnt_info->l2_count; + phys_layout->shader_cnt = fls64(prfcnt_info->core_mask); phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt + phys_layout->mmu_l2_cnt + phys_layout->shader_cnt; - phys_layout->shader_avail_mask = gpu_info->core_mask; + phys_layout->shader_avail_mask = prfcnt_info->core_mask; phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; phys_layout->counters_per_block = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; @@ -363,9 +355,12 @@ static void kbasep_hwcnt_backend_csf_init_layout( static void kbasep_hwcnt_backend_csf_reset_internal_buffers( struct kbase_hwcnt_backend_csf *backend_csf) { - memset(backend_csf->to_user_buf, 0, backend_csf->info->dump_bytes); - memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes); - memset(backend_csf->old_sample_buf, 0, backend_csf->info->dump_bytes); + memset(backend_csf->to_user_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->old_sample_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( @@ -389,12 +384,12 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( u32 idx; u32 *sample; char *cpu_dump_base; + size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { - sample = (u32 *)&cpu_dump_base[idx * - backend_csf->info->dump_bytes]; + sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( backend_csf, sample); } @@ -405,19 +400,20 @@ static void kbasep_hwcnt_backend_csf_update_user_sample( { /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, - backend_csf->info->dump_bytes); + backend_csf->info->prfcnt_info.dump_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ - memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes); + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf, - const u32 *new_sample_buf) + const u32 *new_sample_buf, bool clearing_samples) { size_t block_idx, ctr_idx; const u32 *old_block = old_sample_buf; @@ -425,6 +421,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( u32 *acc_block = accum_buf; for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + const u32 old_enable_mask = + old_block[phys_layout->offset_enable_mask]; const u32 new_enable_mask = new_block[phys_layout->offset_enable_mask]; @@ -442,11 +440,63 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( phys_layout->headers_per_block * KBASE_HWCNT_VALUE_BYTES); - /* Accumulate the counters. */ - for (ctr_idx = phys_layout->headers_per_block; - ctr_idx < phys_layout->values_per_block; - ctr_idx++) { - acc_block[ctr_idx] += new_block[ctr_idx]; + /* Accumulate counter samples + * + * When accumulating samples we need to take into + * account whether the counter sampling method involves + * clearing counters back to zero after each sample is + * taken. + * + * The intention for CSF was that all HW should use + * counters which wrap to zero when their maximum value + * is reached. This, combined with non-clearing + * sampling, enables multiple concurrent users to + * request samples without interfering with each other. + * + * However some early HW may not support wrapping + * counters, for these GPUs counters must be cleared on + * sample to avoid loss of data due to counters + * saturating at their maximum value. + */ + if (!clearing_samples) { + if (old_enable_mask == 0) { + /* Hardware block was previously + * unavailable. Accumulate the new + * counters only, as we know previous + * values are zeroes. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } + } else { + /* Hardware block was previously + * available. Accumulate the delta + * between old and new counter values. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx] - + old_block[ctr_idx]; + } + } + } else { + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } } } old_block += phys_layout->values_per_block; @@ -467,9 +517,11 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( u32 insert_index_to_stop) { u32 raw_idx; + unsigned long flags; u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; - const size_t buf_dump_bytes = backend_csf->info->dump_bytes; + const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf; @@ -478,9 +530,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( return; /* Sync all the buffers to CPU side before read the data. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, - extract_index_to_start, (insert_index_to_stop - 1), true); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, true); /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; @@ -495,7 +548,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( kbasep_hwcnt_backend_csf_accumulate_sample( &backend_csf->phys_layout, buf_dump_bytes, - backend_csf->accum_buf, old_sample_buf, new_sample_buf); + backend_csf->accum_buf, old_sample_buf, new_sample_buf, + clearing_samples); old_sample_buf = new_sample_buf; } @@ -514,23 +568,28 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( } /* Sync zeroed buffers to avoid coherency issues on future use. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, - extract_index_to_start, (insert_index_to_stop - 1), false); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, false); /* After consuming all samples between extract_idx and insert_idx, * set the raw extract index to insert_idx so that the sample buffers * can be released back to the ring buffer pool. */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->csf_if->set_extract_index( backend_csf->info->csf_if->ctx, insert_index_to_stop); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); } static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( struct kbase_hwcnt_backend_csf *backend_csf, enum kbase_hwcnt_backend_csf_enable_state new_state) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); if (backend_csf->enable_state != new_state) { backend_csf->enable_state = new_state; @@ -558,21 +617,19 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) WARN_ON(!work); backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); - - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while the worker was being * launched. */ - if (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED && - backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -581,12 +638,14 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; insert_index_to_acc = backend_csf->insert_index_to_accumulate; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + /* Accumulate up to the insert we grabbed at the prfcnt request * interrupt. */ @@ -599,19 +658,18 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); /* Dump done, set state back to COMPLETED for next request. */ - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while we were accumulating. */ - if (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED && - backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -621,7 +679,8 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) /* Our work here is done - set the wait object and unblock waiters. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; complete_all(&backend_csf->dump_completed); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); } /** @@ -643,20 +702,21 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); - spin_lock_irqsave(&backend_csf->info->lock, flags); - /* Assert the backend is not destroyed. */ - WARN_ON(backend_csf != backend_csf->info->backend); - /* The backend was disabled or had an error while the worker was being * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -667,14 +727,19 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - - /* Accumulate everything we possibly can. We grabbed offsets before the - * spin lock, so we know it is not possible for a concurrent dump's - * insert_to_accumulate to exceed the insert we grabbed. + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Accumulate everything we possibly can. We grabbed the insert index + * immediately after we acquired the lock but before we checked whether + * a concurrent dump was triggered. This ensures that if a concurrent + * dump was triggered between releasing the lock and now, we know for a + * fact that our insert will not exceed the concurrent dump's + * insert_to_accumulate, so we don't risk accumulating too much data. */ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); @@ -685,45 +750,31 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) static void kbase_hwcnt_backend_csf_submit_dump_worker( struct kbase_hwcnt_backend_csf_info *csf_info) { - unsigned long flags; u32 extract_index; - u32 insert_index; WARN_ON(!csf_info); - - csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, - &insert_index); - - spin_lock_irqsave(&csf_info->lock, flags); - - /* Make sure the backend exists and is in the correct state. - * A lot of things could have happened to it in the period before we - * acquired the lock. + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); + WARN_ON(csf_info->backend->enable_state != + KBASE_HWCNT_BACKEND_CSF_ENABLED); + WARN_ON(csf_info->backend->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); + + /* Save insert index now so that the dump worker only accumulates the + * HWC data associated with this request. Extract index is not stored + * as that needs to be checked when accumulating to prevent re-reading + * buffers that have already been read and returned to the GPU. */ - if (kbasep_hwcnt_backend_csf_backend_exists(csf_info) && - (csf_info->backend->enable_state == - KBASE_HWCNT_BACKEND_CSF_ENABLED || - csf_info->backend->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && - csf_info->backend->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT) { - csf_info->backend->insert_index_to_accumulate = insert_index; - csf_info->backend->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; - - /* Submit the accumulator task into the work queue. */ - while (true != queue_work(csf_info->backend->hwc_dump_workq, - &csf_info->backend->hwc_dump_work)) { - /* Spin until we have guaranteed the work has been - * submitted. - * Without this there is a potential race where a prior - * submission of the work may still technically be on - * the queue, even though all of its work is complete. - */ - } - } - - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->get_indexes( + csf_info->csf_if->ctx, &extract_index, + &csf_info->backend->insert_index_to_accumulate); + csf_info->backend->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; + + /* Submit the accumulator task into the work queue. */ + queue_work(csf_info->backend->hwc_dump_workq, + &csf_info->backend->hwc_dump_work); } static void kbasep_hwcnt_backend_csf_get_physical_enable( @@ -753,59 +804,36 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable( enable->clk_enable_map = enable_map->clk_enable_map; } -static int kbasep_hwcnt_backend_csf_dump_enable_impl( +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_csf_dump_enable_nolock( struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_backend_csf_if_enable *out_enable) + const struct kbase_hwcnt_enable_map *enable_map) { - unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; struct kbase_hwcnt_backend_csf_if_enable enable; - WARN_ON(!out_enable); - if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) return -EINVAL; + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); - spin_lock_irqsave(&backend_csf->info->lock, flags); /* enable_state should be DISABLED before we transfer it to enabled */ - if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) return -EIO; - } backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; WARN_ON(!completion_done(&backend_csf->dump_completed)); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - *out_enable = enable; - return 0; -} - -/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_csf_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - int errcode; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; - struct kbase_hwcnt_backend_csf_if_enable enable; - - errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map, - &enable); - if (errcode) - return errcode; - - backend_csf->info->csf_if->dump_enable_nolock( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, &enable); + backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, &enable); kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); @@ -818,33 +846,33 @@ static int kbasep_hwcnt_backend_csf_dump_enable( const struct kbase_hwcnt_enable_map *enable_map) { int errcode; + unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; - struct kbase_hwcnt_backend_csf_if_enable enable; - - errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map, - &enable); - if (errcode) - return errcode; - - backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, - backend_csf->ring_buf, &enable); - kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); + if (!backend_csf) + return -EINVAL; - return 0; + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, + enable_map); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + return errcode; } static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, *lock_flags); wait_event( backend_csf->enable_state_waitq, @@ -853,7 +881,8 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, + lock_flags); } } @@ -868,7 +897,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) WARN_ON(!backend_csf); - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure we wait until any previous enable or disable have completed * before doing anything. @@ -882,7 +911,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) /* If we are already disabled or in an unrecoverable error * state, there is nothing for us to do. */ - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -901,7 +931,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* Block until any async work has completed. We have transitioned out of * the ENABLED state so we can guarantee no new work will concurrently @@ -909,23 +940,16 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) */ flush_workqueue(backend_csf->hwc_dump_workq); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - spin_lock_irqsave(&backend_csf->info->lock, flags); - kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); switch (backend_csf->enable_state) { - case KBASE_HWCNT_BACKEND_CSF_DISABLED: - case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: - case KBASE_HWCNT_BACKEND_CSF_ENABLED: - case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: - case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: - WARN_ON(true); - break; case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); @@ -935,9 +959,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); break; + default: + WARN_ON(true); + break; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* After disable, zero the header of all buffers in the ring buffer back * to 0 to prepare for the next enable. @@ -947,7 +975,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) /* Sync zeroed buffers to avoid coherency issues on future use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - (backend_csf->info->ring_buf_cnt - 1), false); + backend_csf->info->ring_buf_cnt, false); /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare * for next enable. @@ -968,12 +996,27 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, if (!backend_csf) return -EINVAL; - spin_lock_irqsave(&backend_csf->info->lock, flags); - /* Make sure we are enabled or becoming enabled. */ - if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) && - (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* If we're transitioning to enabled there's nothing to accumulate, and + * the user dump buffer is already zeroed. We can just short circuit to + * the DUMP_COMPLETED state. + */ + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return 0; + } + + /* Otherwise, make sure we're already enabled. */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return -EIO; } @@ -983,27 +1026,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); /* HWC is disabled or another dump is ongoing, or we are on * fault. */ return -EIO; } - /* If we are transitioning to enabled there is nothing to accumulate, - * and the user dump buffer is already zeroed. - * We can just short circuit to the DUMP_COMPLETED state. - */ - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); - kbasep_hwcnt_backend_csf_cc_update(backend_csf); - return 0; - } - /* Reset the completion so dump_wait() has something to wait on. */ reinit_completion(&backend_csf->dump_completed); @@ -1022,7 +1052,6 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); /* CSF firmware might enter protected mode now, but still call request. * That is fine, as we changed state while holding the lock, so the @@ -1036,13 +1065,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); - if (do_request) { + if (do_request) backend_csf->info->csf_if->dump_request( backend_csf->info->csf_if->ctx); - } else { + else kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); - } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); return 0; } @@ -1060,13 +1090,14 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) wait_for_completion(&backend_csf->dump_completed); - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure the last dump actually succeeded. */ errcode = (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? 0 : -EIO; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); return errcode; } @@ -1144,10 +1175,8 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) destroy_workqueue(backend_csf->hwc_dump_workq); - if (backend_csf->info->csf_if->ring_buf_free) { - backend_csf->info->csf_if->ring_buf_free( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf); - } + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); kfree(backend_csf->accum_buf); backend_csf->accum_buf = NULL; @@ -1183,18 +1212,21 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, goto alloc_error; backend_csf->info = csf_info; - kbasep_hwcnt_backend_csf_init_layout(&csf_info->gpu_info, + kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); - backend_csf->accum_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->accum_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->accum_buf) goto err_alloc_acc_buf; - backend_csf->old_sample_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->old_sample_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->old_sample_buf) goto err_alloc_pre_sample_buf; - backend_csf->to_user_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->to_user_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; @@ -1210,7 +1242,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, /* Sync zeroed buffers to avoid coherency issues on use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - (backend_csf->info->ring_buf_cnt - 1), false); + backend_csf->info->ring_buf_cnt, false); init_completion(&backend_csf->dump_completed); @@ -1278,17 +1310,17 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, /* If it was not created before, attach it to csf_info. * Use spin lock to avoid concurrent initialization. */ - spin_lock_irqsave(&csf_info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); if (csf_info->backend == NULL) { csf_info->backend = backend_csf; *out_backend = (struct kbase_hwcnt_backend *)backend_csf; success = true; - if (csf_info->unrecoverable_error_happened) { + if (csf_info->unrecoverable_error_happened) backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; - } } - spin_unlock_irqrestore(&csf_info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* Destroy the new created backend if the backend has already created * before. In normal case, this won't happen if the client call init() @@ -1317,9 +1349,10 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. */ - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->backend = NULL; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); kbasep_hwcnt_backend_csf_destroy(backend_csf); } @@ -1370,8 +1403,6 @@ static int kbasep_hwcnt_backend_csf_info_create( if (!info) return -ENOMEM; - spin_lock_init(&info->lock); - #if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) info->counter_set = KBASE_HWCNT_SET_SECONDARY; #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) @@ -1405,11 +1436,12 @@ kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) } static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) + struct kbase_hwcnt_backend_csf *backend_csf) { bool do_disable = false; - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); /* We are already in or transitioning to the unrecoverable error state. * Early out. @@ -1451,18 +1483,16 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( * disabled, - we don't want to disable twice if an unrecoverable error * happens while we are disabling. */ - if (do_disable) { - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); + if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); - } } static void kbasep_hwcnt_backend_csf_handle_recoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) + struct kbase_hwcnt_backend_csf *backend_csf) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED: @@ -1478,8 +1508,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( /* A seemingly recoverable error that occurs while we are * transitioning to enabled is probably unrecoverable. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf, - lock_flags); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + backend_csf); return; case KBASE_HWCNT_BACKEND_CSF_ENABLED: /* Start transitioning to the disabled state. We can't wait for @@ -1496,14 +1526,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); - /* Unlock spin lock before we call csf_if disable(). */ - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); - backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - - /* Lock spin lock again to match the spin lock pairs. */ - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); return; } } @@ -1511,44 +1535,27 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( void kbase_hwcnt_backend_csf_protm_entered( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; - struct kbase_hwcnt_backend_csf_info *csf_info; - struct kbase_hwcnt_backend_csf *backend_csf; - - csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + struct kbase_hwcnt_backend_csf_info *csf_info = + (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = true; - /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); - return; - } - - backend_csf = csf_info->backend; - /* If we are not in REQUESTED state, we don't need to do the dumping. */ - if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) { - spin_unlock_irqrestore(&csf_info->lock, flags); - return; - } - backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; - - spin_unlock_irqrestore(&csf_info->lock, flags); - kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); + /* Call on_prfcnt_sample() to trigger collection of the protected mode + * entry auto-sample if there is currently a pending dump request. + */ + kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); } void kbase_hwcnt_backend_csf_protm_exited( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = false; - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_unrecoverable_error( @@ -1559,18 +1566,17 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error( csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = true; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } - kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend, - &flags); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_before_reset( @@ -1582,11 +1588,11 @@ void kbase_hwcnt_backend_csf_on_before_reset( csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = false; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } backend_csf = csf_info->backend; @@ -1605,7 +1611,7 @@ void kbase_hwcnt_backend_csf_on_before_reset( * really matter, the power is being pulled. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } /* A reset is the only way to exit the unrecoverable error state */ @@ -1615,81 +1621,66 @@ void kbase_hwcnt_backend_csf_on_before_reset( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); } - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_sample( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; /* If the current state is not REQUESTED, this HWC sample will be * skipped and processed in next dump_request. */ - if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) return; - } backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; - spin_unlock_irqrestore(&csf_info->lock, flags); kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); } void kbase_hwcnt_backend_csf_on_prfcnt_threshold( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; - if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) /* Submit the threshold work into the work queue to consume the * available samples. */ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); - } - - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_overflow( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } /* Called when an overflow occurs. We treat this as a recoverable error, * so we start transitioning to the disabled state. @@ -1698,27 +1689,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_overflow( * complex recovery code when we can just turn ourselves off instead for * a while. */ - kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend, - &flags); - - spin_unlock_irqrestore(&csf_info->lock, flags); + kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); } void kbase_hwcnt_backend_csf_on_prfcnt_enable( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; if (backend_csf->enable_state == @@ -1735,27 +1720,22 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable( * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } - - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_disable( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; if (backend_csf->enable_state == @@ -1773,10 +1753,8 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable( * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } - - spin_unlock_irqrestore(&csf_info->lock, flags); } int kbase_hwcnt_backend_csf_metadata_init( @@ -1784,28 +1762,29 @@ int kbase_hwcnt_backend_csf_metadata_init( { int errcode; struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_gpu_info gpu_info; if (!iface) return -EINVAL; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - WARN_ON(!csf_info->csf_if->get_gpu_info); - csf_info->csf_if->get_gpu_info(csf_info->csf_if->ctx, - &csf_info->dump_bytes, - &csf_info->gpu_info.l2_count, - &csf_info->gpu_info.core_mask, - &csf_info->gpu_info.clk_cnt); + WARN_ON(!csf_info->csf_if->get_prfcnt_info); + + csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, + &csf_info->prfcnt_info); /* The clock domain counts should not exceed the number of maximum * number of clock regulators. */ - if (csf_info->gpu_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) + if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; - errcode = kbase_hwcnt_csf_metadata_create(&csf_info->gpu_info, - csf_info->counter_set, - &csf_info->metadata); + gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; + errcode = kbase_hwcnt_csf_metadata_create( + &gpu_info, csf_info->counter_set, &csf_info->metadata); if (errcode) return errcode; @@ -1813,7 +1792,8 @@ int kbase_hwcnt_backend_csf_metadata_init( * Dump abstraction size should be exactly the same size and layout as * the physical dump size, for backwards compatibility. */ - WARN_ON(csf_info->dump_bytes != csf_info->metadata->dump_buf_bytes); + WARN_ON(csf_info->prfcnt_info.dump_bytes != + csf_info->metadata->dump_buf_bytes); return 0; } @@ -1868,8 +1848,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, return 0; } -void kbase_hwcnt_backend_csf_destroy( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) { if (!iface) return; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/mali_kbase_hwcnt_backend_csf.h index 93938f0..7506274 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,7 +40,7 @@ * @iface: Non-NULL pointer to backend interface structure that is filled * in on creation success. * - * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. * * Return: 0 on success, else error code. */ @@ -77,7 +77,7 @@ void kbase_hwcnt_backend_csf_destroy( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_protm_entered() - CSf HWC backend function to receive + * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive * notification that protected mode * has been entered. * @iface: Non-NULL pointer to HWC backend interface. @@ -86,7 +86,7 @@ void kbase_hwcnt_backend_csf_protm_entered( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_protm_exited() - CSf HWC backend function to receive + * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive * notification that protected mode has * been exited. * @iface: Non-NULL pointer to HWC backend interface. @@ -95,22 +95,20 @@ void kbase_hwcnt_backend_csf_protm_exited( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSf HWC backend function - * to be called when an - * unrecoverable error - * occurs, such as the - * firmware has died or bus - * error, this puts us into - * the unrecoverable error - * state, which we can only - * get out of by a reset. + * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function + * called when unrecoverable + * errors are detected. * @iface: Non-NULL pointer to HWC backend interface. + * + * This should be called on encountering errors that can only be recovered from + * with reset, or that may put HWC logic in state that could result in hang. For + * example, on bus error, or when FW becomes unresponsive. */ void kbase_hwcnt_backend_csf_on_unrecoverable_error( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_on_before_reset() - CSf HWC backend function to be + * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be * called immediately before a * reset. Takes us out of the * unrecoverable error state, if we diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h index e86d240..b4ddd31 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,19 +61,63 @@ struct kbase_hwcnt_backend_csf_if_enable { }; /** - * typedef kbase_hwcnt_backend_csf_if_get_gpu_info_fn - Get GPU information - * @ctx: Non-NULL pointer to a CSF context. - * @dump_size: Non-NULL pointer to where the dump size of performance counter - * sample is stored. - * @l2_count: Non-NULL pointer to where the MMU L2 cache count is stored. - * @core_mask: Non-NULL pointer to where shader core mask is stored. + * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter + * information. + * @dump_bytes: Bytes of GPU memory required to perform a performance + * counter dump. + * @l2_count: The MMU L2 cache count. + * @core_mask: Shader core mask. + * @clk_cnt: Clock domain count in the system. + * @clearing_samples: Indicates whether counters are cleared after each sample + * is taken. + */ +struct kbase_hwcnt_backend_csf_if_prfcnt_info { + size_t dump_bytes; + size_t l2_count; + u64 core_mask; + u8 clk_cnt; + bool clearing_samples; +}; + +/** + * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the + * backend spinlock is + * held. + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Pointer to the memory location that would store the previous + * interrupt state. + */ +typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. * - * @clk_cnt: Non-NULL pointer to where clock domain count in the system is - * stored. + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Previously stored interrupt state when Scheduler interrupt + * spinlock was acquired. */ -typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_info_fn)( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size, - size_t *l2_count, u64 *core_mask, u8 *clk_cnt); +typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance + * counter information. + * @ctx: Non-NULL pointer to a CSF context. + * @prfcnt_info: Non-NULL pointer to struct where performance counter + * information should be stored. + */ +typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer @@ -105,14 +149,13 @@ typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)( * inclusive. * @buf_index_last: The last buffer index in the ring buffer to be synced, * exclusive. - * @for_cpu: The direction of sync to be applied. - * It is set to true when CPU cache needs to be invalidated - * before reading the ring buffer contents. And set to false - * when CPU cache needs to be flushed after writing to the - * ring buffer. + * @for_cpu: The direction of sync to be applied, set to true when CPU + * cache needs invalidating before reading the buffer, and set + * to false after CPU writes to flush these before this memory + * is overwritten by the GPU. * - * After HWC sample request is done in GPU side, we need to sync the dump memory - * to CPU side to access the HWC data. + * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU + * are correctly observed. */ typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, @@ -147,25 +190,10 @@ typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)( * @ctx: Non-NULL pointer to a CSF interface context. * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. * @enable: Non-NULL pointer to the enable map of HWC. - */ -typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable); - -/** - * typedef kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn - Setup and enable - * hardware counter - * in CSF interface. - * @ctx: Non-NULL pointer to a CSF interface context. - * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. - * @enable: Non-NULL pointer to the enable map of HWC. * - * Exactly the same as kbase_hwcnt_backend_csf_if_dump_enable_fn(), except must - * be called in an atomic context with the spinlock documented by the specific - * backend interface held. + * Requires lock to be taken before calling. */ -typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)( +typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, struct kbase_hwcnt_backend_csf_if_enable *enable); @@ -174,13 +202,18 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)( * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter * in CSF interface. * @ctx: Non-NULL pointer to a CSF interface context. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. + * * @ctx: Non-NULL pointer to the interface context. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx); @@ -189,9 +222,12 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and * insert indexes of the * ring buffer. + * * @ctx: Non-NULL pointer to a CSF interface context. * @extract_index: Non-NULL pointer where current extract index to be saved. * @insert_index: Non-NULL pointer where current insert index to be saved. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, @@ -201,8 +237,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract * index of the ring * buffer. + * * @ctx: Non-NULL pointer to a CSF interface context. * @extract_index: New extract index to be set. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); @@ -213,9 +252,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( * @ctx: Non-NULL pointer to a CSF interface context. * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, * the array size should be at least as big as the number of - * clock domains returned by get_gpu_info interface. + * clock domains returned by get_prfcnt_info interface. * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock * domain. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, @@ -225,7 +266,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual * interface. * @ctx: CSF interface context. - * @get_gpu_info: Function ptr to get HWC related information. + * @assert_lock_held: Function ptr to assert backend spinlock is held. + * @lock: Function ptr to acquire backend spinlock. + * @unlock: Function ptr to release backend spinlock. + * @get_prfcnt_info: Function ptr to get performance counter related + * information. * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. * @ring_buf_sync: Function ptr to sync ring buffer to CPU. * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. @@ -243,13 +288,15 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( */ struct kbase_hwcnt_backend_csf_if { struct kbase_hwcnt_backend_csf_if_ctx *ctx; - kbase_hwcnt_backend_csf_if_get_gpu_info_fn get_gpu_info; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn lock; + kbase_hwcnt_backend_csf_if_unlock_fn unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info; kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc; kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync; kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free; kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns; kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable; - kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn dump_enable_nolock; kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable; kbase_hwcnt_backend_csf_if_dump_request_fn dump_request; kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c index 7a3b239..67ca4cb 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #include <device/mali_kbase_device.h> #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" -#include "csf/mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "csf/mali_kbase_csf_firmware.h" #include "mali_kbase_hwcnt_backend_csf_if_fw.h" @@ -88,6 +88,50 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx { struct kbase_ccswe ccswe_shader_cores; }; +static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock(kbdev, flags); +} + +static void kbasep_hwcnt_backend_csf_if_fw_unlock( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + /** * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback * @@ -170,16 +214,18 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( rtm, &fw_ctx->rate_listener); } -static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size, - size_t *l2_count, u64 *core_mask, u8 *clk_cnt) +static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) { #ifdef CONFIG_MALI_NO_MALI - *l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - *core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; - *dump_size = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * - KBASE_DUMMY_MODEL_BLOCK_SIZE; - *clk_cnt = 1; + prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + prfcnt_info->core_mask = + (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; + prfcnt_info->dump_bytes = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * + KBASE_DUMMY_MODEL_BLOCK_SIZE; + prfcnt_info->clk_cnt = 1; + prfcnt_info->clearing_samples = false; #else struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -188,10 +234,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( u32 prfcnt_fw_size = 0; WARN_ON(!ctx); - WARN_ON(!dump_size); - WARN_ON(!l2_count); - WARN_ON(!core_mask); - WARN_ON(!clk_cnt); + WARN_ON(!prfcnt_info); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; @@ -199,12 +242,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; prfcnt_fw_size = (prfcnt_size >> 16) << 8; fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; - *dump_size = fw_ctx->buf_bytes; + prfcnt_info->dump_bytes = fw_ctx->buf_bytes; - *l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; - *core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; + prfcnt_info->core_mask = + kbdev->gpu_props.props.coherency_info.group[0].core_mask; - *clk_cnt = fw_ctx->clk_cnt; + prfcnt_info->clk_cnt = fw_ctx->clk_cnt; + prfcnt_info->clearing_samples = true; #endif } @@ -331,9 +376,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( WARN_ON(!ctx); WARN_ON(!ring_buf); - /* Get the buffer indexes in the ring buffer. */ + /* The index arguments for this function form an inclusive, exclusive + * range. + * However, when masking back to the available buffers we will make this + * inclusive at both ends so full flushes are not 0 -> 0. + */ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); - ring_buf_index_last = buf_index_last & (fw_ring_buf->buf_count - 1); + ring_buf_index_last = + (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); /* The start address is the offset of the first buffer. */ start_address = fw_ctx->buf_bytes * ring_buf_index_first; @@ -348,6 +398,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( /* sync the first part to the end of ring buffer. */ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); + if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), @@ -367,6 +418,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( for (i = pg_first; i <= pg_last; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); + if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, @@ -420,12 +472,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( } } -static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( +static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, struct kbase_hwcnt_backend_csf_if_enable *enable) { - unsigned long flags; u32 prfcnt_config; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; @@ -437,18 +488,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( WARN_ON(!ctx); WARN_ON(!ring_buf); WARN_ON(!enable); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; - lockdep_assert_held(&kbdev->hwaccess_lock); - /* Configure */ prfcnt_config = fw_ring_buf->buf_count; prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; - kbase_csf_scheduler_spin_lock(kbdev, &flags); - /* Configure the ring buffer base address */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); @@ -503,52 +551,25 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); } -static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable) -{ - unsigned long flags; - struct kbase_device *kbdev; - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = - (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; - - WARN_ON(!ctx); - WARN_ON(!ring_buf); - WARN_ON(!enable); - - kbdev = fw_ctx->kbdev; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(ctx, ring_buf, - enable); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { - unsigned long flags; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Disable the HWC */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.hwcnt.enable_pending = true; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); @@ -569,7 +590,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( * happens. */ kbdev->csf.hwcnt.request_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, flags); kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); } @@ -577,7 +597,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( static void kbasep_hwcnt_backend_csf_if_fw_dump_request( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { - unsigned long flags; u32 glb_req; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; @@ -585,57 +604,52 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request( (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Trigger dumping */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.hwcnt.request_pending = true; glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, GLB_REQ_PRFCNT_SAMPLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - kbase_csf_scheduler_spin_unlock(kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index) { - unsigned long flags; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); WARN_ON(!extract_index); WARN_ON(!insert_index); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); - kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags); *extract_index = kbase_csf_firmware_global_input_read( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); *insert_index = kbase_csf_firmware_global_output( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); - kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) { - unsigned long flags; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); /* Set the raw extract index to release the buffer back to the ring * buffer. */ - kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags); kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, extract_idx); - kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( @@ -649,6 +663,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( WARN_ON(!ctx); WARN_ON(!cycle_counts); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { if (!(clk_enable_map & (1ull << clk))) @@ -749,14 +764,16 @@ int kbase_hwcnt_backend_csf_if_fw_create( return errcode; if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; - if_fw->get_gpu_info = kbasep_hwcnt_backend_csf_if_fw_get_gpu_info; + if_fw->assert_lock_held = + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; + if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; + if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; + if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; - if_fw->dump_enable_nolock = - kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock; if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; if_fw->get_gpu_cycle_count = diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h index d72851e..f55efb6 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c index c6c672c..4168472 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c @@ -62,6 +62,8 @@ struct kbase_hwcnt_backend_jm_info { * @enabled: True if dumping has been enabled, else false. * @pm_core_mask: PM state sync-ed shaders core mask for the enabled * dumping. + * @curr_config: Current allocated hardware resources to correctly map the src + * raw dump buffer to the dst dump buffer. * @clk_enable_map: The enable map specifying enabled clock domains. * @cycle_count_elapsed: * Cycle count elapsed for a given sample period. @@ -81,6 +83,7 @@ struct kbase_hwcnt_backend_jm { struct kbase_vmap_struct *vmap; bool enabled; u64 pm_core_mask; + struct kbase_hwcnt_curr_config curr_config; u64 clk_enable_map; u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; @@ -89,15 +92,16 @@ struct kbase_hwcnt_backend_jm { }; /** - * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the - * hwcnt metadata. + * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used + * to create the hwcnt metadata. * @kbdev: Non-NULL pointer to kbase device. * @info: Non-NULL pointer to data structure to be filled in. * * The initialised info struct will only be valid for use while kbdev is valid. */ -static int kbase_hwcnt_gpu_info_init(struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info) +static int +kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) { size_t clk; @@ -240,6 +244,37 @@ static void kbasep_hwcnt_backend_jm_cc_disable( } +/** + * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with + * current config information. + * @kbdev: Non-NULL pointer to kbase device. + * @curr_config: Non-NULL pointer to return the current configuration of + * hardware allocated to the GPU. + * + * The current configuration information is used for architectures where the + * max_config interface is available from the Arbiter. In this case the current + * allocated hardware is not always the same, so the current config information + * is used to correctly map the current allocated resources to the memory layout + * that is copied to the user space. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_gpu_update_curr_config( + struct kbase_device *kbdev, + struct kbase_hwcnt_curr_config *curr_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + curr_config->num_l2_slices = + kbdev->gpu_props.curr_config.l2_slices; + curr_config->shader_present = + kbdev->gpu_props.curr_config.shader_present; + return 0; +} + /* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ static u64 kbasep_hwcnt_backend_jm_timestamp_ns( struct kbase_hwcnt_backend *backend) @@ -287,11 +322,18 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock( timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + if (errcode) + goto error; + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); if (errcode) goto error; backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + backend_jm->enabled = true; kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); @@ -372,7 +414,7 @@ static int kbasep_hwcnt_backend_jm_dump_request( size_t clk; int ret; - if (!backend_jm || !backend_jm->enabled) + if (!backend_jm || !backend_jm->enabled || !dump_time_ns) return -EINVAL; kbdev = backend_jm->kctx->kbdev; @@ -441,6 +483,11 @@ static int kbasep_hwcnt_backend_jm_dump_get( struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; size_t clk; +#ifdef CONFIG_MALI_NO_MALI + struct kbase_device *kbdev; + unsigned long flags; + int errcode; +#endif if (!backend_jm || !dst || !dst_enable_map || (backend_jm->info->metadata != dst->metadata) || @@ -460,9 +507,24 @@ static int kbasep_hwcnt_backend_jm_dump_get( dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; } +#ifdef CONFIG_MALI_NO_MALI + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (errcode) + return errcode; +#endif + return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va, dst_enable_map, backend_jm->pm_core_mask, - accumulate); + &backend_jm->curr_config, accumulate); } /** @@ -684,7 +746,7 @@ static int kbasep_hwcnt_backend_jm_info_create( WARN_ON(!kbdev); WARN_ON(!out_info); - errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info); if (errcode) return errcode; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c index 91d1f8c..4fba6b6 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -242,6 +242,13 @@ int kbase_hwcnt_jm_metadata_create( if (!gpu_info || !out_metadata || !out_dump_bytes) return -EINVAL; + /* + * For architectures where a max_config interface is available + * from the arbiter, the v5 dump bytes and the metadata v5 are + * based on the maximum possible allocation of the HW in the + * GPU cause it needs to be prepared for the worst case where + * all the available L2 cache and Shader cores are allocated. + */ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); errcode = kbasep_hwcnt_backend_gpu_metadata_create( gpu_info, false, counter_set, &metadata); @@ -260,8 +267,7 @@ int kbase_hwcnt_jm_metadata_create( return 0; } -void kbase_hwcnt_jm_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata) +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { if (!metadata) return; @@ -318,15 +324,41 @@ static bool is_block_type_shader( return is_shader; } +static bool is_block_type_l2_cache( + const u64 grp_type, + const u64 blk_type) +{ + bool is_l2_cache = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) + is_l2_cache = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_l2_cache; +} + int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - u64 pm_core_mask, bool accumulate) + u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; u64 core_mask = pm_core_mask; + /* Variables to deal with the current configuration */ + int l2_count = 0; + bool hw_res_available = true; + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; @@ -348,15 +380,43 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const bool is_shader_core = is_block_type_shader( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); + const bool is_l2_cache = is_block_type_l2_cache( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type); + + /* + * If l2 blocks is greater than the current allocated number of + * L2 slices, there is no hw allocated to that block. + */ + if (is_l2_cache) { + l2_count++; + if (l2_count > curr_config->num_l2_slices) + hw_res_available = false; + else + hw_res_available = true; + } + /* + * For the shader cores, the current shader_mask allocated is + * always a subgroup of the maximum shader_mask, so after + * jumping any L2 cache not available the available shader cores + * will always have a matching set of blk instances available to + * accumulate them. + */ + else { + hw_res_available = true; + } - /* Early out if no values in the dest block are enabled */ + /* + * Early out if no values in the dest block are enabled or if + * the resource target of the block is not available in the HW. + */ if (kbase_hwcnt_enable_map_block_enabled( dst_enable_map, grp, blk, blk_inst)) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; - if (!is_shader_core || (core_mask & 1)) { + if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, @@ -372,7 +432,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, } } - src_offset += (hdr_cnt + ctr_cnt); + /* Just increase the src_offset if the HW is available */ + if (hw_res_available) + src_offset += (hdr_cnt + ctr_cnt); if (is_shader_core) core_mask = core_mask >> 1; } @@ -380,10 +442,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, return 0; } -int kbase_hwcnt_csf_dump_get( - struct kbase_hwcnt_dump_buffer *dst, void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h index 4ebff2d..9b846a9 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -128,6 +128,50 @@ struct kbase_hwcnt_gpu_info { }; /** + * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the + * GPU. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * + * For architectures with the max_config interface available from the Arbiter, + * the current resources allocated may change during runtime due to a + * re-partitioning (possible with partition manager). Thus, the HWC needs to be + * prepared to report any possible set of counters. For this reason the memory + * layout in the userspace is based on the maximum possible allocation. On the + * other hand, each partition has just the view of its currently allocated + * resources. Therefore, it is necessary to correctly map the dumped HWC values + * from the registers into this maximum memory layout so that it can be exposed + * to the userspace side correctly. + * + * For L2 cache just the number is enough once the allocated ones will be + * accumulated on the first L2 slots available in the destination buffer. + * + * For the correct mapping of the shader cores it is necessary to jump all the + * L2 cache slots in the destination buffer that are not allocated. But, it is + * not necessary to add any logic to map the shader cores bitmap into the memory + * layout because the shader_present allocated will always be a subset of the + * maximum shader_present. It is possible because: + * 1 - Partitions are made of slices and they are always ordered from the ones + * with more shader cores to the ones with less. + * 2 - The shader cores in a slice are always contiguous. + * 3 - A partition can only have a contiguous set of slices allocated to it. + * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with + * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: + * 0x0011|0111|0111|1111 -> note the order and that the shader cores are + * contiguous in any slice. + * Supposing that a partition takes the two slices in the middle, the current + * config shader_present for this partition would be: + * 0x0111|0111 -> note that this is a subset of the maximum above and the slices + * are contiguous. + * Therefore, by directly copying any subset of the maximum possible + * shader_present the mapping is already achieved. + */ +struct kbase_hwcnt_curr_config { + size_t num_l2_slices; + u64 shader_present; +}; + +/** * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the * JM GPUs. * @info: Non-NULL pointer to info struct. @@ -186,6 +230,8 @@ void kbase_hwcnt_csf_metadata_destroy( * kbase_hwcnt_jm_metadata_create. * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @curr_config: Current allocated hardware resources to correctly map the + * src raw dump buffer to the dst dump buffer. * @accumulate: True if counters in src should be accumulated into dst, * rather than copied. * @@ -197,7 +243,9 @@ void kbase_hwcnt_csf_metadata_destroy( */ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, bool accumulate); + const u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw @@ -217,10 +265,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, * * Return: 0 on success, else error code. */ -int kbase_hwcnt_csf_dump_get( - struct kbase_hwcnt_dump_buffer *dst, void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate); +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate); /** * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c index bd523dd..e87dbbf 100644 --- a/mali_kbase/mali_kbase_hwcnt_legacy.c +++ b/mali_kbase/mali_kbase_hwcnt_legacy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <linux/slab.h> #include <linux/uaccess.h> diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/mali_kbase/mali_kbase_hwcnt_reader.h deleted file mode 100644 index 9f2172b..0000000 --- a/mali_kbase/mali_kbase_hwcnt_reader.h +++ /dev/null @@ -1,105 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_HWCNT_READER_H_ -#define _KBASE_HWCNT_READER_H_ - -#include <stddef.h> - -/* The ids of ioctl commands. */ -#define KBASE_HWCNT_READER 0xBE -#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) -#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) -#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) -#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) -#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ - offsetof(struct kbase_hwcnt_reader_metadata, cycles)) -#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ - offsetof(struct kbase_hwcnt_reader_metadata, cycles)) -#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) -#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) -#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) -#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) -#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ - _IOW(KBASE_HWCNT_READER, 0xFF, \ - struct kbase_hwcnt_reader_api_version) - -/** - * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles - * @top: the number of cycles associated with the main clock for the - * GPU - * @shader_cores: the cycles that have elapsed on the GPU shader cores - */ -struct kbase_hwcnt_reader_metadata_cycles { - u64 top; - u64 shader_cores; -}; - -/** - * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata - * @timestamp: time when sample was collected - * @event_id: id of an event that triggered sample collection - * @buffer_idx: position in sampling area where sample buffer was stored - * @cycles: the GPU cycles that occurred since the last sample - */ -struct kbase_hwcnt_reader_metadata { - u64 timestamp; - u32 event_id; - u32 buffer_idx; - struct kbase_hwcnt_reader_metadata_cycles cycles; -}; - -/** - * enum base_hwcnt_reader_event - hwcnt dumping events - * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump - * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump - * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request - * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request - * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events - */ -enum base_hwcnt_reader_event { - BASE_HWCNT_READER_EVENT_MANUAL, - BASE_HWCNT_READER_EVENT_PERIODIC, - BASE_HWCNT_READER_EVENT_PREJOB, - BASE_HWCNT_READER_EVENT_POSTJOB, - - BASE_HWCNT_READER_EVENT_COUNT -}; - -#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) -#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) -#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) -/** - * struct kbase_hwcnt_reader_api_version - hwcnt reader API version - * @version: API version - * @features: available features in this API version - */ -struct kbase_hwcnt_reader_api_version { - u32 version; - u32 features; -}; - -#endif /* _KBASE_HWCNT_READER_H_ */ - diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h deleted file mode 100644 index 36dfc34..0000000 --- a/mali_kbase/mali_kbase_ioctl.h +++ /dev/null @@ -1,841 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_IOCTL_H_ -#define _KBASE_IOCTL_H_ - -#ifdef __cpluscplus -extern "C" { -#endif - -#include <asm-generic/ioctl.h> -#include <linux/types.h> - -#if MALI_USE_CSF -#include "csf/mali_kbase_csf_ioctl.h" -#else -#include "jm/mali_kbase_jm_ioctl.h" -#endif /* MALI_USE_CSF */ - -#define KBASE_IOCTL_TYPE 0x80 - -/** - * struct kbase_ioctl_set_flags - Set kernel context creation flags - * - * @create_flags: Flags - see base_context_create_flags - */ -struct kbase_ioctl_set_flags { - __u32 create_flags; -}; - -#define KBASE_IOCTL_SET_FLAGS \ - _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) - -/** - * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel - * - * @buffer: Pointer to the buffer to store properties into - * @size: Size of the buffer - * @flags: Flags - must be zero for now - * - * The ioctl will return the number of bytes stored into @buffer or an error - * on failure (e.g. @size is too small). If @size is specified as 0 then no - * data will be written but the return value will be the number of bytes needed - * for all the properties. - * - * @flags may be used in the future to request a different format for the - * buffer. With @flags == 0 the following format is used. - * - * The buffer will be filled with pairs of values, a u32 key identifying the - * property followed by the value. The size of the value is identified using - * the bottom bits of the key. The value then immediately followed the key and - * is tightly packed (there is no padding). All keys and values are - * little-endian. - * - * 00 = u8 - * 01 = u16 - * 10 = u32 - * 11 = u64 - */ -struct kbase_ioctl_get_gpuprops { - __u64 buffer; - __u32 size; - __u32 flags; -}; - -#define KBASE_IOCTL_GET_GPUPROPS \ - _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) - -/** - * union kbase_ioctl_mem_alloc - Allocate memory on the GPU - * @in: Input parameters - * @in.va_pages: The number of pages of virtual address space to reserve - * @in.commit_pages: The number of physical pages to allocate - * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region - * @in.flags: Flags - * @out: Output parameters - * @out.flags: Flags - * @out.gpu_va: The GPU virtual address which is allocated - */ -union kbase_ioctl_mem_alloc { - struct { - __u64 va_pages; - __u64 commit_pages; - __u64 extension; - __u64 flags; - } in; - struct { - __u64 flags; - __u64 gpu_va; - } out; -}; - -#define KBASE_IOCTL_MEM_ALLOC \ - _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) - -/** - * struct kbase_ioctl_mem_query - Query properties of a GPU memory region - * @in: Input parameters - * @in.gpu_addr: A GPU address contained within the region - * @in.query: The type of query - * @out: Output parameters - * @out.value: The result of the query - * - * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. - */ -union kbase_ioctl_mem_query { - struct { - __u64 gpu_addr; - __u64 query; - } in; - struct { - __u64 value; - } out; -}; - -#define KBASE_IOCTL_MEM_QUERY \ - _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) - -#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) -#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) -#define KBASE_MEM_QUERY_FLAGS ((u64)3) - -/** - * struct kbase_ioctl_mem_free - Free a memory region - * @gpu_addr: Handle to the region to free - */ -struct kbase_ioctl_mem_free { - __u64 gpu_addr; -}; - -#define KBASE_IOCTL_MEM_FREE \ - _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) - -/** - * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader - * @buffer_count: requested number of dumping buffers - * @fe_bm: counters selection bitmask (Front end) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - * - * A fd is returned from the ioctl if successful, or a negative value on error - */ -struct kbase_ioctl_hwcnt_reader_setup { - __u32 buffer_count; - __u32 fe_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -#define KBASE_IOCTL_HWCNT_READER_SETUP \ - _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) - -/** - * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection - * @dump_buffer: GPU address to write counters to - * @fe_bm: counters selection bitmask (Front end) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - */ -struct kbase_ioctl_hwcnt_enable { - __u64 dump_buffer; - __u32 fe_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -#define KBASE_IOCTL_HWCNT_ENABLE \ - _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) - -#define KBASE_IOCTL_HWCNT_DUMP \ - _IO(KBASE_IOCTL_TYPE, 10) - -#define KBASE_IOCTL_HWCNT_CLEAR \ - _IO(KBASE_IOCTL_TYPE, 11) - -/** - * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. - * @data: Counter samples for the dummy model. - * @size: Size of the counter sample data. - * @padding: Padding. - */ -struct kbase_ioctl_hwcnt_values { - __u64 data; - __u32 size; - __u32 padding; -}; - -#define KBASE_IOCTL_HWCNT_SET \ - _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) - -/** - * struct kbase_ioctl_disjoint_query - Query the disjoint counter - * @counter: A counter of disjoint events in the kernel - */ -struct kbase_ioctl_disjoint_query { - __u32 counter; -}; - -#define KBASE_IOCTL_DISJOINT_QUERY \ - _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) - -/** - * struct kbase_ioctl_get_ddk_version - Query the kernel version - * @version_buffer: Buffer to receive the kernel version string - * @size: Size of the buffer - * @padding: Padding - * - * The ioctl will return the number of bytes written into version_buffer - * (which includes a NULL byte) or a negative error code - * - * The ioctl request code has to be _IOW because the data in ioctl struct is - * being copied to the kernel, even though the kernel then writes out the - * version info to the buffer specified in the ioctl. - */ -struct kbase_ioctl_get_ddk_version { - __u64 version_buffer; - __u32 size; - __u32 padding; -}; - -#define KBASE_IOCTL_GET_DDK_VERSION \ - _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) - -/** - * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory - * allocator (between kernel driver - * version 10.2--11.4) - * @va_pages: Number of VA pages to reserve for JIT - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - * - * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for - * backwards compatibility. - */ -struct kbase_ioctl_mem_jit_init_10_2 { - __u64 va_pages; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) - -/** - * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory - * allocator (between kernel driver - * version 11.5--11.19) - * @va_pages: Number of VA pages to reserve for JIT - * @max_allocations: Maximum number of concurrent allocations - * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) - * @group_id: Group ID to be used for physical allocations - * @padding: Currently unused, must be zero - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - * - * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for - * backwards compatibility. - */ -struct kbase_ioctl_mem_jit_init_11_5 { - __u64 va_pages; - __u8 max_allocations; - __u8 trim_level; - __u8 group_id; - __u8 padding[5]; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) - -/** - * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory - * allocator - * @va_pages: Number of GPU virtual address pages to reserve for just-in-time - * memory allocations - * @max_allocations: Maximum number of concurrent allocations - * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) - * @group_id: Group ID to be used for physical allocations - * @padding: Currently unused, must be zero - * @phys_pages: Maximum number of physical pages to allocate just-in-time - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - */ -struct kbase_ioctl_mem_jit_init { - __u64 va_pages; - __u8 max_allocations; - __u8 trim_level; - __u8 group_id; - __u8 padding[5]; - __u64 phys_pages; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) - -/** - * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory - * - * @handle: GPU memory handle (GPU VA) - * @user_addr: The address where it is mapped in user space - * @size: The number of bytes to synchronise - * @type: The direction to synchronise: 0 is sync to memory (clean), - * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_mem_sync { - __u64 handle; - __u64 user_addr; - __u64 size; - __u8 type; - __u8 padding[7]; -}; - -#define KBASE_IOCTL_MEM_SYNC \ - _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) - -/** - * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer - * - * @in: Input parameters - * @in.gpu_addr: The GPU address of the memory region - * @in.cpu_addr: The CPU address to locate - * @in.size: A size in bytes to validate is contained within the region - * @out: Output parameters - * @out.offset: The offset from the start of the memory region to @cpu_addr - */ -union kbase_ioctl_mem_find_cpu_offset { - struct { - __u64 gpu_addr; - __u64 cpu_addr; - __u64 size; - } in; - struct { - __u64 offset; - } out; -}; - -#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ - _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) - -/** - * struct kbase_ioctl_get_context_id - Get the kernel context ID - * - * @id: The kernel context ID - */ -struct kbase_ioctl_get_context_id { - __u32 id; -}; - -#define KBASE_IOCTL_GET_CONTEXT_ID \ - _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) - -/** - * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd - * - * @flags: Flags - * - * The ioctl returns a file descriptor when successful - */ -struct kbase_ioctl_tlstream_acquire { - __u32 flags; -}; - -#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ - _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) - -#define KBASE_IOCTL_TLSTREAM_FLUSH \ - _IO(KBASE_IOCTL_TYPE, 19) - -/** - * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region - * - * @gpu_addr: The memory region to modify - * @pages: The number of physical pages that should be present - * - * The ioctl may return on the following error codes or 0 for success: - * -ENOMEM: Out of memory - * -EINVAL: Invalid arguments - */ -struct kbase_ioctl_mem_commit { - __u64 gpu_addr; - __u64 pages; -}; - -#define KBASE_IOCTL_MEM_COMMIT \ - _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) - -/** - * union kbase_ioctl_mem_alias - Create an alias of memory regions - * @in: Input parameters - * @in.flags: Flags, see BASE_MEM_xxx - * @in.stride: Bytes between start of each memory region - * @in.nents: The number of regions to pack together into the alias - * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info - * @out: Output parameters - * @out.flags: Flags, see BASE_MEM_xxx - * @out.gpu_va: Address of the new alias - * @out.va_pages: Size of the new alias - */ -union kbase_ioctl_mem_alias { - struct { - __u64 flags; - __u64 stride; - __u64 nents; - __u64 aliasing_info; - } in; - struct { - __u64 flags; - __u64 gpu_va; - __u64 va_pages; - } out; -}; - -#define KBASE_IOCTL_MEM_ALIAS \ - _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) - -/** - * union kbase_ioctl_mem_import - Import memory for use by the GPU - * @in: Input parameters - * @in.flags: Flags, see BASE_MEM_xxx - * @in.phandle: Handle to the external memory - * @in.type: Type of external memory, see base_mem_import_type - * @in.padding: Amount of extra VA pages to append to the imported buffer - * @out: Output parameters - * @out.flags: Flags, see BASE_MEM_xxx - * @out.gpu_va: Address of the new alias - * @out.va_pages: Size of the new alias - */ -union kbase_ioctl_mem_import { - struct { - __u64 flags; - __u64 phandle; - __u32 type; - __u32 padding; - } in; - struct { - __u64 flags; - __u64 gpu_va; - __u64 va_pages; - } out; -}; - -#define KBASE_IOCTL_MEM_IMPORT \ - _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) - -/** - * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region - * @gpu_va: The GPU region to modify - * @flags: The new flags to set - * @mask: Mask of the flags to modify - */ -struct kbase_ioctl_mem_flags_change { - __u64 gpu_va; - __u64 flags; - __u64 mask; -}; - -#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ - _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) - -/** - * struct kbase_ioctl_stream_create - Create a synchronisation stream - * @name: A name to identify this stream. Must be NULL-terminated. - * - * Note that this is also called a "timeline", but is named stream to avoid - * confusion with other uses of the word. - * - * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. - * - * The ioctl returns a file descriptor. - */ -struct kbase_ioctl_stream_create { - char name[32]; -}; - -#define KBASE_IOCTL_STREAM_CREATE \ - _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) - -/** - * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence - * @fd: The file descriptor to validate - */ -struct kbase_ioctl_fence_validate { - int fd; -}; - -#define KBASE_IOCTL_FENCE_VALIDATE \ - _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) - -/** - * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel - * @buffer: Pointer to the information - * @len: Length - * @padding: Padding - * - * The data provided is accessible through a debugfs file - */ -struct kbase_ioctl_mem_profile_add { - __u64 buffer; - __u32 len; - __u32 padding; -}; - -#define KBASE_IOCTL_MEM_PROFILE_ADD \ - _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) - -/** - * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource - * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to map - */ -struct kbase_ioctl_sticky_resource_map { - __u64 count; - __u64 address; -}; - -#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ - _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) - -/** - * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was - * previously permanently mapped - * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to unmap - */ -struct kbase_ioctl_sticky_resource_unmap { - __u64 count; - __u64 address; -}; - -#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ - _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) - -/** - * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of - * the GPU memory region for - * the given gpu address and - * the offset of that address - * into the region - * @in: Input parameters - * @in.gpu_addr: GPU virtual address - * @in.size: Size in bytes within the region - * @out: Output parameters - * @out.start: Address of the beginning of the memory region enclosing @gpu_addr - * for the length of @offset bytes - * @out.offset: The offset from the start of the memory region to @gpu_addr - */ -union kbase_ioctl_mem_find_gpu_start_and_offset { - struct { - __u64 gpu_addr; - __u64 size; - } in; - struct { - __u64 start; - __u64 offset; - } out; -}; - -#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ - _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) - - -#define KBASE_IOCTL_CINSTR_GWT_START \ - _IO(KBASE_IOCTL_TYPE, 33) - -#define KBASE_IOCTL_CINSTR_GWT_STOP \ - _IO(KBASE_IOCTL_TYPE, 34) - -/** - * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. - * @in: Input parameters - * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. - * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) - * @in.len: Number of addresses the buffers can hold. - * @in.padding: padding - * @out: Output parameters - * @out.no_of_addr_collected: Number of addresses collected into addr_buffer. - * @out.more_data_available: Status indicating if more addresses are available. - * @out.padding: padding - * - * This structure is used when performing a call to dump GPU write fault - * addresses. - */ -union kbase_ioctl_cinstr_gwt_dump { - struct { - __u64 addr_buffer; - __u64 size_buffer; - __u32 len; - __u32 padding; - - } in; - struct { - __u32 no_of_addr_collected; - __u8 more_data_available; - __u8 padding[27]; - } out; -}; - -#define KBASE_IOCTL_CINSTR_GWT_DUMP \ - _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) - -/** - * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone - * - * @va_pages: Number of VA pages to reserve for EXEC_VA - */ -struct kbase_ioctl_mem_exec_init { - __u64 va_pages; -}; - -#define KBASE_IOCTL_MEM_EXEC_INIT \ - _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) - -/** - * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of - * cpu/gpu time (counter values) - * @in: Input parameters - * @in.request_flags: Bit-flags indicating the requested types. - * @in.paddings: Unused, size alignment matching the out. - * @out: Output parameters - * @out.sec: Integer field of the monotonic time, unit in seconds. - * @out.nsec: Fractional sec of the monotonic time, in nano-seconds. - * @out.padding: Unused, for u64 alignment - * @out.timestamp: System wide timestamp (counter) value. - * @out.cycle_counter: GPU cycle counter value. - */ -union kbase_ioctl_get_cpu_gpu_timeinfo { - struct { - __u32 request_flags; - __u32 paddings[7]; - } in; - struct { - __u64 sec; - __u32 nsec; - __u32 padding; - __u64 timestamp; - __u64 cycle_counter; - } out; -}; - -#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ - _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) - -/** - * struct kbase_ioctl_context_priority_check - Check the max possible priority - * @priority: Input priority & output priority - */ - -struct kbase_ioctl_context_priority_check { - __u8 priority; -}; - -#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) - -/*************** - * test ioctls * - ***************/ -#if MALI_UNIT_TEST -/* These ioctls are purely for test purposes and are not used in the production - * driver, they therefore may change without notice - */ - -#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) - -/** - * struct kbase_ioctl_tlstream_test - Start a timeline stream test - * - * @tpw_count: number of trace point writers in each context - * @msg_delay: time delay between tracepoints from one writer in milliseconds - * @msg_count: number of trace points written by one writer - * @aux_msg: if non-zero aux messages will be included - */ -struct kbase_ioctl_tlstream_test { - __u32 tpw_count; - __u32 msg_delay; - __u32 msg_count; - __u32 aux_msg; -}; - -#define KBASE_IOCTL_TLSTREAM_TEST \ - _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) - -/** - * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes - * @bytes_collected: number of bytes read by user - * @bytes_generated: number of bytes generated by tracepoints - */ -struct kbase_ioctl_tlstream_stats { - __u32 bytes_collected; - __u32 bytes_generated; -}; - -#define KBASE_IOCTL_TLSTREAM_STATS \ - _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) - -#endif /* MALI_UNIT_TEST */ - -/* Customer extension range */ -#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) - -/* If the integration needs extra ioctl add them there - * like this: - * - * struct my_ioctl_args { - * .... - * } - * - * #define KBASE_IOCTL_MY_IOCTL \ - * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) - */ - - -/********************************** - * Definitions for GPU properties * - **********************************/ -#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) -#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) -#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) -#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) - -#define KBASE_GPUPROP_PRODUCT_ID 1 -#define KBASE_GPUPROP_VERSION_STATUS 2 -#define KBASE_GPUPROP_MINOR_REVISION 3 -#define KBASE_GPUPROP_MAJOR_REVISION 4 -/* 5 previously used for GPU speed */ -#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 -/* 7 previously used for minimum GPU speed */ -#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 -#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 -#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 -#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 -#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 - -#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 -#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 -#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 - -#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 -#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 - -#define KBASE_GPUPROP_MAX_THREADS 18 -#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 -#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 -#define KBASE_GPUPROP_MAX_REGISTERS 21 -#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 -#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 -#define KBASE_GPUPROP_IMPL_TECH 24 - -#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 -#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 -#define KBASE_GPUPROP_RAW_L2_PRESENT 27 -#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 -#define KBASE_GPUPROP_RAW_L2_FEATURES 29 -#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 -#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 -#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 -#define KBASE_GPUPROP_RAW_AS_PRESENT 33 -#define KBASE_GPUPROP_RAW_JS_PRESENT 34 -#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 -#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 -#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 -#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 -#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 -#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 -#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 -#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 -#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 -#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 -#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 -#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 -#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 -#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 -#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 -#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 -#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 -#define KBASE_GPUPROP_RAW_GPU_ID 55 -#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 -#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 -#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 -#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 -#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 - -#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 -#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 -#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 -#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 -#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 -#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 -#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 -#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 -#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 -#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 -#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 -#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 -#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 -#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 -#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 -#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 -#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 -#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 -#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 - -#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 - -#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 - -#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 -#define KBASE_GPUPROP_TLS_ALLOC 84 -#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 -#ifdef __cpluscplus -} -#endif - -#endif diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 1cf24a2..949c041 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -74,7 +74,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom) { katom->status = KBASE_JD_ATOM_STATE_COMPLETED; kbase_kinstr_jm_atom_complete(katom); - dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n", + dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", (void *)katom); } @@ -89,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -99,23 +99,23 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(katom->kctx, katom)); jd_mark_atom_complete(katom); - return 0; + return false; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); jd_mark_atom_complete(katom); - return 0; + return false; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); jd_mark_atom_complete(katom); } - return 0; + return false; } katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); /* Queue an action about whether we should try scheduling a context */ return kbasep_js_add_job(kctx, katom); } @@ -758,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_del(runnable_jobs.next); node->in_jd_list = false; - dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", node, node->status); KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -901,7 +901,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, unsigned long flags; enum kbase_jd_atom_state status; - dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. @@ -976,7 +976,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; dev_dbg(kbdev->dev, - "Atom %p status to completed\n", + "Atom %pK status to completed\n", (void *)katom); /* Wrong dependency setup. Atom will be sent @@ -1019,7 +1019,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* This atom will be sent back to user space. @@ -1062,7 +1062,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, */ katom->event_code = BASE_JD_EVENT_DONE; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); @@ -1199,7 +1199,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, bool need_to_try_schedule_context; katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); @@ -1270,7 +1270,7 @@ int kbase_jd_submit(struct kbase_context *kctx, if (unlikely(jd_atom_is_v2)) { if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { - dev_err(kbdev->dev, + dev_dbg(kbdev->dev, "Invalid atom address %p passed to job_submit\n", user_addr); err = -EFAULT; @@ -1281,7 +1281,7 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.seq_nr = 0; } else { if (copy_from_user(&user_atom, user_addr, stride) != 0) { - dev_err(kbdev->dev, + dev_dbg(kbdev->dev, "Invalid atom address %p passed to job_submit\n", user_addr); err = -EFAULT; @@ -1420,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1444,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data) if (katom->event_code == BASE_JD_EVENT_STOPPED) { unsigned long flags; - dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -1452,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); kbase_js_unpull(kctx, katom); @@ -1568,7 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); - dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); } @@ -1698,7 +1698,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) kctx = katom->kctx; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c index 940b920..f423758 100644 --- a/mali_kbase/mali_kbase_jd_debugfs.c +++ b/mali_kbase/mali_kbase_jd_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) #include <mali_kbase_sync.h> #endif -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> struct kbase_jd_debugfs_depinfo { u8 id; @@ -46,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, case BASE_JD_REQ_SOFT_FENCE_TRIGGER: res = kbase_sync_fence_out_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Sa([%p]%d) ", + seq_printf(sfile, "Sa([%pK]%d) ", info.fence, info.status); break; case BASE_JD_REQ_SOFT_FENCE_WAIT: res = kbase_sync_fence_in_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Wa([%p]%d) ", + seq_printf(sfile, "Wa([%pK]%d) ", info.fence, info.status); break; default: diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c index be14b45..73e9905 100644 --- a/mali_kbase/mali_kbase_jm.c +++ b/mali_kbase/mali_kbase_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kctx = kbdev->hwaccess.active_kctx[js]; dev_dbg(kbdev->dev, - "Trying to run the next %d jobs in kctx %p (s:%d)\n", + "Trying to run the next %d jobs in kctx %pK (s:%d)\n", nr_jobs_to_submit, (void *)kctx, js); if (!kctx) @@ -117,7 +117,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -129,7 +129,7 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_STOPPED && diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index ea317b2..6bb57e6 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -162,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); dev_dbg(kctx->kbdev->dev, - "Slot %d (prio %d) is %spullable in kctx %p\n", + "Slot %d (prio %d) is %spullable in kctx %pK\n", js, prio, none_to_pull ? "not " : "", kctx); return none_to_pull; @@ -186,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; @@ -236,7 +236,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS)); dev_dbg(kctx->kbdev->dev, - "Del runnable atom %p from X_DEP list\n", + "Del runnable atom %pK from X_DEP list\n", (void *)entry); list_del(&entry->queue); @@ -252,7 +252,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); dev_dbg(kctx->kbdev->dev, - "Del blocked atom %p from X_DEP list\n", + "Del blocked atom %pK from X_DEP list\n", (void *)entry); list_del(queue->x_dep_head.next); @@ -279,7 +279,7 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js, { int prio; - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) jsctx_queue_foreach_prio(kctx, js, prio, callback); } @@ -303,7 +303,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); dev_dbg(kctx->kbdev->dev, - "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); @@ -335,7 +335,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -365,7 +365,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", (void *)katom, (void *)kctx); /* Atoms must be pulled in the correct order. */ @@ -387,7 +387,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); while (*new) { @@ -542,7 +542,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) sema_init(&jsdd->schedule_sem, 1); for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { - for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); } @@ -610,7 +610,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; @@ -684,7 +684,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -726,7 +726,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -802,7 +802,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], @@ -885,7 +885,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( lockdep_assert_held(&kbdev->hwaccess_lock); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) continue; @@ -895,7 +895,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); dev_dbg(kbdev->dev, - "Popped %p from the pullable queue (s:%d)\n", + "Popped %pK from the pullable queue (s:%d)\n", (void *)kctx, js); return kctx; } @@ -949,25 +949,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, if (is_scheduled) { if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return false; } } katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return false; /* No pullable atoms */ } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return false; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom); return false; /* next atom blocked */ } @@ -976,20 +976,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return false; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return false; } } - dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return true; @@ -1013,7 +1013,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_prio = dep_atom->sched_priority; dev_dbg(kbdev->dev, - "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); /* Dependent atom must already have been submitted */ @@ -1115,7 +1115,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_js = kbase_js_get_slot(kbdev, dep_atom); dev_dbg(kbdev->dev, - "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); @@ -1130,7 +1130,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", (void *)katom); katom->x_pre_dep = dep_atom; @@ -1154,7 +1154,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } } else { dev_dbg(kbdev->dev, - "Deps of atom %p (s:%d) could not be represented\n", + "Deps of atom %pK (s:%d) could not be represented\n", (void *)katom, js); } @@ -1195,7 +1195,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) /* Determine the new priority for context, as per the priority * of currently in-use atoms. */ - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (kctx->atoms_count[prio]) { new_priority = prio; @@ -1237,7 +1237,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom) if (rp->state != KBASE_JD_RP_COMPLETE) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom, start_katom->renderpass_id); /* The following members are read when updating the job slot @@ -1280,7 +1280,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (rp->state == KBASE_JD_RP_COMPLETE) @@ -1347,7 +1347,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ @@ -1360,14 +1360,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); /* Undo the count, as the atom will get added again later but * leave the context priority adjusted or boosted, in case if @@ -1430,7 +1430,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * context on the Queue */ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); /* Queue was updated - caller must try to schedule the * head context @@ -1439,7 +1439,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } } out_unlock: - dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", kctx, enqueue_required ? "" : "not "); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -1468,7 +1468,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1660,7 +1660,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Last reference, and we've been told to remove this context * from the Run Pool */ - dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx)); @@ -1670,7 +1670,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( for (slot = 0; slot < num_slots; slot++) { if (kbdev->hwaccess.active_kctx[slot] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, slot); kbdev->hwaccess.active_kctx[slot] = NULL; } @@ -1773,7 +1773,7 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, * happens asynchronously */ dev_dbg(kbdev->dev, - "JS: ** Killing Context %p on RunPool Remove **", kctx); + "JS: ** Killing Context %pK on RunPool Remove **", kctx); kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); } } @@ -1879,7 +1879,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; - dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -2025,7 +2025,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, kbase_backend_use_ctx_sched(kbdev, kctx, js)) { dev_dbg(kbdev->dev, - "kctx %p already has ASID - mark as active (s:%d)\n", + "kctx %pK already has ASID - mark as active (s:%d)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { @@ -2200,7 +2200,7 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_context *kctx, *n; unsigned long flags; @@ -2336,7 +2336,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", (void *)katom, js); list_add_tail(&katom->queue, &queue->x_dep_head); @@ -2346,7 +2346,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, add_required = false; } } else { - dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom); } @@ -2360,7 +2360,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } dev_dbg(kctx->kbdev->dev, - "Enqueue of kctx %p is %srequired to submit atom %p\n", + "Enqueue of kctx %pK is %srequired to submit atom %pK\n", kctx, enqueue_required ? "" : "not ", katom); return enqueue_required; @@ -2387,7 +2387,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) if (!kbase_js_atom_blocked_on_x_dep(katom)) { dev_dbg(kctx->kbdev->dev, - "Del atom %p from X_DEP list in js_move_to_tree\n", + "Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom); list_del(&katom->queue); @@ -2405,7 +2405,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) } } else { dev_dbg(kctx->kbdev->dev, - "Atom %p blocked on x-dep in js_move_to_tree\n", + "Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom); break; } @@ -2449,7 +2449,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); /* Fail if it had a data dependency. */ @@ -2471,14 +2471,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return NULL; } @@ -2491,18 +2491,18 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return NULL; } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return NULL; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", (void *)katom); return NULL; } @@ -2524,14 +2524,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return NULL; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return NULL; } @@ -2556,7 +2556,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; - dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return katom; @@ -2599,7 +2599,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return start atom %p in state %d of RP %d\n", + "JS return start atom %pK in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2627,7 +2627,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Prevent the tiler job being pulled for execution in the * job scheduler again. */ - dev_dbg(kbdev->dev, "Blocking start atom %p\n", + dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom); atomic_inc(&start_katom->blocked); @@ -2639,14 +2639,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Was the fragment job chain submitted to kbase yet? */ end_katom = rp->end_katom; if (end_katom) { - dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom); if (rp->state == KBASE_JD_RP_RETRY_OOM) { /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom); atomic_dec(&end_katom->blocked); WARN_ON(!(end_katom->atom_flags & @@ -2708,7 +2708,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return end atom %p in state %d of RP %d\n", + "JS return end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state != KBASE_JD_RP_OOM && @@ -2730,14 +2730,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, - "Reset backing to %zu pages for region %p\n", + "Reset backing to %zu pages for region %pK\n", reg->threshold_pages, (void *)reg); if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) kbase_mem_shrink(kctx, reg, reg->threshold_pages); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -2755,7 +2755,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) */ start_katom = rp->start_katom; if (!WARN_ON(!start_katom)) { - dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom); atomic_dec(&start_katom->blocked); (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, @@ -2781,7 +2781,7 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", __func__, (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) @@ -2826,12 +2826,12 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled)) { dev_dbg(kbdev->dev, - "No atoms currently pulled from context %p\n", + "No atoms currently pulled from context %pK\n", (void *)kctx); if (!kctx->slots_pullable) { dev_dbg(kbdev->dev, - "Context %p %s counted as runnable\n", + "Context %pK %s counted as runnable\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? "is" : "isn't"); @@ -2867,7 +2867,7 @@ static void js_return_worker(struct work_struct *data) if (context_idle) { dev_dbg(kbdev->dev, - "Context %p %s counted as active\n", + "Context %pK %s counted as active\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_ACTIVE) ? "is" : "isn't"); @@ -2906,13 +2906,13 @@ static void js_return_worker(struct work_struct *data) kbase_backend_complete_wq_post_sched(kbdev, core_req); - dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -2967,7 +2967,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, return false; dev_dbg(kctx->kbdev->dev, - "Start atom %p is done in state %d of RP %d\n", + "Start atom %pK is done in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2979,7 +2979,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, unsigned long flags; dev_dbg(kctx->kbdev->dev, - "Start atom %p completed before soft-stop\n", + "Start atom %pK completed before soft-stop\n", (void *)start_katom); kbase_gpu_vm_lock(kctx); @@ -2991,7 +2991,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, struct kbase_va_region, link); WARN_ON(reg->flags & KBASE_REG_VA_FREED); - dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -3001,7 +3001,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); } else { dev_dbg(kctx->kbdev->dev, - "Start atom %p did not exceed memory threshold\n", + "Start atom %pK did not exceed memory threshold\n", (void *)start_katom); WARN_ON(rp->state != KBASE_JD_RP_START && @@ -3018,7 +3018,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom); atomic_dec(&end_katom->blocked); @@ -3062,7 +3062,7 @@ static void js_complete_end_rp(struct kbase_context *kctx, if (WARN_ON(rp->end_katom != end_katom)) return; - dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || @@ -3096,7 +3096,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; - dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", __func__, (void *)katom, atom_slot); /* Update the incremental rendering state machine. @@ -3115,7 +3115,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", (void *)katom); context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -3136,7 +3136,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] && kctx->blocked_js[atom_slot][prio]) { dev_dbg(kbdev->dev, - "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n", (void *)kctx, atom_slot, prio); kctx->blocked_js[atom_slot][prio] = false; @@ -3190,7 +3190,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) { - dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); } @@ -3241,7 +3241,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) return true; dev_dbg(kbdev->dev, - "JS complete end atom %p in state %d of RP %d\n", + "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); @@ -3270,7 +3270,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", (void *)katom, (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -3286,7 +3286,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, katom->event_code = katom->will_fail_event_code; katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; - dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_js_evict_deps(kctx, katom, katom->slot_nr, @@ -3308,7 +3308,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); kbase_js_move_to_tree(x_dep); @@ -3319,13 +3319,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, x_dep->slot_nr); if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", (void *)x_dep); return x_dep; } } else { dev_dbg(kbdev->dev, - "No cross-slot dep to unblock for atom %p\n", + "No cross-slot dep to unblock for atom %pK\n", (void *)katom); } @@ -3356,13 +3356,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", (void *)katom); return false; } if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { - dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", (void *)katom); return true; } @@ -3388,12 +3388,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) * if it only depends on the tiler job chain. */ if (katom->x_pre_dep != rp->start_katom) { - dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", (void *)katom->x_pre_dep, (void *)rp->start_katom); return true; } - dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep); return false; @@ -3407,7 +3407,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; int js; - dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", __func__, (void *)kbdev, (unsigned int)js_mask); js_devdata = &kbdev->js_data; @@ -3442,7 +3442,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) context_idle = true; dev_dbg(kbdev->dev, - "kctx %p is not active (s:%d)\n", + "kctx %pK is not active (s:%d)\n", (void *)kctx, js); if (kbase_pm_context_active_handle_suspend( @@ -3472,7 +3472,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) &kctx->jctx.sched_info.ctx.jsctx_mutex); dev_dbg(kbdev->dev, - "kctx %p cannot be used at this time\n", + "kctx %pK cannot be used at this time\n", kctx); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3514,7 +3514,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool pullable; dev_dbg(kbdev->dev, - "No atoms pulled from kctx %p (s:%d)\n", + "No atoms pulled from kctx %pK (s:%d)\n", (void *)kctx, js); pullable = kbase_js_ctx_pullable(kctx, js, @@ -3576,7 +3576,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* Could not run atoms on this slot */ } - dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= @@ -3598,7 +3598,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ctx_waiting[js]) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -3629,7 +3629,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_lock(&js_kctx_info->ctx.jsctx_mutex); kbase_ctx_flag_set(kctx, KCTX_DYING); - dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); /* * At this point we know: @@ -3693,7 +3693,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); /* Only cancel jobs when we evicted from the * queue. No Power Manager active reference was held. @@ -3714,7 +3714,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) * Pool */ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3732,7 +3732,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ KBASE_DEBUG_ASSERT(was_retained); - dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); /* Cancel any remaining running jobs for this kctx - if any. * Submit is disallowed which takes effect immediately, so no @@ -3745,7 +3745,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); - dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", kctx); kbasep_js_runpool_release_ctx(kbdev, kctx); diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c index 76cff41..cc8dd86 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.c +++ b/mali_kbase/mali_kbase_kinstr_jm.c @@ -25,7 +25,7 @@ */ #include "mali_kbase_kinstr_jm.h" -#include "mali_kbase_kinstr_jm_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h> #include "mali_kbase.h" #include "mali_kbase_linux.h" diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h index 74fe5cf..2b81636 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.h +++ b/mali_kbase/mali_kbase_kinstr_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,7 +63,7 @@ #ifndef _KBASE_KINSTR_JM_H_ #define _KBASE_KINSTR_JM_H_ -#include "mali_kbase_kinstr_jm_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h> #ifdef __KERNEL__ #include <linux/version.h> diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/mali_kbase/mali_kbase_kinstr_jm_reader.h deleted file mode 100644 index cbd495f..0000000 --- a/mali_kbase/mali_kbase_kinstr_jm_reader.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * mali_kbase_kinstr_jm_reader.h - * Provides an ioctl API to read kernel atom state changes. The flow of the - * API is: - * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` - * 2. Determine the buffer structure layout via the above ioctl's returned - * size and version fields in ``struct kbase_kinstr_jm_fd_out`` - * 4. Poll the file descriptor for ``POLLIN`` - * 5. Get data with read() on the fd - * 6. Use the structure version to understand how to read the data from the - * buffer - * 7. Repeat 4-6 - * 8. Close the file descriptor - */ - -#ifndef _KBASE_KINSTR_JM_READER_H_ -#define _KBASE_KINSTR_JM_READER_H_ - -/** - * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom - * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has - * entered a hardware queue - * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started - * on an atom - * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped - * on an atom - * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has - * completed on an atom - * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations - * - * We can add new states to the end of this if they do not break the existing - * state machine. Old user mode code can gracefully ignore states they do not - * understand. - * - * If we need to make a breaking change to the state machine, we can do that by - * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will - * mean that old user mode code will fail to understand the new state field in - * the structure and gracefully not use the state change API. - */ -enum kbase_kinstr_jm_reader_atom_state { - KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, - KBASE_KINSTR_JM_READER_ATOM_STATE_START, - KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, - KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, - KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT -}; - -#endif /* _KBASE_KINSTR_JM_READER_H_ */ diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index fd992e2..326917c 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -849,7 +849,7 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx) * * Return: true if any allocs exist on any zone, false otherwise */ -bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) { unsigned int zone_idx; @@ -1393,7 +1393,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", (void *)reg); #if MALI_USE_CSF if (reg->flags & KBASE_REG_CSF_EVENT) @@ -1916,7 +1916,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(kctx != NULL); KBASE_DEBUG_ASSERT(reg != NULL); - dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); @@ -1975,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { @@ -2772,6 +2772,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); #if MALI_USE_CSF /** @@ -4233,8 +4234,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) + if (delta) { + mutex_lock(&kctx->reg_lock); kbase_mem_shrink(kctx, reg, old_pages - delta); + mutex_unlock(&kctx->reg_lock); + } } #if MALI_JIT_PRESSURE_LIMIT_BASE diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index cda6b57..d12ec31 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -31,7 +31,7 @@ #endif #include <linux/kref.h> -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_hw.h> #include "mali_kbase_pm.h" #include "mali_kbase_defs.h" @@ -549,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( WARN_ON(!region->va_refcnt); /* non-atomic as kctx->reg_lock is held */ - dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", region->va_refcnt, (void *)region); region->va_refcnt++; @@ -566,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( /* non-atomic as kctx->reg_lock is held */ region->va_refcnt--; - dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", region->va_refcnt, (void *)region); if (!region->va_refcnt) kbase_region_refcnt_free(region); diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 7c9c08e..cc80927 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -42,7 +42,7 @@ #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> #include <tl/mali_kbase_tracepoints.h> -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_caps.h> #include <mali_kbase_trace_gpu_mem.h> @@ -1104,7 +1104,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; - }; + } if (unlikely(ret)) dev_warn(kctx->kbdev->dev, @@ -2718,7 +2718,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, { struct kbase_va_region *reg = NULL; void *kaddr = NULL; - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + size_t nr_pages = vma_pages(vma); int err = 0; int free_on_close = 0; struct device *dev = kctx->kbdev->dev; @@ -3333,7 +3333,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, { unsigned long cookie = vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + size_t nr_pages = vma_pages(vma); struct kbase_queue *queue; int err = 0; diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index 9b5854a..1874a6f 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -309,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) kbase_mem_pool_unlock(pool); } - +KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) @@ -804,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, nr_to_pool = kbase_mem_pool_capacity(pool); nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, - dirty); + kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, + dirty); i += nr_to_pool; } diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h index 87eb65b..d1ea7ad 100644 --- a/mali_kbase/mali_kbase_mipe_gen_header.h +++ b/mali_kbase/mali_kbase_mipe_gen_header.h @@ -39,14 +39,14 @@ * defined. See documentation below: */ -/** +/* * The name of the variable where the result BLOB will be stored. */ #if !defined(MIPE_HEADER_BLOB_VAR_NAME) #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" #endif -/** +/* * A compiler attribute for the BLOB variable. * * e.g. __attribute__((section("my_section"))) @@ -77,7 +77,7 @@ #error "MIPE_HEADER_STREAM_ID must be defined!" #endif -/** +/* * MIPE packet class. * * See enum tl_packet_class. @@ -86,7 +86,7 @@ #error "MIPE_HEADER_PKT_CLASS must be defined!" #endif -/** +/* * The list of tracepoints to process. * * It should be defined as follows: @@ -105,14 +105,14 @@ #error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" #endif -/** +/* * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. */ #if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) #error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" #endif -/** +/* * The list of enums to process. * * It should be defined as follows: @@ -129,7 +129,7 @@ */ #if defined(MIPE_HEADER_ENUM_LIST) -/** +/* * Tracepoint message ID used for enums declaration. */ #if !defined(MIPE_HEADER_ENUM_MSG_ID) diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index da09a97..3ded47b 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -256,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbase_pm_context_idle(kbdev); /* Re-enable GPU hardware counters */ +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h index 4f66972..cb8a082 100644 --- a/mali_kbase/mali_kbase_reset_gpu.h +++ b/mali_kbase/mali_kbase_reset_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -143,8 +143,16 @@ void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); /** + * Flags for kbase_prepare_to_reset_gpu + */ +#define RESET_FLAGS_NONE ((unsigned int)0) +/* This reset should be treated as an unrecoverable error by HW counter logic */ +#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) + +/** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. * @kbdev: Device pointer + * @flags: Bitfield indicating impact of reset (see flag defines) * * Caller is expected to hold the kbdev->hwaccess_lock. * @@ -153,18 +161,20 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags); /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. * @kbdev: Device pointer - * + * @flags: Bitfield indicating impact of reset (see flag defines) + * Return: a boolean which should be interpreted as follows: * - true - Prepared for reset, kbase_reset_gpu should be called. * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); /** * kbase_reset_gpu - Reset the GPU diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index 654c029..e14a4be 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -27,7 +27,7 @@ #include <mali_kbase_sync.h> #endif #include <linux/dma-mapping.h> -#include <mali_base_kernel.h> +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_hwaccess_time.h> #include <mali_kbase_kinstr_jm.h> #include <mali_kbase_mem_linux.h> @@ -145,6 +145,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) * delay suspend until we process the atom (which may be at the end of a * long chain of dependencies */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); if (pm_active_err) { struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; @@ -162,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) return pm_active_err; } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + else + atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts); @@ -291,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) if (!kbase_sync_fence_in_info_get(dep, &info)) { dev_warn(dev, - "\tVictim trigger atom %d fence [%p] %s: %s\n", + "\tVictim trigger atom %d fence [%pK] %s: %s\n", kbase_jd_atom_id(kctx, dep), info.fence, info.name, @@ -320,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) return; } - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", kctx->tgid, kctx->id, kbase_jd_atom_id(kctx, katom), info.fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", info.fence, info.name, kbase_sync_status_string(info.status)); @@ -1422,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) struct base_external_resource_list *ext_res; u64 count = 0; size_t copy_size; - int ret; user_ext_res = (__user struct base_external_resource_list *) (uintptr_t) katom->jc; /* Fail the job if there is no info structure */ - if (!user_ext_res) { - ret = -EINVAL; - goto fail; - } + if (!user_ext_res) + return -EINVAL; - if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { - ret = -EINVAL; - goto fail; - } + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) + return -EINVAL; /* Is the number of external resources in range? */ - if (!count || count > BASE_EXT_RES_COUNT_MAX) { - ret = -EINVAL; - goto fail; - } + if (!count || count > BASE_EXT_RES_COUNT_MAX) + return -EINVAL; /* Copy the information for safe access and future storage */ copy_size = sizeof(*ext_res); copy_size += sizeof(struct base_external_resource) * (count - 1); - ext_res = kzalloc(copy_size, GFP_KERNEL); - if (!ext_res) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { - ret = -EINVAL; - goto free_info; - } + ext_res = memdup_user(user_ext_res, copy_size); + if (IS_ERR(ext_res)) + return PTR_ERR(ext_res); /* * Overwrite the count with the first value incase it was changed @@ -1467,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) katom->softjob_data = ext_res; return 0; - -free_info: - kfree(ext_res); -fail: - return ret; } static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) @@ -1793,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } mutex_unlock(&kctx->jctx.lock); } diff --git a/mali_kbase/mali_kbase_sync_common.c b/mali_kbase/mali_kbase_sync_common.c index 2061f53..39a68c2 100644 --- a/mali_kbase/mali_kbase_sync_common.c +++ b/mali_kbase/mali_kbase_sync_common.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ /* - * @file mali_kbase_sync_common.c + * @file * * Common code for our explicit fence functionality */ diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index bc985cb..4ac0d0e 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,9 +22,9 @@ #include "mali_kbase_vinstr.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h> #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include "mali_malisw.h" #include "mali_kbase_debug.h" @@ -898,11 +898,12 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( struct kbase_vinstr_client *cli, unsigned long arg, size_t size) { long ret = -EINVAL; - u8 clk_cnt = cli->vctx->metadata->clk_cnt; if (size == sizeof(u32)) { ret = put_user(HWCNT_READER_API, (u32 __user *)arg); } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { + u8 clk_cnt = cli->vctx->metadata->clk_cnt; + unsigned long bytes = 0; struct kbase_hwcnt_reader_api_version api_version = { .version = HWCNT_READER_API, .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, @@ -915,8 +916,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( api_version.features |= KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; - ret = copy_to_user( + bytes = copy_to_user( (void __user *)arg, &api_version, sizeof(api_version)); + + /* copy_to_user returns zero in case of success. + * If it fails, it returns the number of bytes that could NOT be copied + */ + if (bytes == 0) + ret = 0; + else + ret = -EFAULT; } return ret; } @@ -1042,7 +1051,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap( return -EINVAL; vm_size = vma->vm_end - vma->vm_start; - size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; + + /* The mapping is allowed to span the entirety of the page allocation, + * not just the chunk where the dump buffers are allocated. + * This accommodates the corner case where the combined size of the + * dump buffers is smaller than a single page. + * This does not pose a security risk as the pages are zeroed on + * allocation, and anything out of bounds of the dump buffers is never + * written to. + */ + size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; diff --git a/mali_kbase/mali_uk.h b/mali_kbase/mali_uk.h deleted file mode 100644 index a499e02..0000000 --- a/mali_kbase/mali_uk.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/** - * Types and definitions that are common across OSs for both the user - * and kernel side of the User-Kernel interface. - */ - -#ifndef _UK_H_ -#define _UK_H_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * DOC: uk_api User-Kernel Interface API - * - * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device - * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. - * - * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent - * kernel-side API (UKK) via an OS-specific communication mechanism. - * - * This API is internal to the Midgard DDK and is not exposed to any applications. - * - */ - -/** - * enum uk_client_id - These are identifiers for kernel-side drivers - * implementing a UK interface, aka UKK clients. - * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. - * @UK_CLIENT_COUNT: The number of uk clients supported. This must be - * the last member of the enum - * - * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this - * identifier to select a UKK client to the uku_open() function. - * - * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id - * enumeration and the uku_open() implemenation for the various OS ports need to be updated to - * provide a mapping of the identifier to the OS specific device name. - * - */ -enum uk_client_id { - UK_CLIENT_MALI_T600_BASE, - UK_CLIENT_COUNT -}; - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _UK_H_ */ diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index 6b7cb42..8240817 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -83,10 +83,19 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, .addr = fault->addr, }; - if (WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault))) + /* + * A page fault work item could already be pending for the + * context's address space, when the page fault occurs for + * MCU's address space. + */ + if (!queue_work(as->pf_wq, &as->work_pagefault)) kbase_ctx_sched_release_ctx(kctx); - else + else { + dev_dbg(kbdev->dev, + "Page fault is already pending for as %u\n", + as_nr); atomic_inc(&kbdev->faults_pending); + } } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -117,15 +126,9 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) submit_work_pagefault(kbdev, as_no, fault); - /* MCU AS fault could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - /* GPU reset is required to recover */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index 18a74ab..ae334c1 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -206,7 +206,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->hwaccess_lock); dev_dbg(kbdev->dev, - "Entering %s kctx %p, as %p\n", + "Entering %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); if (!kctx) { @@ -255,14 +255,10 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, */ kbasep_js_clear_submit_allowed(js_devdata, kctx); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); - else - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, fault->addr); + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); /* * We need to switch to UNMAPPED mode - but we do this in a @@ -276,7 +272,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, } dev_dbg(kbdev->dev, - "Leaving %s kctx %p, as %p\n", + "Leaving %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); } @@ -375,14 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* record the fault status */ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, AS_FAULTSTATUS)); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); - } + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); if (kbase_as_has_bus_fault(as, fault)) { /* Mark bus fault as handled. @@ -423,7 +416,7 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) { dev_dbg(kctx->kbdev->dev, - "Switching to incremental rendering for region %p\n", + "Switching to incremental rendering for region %pK\n", (void *)reg); return kbase_job_slot_softstop_start_rp(kctx, reg); } diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 51bee43..0761f68 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -561,7 +561,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); dev_dbg(kbdev->dev, - "Entering %s %p, fault_pfn %lld, as_no %d\n", + "Entering %s %pK, fault_pfn %lld, as_no %d\n", __func__, (void *)data, fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() @@ -634,21 +634,13 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); goto fault_done; default: @@ -852,7 +844,7 @@ page_fault_retry: if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { dev_dbg(kctx->kbdev->dev, - "Get region %p for IR\n", + "Get region %pK for IR\n", (void *)region); kbase_va_region_alloc_get(kctx, region); } @@ -980,7 +972,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -1557,7 +1549,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -1613,17 +1605,8 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); -#if MALI_USE_CSF - /* A GPU hang could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); -#endif /* MALI_USE_CSF */ - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1659,7 +1642,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); mutex_unlock(&kbdev->js_data.queue_mutex); #else - ctx_is_in_runpool = kbase_ctx_sched_refcount_mmu_flush(kctx, sync); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); #endif /* !MALI_USE_CSF */ if (ctx_is_in_runpool) { @@ -1681,11 +1664,6 @@ void kbase_mmu_update(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); kbdev->mmu_mode->update(kbdev, mmut, as_nr); - -#if MALI_USE_CSF - if (mmut->kctx) - mmut->kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_update); @@ -1719,10 +1697,6 @@ void kbase_mmu_disable(struct kbase_context *kctx) kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); - -#if MALI_USE_CSF - kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); @@ -2312,30 +2286,3 @@ void kbase_flush_mmu_wqs(struct kbase_device *kbdev) flush_workqueue(as->pf_wq); } } - -#if MALI_USE_CSF -void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - - if (kctx->as_nr == KBASEP_AS_NR_INVALID) - return; - - if (kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_NOT_PEND) - return; - - WARN_ON(!atomic_read(&kctx->refcount)); - - /* Specify the entire address space as the locked region. - * The flush of entire L2 cache and complete TLB invalidation will - * anyways happen for the exisiting CSF GPUs, regardless of the locked - * range. This may have to be revised later on. - */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, - kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_PEND_SYNC); - - kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -} -#endif diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 1d877ac..bf4fd91 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -152,21 +152,4 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr, u64 address, bool as_valid); -#if MALI_USE_CSF -/** - * kbase_mmu_deferred_flush_invalidate() - Perform deferred MMU flush - * operations for a Kbase context. - * @kctx: Pointer to the Kbase context for which MMU flush operations - * are pending. - * - * This function performs the MMU flush operations that are pending for a Kbase - * context. The flush operations will be deferred if the context is inactive, - * i.e. kctx->refcount is zero which happens when all the queue groups of a - * context have gone off CSG slots. - * This needs to be called when first queue group of the context is put back - * on the CSG slot. - */ -void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx); -#endif - #endif /* _KBASE_MMU_H_ */ diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index b0596af..88fd9cf 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -124,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) struct kbase_mmu_setup *current_setup = &as->current_setup; u64 transcfg = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg; + transcfg = current_setup->transcfg; - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK - * Clear PTW_MEMATTR bits - */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; - /* Ensure page-tables reads use read-allocate cache-policy in - * the L2 - */ - transcfg |= AS_TRANSCFG_R_ALLOCATE; - - if (kbdev->system_coherency != COHERENCY_NONE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) - * Clear PTW_SH bits - */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); - } + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (transcfg >> 32) & 0xFFFFFFFFUL); - } else { - if (kbdev->system_coherency != COHERENCY_NONE) - current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + if (kbdev->system_coherency != COHERENCY_NONE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); } + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (transcfg >> 32) & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c b/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c deleted file mode 100644 index 09793e1..0000000 --- a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "mali_kbase.h" -#include <gpu/mali_kbase_gpu_regmap.h> -#include "mali_kbase_defs.h" - -#define ENTRY_TYPE_MASK 3ULL -#define ENTRY_IS_ATE 1ULL -#define ENTRY_IS_INVAL 2ULL -#define ENTRY_IS_PTE 3ULL - -#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -#define ENTRY_RD_BIT (1ULL << 6) -#define ENTRY_WR_BIT (1ULL << 7) -#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -#define ENTRY_ACCESS_BIT (1ULL << 10) -#define ENTRY_NX_BIT (1ULL << 54) - -#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ - ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) - -/* Helper Function to perform assignment of page table entries, to - * ensure the use of strd, which is required on LPAE systems. - */ -static inline void page_table_entry_set(u64 *pte, u64 phy) -{ - WRITE_ONCE(*pte, phy); -} - -static void mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) -{ - /* Set up the required caching policies at the correct indices - * in the memattr register. - */ - setup->memattr = - (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_LPAE_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - 0; /* The other indices are unused for now */ - - setup->transtab = ((u64)mmut->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | - AS_TRANSTAB_LPAE_ADRMODE_TABLE | - AS_TRANSTAB_LPAE_READ_INNER; - - setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; -} - -static void mmu_update(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr) -{ - struct kbase_as *as; - struct kbase_mmu_setup *current_setup; - - if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) - return; - - as = &kbdev->as[as_nr]; - current_setup = &as->current_setup; - - mmu_get_as_setup(mmut, current_setup); - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_as * const as = &kbdev->as[as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; - - current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static phys_addr_t pte_to_phy_addr(u64 entry) -{ - if (!(entry & 1)) - return 0; - - return entry & ~0xFFF; -} - -static int ate_is_valid(u64 ate, int const level) -{ - return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); -} - -static int pte_is_valid(u64 pte, int const level) -{ - return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -} - -/* - * Map KBASE_REG flags to MMU flags - */ -static u64 get_mmu_flags(unsigned long flags) -{ - u64 mmu_flags; - unsigned long memattr_idx; - - memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); - if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, - "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) - memattr_idx = AS_MEMATTR_INDEX_DEFAULT; - /* store mem_attr index as 4:2, noting that: - * - macro called above ensures 3 bits already - * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits - */ - mmu_flags = memattr_idx << 2; - - /* write perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; - /* read perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; - /* nx if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; - - if (flags & KBASE_REG_SHARE_BOTH) { - /* inner and outer shareable */ - mmu_flags |= SHARE_BOTH_BITS; - } else if (flags & KBASE_REG_SHARE_IN) { - /* inner shareable coherency */ - mmu_flags |= SHARE_INNER_BITS; - } - - return mmu_flags; -} - -static void entry_set_ate(u64 *entry, - struct tagged_addr phy, - unsigned long flags, - int const level) -{ - page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | - ENTRY_IS_ATE); -} - -static void entry_set_pte(u64 *entry, phys_addr_t phy) -{ - page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); -} - -static void entry_invalidate(u64 *entry) -{ - page_table_entry_set(entry, ENTRY_IS_INVAL); -} - -static struct kbase_mmu_mode const lpae_mode = { - .update = mmu_update, - .get_as_setup = mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = 0 -}; - -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) -{ - return &lpae_mode; -} diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig index 2630736..a21810b 100644 --- a/mali_kbase/tests/Mconfig +++ b/mali_kbase/tests/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -40,6 +40,10 @@ config BUILD_CSF_TESTS config BUILD_ARBIF_TESTS bool - default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT + default y if UNIT_TEST_CODE && MALI_ARBITER_SUPPORT default n +config BUILD_ARBIF_KERNEL_TESTS + bool + default y if BUILD_KERNEL_MODULES && BUILD_ARBIF_TESTS + default n diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c index 42f1e2d..7455ce2 100644 --- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c +++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -159,7 +159,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) * this iteration of the loop, so will start to correctly update * the object model state. */ - }; + } mutex_unlock(&timeline->tl_kctx_list_lock); diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c index f016e8b..6659d2d 100644 --- a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c +++ b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,7 +74,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) kctx, kctx->id, (u32)(kctx->tgid)); - }; + } /* Reset body stream buffers while holding the kctx lock. * This ensures we can't fire both summary and normal tracepoints for diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c index 4f955a1..20d7b16 100644 --- a/mali_kbase/tl/mali_kbase_timeline.c +++ b/mali_kbase/tl/mali_kbase_timeline.c @@ -186,7 +186,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) { - int ret; + int ret = 0; u32 timeline_flags = TLSTREAM_ENABLED | flags; struct kbase_timeline *timeline = kbdev->timeline; @@ -262,6 +262,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) ret = -EBUSY; } + if (ret >= 0) + timeline->last_acquire_time = ktime_get(); + return ret; } diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h index 9315fcc..0465352 100644 --- a/mali_kbase/tl/mali_kbase_timeline.h +++ b/mali_kbase/tl/mali_kbase_timeline.h @@ -107,32 +107,6 @@ void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); #if MALI_UNIT_TEST -/** - * kbase_timeline_test - start timeline stream data generator - * @kbdev: Kernel common context - * @tpw_count: Number of trace point writers in each context - * @msg_delay: Time delay in milliseconds between trace points written by one - * writer - * @msg_count: Number of trace points written by one writer - * @aux_msg: If non-zero aux messages will be included - * - * This test starts a requested number of asynchronous writers in both IRQ and - * thread context. Each writer will generate required number of test - * tracepoints (tracepoints with embedded information about writer that - * should be verified by user space reader). Tracepoints will be emitted in - * all timeline body streams. If aux_msg is non-zero writer will also - * generate not testable tracepoints (tracepoints without information about - * writer). These tracepoints are used to check correctness of remaining - * timeline message generating functions. Writer will wait requested time - * between generating another set of messages. This call blocks until all - * writers finish. - */ -void kbase_timeline_test( - struct kbase_device *kbdev, - unsigned int tpw_count, - unsigned int msg_delay, - unsigned int msg_count, - int aux_msg); /** * kbase_timeline_stats - read timeline stream statistics diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 8587ba0..e3b6fbc 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -24,6 +24,7 @@ #include "mali_kbase_tracepoints.h" #include "mali_kbase_timeline.h" +#include <linux/delay.h> #include <linux/poll.h> /* The timeline stream file operations functions. */ @@ -46,7 +47,8 @@ const struct file_operations kbasep_tlstream_fops = { /** * kbasep_timeline_io_packet_pending - check timeline streams for pending - *packets + * packets + * * @timeline: Timeline instance * @ready_stream: Pointer to variable where stream will be placed * @rb_idx_raw: Pointer to variable where read buffer index will be placed @@ -86,8 +88,8 @@ kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, } /** - * kbasep_timeline_has_header_data() - - * check timeline headers for pending packets + * kbasep_timeline_has_header_data() - check timeline headers for pending + * packets * * @timeline: Timeline instance * @@ -139,6 +141,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size, /** * kbasep_timeline_copy_header - copy timeline headers to the user + * * @timeline: Timeline instance * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -174,6 +177,7 @@ static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, /** * kbasep_timeline_io_read - copy data from streams to buffer provided by user + * * @filp: Pointer to file structure * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -198,7 +202,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, if (!buffer) return -EINVAL; - if ((*f_pos < 0) || (size < PACKET_SIZE)) + if (*f_pos < 0) return -EINVAL; mutex_lock(&timeline->reader_lock); @@ -217,10 +221,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* If we already read some packets and there is no - * packet pending then return back to user. - * If we don't have any data yet, wait for packet to be - * submitted. - */ + * packet pending then return back to user. + * If we don't have any data yet, wait for packet to be + * submitted. + */ if (copy_len > 0) { if (!kbasep_timeline_io_packet_pending( timeline, &stream, &rb_idx_raw)) @@ -241,8 +245,8 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* Check if this packet fits into the user buffer. - * If so copy its content. - */ + * If so copy its content. + */ rb_idx = rb_idx_raw % PACKET_COUNT; rb_size = atomic_read(&stream->buffer[rb_idx].size); if (rb_size > size - copy_len) @@ -254,10 +258,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* If the distance between read buffer index and write - * buffer index became more than PACKET_COUNT, then overflow - * happened and we need to ignore the last portion of bytes - * that we have just sent to user. - */ + * buffer index became more than PACKET_COUNT, then overflow + * happened and we need to ignore the last portion of bytes + * that we have just sent to user. + */ smp_rmb(); wb_idx_raw = atomic_read(&stream->wbi); @@ -321,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) { struct kbase_timeline *timeline; + ktime_t elapsed_time; + s64 elapsed_time_ms, time_to_sleep; KBASE_DEBUG_ASSERT(inode); KBASE_DEBUG_ASSERT(filp); @@ -330,6 +336,18 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) timeline = (struct kbase_timeline *)filp->private_data; + /* Get the amount of time passed since the timeline was acquired and ensure + * we sleep for long enough such that it has been at least + * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. + * This prevents userspace from spamming acquire and release too quickly. + */ + elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time); + elapsed_time_ms = ktime_to_ms(elapsed_time); + time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + if (time_to_sleep > 0) + msleep(time_to_sleep); + #if MALI_USE_CSF kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); #endif diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h index 2825f77..8a58a13 100644 --- a/mali_kbase/tl/mali_kbase_timeline_priv.h +++ b/mali_kbase/tl/mali_kbase_timeline_priv.h @@ -34,6 +34,11 @@ #include <linux/atomic.h> #include <linux/mutex.h> +/* The minimum amount of time timeline must be acquired for before release is + * allowed, to prevent DoS attacks. + */ +#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) + /** * struct kbase_timeline - timeline state structure * @streams: The timeline streams generated by kernel @@ -49,6 +54,7 @@ * otherwise. See kbase_timeline_io_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header + * @last_acquire_time: The time at which timeline was last acquired. * @csf_tl_reader: CSFFW timeline reader */ struct kbase_timeline { @@ -65,6 +71,7 @@ struct kbase_timeline { atomic_t *timeline_flags; size_t obj_header_btc; size_t aux_header_btc; + ktime_t last_acquire_time; #if MALI_USE_CSF struct kbase_csf_tl_reader csf_tl_reader; #endif diff --git a/mali_kbase/tl/mali_kbase_tlstream.c b/mali_kbase/tl/mali_kbase_tlstream.c index c6eb3c8..202c12f 100644 --- a/mali_kbase/tl/mali_kbase_tlstream.c +++ b/mali_kbase/tl/mali_kbase_tlstream.c @@ -56,20 +56,19 @@ static void kbasep_packet_header_setup( * @numbered: non-zero if the stream is numbered * * Function updates mutable part of packet header in the given buffer. - * Note that value of data_size must not including size of the header. + * Note that value of data_size must not include size of the header. */ static void kbasep_packet_header_update( char *buffer, size_t data_size, int numbered) { - u32 word0; u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); KBASE_DEBUG_ASSERT(buffer); - CSTD_UNUSED(word0); - memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); + /* we copy the contents of word1 to its respective position in the buffer */ + memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); } /** diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index 479f0f4..ece23b3 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -69,6 +69,7 @@ enum tl_msg_id_obj { KBASE_TL_ARBITER_STARTED, KBASE_TL_ARBITER_STOP_REQUESTED, KBASE_TL_ARBITER_STOPPED, + KBASE_TL_ARBITER_REQUESTED, KBASE_JD_GPU_SOFT_RESET, KBASE_TL_KBASE_NEW_DEVICE, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, @@ -288,6 +289,10 @@ enum tl_msg_id_aux { "Driver has stopped using gpu", \ "@p", \ "gpu") \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ + "Driver has requested the arbiter for gpu access", \ + "@p", \ + "gpu") \ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ "gpu soft reset", \ "@p", \ @@ -1565,6 +1570,28 @@ void __kbase_tlstream_tl_arbiter_stopped( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_jd_gpu_soft_reset( struct kbase_tlstream *stream, const void *gpu) diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index a3fd7c1..f3f554a 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -237,6 +237,9 @@ void __kbase_tlstream_tl_arbiter_stop_requested( void __kbase_tlstream_tl_arbiter_stopped( struct kbase_tlstream *stream, const void *gpu); +void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu); void __kbase_tlstream_jd_gpu_soft_reset( struct kbase_tlstream *stream, const void *gpu); @@ -1301,6 +1304,25 @@ struct kbase_tlstream; } while (0) /** + * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - + * Driver has requested the arbiter for gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_arbiter_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - * gpu soft reset * |