diff options
author | Sidath Senanayake <sidaths@google.com> | 2020-09-11 16:44:12 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2020-09-11 16:44:12 +0100 |
commit | d4ca6eb7268ee2db9deabd1745b505c6e1c162f9 (patch) | |
tree | 64058c324e9e6adb30e8689d17f0a2e2b27636bc | |
parent | bc3c01e61c8ce9783a8ab091053905effcae12de (diff) | |
download | gpu-d4ca6eb7268ee2db9deabd1745b505c6e1c162f9.tar.gz |
Mali Valhall DDK r26p0 KMD
Provenance:
009a7d86a (collaborate/EAC/v_r26p0)
VX504X08X-BU-00000-r26p0-01eac0 - Android DDK
VX504X08X-BU-60000-r26p0-01eac0 - Android Document Bundle
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Ic3671bdc454b706b6f98a9d1a615d1886da0c3e8
72 files changed, 6232 insertions, 1039 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 192ac06..06dda9c 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -21,9 +21,12 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r25p0-01eac0" +MALI_RELEASE_NAME ?= "r26p0-01eac0" # Paths required for build + +# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) KBASE_PATH = $(src) KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy UMP_PATH = $(src)/../../../base @@ -34,6 +37,7 @@ MALI_USE_CSF ?= 0 MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 MALI_COVERAGE ?= 0 +MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" # Experimental features (corresponding -D definition should be appended to # DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, @@ -41,7 +45,6 @@ CONFIG_MALI_PLATFORM_NAME ?= "devicetree" # # Experimental features must default to disabled, e.g.: # MALI_EXPERIMENTAL_FEATURE ?= 0 -MALI_JIT_PRESSURE_LIMIT ?= 0 MALI_INCREMENTAL_RENDERING ?= 0 # Set up our defines, which will be passed to gcc @@ -52,7 +55,7 @@ DEFINES = \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ - -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ + -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) ifeq ($(KBUILD_EXTMOD),) @@ -76,6 +79,7 @@ SRC := \ debug/mali_kbase_debug_ktrace.c \ device/mali_kbase_device.c \ mali_kbase_cache_policy.c \ + mali_kbase_ccswe.c \ mali_kbase_mem.c \ mali_kbase_mem_pool_group.c \ mali_kbase_native_mgm.c \ @@ -86,7 +90,7 @@ SRC := \ mali_kbase_config.c \ mali_kbase_vinstr.c \ mali_kbase_hwcnt.c \ - mali_kbase_hwcnt_backend_gpu.c \ + mali_kbase_hwcnt_backend_jm.c \ mali_kbase_hwcnt_gpu.c \ mali_kbase_hwcnt_legacy.c \ mali_kbase_hwcnt_types.c \ @@ -111,12 +115,14 @@ SRC := \ mali_kbase_strings.c \ mali_kbase_as_fault_debugfs.c \ mali_kbase_regs_history_debugfs.c \ + mali_power_gpu_frequency_trace.c \ thirdparty/mali_kbase_mmap.c \ tl/mali_kbase_timeline.c \ tl/mali_kbase_timeline_io.c \ tl/mali_kbase_tlstream.c \ tl/mali_kbase_tracepoints.c \ - gpu/mali_kbase_gpu.c + gpu/mali_kbase_gpu.c \ + mali_kbase_trace_gpu_mem.c ifeq ($(MALI_USE_CSF),1) SRC += \ @@ -135,6 +141,7 @@ else mali_kbase_jd_debugfs.c \ mali_kbase_js.c \ mali_kbase_js_ctx_attr.c \ + mali_kbase_kinstr_jm.c \ debug/backend/mali_kbase_debug_ktrace_jm.c \ device/backend/mali_kbase_device_jm.c \ gpu/backend/mali_kbase_gpu_fault_jm.c \ diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index 58a5b0b..ca59dbb 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -230,6 +230,10 @@ config MALI_DMA_BUF_LEGACY_COMPAT maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, including a cache flush. + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. + config MALI_HW_ERRATA_1485982_NOT_AFFECTED bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 2449e80..0b3e073 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -34,7 +34,8 @@ BACKEND += \ backend/gpu/mali_kbase_pm_coarse_demand.c \ backend/gpu/mali_kbase_pm_policy.c \ backend/gpu/mali_kbase_time.c \ - backend/gpu/mali_kbase_l2_mmu_config.c + backend/gpu/mali_kbase_l2_mmu_config.c \ + backend/gpu/mali_kbase_clk_rate_trace_mgr.c ifeq ($(MALI_USE_CSF),1) # empty diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c new file mode 100644 index 0000000..18bb117 --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -0,0 +1,280 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of the GPU clock rate trace manager. + */ + +#include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> +#include <linux/clk.h> +#include <asm/div64.h> +#include "mali_kbase_clk_rate_trace_mgr.h" + +#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY +#include <trace/events/power_gpu_frequency.h> +#else +#include "mali_power_gpu_frequency_trace.h" +#endif + +#ifndef CLK_RATE_TRACE_OPS +#define CLK_RATE_TRACE_OPS (NULL) +#endif + +static int gpu_clk_rate_change_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kbase_gpu_clk_notifier_data *ndata = data; + struct kbase_clk_data *clk_data = + container_of(nb, struct kbase_clk_data, clk_rate_change_nb); + struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; + unsigned long flags; + + if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) + return NOTIFY_BAD; + + spin_lock_irqsave(&clk_rtm->lock, flags); + if (event == POST_RATE_CHANGE) { + if (!clk_rtm->gpu_idle && + (clk_data->clock_val != ndata->new_rate)) { + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, ndata->new_rate); + } + + clk_data->clock_val = ndata->new_rate; + } + spin_unlock_irqrestore(&clk_rtm->lock, flags); + + return NOTIFY_DONE; +} + +static int gpu_clk_data_init(struct kbase_device *kbdev, + void *gpu_clk_handle, unsigned int index) +{ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_data *clk_data; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + int ret = 0; + + if (WARN_ON(!callbacks) || + WARN_ON(!gpu_clk_handle) || + WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) + return -EINVAL; + + clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); + if (!clk_data) { + dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); + return -ENOMEM; + } + + clk_data->index = (u8)index; + clk_data->gpu_clk_handle = gpu_clk_handle; + /* Store the initial value of clock */ + clk_data->clock_val = + callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + + { + /* At the initialization time, GPU is powered off. */ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + spin_unlock_irqrestore(&clk_rtm->lock, flags); + } + + clk_data->clk_rtm = clk_rtm; + clk_rtm->clks[index] = clk_data; + + clk_data->clk_rate_change_nb.notifier_call = + gpu_clk_rate_change_notifier; + + ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, + &clk_data->clk_rate_change_nb); + if (ret) { + dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); + kfree(clk_data); + } + + return ret; +} + +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + int ret = 0; + + /* Return early if no callbacks provided for clock rate tracing */ + if (!callbacks) + return 0; + + spin_lock_init(&clk_rtm->lock); + INIT_LIST_HEAD(&clk_rtm->listeners); + + clk_rtm->gpu_idle = true; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + void *gpu_clk_handle = + callbacks->enumerate_gpu_clk(kbdev, i); + + if (!gpu_clk_handle) + break; + + ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); + if (ret) + goto error; + } + + /* Activate clock rate trace manager if at least one GPU clock was + * enumerated. + */ + if (i) + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); + else + dev_info(kbdev->dev, "No clock(s) available for rate tracing"); + + return 0; + +error: + while (i--) { + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + return ret; +} + +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + + WARN_ON(!list_empty(&clk_rtm->listeners)); + + if (!clk_rtm->clk_rate_trace_ops) + return; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (!clk_rtm->clks[i]) + break; + + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); +} + +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, clk_data->clock_val); + } + + clk_rtm->gpu_idle = false; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + } + + clk_rtm->gpu_idle = true; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clk_index, + unsigned long new_rate) +{ + struct kbase_clk_rate_listener *pos; + struct kbase_device *kbdev; + + lockdep_assert_held(&clk_rtm->lock); + + kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); + + dev_dbg(kbdev->dev, "GPU clock %u rate changed to %lu", + clk_index, new_rate); + + /* Raise standard `power/gpu_frequency` ftrace event */ + { + unsigned long new_rate_khz = new_rate; + + do_div(new_rate_khz, 1000); + trace_gpu_frequency(new_rate_khz, clk_index); + } + + /* Notify the listeners. */ + list_for_each_entry(pos, &clk_rtm->listeners, node) { + pos->notify(pos, clk_index, new_rate); + } +} +KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); + diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h new file mode 100644 index 0000000..dcafb26 --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -0,0 +1,155 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CLK_RATE_TRACE_MGR_ +#define _KBASE_CLK_RATE_TRACE_MGR_ + +/** The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ +#define KBASE_CLOCK_DOMAIN_TOP (0) + +/** The index of shader-cores clock domain in + * kbase_clk_rate_trace_manager:clks. + */ +#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) + +/** + * struct kbase_clk_data - Data stored per enumerated GPU clock. + * + * @clk_rtm: Pointer to clock rate trace manager object. + * @gpu_clk_handle: Handle unique to the enumerated GPU clock. + * @plat_private: Private data for the platform to store into + * @clk_rate_change_nb: notifier block containing the pointer to callback + * function that is invoked whenever the rate of + * enumerated GPU clock changes. + * @clock_val: Current rate of the enumerated GPU clock. + * @index: Index at which the GPU clock was enumerated. + */ +struct kbase_clk_data { + struct kbase_clk_rate_trace_manager *clk_rtm; + void *gpu_clk_handle; + void *plat_private; + struct notifier_block clk_rate_change_nb; + unsigned long clock_val; + u8 index; +}; + +/** + * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. + * + * @kbdev: Device pointer + * + * Return: 0 if success, or an error code on failure. + */ +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace + * manager of GPU becoming active. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace + * manager of GPU becoming idle. + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + * + * kbase_clk_rate_trace_manager:lock must be held by the caller. + */ +static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + lockdep_assert_held(&clk_rtm->lock); + list_add(&listener->node, &clk_rtm->listeners); +} + +/** + * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_subscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_subscribe_no_lock( + clk_rtm, listener); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_unsubscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + list_del(&listener->node); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ + * rate listeners. + * + * @clk_rtm: Clock rate manager instance. + * @clk_index: Clock index. + * @new_rate: New clock frequency(Hz) + * + * kbase_clk_rate_trace_manager:lock must be locked. + * This function is exported to be used by clock rate trace test + * portal. + */ +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clock_index, + unsigned long new_rate); + +#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ + diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 8b320c7..f9c2ec7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -87,7 +87,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, enable->dump_buffer >> 32); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->jm_bm); + enable->fe_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), enable->shader_bm); diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index 21b2aa2..8696c6a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -79,8 +79,6 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) return IRQ_HANDLED; } -KBASE_EXPORT_TEST_API(kbase_job_irq_handler); - static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) { unsigned long flags; @@ -177,7 +175,7 @@ static irq_handler_t kbase_handler_table[] = { * Return: IRQ_HANDLED if the requests are from the GPU device, * IRQ_NONE otherwise */ -static irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) { struct kbase_device *kbdev = kbase_untag(data); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index fa6bc83..73c4f6b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -33,6 +33,7 @@ #include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_reset_gpu.h> #include <mali_kbase_ctx_sched.h> +#include <mali_kbase_kinstr_jm.h> #include <mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_irq_internal.h> @@ -277,6 +278,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); + kbase_kinstr_jm_atom_hw_submit(katom); #ifdef CONFIG_GPU_TRACEPOINTS if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -692,12 +694,40 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) kbase_job_slot_hardstop(kctx, i, NULL); } +/** + * kbase_is_existing_atom_submitted_later_than_ready + * @ready: sequence number of the ready atom + * @existing: sequence number of the existing atom + * + * Returns true if the existing atom has been submitted later than the + * ready atom. It is used to understand if an atom that is ready has been + * submitted earlier than the currently running atom, so that the currently + * running atom should be preempted to allow the ready atom to run. + */ +static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing) +{ + /* No seq_nr set? */ + if (!ready || !existing) + return false; + + /* Efficiently handle the unlikely case of wrapping. + * The following code assumes that the delta between the sequence number + * of the two atoms is less than INT64_MAX. + * In the extremely unlikely case where the delta is higher, the comparison + * defaults for no preemption. + * The code also assumes that the conversion from unsigned to signed types + * works because the signed integers are 2's complement. + */ + return (s64)(ready - existing) < 0; +} + void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; int js = target_katom->slot_nr; int priority = target_katom->sched_priority; + int seq_nr = target_katom->seq_nr; int i; bool stop_sent = false; @@ -719,7 +749,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, (katom->kctx != kctx)) continue; - if (katom->sched_priority > priority) { + if ((katom->sched_priority > priority) || + (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) { if (!stop_sent) KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( kbdev, diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index ec7bcb1..8b409a0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -33,6 +33,7 @@ #include <tl/mali_kbase_tracepoints.h> #include <mali_kbase_hwcnt_context.h> #include <mali_kbase_reset_gpu.h> +#include <mali_kbase_kinstr_jm.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -278,6 +279,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not * submitted beforehand. */ diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index fcc0437..d2d11a3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -37,7 +37,7 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) { struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - s8 nr_running_ctxs; + int nr_running_ctxs; lockdep_assert_held(&kbdev->js_data.runpool_mutex); @@ -69,10 +69,10 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). */ { - s8 nr_compute_ctxs = + int nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); - s8 nr_noncompute_ctxs = nr_running_ctxs - + int nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs; return (bool) (nr_compute_ctxs >= 2 || diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index cb10518..a9c33e2 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -25,13 +25,13 @@ #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_pm_internal.h> -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts) +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts) { u32 hi1, hi2; - kbase_pm_request_gpu_cycle_counter(kbdev); - if (cycle_counter) { /* Read hi, lo, hi to ensure a coherent u64 */ do { @@ -65,6 +65,13 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, #else ktime_get_raw_ts64(ts); #endif +} +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) +{ + kbase_pm_request_gpu_cycle_counter(kbdev); + kbase_backend_get_gpu_time_norequest( + kbdev, cycle_counter, system_time, ts); kbase_pm_release_gpu_cycle_counter(kbdev); } diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c index 2cd2551..5d5b639 100644 --- a/mali_kbase/context/backend/mali_kbase_context_jm.c +++ b/mali_kbase/context/backend/mali_kbase_context_jm.c @@ -30,6 +30,7 @@ #include <mali_kbase.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_dma_fence.h> +#include <mali_kbase_kinstr_jm.h> #include <mali_kbase_mem_linux.h> #include <mali_kbase_mem_pool_group.h> #include <mmu/mali_kbase_mmu.h> @@ -70,6 +71,21 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx) KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); #endif /* CONFIG_DEBUG_FS */ +static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) +{ + int ret = kbase_kinstr_jm_init(&kctx->kinstr_jm); + + if (!ret) + return ret; + + return 0; +} + +static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) +{ + kbase_kinstr_jm_term(kctx->kinstr_jm); +} + static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) { kbase_timer_setup(&kctx->soft_job_timeout, @@ -122,6 +138,8 @@ static const struct kbase_context_init context_init[] = { "Sticky resource initialization failed"}, {kbase_jit_init, kbase_jit_term, "JIT initialization failed"}, + {kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term, + "JM instrumentation initialization failed"}, {kbase_context_kbase_timer_setup, NULL, NULL}, {kbase_context_submit_check, NULL, NULL}, }; diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 93fe431..5c27224 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -36,9 +36,99 @@ #include <mmu/mali_kbase_mmu.h> #include <context/mali_kbase_context_internal.h> +/** + * find_process_node - Used to traverse the process rb_tree to find if + * process exists already in process rb_tree. + * + * @node: Pointer to root node to start search. + * @tgid: Thread group PID to search for. + * + * Return: Pointer to kbase_process if exists otherwise NULL. + */ +static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) +{ + struct kbase_process *kprcs = NULL; + + /* Check if the kctx creation request is from a existing process.*/ + while (node) { + struct kbase_process *prcs_node = + rb_entry(node, struct kbase_process, kprcs_node); + if (prcs_node->tgid == tgid) { + kprcs = prcs_node; + break; + } + + if (tgid < prcs_node->tgid) + node = node->rb_left; + else + node = node->rb_right; + } + + return kprcs; +} + +/** + * kbase_insert_kctx_to_process - Initialise kbase process context. + * + * @kctx: Pointer to kbase context. + * + * Here we initialise per process rb_tree managed by kbase_device. + * We maintain a rb_tree of each unique process that gets created. + * and Each process maintains a list of kbase context. + * This setup is currently used by kernel trace functionality + * to trace and visualise gpu memory consumption. + * + * Return: 0 on success and error number on failure. + */ +static int kbase_insert_kctx_to_process(struct kbase_context *kctx) +{ + struct rb_root *const prcs_root = &kctx->kbdev->process_root; + const pid_t tgid = kctx->tgid; + struct kbase_process *kprcs = NULL; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + + kprcs = find_process_node(prcs_root->rb_node, tgid); + + /* if the kctx is from new process then create a new kbase_process + * and add it to the &kbase_device->rb_tree + */ + if (!kprcs) { + struct rb_node **new = &prcs_root->rb_node, *parent = NULL; + + kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); + if (kprcs == NULL) + return -ENOMEM; + kprcs->tgid = tgid; + INIT_LIST_HEAD(&kprcs->kctx_list); + kprcs->dma_buf_root = RB_ROOT; + kprcs->total_gpu_pages = 0; + + while (*new) { + struct kbase_process *prcs_node; + + parent = *new; + prcs_node = rb_entry(parent, struct kbase_process, + kprcs_node); + if (tgid < prcs_node->tgid) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&kprcs->kprcs_node, parent, new); + rb_insert_color(&kprcs->kprcs_node, prcs_root); + } + + kctx->kprcs = kprcs; + list_add(&kctx->kprcs_link, &kprcs->kctx_list); + + return 0; +} + int kbase_context_common_init(struct kbase_context *kctx) { const unsigned long cookies_mask = KBASE_COOKIE_MASK; + int err = 0; /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); @@ -81,13 +171,50 @@ int kbase_context_common_init(struct kbase_context *kctx) mutex_lock(&kctx->kbdev->kctx_list_lock); list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + err = kbase_insert_kctx_to_process(kctx); + if (err) + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process\n", err); + KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, kctx->kbdev->gpu_props.props.raw_props.gpu_id); KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, (u32)(kctx->tgid)); mutex_unlock(&kctx->kbdev->kctx_list_lock); - return 0; + return err; +} + +/** + * kbase_remove_kctx_from_process - remove a terminating context from + * the process list. + * + * @kctx: Pointer to kbase context. + * + * Remove the tracking of context from the list of contexts maintained under + * kbase process and if the list if empty then there no outstanding contexts + * we can remove the process node as well. + */ + +static void kbase_remove_kctx_from_process(struct kbase_context *kctx) +{ + struct kbase_process *kprcs = kctx->kprcs; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + list_del(&kctx->kprcs_link); + + /* if there are no outstanding contexts in current process node, + * we can remove it from the process rb_tree. + */ + if (list_empty(&kprcs->kctx_list)) { + rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); + /* Add checks, so that the terminating process Should not + * hold any gpu_memory. + */ + WARN_ON(kprcs->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); + kfree(kprcs); + } } void kbase_context_common_term(struct kbase_context *kctx) @@ -109,6 +236,7 @@ void kbase_context_common_term(struct kbase_context *kctx) WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); mutex_lock(&kctx->kbdev->kctx_list_lock); + kbase_remove_kctx_from_process(kctx); KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index fbba2e7..2a45a33 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -43,6 +43,7 @@ #include <backend/gpu/mali_kbase_js_internal.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_dummy_job_wa.h> +#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h> /** * kbase_backend_late_init - Perform any backend-specific initialization. @@ -178,8 +179,11 @@ static const struct kbase_device_init dev_init[] = { "Job JS devdata initialization failed"}, {kbase_device_timeline_init, kbase_device_timeline_term, "Timeline stream initialization failed"}, - {kbase_device_hwcnt_backend_gpu_init, - kbase_device_hwcnt_backend_gpu_term, + {kbase_clk_rate_trace_manager_init, + kbase_clk_rate_trace_manager_term, + "Clock rate trace manager initialization failed"}, + {kbase_device_hwcnt_backend_jm_init, + kbase_device_hwcnt_backend_jm_term, "GPU hwcnt backend creation failed"}, {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, "GPU hwcnt context initialization failed"}, diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index 76f14e5..d0b85ba 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -271,14 +271,14 @@ void kbase_increment_device_id(void) kbase_dev_nr++; } -int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) +int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) { - return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); + return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); } -void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev) +void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) { - kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); } int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h index 9f96db0..5464458 100644 --- a/mali_kbase/device/mali_kbase_device_internal.h +++ b/mali_kbase/device/mali_kbase_device_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,8 +43,8 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev); int kbase_device_timeline_init(struct kbase_device *kbdev); void kbase_device_timeline_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev); +int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev); int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h index 879a436..ce36020 100644 --- a/mali_kbase/jm/mali_base_jm_kernel.h +++ b/mali_kbase/jm/mali_base_jm_kernel.h @@ -155,18 +155,23 @@ /* Use the GPU VA chosen by the kernel client */ #define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) +/* Bit 28 reserved for Kernel side cache sync ops flag */ + +/* Force trimming of JIT allocations when creating a new allocation */ +#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) + /* Number of bits used as flags for base memory management * * Must be kept in sync with the base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 28 +#define BASE_MEM_FLAGS_NR_BITS 30 /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. */ #define BASEP_MEM_FLAGS_KERNEL_ONLY \ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ - BASE_MEM_FLAG_MAP_FIXED) + BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) /* A mask for all output bits, excluding IN/OUT bits. */ @@ -192,6 +197,28 @@ #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ BASE_MEM_COOKIE_BASE) +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the + * initial commit is aligned to 'extent' pages, where 'extent' must be a power + * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) + +/** + * If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + /** * typedef base_context_create_flags - Flags to pass to ::base_context_init. * @@ -787,6 +814,54 @@ struct base_jd_atom_v2 { u8 padding[7]; }; +/** + * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr + * at the beginning. + * + * @seq_nr: Sequence number of logical grouping of atoms. + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + */ +typedef struct base_jd_atom { + u64 seq_nr; + u64 jc; + struct base_jd_udata udata; + u64 extres_list; + u16 nr_extres; + u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + u8 device_nr; + u8 jobslot; + base_jd_core_req core_req; + u8 renderpass_id; + u8 padding[7]; +} base_jd_atom; + /* Job chain event code bits * Defines the bits used to create ::base_jd_event_code */ @@ -982,7 +1057,7 @@ struct base_jd_event_v2 { * jobs. * * This structure is stored into the memory pointed to by the @jc field - * of &struct base_jd_atom_v2. + * of &struct base_jd_atom. * * It must not occupy the same CPU cache line(s) as any neighboring data. * This is to avoid cases where access to pages containing the structure diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h index aac561b..307a342 100644 --- a/mali_kbase/jm/mali_kbase_jm_defs.h +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -496,9 +496,9 @@ struct kbase_jd_atom { struct list_head jd_item; bool in_jd_list; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE u8 jit_ids[2]; -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; struct kbase_ext_res *extres; @@ -608,6 +608,9 @@ struct kbase_jd_atom { atomic_t blocked; + /* user-space sequence number, to order atoms in some temporal order */ + u64 seq_nr; + struct kbase_jd_atom *pre_dep; struct kbase_jd_atom *post_dep; diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h index 408e98e..6dc57d0 100644 --- a/mali_kbase/jm/mali_kbase_jm_ioctl.h +++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h @@ -94,16 +94,32 @@ * - The above changes are checked for safe values in usual builds * 11.21: * - v2.0 of mali_trace debugfs file, which now versions the file separately + * 11.22: + * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. + * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. + * 11.23: + * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify + * the physical memory backing of JIT allocations. This was not supposed + * to be a valid use case, but it was allowed by the previous implementation. + * 11.24: + * - Added a sysfs file 'serialize_jobs' inside a new sub-directory + * 'scheduling'. + * 11.25: + * - Enabled JIT pressure limit in base/kbase by default + * 11.26 + * - Added kinstr_jm API + * 11.27 + * - Backwards compatible extension to HWC ioctl. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 21 +#define BASE_UK_VERSION_MINOR 27 /** * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel * - * @addr: Memory address of an array of struct base_jd_atom_v2 + * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 * @nr_atoms: Number of entries in the array - * @stride: sizeof(struct base_jd_atom_v2) + * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) */ struct kbase_ioctl_job_submit { __u64 addr; @@ -132,5 +148,47 @@ struct kbase_ioctl_soft_event_update { #define KBASE_IOCTL_SOFT_EVENT_UPDATE \ _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) +/** + * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for + * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the + * kernel + * + * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` + * @version: Represents a breaking change in the + * `struct kbase_kinstr_jm_atom_state_change` + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + * + * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the + * end of the structure that older user space might not understand. If the + * `version` is the same, the structure is still compatible with newer kernels. + * The `size` can be used to cast the opaque memory returned from the kernel. + */ +struct kbase_kinstr_jm_fd_out { + __u16 size; + __u8 version; + __u8 padding[5]; +}; + +/** + * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor + * + * @count: Number of atom states that can be stored in the kernel circular + * buffer. Must be a power of two + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + */ +struct kbase_kinstr_jm_fd_in { + __u16 count; + __u8 padding[6]; +}; + +union kbase_kinstr_jm_fd { + struct kbase_kinstr_jm_fd_in in; + struct kbase_kinstr_jm_fd_out out; +}; + +#define KBASE_IOCTL_KINSTR_JM_FD \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) #endif /* _KBASE_JM_IOCTL_H_ */ diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h index 1e2744d..d45092f 100644 --- a/mali_kbase/mali_base_kernel.h +++ b/mali_kbase/mali_base_kernel.h @@ -213,28 +213,6 @@ struct base_mem_aliasing_info { */ #define BASE_JIT_ALLOC_COUNT (255) -/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the - * initial commit is aligned to 'extent' pages, where 'extent' must be a power - * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES - */ -#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) - -/** - * If set, the heap info address points to a u32 holding the used size in bytes; - * otherwise it points to a u64 holding the lowest address of unused memory. - */ -#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) - -/** - * Valid set of just-in-time memory allocation flags - * - * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr - * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set - * and heap_info_gpu_addr being 0 will be rejected). - */ -#define BASE_JIT_ALLOC_VALID_FLAGS \ - (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) - /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 * * jit_version is 1 diff --git a/mali_kbase/mali_gpu_mem_trace.h b/mali_kbase/mali_gpu_mem_trace.h new file mode 100644 index 0000000..183e6c4 --- /dev/null +++ b/mali_kbase/mali_gpu_mem_trace.h @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_mem +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_gpu_mem_trace + +#if !defined(_TRACE_MALI_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_GPU_MEM_H + +#include <linux/tracepoint.h> + +/* + * trace_gpu_mem_total + * + * The gpu_memory_total event indicates that there's an update to either the + * global or process total gpu memory counters. + * + * This event should be emitted whenever the kernel device driver allocates, + * frees, imports, unimports memory in the GPU addressable space. + * + * @gpu_id: Kbase device id. + * @pid: This is either the thread group ID of the process for which there was + * an update in the GPU memory usage or 0 so as to indicate an update in + * the device wide GPU memory usage. + * @size: GPU memory usage in bytes. + */ +TRACE_EVENT(gpu_mem_total, + TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size), + + TP_ARGS(gpu_id, pid, size), + + TP_STRUCT__entry( + __field(uint32_t, gpu_id) + __field(uint32_t, pid) + __field(uint64_t, size) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->pid = pid; + __entry->size = size; + ), + + TP_printk("gpu_id=%u pid=%u size=%llu", + __entry->gpu_id, + __entry->pid, + __entry->size) +); +#endif /* _TRACE_MALI_GPU_MEM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index 0445e0c..c623e7e 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -213,9 +213,9 @@ void kbase_jd_exit(struct kbase_context *kctx); * kbase_jd_submit - Submit atoms to the job dispatcher * * @kctx: The kbase context to submit to - * @user_addr: The address in user space of the struct base_jd_atom_v2 array + * @user_addr: The address in user space of the struct base_jd_atom array * @nr_atoms: The number of atoms in the array - * @stride: sizeof(struct base_jd_atom_v2) + * @stride: sizeof(struct base_jd_atom) * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) * * Return: 0 on success or error code @@ -457,7 +457,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev); /** * Return the atom's ID, as was originally supplied by userspace in - * base_jd_atom_v2::atom_number + * base_jd_atom::atom_number */ static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) { diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h new file mode 100644 index 0000000..b201a60 --- /dev/null +++ b/mali_kbase/mali_kbase_caps.h @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_caps.h + * + * Driver Capability Queries. + */ + +#ifndef _KBASE_CAPS_H_ +#define _KBASE_CAPS_H_ + +#include <linux/types.h> + +typedef enum mali_kbase_cap { + MALI_KBASE_CAP_SYSTEM_MONITOR = 0, + MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, + MALI_KBASE_CAP_MEM_GROW_ON_GPF, + MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_NUM_CAPS +} mali_kbase_cap; + +extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap); + +static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); +} + +static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); +} + +static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); +} + +static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); +} + +#endif /* __KBASE_CAPS_H_ */ diff --git a/mali_kbase/mali_kbase_ccswe.c b/mali_kbase/mali_kbase_ccswe.c new file mode 100644 index 0000000..87d5aaa --- /dev/null +++ b/mali_kbase/mali_kbase_ccswe.c @@ -0,0 +1,105 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_ccswe.h" +#include "mali_kbase_linux.h" + +#include <linux/math64.h> +#include <linux/time.h> + +static u64 kbasep_ccswe_cycle_at_no_lock( + struct kbase_ccswe *self, u64 timestamp_ns) +{ + s64 diff_s, diff_ns; + u32 gpu_freq; + + lockdep_assert_held(&self->access); + + diff_ns = timestamp_ns - self->timestamp_ns; + gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; + + diff_s = div_s64(diff_ns, NSEC_PER_SEC); + diff_ns -= diff_s * NSEC_PER_SEC; + + return self->cycles_elapsed + diff_s * gpu_freq + + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); +} + +void kbase_ccswe_init(struct kbase_ccswe *self) +{ + memset(self, 0, sizeof(*self)); + + spin_lock_init(&self->access); +} +KBASE_EXPORT_TEST_API(kbase_ccswe_init); + +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) +{ + unsigned long flags; + u64 result; + + spin_lock_irqsave(&self->access, flags); + result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); + spin_unlock_irqrestore(&self->access, flags); + + return result; +} +KBASE_EXPORT_TEST_API(kbase_ccswe_cycle_at); + +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + /* The time must go only forward. */ + if (WARN_ON(timestamp_ns < self->timestamp_ns)) + goto exit; + + /* If this is the first frequency change, cycles_elapsed is zero. */ + if (self->timestamp_ns) + self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( + self, timestamp_ns); + + self->timestamp_ns = timestamp_ns; + self->prev_gpu_freq = self->gpu_freq; + self->gpu_freq = gpu_freq; +exit: + spin_unlock_irqrestore(&self->access, flags); +} +KBASE_EXPORT_TEST_API(kbase_ccswe_freq_change); + +void kbase_ccswe_reset(struct kbase_ccswe *self) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + self->timestamp_ns = 0; + self->cycles_elapsed = 0; + self->gpu_freq = 0; + self->prev_gpu_freq = 0; + + spin_unlock_irqrestore(&self->access, flags); +} + diff --git a/mali_kbase/mali_kbase_ccswe.h b/mali_kbase/mali_kbase_ccswe.h new file mode 100644 index 0000000..3a7cf73 --- /dev/null +++ b/mali_kbase/mali_kbase_ccswe.h @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CCSWE_H_ +#define _KBASE_CCSWE_H_ + +#include <linux/spinlock.h> + +/** + * struct kbase_ccswe - Cycle count software estimator. + * + * @access: Spinlock protecting this structure access. + * @timestamp_ns: Timestamp(ns) when the last frequency change + * occurred. + * @cycles_elapsed: Number of cycles elapsed before the last frequency + * change + * @gpu_freq: Current GPU frequency(Hz) value. + * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency + * change. + */ +struct kbase_ccswe { + spinlock_t access; + u64 timestamp_ns; + u64 cycles_elapsed; + u32 gpu_freq; + u32 prev_gpu_freq; +}; + +/** + * kbase_ccswe_init() - initialize the cycle count estimator. + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_init(struct kbase_ccswe *self); + + +/** + * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: The timestamp(ns) for cycle count estimation. + * + * The timestamp must be bigger than the timestamp of the penultimate + * frequency change. If only one frequency change occurred, the + * timestamp must be bigger than the timestamp of the frequency change. + * This is to allow the following code to be executed w/o synchronization. + * If lines below executed atomically, it is safe to assume that only + * one frequency change may happen in between. + * + * u64 ts = ktime_get_raw_ns(); + * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) + * + * Returns: estimated value of cycle count at a given time. + */ +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); + +/** + * kbase_ccswe_freq_change() - update GPU frequency. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: Timestamp(ns) when frequency change occurred. + * @gpu_freq: New GPU frequency value. + * + * The timestamp must be bigger than the timestamp of the previous + * frequency change. The function is to be called at the frequency + * change moment (not later). + */ +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); + +/** + * kbase_ccswe_reset() - reset estimator state + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_reset(struct kbase_ccswe *self); + +#endif /* _KBASE_CCSWE_H_ */ diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h index 69723ea..57456e2 100644 --- a/mali_kbase/mali_kbase_config.h +++ b/mali_kbase/mali_kbase_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2017, 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -223,6 +223,88 @@ struct kbase_pm_callback_conf { int (*soft_reset_callback)(struct kbase_device *kbdev); }; +/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. + * + * Pointer to this structure is supposed to be passed to the gpu clock rate + * change notifier function. This structure is deliberately aligned with the + * common clock framework notification structure 'struct clk_notifier_data' + * and such alignment should be maintained. + * + * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. + * @old_rate: Previous rate of this GPU clock. + * @new_rate: New rate of this GPU clock. + */ +struct kbase_gpu_clk_notifier_data { + void *gpu_clk_handle; + unsigned long old_rate; + unsigned long new_rate; +}; + +/** + * kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace operations. + * + * Specifies the functions pointers for platform specific GPU clock rate trace + * operations. By default no functions are required. + */ +struct kbase_clk_rate_trace_op_conf { + /** + * enumerate_gpu_clk - Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns a handle unique to the given GPU clock, or NULL if the clock + * array has been exhausted at the given index value. + * + * Kbase will use this function pointer to enumerate the existence of a + * GPU clock on the given index. + */ + void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, + unsigned int index); + + /** + * get_gpu_clk_rate - Get the current rate for an enumerated clock. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * + * Returns current rate of the GPU clock in unit of Hz. + */ + unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, + void *gpu_clk_handle); + + /** + * gpu_clk_notifier_register - Register a clock rate change notifier. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * Returns 0 on success, negative error code otherwise. + * + * This function pointer is used to register a callback function that + * is supposed to be invoked whenever the rate of clock corresponding + * to @gpu_clk_handle changes. + * @nb contains the pointer to callback function. + * The callback function expects the pointer of type + * 'struct kbase_gpu_clk_notifier_data' as the third argument. + */ + int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); + + /** + * gpu_clk_notifier_unregister - Unregister clock rate change notifier + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * This function pointer is used to unregister a callback function that + * was previously registered to get notified of the change in rate + * of clock corresponding to @gpu_clk_handle. + */ + void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); +}; + #ifdef CONFIG_OF struct kbase_platform_config { }; diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index fb2353e..83a22d9 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -55,6 +55,7 @@ #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_device_internal.h> #include "mali_kbase_ioctl.h" +#include "mali_kbase_kinstr_jm.h" #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_legacy.h" @@ -114,6 +115,8 @@ #include <device/mali_kbase_device.h> #include <context/mali_kbase_context.h> +#include <mali_kbase_caps.h> + /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 #define MMU_IRQ_TAG 1 @@ -122,6 +125,82 @@ #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" /** + * Kernel min/maj <=> API Version + */ +#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ + (((minor) & 0xFFF) << 8) | \ + ((0 & 0xFF) << 0)) + +#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF) +#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF) + +/** + * mali_kbase_api_version_to_maj_min - convert an api_version to a min/maj pair + * + * @api_version: API version to convert + * @major: Major version number (must not exceed 12 bits) + * @minor: Major version number (must not exceed 12 bits) + */ +void mali_kbase_api_version_to_maj_min(unsigned long api_version, u16 *maj, u16 *min) +{ + if (WARN_ON(!maj)) + return; + + if (WARN_ON(!min)) + return; + + *maj = KBASE_API_MAJ(api_version); + *min = KBASE_API_MIN(api_version); +} + +/** + * kbase capabilities table + */ +typedef struct mali_kbase_capability_def { + u16 required_major; + u16 required_minor; +} mali_kbase_capability_def; + +/** + * This must be kept in-sync with mali_kbase_cap + * + * TODO: The alternative approach would be to embed the cap enum values + * in the table. Less efficient but potentially safer. + */ +static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { + { 11, 15 }, /* SYSTEM_MONITOR */ + { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 2 }, /* MEM_GROW_ON_GPF */ + { 11, 2 } /* MEM_PROTECTED */ +}; + +/** + * mali_kbase_supports_cap - Query whether a kbase capability is supported + * + * @api_version: API version to convert + * @cap: Capability to query for - see mali_kbase_caps.h + */ +bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap) +{ + bool supported = false; + unsigned long required_ver; + + mali_kbase_capability_def const *cap_def; + + if (WARN_ON(cap < 0)) + return false; + + if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) + return false; + + cap_def = &kbase_caps_table[(int)cap]; + required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); + supported = (api_version >= required_ver); + + return supported; +} + +/** * kbase_file_new - Create an object representing a device file * * @kbdev: An instance of the GPU platform device, allocated from the probe @@ -152,7 +231,7 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, } /** - * kbase_file_get_api_version - Set the application programmer interface version + * kbase_file_set_api_version - Set the application programmer interface version * * @kfile: A device file created by kbase_file_new() * @major: Major version number (must not exceed 12 bits) @@ -326,7 +405,7 @@ static int kbase_api_handshake(struct kbase_file *kfile, * the flags have been set. Originally it was created on file open * (with job submission disabled) but we don't support that usage. */ - if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15)) + if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) err = kbase_file_create_kctx(kfile, BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); @@ -663,7 +742,7 @@ static int kbase_api_set_flags(struct kbase_file *kfile, /* For backward compatibility, the context may have been created before * the flags were set. */ - if (api_version >= KBASE_API_VERSION(11, 15)) { + if (mali_kbase_supports_system_monitor(api_version)) { err = kbase_file_create_kctx(kfile, flags->create_flags); } else { struct kbasep_js_kctx_info *js_kctx_info = NULL; @@ -790,6 +869,12 @@ static int kbase_api_mem_free(struct kbase_context *kctx, return kbase_mem_free(kctx, free->gpu_addr); } +static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, + union kbase_kinstr_jm_fd *arg) +{ + return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); +} + static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_reader_setup *setup) { @@ -1536,6 +1621,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; /* Instrumentation. */ + case KBASE_IOCTL_KINSTR_JM_FD: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, + kbase_api_kinstr_jm_fd, + union kbase_kinstr_jm_fd, + kctx); + break; case KBASE_IOCTL_HWCNT_READER_SETUP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, kbase_api_hwcnt_reader_setup, @@ -1890,7 +1981,7 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ @@ -1985,7 +2076,7 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); * @dev: The device this sysfs file is for. * @attr: The attributes of the sysfs file. * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. + * @count: The number of bytes to write to the sysfs file. * * This allows setting the timeout for software jobs. Waiting soft event wait * jobs will be cancelled after this period expires, while soft fence wait jobs @@ -2078,7 +2169,7 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ @@ -2255,7 +2346,7 @@ static u32 get_new_js_timeout( * @dev: The device the sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * This function is called when the js_scheduling_period sysfs file is written * to. It checks the data written, and if valid updates the js_scheduling_period @@ -2495,7 +2586,7 @@ static ssize_t show_debug(struct device *dev, struct device_attribute *attr, cha * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ @@ -3096,7 +3187,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, set_js_ctx_scheduling_mode); #ifdef MALI_KBASE_BUILD -#ifdef CONFIG_DEBUG_FS /* Number of entries in serialize_jobs_settings[] */ #define NR_SERIALIZE_JOBS_SETTINGS 5 @@ -3117,8 +3207,47 @@ static struct }; /** - * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs - * file + * update_serialize_jobs_setting - Update the serialization setting for the + * submission of GPU jobs. + * + * This function is called when the serialize_jobs sysfs/debugfs file is + * written to. It matches the requested setting against the available settings + * and if a matching setting is found updates kbdev->serialize_jobs. + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @buf: Buffer containing the value written to the sysfs/debugfs file. + * @count: The number of bytes to write to the sysfs/debugfs file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, + const char *buf, size_t count) +{ + int i; + bool valid = false; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { + kbdev->serialize_jobs = + serialize_jobs_settings[i].setting; + valid = true; + break; + } + } + + if (!valid) { + dev_err(kbdev->dev, "serialize_jobs: invalid setting"); + return -EINVAL; + } + + return count; +} + +#ifdef CONFIG_DEBUG_FS +/** + * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs + * debugfs file * @sfile: seq_file pointer * @data: Private callback data * @@ -3128,7 +3257,8 @@ static struct * * Return: 0 on success, or an error code on error */ -static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) +static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, + void *data) { struct kbase_device *kbdev = sfile->private; int i; @@ -3169,8 +3299,6 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, struct seq_file *s = file->private_data; struct kbase_device *kbdev = s->private; char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; - int i; - bool valid = false; CSTD_UNUSED(ppos); @@ -3180,21 +3308,7 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, buf[count] = 0; - for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { - if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { - kbdev->serialize_jobs = - serialize_jobs_settings[i].setting; - valid = true; - break; - } - } - - if (!valid) { - dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); - return -EINVAL; - } - - return count; + return update_serialize_jobs_setting(kbdev, buf, count); } /** @@ -3208,7 +3322,8 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, static int kbasep_serialize_jobs_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); + return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, + in->i_private); } static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { @@ -3221,6 +3336,72 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { }; #endif /* CONFIG_DEBUG_FS */ + +/** + * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. + * + * This function is called to get the contents of the serialize_jobs sysfs + * file. This is a list of the available settings with the currently active + * one surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + ssize_t ret = 0; + int i; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (kbdev->serialize_jobs == + serialize_jobs_settings[i].setting) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", + serialize_jobs_settings[i].name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", + serialize_jobs_settings[i].name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. + * + * This function is called when the serialize_jobs sysfs file is written to. + * It matches the requested setting against the available settings and if a + * matching setting is found updates kbdev->serialize_jobs. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t store_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); +} + +static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, + store_serialize_jobs_sysfs); #endif /* MALI_KBASE_BUILD */ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) @@ -4019,6 +4200,11 @@ void buslog_term(struct kbase_device *kbdev) } #endif +static struct attribute *kbase_scheduling_attrs[] = { + &dev_attr_serialize_jobs.attr, + NULL +}; + static struct attribute *kbase_attrs[] = { #ifdef CONFIG_MALI_DEBUG &dev_attr_debug_command.attr, @@ -4041,6 +4227,12 @@ static struct attribute *kbase_attrs[] = { NULL }; +#define SYSFS_SCHEDULING_GROUP "scheduling" +static const struct attribute_group kbase_scheduling_attr_group = { + .name = SYSFS_SCHEDULING_GROUP, + .attrs = kbase_scheduling_attrs, +}; + static const struct attribute_group kbase_attr_group = { .attrs = kbase_attrs, }; @@ -4056,11 +4248,23 @@ int kbase_sysfs_init(struct kbase_device *kbdev) kbdev->mdev.mode = 0666; err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (!err) { + err = sysfs_create_group(&kbdev->dev->kobj, + &kbase_scheduling_attr_group); + if (err) { + dev_err(kbdev->dev, "Creation of %s sysfs group failed", + SYSFS_SCHEDULING_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, + &kbase_attr_group); + } + } + return err; } void kbase_sysfs_term(struct kbase_device *kbdev) { + sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); put_device(kbdev->dev); } diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h index e1fffc3..caba2cd 100644 --- a/mali_kbase/mali_kbase_cs_experimental.h +++ b/mali_kbase/mali_kbase_cs_experimental.h @@ -41,9 +41,6 @@ */ static inline void mali_kbase_print_cs_experimental(void) { -#if MALI_JIT_PRESSURE_LIMIT - pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); -#endif /* MALI_JIT_PRESSURE_LIMIT */ #if MALI_INCREMENTAL_RENDERING pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); #endif /* MALI_INCREMENTAL_RENDERING */ diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 7056d80..5cbe6a9 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -40,7 +40,7 @@ #include <mali_kbase_instr_defs.h> #include <mali_kbase_pm.h> #include <mali_kbase_gpuprops_types.h> -#include <mali_kbase_hwcnt_backend_gpu.h> +#include <mali_kbase_hwcnt_backend_jm.h> #include <protected_mode_switcher.h> #include <linux/atomic.h> @@ -156,6 +156,7 @@ struct kbase_device; struct kbase_as; struct kbase_mmu_setup; struct kbase_ipa_model_vinstr_data; +struct kbase_kinstr_jm; /** * struct kbase_io_access - holds information about 1 register access @@ -320,6 +321,58 @@ struct kbasep_mem_device { atomic_t ir_threshold; }; +struct kbase_clk_rate_listener; + +/** + * kbase_clk_rate_listener_on_change_t() - Frequency change callback + * + * @listener: Clock frequency change listener. + * @clk_index: Index of the clock for which the change has occurred. + * @clk_rate_hz: Clock frequency(Hz). + * + * A callback to call when clock rate changes. The function must not + * sleep. No clock rate manager functions must be called from here, as + * its lock is taken. + */ +typedef void (*kbase_clk_rate_listener_on_change_t)( + struct kbase_clk_rate_listener *listener, + u32 clk_index, + u32 clk_rate_hz); + +/** + * struct kbase_clk_rate_listener - Clock frequency listener + * + * @node: List node. + * @notify: Callback to be called when GPU frequency changes. + */ +struct kbase_clk_rate_listener { + struct list_head node; + kbase_clk_rate_listener_on_change_t notify; +}; + +/** + * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock + * rate trace manager. + * + * @gpu_idle: Tracks the idle state of GPU. + * @clks: Array of pointer to structures storing data for every + * enumerated GPU clock. + * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace + * operations. + * @gpu_clk_rate_trace_write: Pointer to the function that would emit the + * tracepoint for the clock rate change. + * @listeners: List of listener attached. + * @lock: Lock to serialize the actions of GPU clock rate trace + * manager. + */ +struct kbase_clk_rate_trace_manager { + bool gpu_idle; + struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; + struct list_head listeners; + spinlock_t lock; +}; + /** * Data stored per device for power management. * @@ -385,6 +438,11 @@ struct kbase_pm_device_data { */ struct kbase_arbiter_vm_state *arb_vm_state; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /** + * The state of the GPU clock rate trace manager + */ + struct kbase_clk_rate_trace_manager clk_rtm; }; /** @@ -560,6 +618,32 @@ struct kbase_devfreq_queue_info { }; /** + * struct kbase_process - Representing an object of a kbase process instantiated + * when the first kbase context is created under it. + * @tgid: Thread group ID. + * @total_gpu_pages: Total gpu pages allocated across all the contexts + * of this process, it accounts for both native allocations + * and dma_buf imported allocations. + * @kctx_list: List of kbase contexts created for the process. + * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree + * based on key tgid, kprcs_node is the node link to + * &struct_kbase_device.process_root. + * @dma_buf_root: RB tree of the dma-buf imported allocations, imported + * across all the contexts created for this process. + * Used to ensure that pages of allocation are accounted + * only once for the process, even if the allocation gets + * imported multiple times for the process. + */ +struct kbase_process { + pid_t tgid; + size_t total_gpu_pages; + struct list_head kctx_list; + + struct rb_node kprcs_node; + struct rb_root dma_buf_root; +}; + +/** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. * @hw_quirks_sc: Configuration to be used for the shader cores as per @@ -806,6 +890,20 @@ struct kbase_devfreq_queue_info { * Job Scheduler * @l2_size_override: Used to set L2 cache size via device tree blob * @l2_hash_override: Used to set L2 cache hash via device tree blob + * @process_root: rb_tree root node for maintaining a rb_tree of + * kbase_process based on key tgid(thread group ID). + * @dma_buf_root: rb_tree root node for maintaining a rb_tree of + * &struct kbase_dma_buf based on key dma_buf. + * We maintain a rb_tree of dma_buf mappings under + * kbase_device and kbase_process, one indicates a + * mapping and gpu memory usage at device level and + * other one at process level. + * @total_gpu_pages: Total GPU pages used for the complete GPU device. + * @dma_buf_lock: This mutex should be held while accounting for + * @total_gpu_pages from imported dma buffers. + * @gpu_mem_usage_lock: This spinlock should be held while accounting + * @total_gpu_pages for both native and dma-buf imported + * allocations. */ struct kbase_device { u32 hw_quirks_sc; @@ -1043,6 +1141,13 @@ struct kbase_device { #endif /* CONFIG_MALI_CINSTR_GWT */ + struct rb_root process_root; + struct rb_root dma_buf_root; + + size_t total_gpu_pages; + struct mutex dma_buf_lock; + spinlock_t gpu_mem_usage_lock; + struct { struct kbase_context *ctx; u64 jc; @@ -1056,10 +1161,6 @@ struct kbase_device { #endif }; -#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ - (((minor) & 0xFFF) << 8) | \ - ((0 & 0xFF) << 0)) - /** * enum kbase_file_state - Initialization state of a file opened by @kbase_open * @@ -1189,6 +1290,13 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* + * Set when JIT physical page limit is less than JIT virtual address + * page limit, so we must take care to not exceed the physical limit + */ + KCTX_JPL_ENABLED = 1U << 16, +#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */ }; struct kbase_sub_alloc { @@ -1399,6 +1507,16 @@ struct kbase_sub_alloc { * that were used (i.e. the * &struct_kbase_va_region.used_pages for regions * that have had a usage report). + * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being + * now allocated for just-in-time memory + * allocations of a context (across all the + * threads). This is supposed to be updated + * with @reg_lock held before allocating + * the backing pages. This helps ensure that + * total physical memory usage for just in + * time memory allocation remains within the + * @jit_phys_pages_limit in multi-threaded + * scenarios. * @jit_active_head: List containing the just-in-time memory allocations * which are in use. * @jit_pool_head: List containing the just-in-time memory allocations @@ -1425,6 +1543,10 @@ struct kbase_sub_alloc { * is used to determine the atom's age when it is added to * the runnable RB-tree. * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) + * @kprcs: Reference to @struct kbase_process that the current + * kbase_context belongs to. + * @kprcs_link: List link for the list of kbase context maintained + * under kbase_process. * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by * kbase_context.reg_lock. * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. @@ -1435,6 +1557,7 @@ struct kbase_sub_alloc { * for context scheduling, protected by hwaccess_lock. * @atoms_count: Number of GPU atoms currently in use, per priority * @create_flags: Flags used in context creation. + * @kinstr_jm: Kernel job manager instrumentation context handle * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1545,10 +1668,11 @@ struct kbase_context { u8 jit_current_allocations_per_bin[256]; u8 jit_version; u8 jit_group_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE u64 jit_phys_pages_limit; u64 jit_current_phys_pressure; -#endif /* MALI_JIT_PRESSURE_LIMIT */ + u64 jit_phys_pages_to_be_allocated; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; @@ -1559,6 +1683,9 @@ struct kbase_context { u8 trim_level; + struct kbase_process *kprcs; + struct list_head kprcs_link; + #ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; bool gwt_was_enabled; @@ -1567,6 +1694,8 @@ struct kbase_context { #endif base_context_create_flags create_flags; + + struct kbase_kinstr_jm *kinstr_jm; }; #ifdef CONFIG_MALI_CINSTR_GWT diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h index be85491..4fd2e35 100644 --- a/mali_kbase/mali_kbase_hwaccess_instr.h +++ b/mali_kbase/mali_kbase_hwaccess_instr.h @@ -35,7 +35,7 @@ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. * @dump_buffer: GPU address to write counters to. * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. - * @jm_bm: counters selection bitmask (JM). + * @fe_bm: counters selection bitmask (Front End). * @shader_bm: counters selection bitmask (Shader). * @tiler_bm: counters selection bitmask (Tiler). * @mmu_l2_bm: counters selection bitmask (MMU_L2). @@ -45,7 +45,7 @@ struct kbase_instr_hwcnt_enable { u64 dump_buffer; u64 dump_buffer_bytes; - u32 jm_bm; + u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h index a61e5b9..94b7551 100644 --- a/mali_kbase/mali_kbase_hwaccess_time.h +++ b/mali_kbase/mali_kbase_hwaccess_time.h @@ -39,4 +39,18 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, struct timespec64 *ts); +/** + * kbase_backend_get_gpu_time_norequest() - Get current GPU time without + * request/release cycle counter + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts); + #endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c index 14ec5cb..2708af7 100644 --- a/mali_kbase/mali_kbase_hwcnt.c +++ b/mali_kbase/mali_kbase_hwcnt.c @@ -242,6 +242,7 @@ static void kbasep_hwcnt_accumulator_disable( bool backend_enabled = false; struct kbase_hwcnt_accumulator *accum; unsigned long flags; + u64 dump_time_ns; WARN_ON(!hctx); lockdep_assert_held(&hctx->accum_lock); @@ -271,7 +272,7 @@ static void kbasep_hwcnt_accumulator_disable( goto disable; /* Try and accumulate before disabling */ - errcode = hctx->iface->dump_request(accum->backend); + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); if (errcode) goto disable; @@ -419,23 +420,16 @@ static int kbasep_hwcnt_accumulator_dump( /* Initiate the dump if the backend is enabled. */ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { - /* Disable pre-emption, to make the timestamp as accurate as - * possible. - */ - preempt_disable(); - { + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend, &dump_time_ns); + dump_requested = true; + } else { dump_time_ns = hctx->iface->timestamp_ns( - accum->backend); - if (dump_buf) { - errcode = hctx->iface->dump_request( accum->backend); - dump_requested = true; - } else { - errcode = hctx->iface->dump_clear( - accum->backend); - } + errcode = hctx->iface->dump_clear(accum->backend); } - preempt_enable(); + if (errcode) goto error; } else { diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/mali_kbase_hwcnt_backend.h index b7aa0e1..3a921b7 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend.h +++ b/mali_kbase/mali_kbase_hwcnt_backend.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -137,6 +137,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)( * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter * dump. * @backend: Non-NULL pointer to backend. + * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was + * requested will be written out to on success. * * If the backend is not enabled or another dump is already in progress, * returns an error. @@ -144,7 +146,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)( * Return: 0 on success, else error code. */ typedef int (*kbase_hwcnt_backend_dump_request_fn)( - struct kbase_hwcnt_backend *backend); + struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns); /** * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c deleted file mode 100644 index 407c768..0000000 --- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_backend_gpu.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" -#include "mali_kbase_pm_ca.h" -#include "mali_kbase_hwaccess_instr.h" -#ifdef CONFIG_MALI_NO_MALI -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif - - -/** - * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance - * of a GPU hardware counter backend. - * @kbdev: KBase device. - * @use_secondary: True if secondary performance counters should be used, - * else false. Ignored if secondary counters are not supported. - * @metadata: Hardware counter metadata. - * @dump_bytes: Bytes of GPU memory required to perform a - * hardware counter dump. - */ -struct kbase_hwcnt_backend_gpu_info { - struct kbase_device *kbdev; - bool use_secondary; - const struct kbase_hwcnt_metadata *metadata; - size_t dump_bytes; -}; - -/** - * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend. - * @info: Info used to create the backend. - * @kctx: KBase context used for GPU memory allocation and - * counter dumping. - * @gpu_dump_va: GPU hardware counter dump buffer virtual address. - * @cpu_dump_va: CPU mapping of gpu_dump_va. - * @vmap: Dump buffer vmap. - * @enabled: True if dumping has been enabled, else false. - * @pm_core_mask: PM state sync-ed shaders core mask for the enabled dumping. - */ -struct kbase_hwcnt_backend_gpu { - const struct kbase_hwcnt_backend_gpu_info *info; - struct kbase_context *kctx; - u64 gpu_dump_va; - void *cpu_dump_va; - struct kbase_vmap_struct *vmap; - bool enabled; - u64 pm_core_mask; -}; - -/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( - struct kbase_hwcnt_backend *backend) -{ - (void)backend; - return ktime_get_raw_ns(); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - struct kbase_context *kctx; - struct kbase_device *kbdev; - struct kbase_hwcnt_physical_enable_map phys; - struct kbase_instr_hwcnt_enable enable; - - if (!backend_gpu || !enable_map || backend_gpu->enabled || - (enable_map->metadata != backend_gpu->info->metadata)) - return -EINVAL; - - kctx = backend_gpu->kctx; - kbdev = backend_gpu->kctx->kbdev; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); - - enable.jm_bm = phys.jm_bm; - enable.shader_bm = phys.shader_bm; - enable.tiler_bm = phys.tiler_bm; - enable.mmu_l2_bm = phys.mmu_l2_bm; - enable.use_secondary = backend_gpu->info->use_secondary; - enable.dump_buffer = backend_gpu->gpu_dump_va; - enable.dump_buffer_bytes = backend_gpu->info->dump_bytes; - - errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); - if (errcode) - goto error; - - backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); - backend_gpu->enabled = true; - - return 0; -error: - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -static int kbasep_hwcnt_backend_gpu_dump_enable( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - unsigned long flags; - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - struct kbase_device *kbdev; - - if (!backend_gpu) - return -EINVAL; - - kbdev = backend_gpu->kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock( - backend, enable_map); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_gpu_dump_disable( - struct kbase_hwcnt_backend *backend) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (WARN_ON(!backend_gpu) || !backend_gpu->enabled) - return; - - errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx); - WARN_ON(errcode); - - backend_gpu->enabled = false; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -static int kbasep_hwcnt_backend_gpu_dump_clear( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_clear(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */ -static int kbasep_hwcnt_backend_gpu_dump_request( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_request_dump(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -static int kbasep_hwcnt_backend_gpu_dump_wait( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */ -static int kbasep_hwcnt_backend_gpu_dump_get( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !dst || !dst_enable_map || - (backend_gpu->info->metadata != dst->metadata) || - (dst_enable_map->metadata != dst->metadata)) - return -EINVAL; - - /* Invalidate the kernel buffer before reading from it. */ - kbase_sync_mem_regions( - backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); - - return kbase_hwcnt_gpu_dump_get( - dst, backend_gpu->cpu_dump_va, dst_enable_map, - backend_gpu->pm_core_mask, accumulate); -} - -/** - * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer. - * @info: Non-NULL pointer to GPU backend info. - * @kctx: Non-NULL pointer to kbase context. - * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address - * is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_dump_alloc( - const struct kbase_hwcnt_backend_gpu_info *info, - struct kbase_context *kctx, - u64 *gpu_dump_va) -{ - struct kbase_va_region *reg; - u64 flags; - u64 nr_pages; - - WARN_ON(!info); - WARN_ON(!kctx); - WARN_ON(!gpu_dump_va); - - flags = BASE_MEM_PROT_CPU_RD | - BASE_MEM_PROT_GPU_WR | - BASEP_MEM_PERMANENT_KERNEL_MAPPING | - BASE_MEM_CACHED_CPU; - - if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) - flags |= BASE_MEM_UNCACHED_GPU; - - nr_pages = PFN_UP(info->dump_bytes); - - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); - - if (!reg) - return -ENOMEM; - - return 0; -} - -/** - * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer. - * @kctx: Non-NULL pointer to kbase context. - * @gpu_dump_va: GPU dump buffer virtual address. - */ -static void kbasep_hwcnt_backend_gpu_dump_free( - struct kbase_context *kctx, - u64 gpu_dump_va) -{ - WARN_ON(!kctx); - if (gpu_dump_va) - kbase_mem_free(kctx, gpu_dump_va); -} - -/** - * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend. - * @backend: Pointer to GPU backend to destroy. - * - * Can be safely called on a backend in any state of partial construction. - */ -static void kbasep_hwcnt_backend_gpu_destroy( - struct kbase_hwcnt_backend_gpu *backend) -{ - if (!backend) - return; - - if (backend->kctx) { - struct kbase_context *kctx = backend->kctx; - struct kbase_device *kbdev = kctx->kbdev; - - if (backend->cpu_dump_va) - kbase_phy_alloc_mapping_put(kctx, backend->vmap); - - if (backend->gpu_dump_va) - kbasep_hwcnt_backend_gpu_dump_free( - kctx, backend->gpu_dump_va); - - kbasep_js_release_privileged_ctx(kbdev, kctx); - kbase_destroy_context(kctx); - } - - kfree(backend); -} - -/** - * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend. - * @info: Non-NULL pointer to backend info. - * @out_backend: Non-NULL pointer to where backend is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_create( - const struct kbase_hwcnt_backend_gpu_info *info, - struct kbase_hwcnt_backend_gpu **out_backend) -{ - - int errcode; - struct kbase_device *kbdev; - struct kbase_hwcnt_backend_gpu *backend = NULL; - - WARN_ON(!info); - WARN_ON(!out_backend); - - kbdev = info->kbdev; - - backend = kzalloc(sizeof(*backend), GFP_KERNEL); - if (!backend) - goto alloc_error; - - backend->info = info; - - backend->kctx = kbase_create_context(kbdev, true, - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); - if (!backend->kctx) - goto alloc_error; - - kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); - - errcode = kbasep_hwcnt_backend_gpu_dump_alloc( - info, backend->kctx, &backend->gpu_dump_va); - if (errcode) - goto error; - - backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, - backend->gpu_dump_va, &backend->vmap); - if (!backend->cpu_dump_va) - goto alloc_error; - -#ifdef CONFIG_MALI_NO_MALI - /* The dummy model needs the CPU mapping. */ - gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); -#endif - - *out_backend = backend; - return 0; - -alloc_error: - errcode = -ENOMEM; -error: - kbasep_hwcnt_backend_gpu_destroy(backend); - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_init_fn */ -static int kbasep_hwcnt_backend_gpu_init( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend = NULL; - - if (!info || !out_backend) - return -EINVAL; - - errcode = kbasep_hwcnt_backend_gpu_create( - (const struct kbase_hwcnt_backend_gpu_info *) info, &backend); - if (errcode) - return errcode; - - *out_backend = (struct kbase_hwcnt_backend *)backend; - - return 0; -} - -/* GPU backend implementation of kbase_hwcnt_backend_term_fn */ -static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend) -{ - if (!backend) - return; - - kbasep_hwcnt_backend_gpu_dump_disable(backend); - kbasep_hwcnt_backend_gpu_destroy( - (struct kbase_hwcnt_backend_gpu *)backend); -} - -/** - * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info. - * @info: Pointer to info to destroy. - * - * Can be safely called on a backend info in any state of partial construction. - */ -static void kbasep_hwcnt_backend_gpu_info_destroy( - const struct kbase_hwcnt_backend_gpu_info *info) -{ - if (!info) - return; - - kbase_hwcnt_gpu_metadata_destroy(info->metadata); - kfree(info); -} - -/** - * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info. - * @kbdev: Non_NULL pointer to kbase device. - * @out_info: Non-NULL pointer to where info is stored on success. - * - * Return 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_info_create( - struct kbase_device *kbdev, - const struct kbase_hwcnt_backend_gpu_info **out_info) -{ - int errcode = -ENOMEM; - struct kbase_hwcnt_gpu_info hwcnt_gpu_info; - struct kbase_hwcnt_backend_gpu_info *info = NULL; - - WARN_ON(!kbdev); - WARN_ON(!out_info); - - errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); - if (errcode) - return errcode; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - goto error; - - info->kbdev = kbdev; - -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY - info->use_secondary = true; -#else - info->use_secondary = false; -#endif - - errcode = kbase_hwcnt_gpu_metadata_create( - &hwcnt_gpu_info, info->use_secondary, - &info->metadata, - &info->dump_bytes); - if (errcode) - goto error; - - *out_info = info; - - return 0; -error: - kbasep_hwcnt_backend_gpu_info_destroy(info); - return errcode; -} - -int kbase_hwcnt_backend_gpu_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_interface *iface) -{ - int errcode; - const struct kbase_hwcnt_backend_gpu_info *info = NULL; - - if (!kbdev || !iface) - return -EINVAL; - - errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info); - - if (errcode) - return errcode; - - iface->metadata = info->metadata; - iface->info = (struct kbase_hwcnt_backend_info *)info; - iface->init = kbasep_hwcnt_backend_gpu_init; - iface->term = kbasep_hwcnt_backend_gpu_term; - iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns; - iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable; - iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock; - iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable; - iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear; - iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request; - iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait; - iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get; - - return 0; -} - -void kbase_hwcnt_backend_gpu_destroy( - struct kbase_hwcnt_backend_interface *iface) -{ - if (!iface) - return; - - kbasep_hwcnt_backend_gpu_info_destroy( - (const struct kbase_hwcnt_backend_gpu_info *)iface->info); - memset(iface, 0, sizeof(*iface)); -} diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c new file mode 100644 index 0000000..02a42bf --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c @@ -0,0 +1,707 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_backend_jm.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_hwaccess_time.h" +#include "mali_kbase_ccswe.h" + +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include "backend/gpu/mali_kbase_pm_internal.h" + +/** + * struct kbase_hwcnt_backend_jm_info - Information used to create an instance + * of a JM hardware counter backend. + * @kbdev: KBase device. + * @use_secondary: True if secondary performance counters should be used, + * else false. Ignored if secondary counters are not supported. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + */ +struct kbase_hwcnt_backend_jm_info { + struct kbase_device *kbdev; + bool use_secondary; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; +}; + +/** + * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @enabled: True if dumping has been enabled, else false. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled + * dumping. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: + * Cycle count elapsed for a given sample period. + * The top clock cycle, index 0, is read directly from + * hardware, but the other clock domains need to be + * calculated with software estimation. + * @prev_cycle_count: Previous cycle count to calculate the cycle count for + * sample period. + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ +struct kbase_hwcnt_backend_jm { + const struct kbase_hwcnt_backend_jm_info *info; + struct kbase_context *kctx; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + bool enabled; + u64 pm_core_mask; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; +}; + +/** + * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void kbasep_hwcnt_backend_jm_on_freq_change( + struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, + u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_jm *backend_jm = container_of( + rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking + * + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. + */ +static void kbasep_hwcnt_backend_jm_cc_enable( + struct kbase_hwcnt_backend_jm *backend_jm, + const struct kbase_hwcnt_enable_map *enable_map, + u64 timestamp_ns) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_count; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn on the cycle counter */ + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, &cycle_count, NULL, NULL); + + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = + cycle_count; + } + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = + rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32) clk_data->clock_val; + kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, + timestamp_ns, + cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock( + rtm, &backend_jm->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + + /* ccswe was reset. The estimated cycle is zero. */ + backend_jm->prev_cycle_count[ + KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; + } + + /* Keep clk_enable_map for dump_request. */ + backend_jm->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking + * + * @backend: Non-NULL pointer to backend. + */ +static void kbasep_hwcnt_backend_jm_cc_disable( + struct kbase_hwcnt_backend_jm *backend_jm) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = backend_jm->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn off the cycle counter */ + kbase_pm_release_gpu_cycle_counter(backend_jm->kctx->kbdev); + } + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + + kbase_clk_rate_trace_manager_unsubscribe( + rtm, &backend_jm->rate_listener); + } +} + + +/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_jm_timestamp_ns( + struct kbase_hwcnt_backend *backend) +{ + (void)backend; + return ktime_get_raw_ns(); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys; + struct kbase_instr_hwcnt_enable enable; + u64 timestamp_ns; + + if (!backend_jm || !enable_map || backend_jm->enabled || + (enable_map->metadata != backend_jm->info->metadata)) + return -EINVAL; + + kctx = backend_jm->kctx; + kbdev = backend_jm->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); + + enable.fe_bm = phys.fe_bm; + enable.shader_bm = phys.shader_bm; + enable.tiler_bm = phys.tiler_bm; + enable.mmu_l2_bm = phys.mmu_l2_bm; + enable.use_secondary = backend_jm->info->use_secondary; + enable.dump_buffer = backend_jm->gpu_dump_va; + enable.dump_buffer_bytes = backend_jm->info->dump_bytes; + + timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + backend_jm->enabled = true; + + kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); + + return 0; +error: + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + + if (!backend_jm) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock( + backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_jm_dump_disable( + struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (WARN_ON(!backend_jm) || !backend_jm->enabled) + return; + + kbasep_hwcnt_backend_jm_cc_disable(backend_jm); + + errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); + WARN_ON(errcode); + + backend_jm->enabled = false; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_jm_dump_clear( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_jm_dump_request( + struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + const struct kbase_hwcnt_metadata *metadata; + u64 current_cycle_count; + size_t clk; + int ret; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + metadata = backend_jm->info->metadata; + + /* Disable pre-emption, to make the timestamp as accurate as possible */ + preempt_disable(); + { + *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + backend_jm->clk_enable_map, clk)) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, ¤t_cycle_count, + NULL, NULL); + } else { + /* + * Estimate cycle count for non-top clock + * domain. + */ + current_cycle_count = kbase_ccswe_cycle_at( + &backend_jm->ccswe_shader_cores, + *dump_time_ns); + } + backend_jm->cycle_count_elapsed[clk] = + current_cycle_count - + backend_jm->prev_cycle_count[clk]; + + /* + * Keep the current cycle count for later calculation. + */ + backend_jm->prev_cycle_count[clk] = current_cycle_count; + } + } + preempt_enable(); + + return ret; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_jm_dump_wait( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_jm_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + size_t clk; + + if (!backend_jm || !dst || !dst_enable_map || + (backend_jm->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions( + backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + continue; + + /* Extract elapsed cycle count for each clock domain. */ + dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; + } + + return kbase_hwcnt_gpu_dump_get( + dst, backend_jm->cpu_dump_va, dst_enable_map, + backend_jm->pm_core_mask, accumulate); +} + +/** + * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to JM backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_dump_alloc( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_context *kctx, + u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | + BASE_MEM_PROT_GPU_WR | + BASEP_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU; + + if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) + flags |= BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_jm_dump_free( + struct kbase_context *kctx, + u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. + * @backend: Pointer to JM backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_jm *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_jm_dump_free( + kctx, backend->gpu_dump_va); + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_jm_create() - Create a JM backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_create( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_hwcnt_backend_jm **out_backend) +{ + + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_jm *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + + backend->kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + errcode = kbasep_hwcnt_backend_jm_dump_alloc( + info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, + backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va) + goto alloc_error; + + kbase_ccswe_init(&backend->ccswe_shader_cores); + backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); +#endif + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_jm_destroy(backend); + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_jm_init( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_create( + (const struct kbase_hwcnt_backend_jm_info *) info, &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_destroy( + (struct kbase_hwcnt_backend_jm *)backend); +} + +/** + * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_info_destroy( + const struct kbase_hwcnt_backend_jm_info *info) +{ + if (!info) + return; + + kbase_hwcnt_gpu_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_info_create( + struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_jm_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; + struct kbase_hwcnt_backend_jm_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + if (errcode) + return errcode; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto error; + + info->kbdev = kbdev; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + info->use_secondary = true; +#else + info->use_secondary = false; +#endif + + errcode = kbase_hwcnt_gpu_metadata_create( + &hwcnt_gpu_info, info->use_secondary, + &info->metadata, + &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_jm_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_jm_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->metadata = info->metadata; + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->init = kbasep_hwcnt_backend_jm_init; + iface->term = kbasep_hwcnt_backend_jm_term; + iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_jm_info_destroy( + (const struct kbase_hwcnt_backend_jm_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h b/mali_kbase/mali_kbase_hwcnt_backend_jm.h index 7712f14..f15faeb 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,19 +21,19 @@ */ /** - * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU + * Concrete implementation of mali_kbase_hwcnt_backend interface for JM * backend. */ -#ifndef _KBASE_HWCNT_BACKEND_GPU_H_ -#define _KBASE_HWCNT_BACKEND_GPU_H_ +#ifndef _KBASE_HWCNT_BACKEND_JM_H_ +#define _KBASE_HWCNT_BACKEND_JM_H_ #include "mali_kbase_hwcnt_backend.h" struct kbase_device; /** - * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend + * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend * interface. * @kbdev: Non-NULL pointer to kbase device. * @iface: Non-NULL pointer to backend interface structure that is filled in @@ -43,19 +43,19 @@ struct kbase_device; * * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_gpu_create( +int kbase_hwcnt_backend_jm_create( struct kbase_device *kbdev, struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend + * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend * interface. * @iface: Pointer to interface to destroy. * * Can be safely called on an all-zeroed interface, or on an already destroyed * interface. */ -void kbase_hwcnt_backend_gpu_destroy( +void kbase_hwcnt_backend_jm_destroy( struct kbase_hwcnt_backend_interface *iface); -#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */ +#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c index 095c765..1034328 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,6 +138,8 @@ static int kbasep_hwcnt_backend_gpu_metadata_v4_create( } } + desc.clk_cnt = v4_info->clk_cnt; + errcode = kbase_hwcnt_metadata_create(&desc, metadata); /* Always clean up, as metadata will make a copy of the input args */ @@ -258,6 +260,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( desc.grp_cnt = 1; desc.grps = &group; + desc.clk_cnt = v5_info->clk_cnt; /* The JM, Tiler, and L2s are always available, and are before cores */ desc.avail_mask = (1ull << non_sc_block_count) - 1; @@ -287,6 +290,8 @@ int kbase_hwcnt_gpu_info_init( struct kbase_device *kbdev, struct kbase_hwcnt_gpu_info *info) { + size_t clk; + if (!kbdev || !info) return -EINVAL; @@ -307,6 +312,14 @@ int kbase_hwcnt_gpu_info_init( info->v5.core_mask = core_mask; } #endif + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + info->v5.clk_cnt = clk; + return 0; } @@ -563,7 +576,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( { const struct kbase_hwcnt_metadata *metadata; - u64 jm_bm = 0; + u64 fe_bm = 0; u64 shader_bm = 0; u64 tiler_bm = 0; u64 mmu_l2_bm = 0; @@ -601,7 +614,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( mmu_l2_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - jm_bm |= *blk_map; + fe_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: break; @@ -613,7 +626,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - jm_bm |= *blk_map; + fe_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: tiler_bm |= *blk_map; @@ -635,8 +648,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical( } } - dst->jm_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); + dst->fe_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0); dst->shader_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); dst->tiler_bm = @@ -653,7 +666,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( const struct kbase_hwcnt_metadata *metadata; u64 ignored_hi; - u64 jm_bm; + u64 fe_bm; u64 shader_bm; u64 tiler_bm; u64 mmu_l2_bm; @@ -665,7 +678,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( metadata = dst->metadata; kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->jm_bm, &jm_bm, &ignored_hi); + src->fe_bm, &fe_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( src->shader_bm, &shader_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( @@ -698,7 +711,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( *blk_map = mmu_l2_bm; break; case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - *blk_map = jm_bm; + *blk_map = fe_bm; break; case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: break; @@ -710,7 +723,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - *blk_map = jm_bm; + *blk_map = fe_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: *blk_map = tiler_bm; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h index 12891e0..13c1af3 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,13 +80,13 @@ enum kbase_hwcnt_gpu_v5_block_type { /** * struct kbase_hwcnt_physical_enable_map - Representation of enable map * directly used by GPU. - * @jm_bm: Job Manager counters selection bitmask. + * @fe_bm: Front end (JM/CSHW) counters selection bitmask. * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. */ struct kbase_hwcnt_physical_enable_map { - u32 jm_bm; + u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; @@ -96,6 +96,7 @@ struct kbase_hwcnt_physical_enable_map { * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. * @cg_count: Core group count. * @cgs: Non-NULL pointer to array of cg_count coherent group structures. + * @clk_cnt: Number of clock domains available. * * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, * where each core group may have a physically different layout. @@ -103,16 +104,19 @@ struct kbase_hwcnt_physical_enable_map { struct kbase_hwcnt_gpu_v4_info { size_t cg_count; const struct mali_base_gpu_coherent_group *cgs; + u8 clk_cnt; }; /** * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. * @l2_count: L2 cache count. * @core_mask: Shader core mask. May be sparse. + * @clk_cnt: Number of clock domains available. */ struct kbase_hwcnt_gpu_v5_info { size_t l2_count; u64 core_mask; + u8 clk_cnt; }; /** diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c index b0e6aee..794ef39 100644 --- a/mali_kbase/mali_kbase_hwcnt_legacy.c +++ b/mali_kbase/mali_kbase_hwcnt_legacy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,7 +69,7 @@ int kbase_hwcnt_legacy_client_create( goto error; /* Translate from the ioctl enable map to the internal one */ - phys_em.jm_bm = enable->jm_bm; + phys_em.fe_bm = enable->fe_bm; phys_em.shader_bm = enable->shader_bm; phys_em.tiler_bm = enable->tiler_bm; phys_em.mmu_l2_bm = enable->mmu_l2_bm; diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/mali_kbase/mali_kbase_hwcnt_reader.h index 10706b8..8cd3835 100644 --- a/mali_kbase/mali_kbase_hwcnt_reader.h +++ b/mali_kbase/mali_kbase_hwcnt_reader.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,31 +23,53 @@ #ifndef _KBASE_HWCNT_READER_H_ #define _KBASE_HWCNT_READER_H_ +#include <stddef.h> + /* The ids of ioctl commands. */ #define KBASE_HWCNT_READER 0xBE #define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) #define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) #define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) #define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) -#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ +#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ +#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ struct kbase_hwcnt_reader_metadata) #define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) #define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) #define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) #define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) +#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ + _IOW(KBASE_HWCNT_READER, 0xFF, \ + struct kbase_hwcnt_reader_api_version) + +/** + * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles + * @top: the number of cycles associated with the main clock for the + * GPU + * @shader_cores: the cycles that have elapsed on the GPU shader cores + */ +struct kbase_hwcnt_reader_metadata_cycles { + u64 top; + u64 shader_cores; +}; /** * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata * @timestamp: time when sample was collected * @event_id: id of an event that triggered sample collection * @buffer_idx: position in sampling area where sample buffer was stored + * @cycles: the GPU cycles that occurred since the last sample */ struct kbase_hwcnt_reader_metadata { u64 timestamp; u32 event_id; u32 buffer_idx; + struct kbase_hwcnt_reader_metadata_cycles cycles; }; /** @@ -67,5 +89,18 @@ enum base_hwcnt_reader_event { BASE_HWCNT_READER_EVENT_COUNT }; +/** + * struct kbase_hwcnt_reader_api_version - hwcnt reader API version + * @versoin: API version + * @features: available features in this API version + */ +#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) +struct kbase_hwcnt_reader_api_version { + u32 version; + u32 features; +}; + #endif /* _KBASE_HWCNT_READER_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c index 1e9efde..73ea609 100644 --- a/mali_kbase/mali_kbase_hwcnt_types.c +++ b/mali_kbase/mali_kbase_hwcnt_types.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,6 +55,10 @@ int kbase_hwcnt_metadata_create( if (!desc || !out_metadata) return -EINVAL; + /* The maximum number of clock domains is 64. */ + if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) + return -EINVAL; + /* Calculate the bytes needed to tightly pack the metadata */ /* Top level metadata */ @@ -158,6 +162,7 @@ int kbase_hwcnt_metadata_create( enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; metadata->avail_mask = desc->avail_mask; + metadata->clk_cnt = desc->clk_cnt; WARN_ON(size != offset); /* Due to the block alignment, there should be exactly one enable map @@ -187,12 +192,17 @@ int kbase_hwcnt_enable_map_alloc( if (!metadata || !enable_map) return -EINVAL; - enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); - if (!enable_map_buf) - return -ENOMEM; + if (metadata->enable_map_bytes > 0) { + enable_map_buf = + kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + } else { + enable_map_buf = NULL; + } enable_map->metadata = metadata; - enable_map->enable_map = enable_map_buf; + enable_map->hwcnt_enable_map = enable_map_buf; return 0; } KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); @@ -202,8 +212,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) if (!enable_map) return; - kfree(enable_map->enable_map); - enable_map->enable_map = NULL; + kfree(enable_map->hwcnt_enable_map); + enable_map->hwcnt_enable_map = NULL; enable_map->metadata = NULL; } KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); @@ -212,17 +222,25 @@ int kbase_hwcnt_dump_buffer_alloc( const struct kbase_hwcnt_metadata *metadata, struct kbase_hwcnt_dump_buffer *dump_buf) { - u32 *buf; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + u8 *buf; if (!metadata || !dump_buf) return -EINVAL; - buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + + /* Make a single allocation for both dump_buf and clk_cnt_buf. */ + buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); if (!buf) return -ENOMEM; dump_buf->metadata = metadata; - dump_buf->dump_buf = buf; + dump_buf->dump_buf = (u32 *)buf; + dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + return 0; } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); @@ -246,10 +264,16 @@ int kbase_hwcnt_dump_buffer_array_alloc( size_t buf_idx; unsigned int order; unsigned long addr; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; if (!metadata || !dump_bufs) return -EINVAL; + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = + sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + /* Allocate memory for the dump buffer struct array */ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); if (!buffers) @@ -258,7 +282,7 @@ int kbase_hwcnt_dump_buffer_array_alloc( /* Allocate pages for the actual dump buffers, as they tend to be fairly * large. */ - order = get_order(metadata->dump_buf_bytes * n); + order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); addr = __get_free_pages(GFP_KERNEL, order); if (!addr) { @@ -273,10 +297,14 @@ int kbase_hwcnt_dump_buffer_array_alloc( /* Set the buffer of each dump buf */ for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t offset = metadata->dump_buf_bytes * buf_idx; + const size_t dump_buf_offset = dump_buf_bytes * buf_idx; + const size_t clk_cnt_buf_offset = + (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); buffers[buf_idx].metadata = metadata; - buffers[buf_idx].dump_buf = (u32 *)(addr + offset); + buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset); + buffers[buf_idx].clk_cnt_buf = + (u64 *)(addr + clk_cnt_buf_offset); } return 0; @@ -324,6 +352,9 @@ void kbase_hwcnt_dump_buffer_zero( kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); } + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); @@ -334,6 +365,9 @@ void kbase_hwcnt_dump_buffer_zero_strict( return; memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); @@ -384,6 +418,7 @@ void kbase_hwcnt_dump_buffer_copy( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -413,6 +448,12 @@ void kbase_hwcnt_dump_buffer_copy( kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; + } } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); @@ -423,6 +464,7 @@ void kbase_hwcnt_dump_buffer_copy_strict( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -451,6 +493,14 @@ void kbase_hwcnt_dump_buffer_copy_strict( kbase_hwcnt_dump_buffer_block_copy_strict( dst_blk, src_blk, blk_em, val_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk); + + dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); @@ -461,6 +511,7 @@ void kbase_hwcnt_dump_buffer_accumulate( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -494,6 +545,12 @@ void kbase_hwcnt_dump_buffer_accumulate( kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, ctr_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + } } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); @@ -504,6 +561,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -534,5 +592,13 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( kbase_hwcnt_dump_buffer_block_accumulate_strict( dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + else + dst->clk_cnt_buf[clk] = 0; + } } KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h index 4d78c84..6a2640f 100644 --- a/mali_kbase/mali_kbase_hwcnt_types.h +++ b/mali_kbase/mali_kbase_hwcnt_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -136,11 +136,13 @@ struct kbase_hwcnt_group_description { * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, * describing each Hardware Counter Group in the system. * @avail_mask: Flat Availability Mask for all block instances in the system. + * @clk_cnt: The number of clock domains in the system. The maximum is 64. */ struct kbase_hwcnt_description { size_t grp_cnt; const struct kbase_hwcnt_group_description *grps; u64 avail_mask; + u8 clk_cnt; }; /** @@ -220,6 +222,7 @@ struct kbase_hwcnt_group_metadata { * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. * @avail_mask: The Availability Mask for the system. + * @clk_cnt: The number of clock domains in the system. */ struct kbase_hwcnt_metadata { size_t grp_cnt; @@ -227,6 +230,7 @@ struct kbase_hwcnt_metadata { size_t enable_map_bytes; size_t dump_buf_bytes; u64 avail_mask; + u8 clk_cnt; }; /** @@ -234,13 +238,16 @@ struct kbase_hwcnt_metadata { * bitfields. * @metadata: Non-NULL pointer to metadata used to identify, and to describe * the layout of the enable map. - * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array - * of u64 bitfields, each bit of which enables one hardware + * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an + * array of u64 bitfields, each bit of which enables one hardware * counter. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. */ struct kbase_hwcnt_enable_map { const struct kbase_hwcnt_metadata *metadata; - u64 *enable_map; + u64 *hwcnt_enable_map; + u64 clk_enable_map; }; /** @@ -250,10 +257,13 @@ struct kbase_hwcnt_enable_map { * the layout of the Dump Buffer. * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array * of u32 values. + * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed + * for each clock domain. */ struct kbase_hwcnt_dump_buffer { const struct kbase_hwcnt_metadata *metadata; u32 *dump_buf; + u64 *clk_cnt_buf; }; /** @@ -473,7 +483,7 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); * block instance. */ #define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ - ((map)->enable_map + \ + ((map)->hwcnt_enable_map + \ (map)->metadata->grp_metadata[(grp)].enable_map_index + \ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) @@ -520,7 +530,11 @@ static inline void kbase_hwcnt_enable_map_block_disable_all( static inline void kbase_hwcnt_enable_map_disable_all( struct kbase_hwcnt_enable_map *dst) { - memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); + if (dst->hwcnt_enable_map != NULL) + memset(dst->hwcnt_enable_map, 0, + dst->metadata->enable_map_bytes); + + dst->clk_enable_map = 0; } /** @@ -569,6 +583,8 @@ static inline void kbase_hwcnt_enable_map_enable_all( kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) kbase_hwcnt_enable_map_block_enable_all( dst, grp, blk, blk_inst); + + dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; } /** @@ -582,9 +598,13 @@ static inline void kbase_hwcnt_enable_map_copy( struct kbase_hwcnt_enable_map *dst, const struct kbase_hwcnt_enable_map *src) { - memcpy(dst->enable_map, - src->enable_map, - dst->metadata->enable_map_bytes); + if (dst->hwcnt_enable_map != NULL) { + memcpy(dst->hwcnt_enable_map, + src->hwcnt_enable_map, + dst->metadata->enable_map_bytes); + } + + dst->clk_enable_map = src->clk_enable_map; } /** @@ -602,8 +622,12 @@ static inline void kbase_hwcnt_enable_map_union( dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; size_t i; - for (i = 0; i < bitfld_count; i++) - dst->enable_map[i] |= src->enable_map[i]; + if (dst->hwcnt_enable_map != NULL) { + for (i = 0; i < bitfld_count; i++) + dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; + } + + dst->clk_enable_map |= src->clk_enable_map; } /** @@ -656,6 +680,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled( const struct kbase_hwcnt_enable_map *enable_map) { size_t grp, blk, blk_inst; + const u64 clk_enable_map_mask = + (1ull << enable_map->metadata->clk_cnt) - 1; + + if (enable_map->metadata->clk_cnt > 0 && + (enable_map->clk_enable_map & clk_enable_map_mask)) + return true; kbase_hwcnt_metadata_for_each_block( enable_map->metadata, grp, blk, blk_inst) { @@ -1084,4 +1114,29 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( } } +/** + * @brief Iterate over each clock domain in the metadata. + * + * @param[in] md Non-NULL pointer to metadata. + * @param[in] clk size_t variable used as clock iterator. + */ +#define kbase_hwcnt_metadata_for_each_clock(md, clk) \ + for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) + +/** + * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled + * in clk_enable_map. + * @clk_enable_map: An enable map for clock domains. + * @index: Index of the enable map for clock domain. + * + * Return: true if the index of the clock domain is enabled, else false. + */ +static inline bool kbase_hwcnt_clk_enable_map_enabled( + const u64 clk_enable_map, const size_t index) +{ + if (clk_enable_map & (1ull << index)) + return true; + return false; +} + #endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h index 977b194..17e7601 100644 --- a/mali_kbase/mali_kbase_ioctl.h +++ b/mali_kbase/mali_kbase_ioctl.h @@ -166,7 +166,7 @@ struct kbase_ioctl_mem_free { /** * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader * @buffer_count: requested number of dumping buffers - * @jm_bm: counters selection bitmask (JM) + * @fe_bm: counters selection bitmask (Front end) * @shader_bm: counters selection bitmask (Shader) * @tiler_bm: counters selection bitmask (Tiler) * @mmu_l2_bm: counters selection bitmask (MMU_L2) @@ -175,7 +175,7 @@ struct kbase_ioctl_mem_free { */ struct kbase_ioctl_hwcnt_reader_setup { __u32 buffer_count; - __u32 jm_bm; + __u32 fe_bm; __u32 shader_bm; __u32 tiler_bm; __u32 mmu_l2_bm; @@ -187,14 +187,14 @@ struct kbase_ioctl_hwcnt_reader_setup { /** * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection * @dump_buffer: GPU address to write counters to - * @jm_bm: counters selection bitmask (JM) + * @fe_bm: counters selection bitmask (Front end) * @shader_bm: counters selection bitmask (Shader) * @tiler_bm: counters selection bitmask (Tiler) * @mmu_l2_bm: counters selection bitmask (MMU_L2) */ struct kbase_ioctl_hwcnt_enable { __u64 dump_buffer; - __u32 jm_bm; + __u32 fe_bm; __u32 shader_bm; __u32 tiler_bm; __u32 mmu_l2_bm; diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 1a830dd..8f22ceb 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -32,6 +32,7 @@ #include <linux/ratelimit.h> #include <mali_kbase_jm.h> +#include <mali_kbase_kinstr_jm.h> #include <mali_kbase_hwaccess_jm.h> #include <tl/mali_kbase_tracepoints.h> #include <mali_linux_trace.h> @@ -39,6 +40,8 @@ #include "mali_kbase_dma_fence.h" #include <mali_kbase_cs_experimental.h> +#include <mali_kbase_caps.h> + #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) @@ -52,11 +55,6 @@ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) -/* Minimum API version that supports the just-in-time memory allocation pressure - * limit feature. - */ -#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) - /* * This is the kernel side of the API. Only entry points are: * - kbase_jd_submit(): Called from userspace to submit a single bag @@ -77,6 +75,15 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p) return u64_to_user_ptr(p); } +/* Mark an atom as complete, and trace it in kinstr_jm */ +static void jd_mark_atom_complete(struct kbase_jd_atom *katom) +{ + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + kbase_kinstr_jm_atom_complete(katom); + dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); +} + /* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs * * Returns whether the JS needs a reschedule. @@ -97,24 +104,18 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) /* Dependency only atom */ trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(katom->kctx, katom)); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", - (void *)katom); + jd_mark_atom_complete(katom); return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, - "Atom %p status to completed\n", (void *)katom); + jd_mark_atom_complete(katom); return 0; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, - "Atom %p status to completed\n", (void *)katom); + jd_mark_atom_complete(katom); } return 0; } @@ -205,7 +206,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) * jctx.lock must be held when this is called. */ -static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { int err_ret_val = -EINVAL; u32 res_no; @@ -465,8 +466,6 @@ static inline void jd_resolve_dep(struct list_head *out_list, } } -KBASE_EXPORT_TEST_API(jd_resolve_dep); - /** * is_dep_valid - Validate that a dependency is valid for early dependency * submission @@ -558,7 +557,7 @@ static void jd_try_submitting_deps(struct list_head *out_list, } } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. * @@ -698,7 +697,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) kbase_jit_retry_pending_alloc(kctx); } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* * Perform the necessary handling of an atom that has finished running @@ -723,9 +722,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); -#if MALI_JIT_PRESSURE_LIMIT - jd_update_jit_usage(katom); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ @@ -736,9 +736,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } } - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", - (void *)katom); + jd_mark_atom_complete(katom); list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { @@ -870,8 +868,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif +/* Trace an atom submission. */ +static void jd_trace_atom_submit(struct kbase_context *const kctx, + struct kbase_jd_atom *const katom, + int *priority) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + if (priority) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); + kbase_kinstr_jm_atom_queue(katom); +} + static bool jd_submit_atom(struct kbase_context *const kctx, - const struct base_jd_atom_v2 *const user_atom, + const struct base_jd_atom *const user_atom, const struct base_jd_fragment *const user_jc_incr, struct kbase_jd_atom *const katom) { @@ -901,6 +914,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->jc = user_atom->jc; katom->core_req = user_atom->core_req; katom->jobslot = user_atom->jobslot; + katom->seq_nr = user_atom->seq_nr; katom->atom_flags = 0; katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; @@ -913,19 +927,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx, trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Older API version atoms might have random values where jit_id now * lives, but we must maintain backwards compatibility - handle the * issue. */ - if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { + if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { katom->jit_ids[0] = 0; katom->jit_ids[1] = 0; } else { katom->jit_ids[0] = user_atom->jit_id[0]; katom->jit_ids[1] = user_atom->jit_id[1]; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ katom->renderpass_id = user_atom->renderpass_id; @@ -961,17 +975,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any * dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX( - kbdev, - katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( - kbdev, - katom, - TL_ATOM_STATE_IDLE); + jd_trace_atom_submit(kctx, katom, NULL); return jd_done_nolock(katom, NULL); } @@ -1013,13 +1017,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* This atom will be sent back to user space. * Do not record any dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, - TL_ATOM_STATE_IDLE); + jd_trace_atom_submit(kctx, katom, NULL); will_fail = true; @@ -1078,13 +1076,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->sched_priority = sched_prio; /* Create a new atom. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + jd_trace_atom_submit(kctx, katom, &katom->sched_priority); #if !MALI_INCREMENTAL_RENDERING /* Reject atoms for incremental rendering if not supported */ @@ -1151,8 +1143,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx, } } -#if !MALI_JIT_PRESSURE_LIMIT - if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && +#if !MALI_JIT_PRESSURE_LIMIT_BASE + if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && (user_atom->jit_id[0] || user_atom->jit_id[1])) { /* JIT pressure limit is disabled, but we are receiving non-0 * JIT IDs - atom is invalid. @@ -1160,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->event_code = BASE_JD_EVENT_JOB_INVALID; return jd_done_nolock(katom, NULL); } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* Validate the atom. Function will return error if the atom is * malformed. @@ -1233,6 +1225,9 @@ int kbase_jd_submit(struct kbase_context *kctx, struct kbase_device *kbdev; u32 latest_flush; + bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || + stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + /* * kbase_jd_submit isn't expected to fail and so all errors with the * jobs are reported by immediately failing them (through event system) @@ -1247,7 +1242,9 @@ int kbase_jd_submit(struct kbase_context *kctx, } if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && - stride != sizeof(struct base_jd_atom_v2)) { + stride != sizeof(struct base_jd_atom_v2) && + stride != offsetof(struct base_jd_atom, renderpass_id) && + stride != sizeof(struct base_jd_atom)) { dev_err(kbdev->dev, "Stride %u passed to job_submit isn't supported by the kernel\n", stride); @@ -1258,16 +1255,29 @@ int kbase_jd_submit(struct kbase_context *kctx, latest_flush = kbase_backend_get_current_flush_id(kbdev); for (i = 0; i < nr_atoms; i++) { - struct base_jd_atom_v2 user_atom; + struct base_jd_atom user_atom; struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; - if (copy_from_user(&user_atom, user_addr, stride) != 0) { - dev_err(kbdev->dev, - "Invalid atom address %p passed to job_submit\n", - user_addr); - err = -EFAULT; - break; + if (unlikely(jd_atom_is_v2)) { + if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } + + /* no seq_nr in v2 */ + user_atom.seq_nr = 0; + } else { + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } } if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c new file mode 100644 index 0000000..1e91a7c --- /dev/null +++ b/mali_kbase/mali_kbase_kinstr_jm.c @@ -0,0 +1,896 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm.c + * Kernel driver public interface to job manager atom tracing + */ + +#include "mali_kbase_kinstr_jm.h" +#include "mali_kbase_kinstr_jm_reader.h" + +#include "mali_kbase.h" +#include "mali_kbase_linux.h" + +#include <mali_kbase_jm_rb.h> + +#include <asm/barrier.h> +#include <linux/anon_inodes.h> +#include <linux/circ_buf.h> +#include <linux/fs.h> +#include <linux/kref.h> +#include <linux/ktime.h> +#include <linux/log2.h> +#include <linux/mutex.h> +#include <linux/rculist_bl.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/version.h> +#include <linux/wait.h> + +#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE +#include <linux/build_bug.h> +#else +// Stringify the expression if no message is given. +#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) +#define __static_assert(e, msg, ...) _Static_assert(e, msg) +#endif + +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +typedef unsigned int __poll_t; +#endif + +#ifndef ENOTSUP +#define ENOTSUP EOPNOTSUPP +#endif + +/* The module printing prefix */ +#define PR_ "mali_kbase_kinstr_jm: " + +/* Allows us to perform ASM goto for the tracing + * https://www.kernel.org/doc/Documentation/static-keys.txt + */ +#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE +DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); +#else +struct static_key basep_kinstr_jm_reader_static_key = STATIC_KEY_INIT_FALSE; +#define static_branch_inc(key) static_key_slow_inc(key) +#define static_branch_dec(key) static_key_slow_dec(key) +#endif /* KERNEL_VERSION(4 ,3, 0) <= LINUX_VERSION_CODE */ + +#define KBASE_KINSTR_JM_VERSION 1 + +/** + * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing + * @readers: a bitlocked list of opened readers. Readers are attached to the + * private data of a file descriptor that the user opens with the + * KBASE_IOCTL_KINSTR_JM_FD IO control call. + * @refcount: reference count for the context. Any reader will have a link + * back to the context so that they can remove themselves from the + * list. + * + * This is opaque outside this compilation unit + */ +struct kbase_kinstr_jm { + struct hlist_bl_head readers; + struct kref refcount; +}; + +/** + * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a + * new state + * @timestamp: Raw monotonic nanoseconds of the state change + * @state: The state that the atom has moved to + * @atom: The atom number that has changed state + * @flags: Flags associated with the state change. See + * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. + * @reserved: Reserved for future use. + * @data: Extra data for the state change. Active member depends on state. + * + * We can add new fields to the structure and old user code will gracefully + * ignore the new fields. + * + * We can change the size of the structure and old user code will gracefully + * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. + * + * If we remove fields, the version field in `struct + * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will + * gracefully fail and tell the user that the kernel API is too new and has + * backwards-incompatible changes. Note that one userspace can opt to handle + * multiple kernel major versions of the structure. + * + * If we need to change the _meaning_ of one of the fields, i.e. the state + * machine has had a incompatible change, we can keep the same members in the + * structure and update the version as above. User code will no longer + * recognise that it has the supported field and can gracefully explain to the + * user that the kernel API is no longer supported. + * + * When making changes to this structure, make sure they are either: + * - additions to the end (for minor version bumps (i.e. only a size increase)) + * such that the layout of existing fields doesn't change, or; + * - update the version reported to userspace so that it can fail explicitly. + */ +struct kbase_kinstr_jm_atom_state_change { + u64 timestamp; + s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ + u8 atom; + u8 flags; + u8 reserved[1]; + /* Tagged union based on state. Ensure members are aligned correctly! */ + union { + struct { + u8 slot; + } start; + u8 padding[4]; + } data; +}; +static_assert( + ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); + +#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) + +/** + * struct reader_changes - The circular buffer of kernel atom state changes + * @data: The allocated buffer. This is allocated when the user requests + * the reader file descriptor. It is released when the user calls + * close() on the fd. When accessing this, lock the producer spin + * lock to prevent races on the allocated memory. The consume lock + * does not need to be held because newly-inserted data will always + * be outside the currenly-read range. + * @producer: The producing spinlock which allows us to push changes into the + * buffer at the same time as a user read occurring. This needs to + * be locked when saving/restoring the IRQ because we can receive an + * interrupt from the GPU when an atom completes. The CPU could have + * a task preempted that is holding this lock. + * @consumer: The consuming mutex which locks around the user read(). + * Must be held when updating the tail of the circular buffer. + * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should lock and update this with an SMP + * store when a new change lands. The consumer can read with an + * SMP load. This allows the producer to safely insert new changes + * into the circular buffer. + * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should do a READ_ONCE load and the consumer + * should SMP store. + * @size: The number of changes that are allowed in @c data. Can be used + * with Linux @c CIRC_ helpers. Will always be a power of two. The + * producer lock should be held when updating this and stored with + * an SMP release memory barrier. This means that the consumer can + * do an SMP load. + * @threshold: The number of changes above which threads polling on the reader + * file descriptor will be woken up. + */ +struct reader_changes { + struct kbase_kinstr_jm_atom_state_change *data; + spinlock_t producer; + struct mutex consumer; + u32 head; + u32 tail; + u32 size; + u32 threshold; +}; + +/** + * reader_changes_is_valid_size() - Determines if requested changes buffer size + * is valid. + * @size: The requested memory size + * + * We have a constraint that the underlying physical buffer must be a + * power of two so that we can use the efficient circular buffer helpers that + * the kernel provides. It also needs to be representable within a u32. + * + * Return: + * * true - the size is valid + * * false - the size is invalid + */ +static inline bool reader_changes_is_valid_size(const size_t size) +{ + typedef struct reader_changes changes_t; + const size_t elem_size = sizeof(*((changes_t *)0)->data); + const size_t size_size = sizeof(((changes_t *)0)->size); + const size_t size_max = (1ull << (size_size * 8)) - 1; + + return is_power_of_2(size) && /* Is a power of two */ + ((size / elem_size) <= size_max); /* Small enough */ +} + +/** + * reader_changes_init() - Initializes the reader changes and allocates the + * changes buffer + * @changes: The context pointer, must point to a zero-inited allocated reader + * changes structure. We may support allocating the structure in the + * future. + * @size: The requested changes buffer size + * + * Return: + * (0, U16_MAX] - the number of data elements allocated + * -EINVAL - a pointer was invalid + * -ENOTSUP - we do not support allocation of the context + * -ERANGE - the requested memory size was invalid + * -ENOMEM - could not allocate the memory + * -EADDRINUSE - the buffer memory was already allocated + */ +static int reader_changes_init(struct reader_changes *const changes, + const size_t size) +{ + BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); + + if (!reader_changes_is_valid_size(size)) { + pr_warn(PR_ "invalid size %zu\n", size); + return -ERANGE; + } + + changes->data = vmalloc(size); + if (!changes->data) + return -ENOMEM; + + spin_lock_init(&changes->producer); + mutex_init(&changes->consumer); + + changes->size = size / sizeof(*changes->data); + changes->threshold = min(((size_t)(changes->size)) / 4, + ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); + + return changes->size; +} + +/** + * reader_changes_term() - Cleans up a reader changes structure + * @changes: The context to clean up + * + * Releases the allocated state changes memory + */ +static void reader_changes_term(struct reader_changes *const changes) +{ + struct kbase_kinstr_jm_atom_state_change *data = NULL; + unsigned long irq; + + /* + * Although changes->data is used on the consumer side, too, no active + * consumer is possible by the time we clean up the reader changes, so + * no need to take the consumer lock. However, we do need the producer + * lock because the list removal can race with list traversal. + */ + spin_lock_irqsave(&changes->producer, irq); + swap(changes->data, data); + spin_unlock_irqrestore(&changes->producer, irq); + + mutex_destroy(&changes->consumer); + vfree(data); +} + +/** + * reader_changes_count_locked() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to + * determine the count, so there may be more items. However, that's the maximum + * number that can be read in one contiguous read. + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count_locked(struct reader_changes *const changes) +{ + u32 head; + + lockdep_assert_held_once(&changes->consumer); + + head = smp_load_acquire(&changes->head); + + return CIRC_CNT_TO_END(head, changes->tail, changes->size); +} + +/** + * reader_changes_count() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count(struct reader_changes *const changes) +{ + u32 ret; + + mutex_lock(&changes->consumer); + ret = reader_changes_count_locked(changes); + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_changes_push() - Pushes a change into the reader circular buffer. + * @changes: The buffer to insert the change into + * @change: Kernel atom change to insert + * @wait_queue: The queue to be kicked when changes should be read from + * userspace. Kicked when a threshold is reached or there is + * overflow. + */ +static void reader_changes_push( + struct reader_changes *const changes, + const struct kbase_kinstr_jm_atom_state_change *const change, + wait_queue_head_t *const wait_queue) +{ + u32 head, tail, size, space; + unsigned long irq; + struct kbase_kinstr_jm_atom_state_change *data; + + spin_lock_irqsave(&changes->producer, irq); + + /* We may be called for a reader_changes that's awaiting cleanup. */ + data = changes->data; + if (!data) + goto unlock; + + size = changes->size; + head = changes->head; + tail = smp_load_acquire(&changes->tail); + + space = CIRC_SPACE(head, tail, size); + if (space >= 1) { + data[head] = *change; + if (space == 1) { + data[head].flags |= + KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; + pr_warn(PR_ "overflow of circular buffer\n"); + } + smp_store_release(&changes->head, (head + 1) & (size - 1)); + } + + /* Wake for either overflow or over-threshold cases. */ + if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) + wake_up_interruptible(wait_queue); + +unlock: + spin_unlock_irqrestore(&changes->producer, irq); +} + +/** + * struct reader - Allows the kernel state changes to be read by user space. + * @node: The node in the @c readers locked list + * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) + * @changes: The circular buffer of user changes + * @wait_queue: A wait queue for poll + * @context: a pointer to the parent context that created this reader. Can be + * used to remove the reader from the list of readers. Reference + * counted. + * + * The reader is a circular buffer in kernel space. State changes are pushed + * into the buffer. The flow from user space is: + * + * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will + * allocate the kernel side circular buffer with a size specified in the + * ioctl argument. + * * The user will then poll the file descriptor for data + * * Upon receiving POLLIN, perform a read() on the file descriptor to get + * the data out. + * * The buffer memory will be freed when the file descriptor is closed + */ +struct reader { + struct hlist_bl_node node; + struct rcu_head rcu_head; + struct reader_changes changes; + wait_queue_head_t wait_queue; + struct kbase_kinstr_jm *context; +}; + +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); + +/** + * reader_term() - Terminate a instrumentation job manager reader context. + * @reader: Pointer to context to be terminated. + */ +static void reader_term(struct reader *const reader) +{ + if (!reader) + return; + + kbase_kinstr_jm_readers_del(reader->context, reader); + reader_changes_term(&reader->changes); + kbase_kinstr_jm_ref_put(reader->context); + + kfree_rcu(reader, rcu_head); +} + +/** + * reader_init() - Initialise a instrumentation job manager reader context. + * @out_reader: Non-NULL pointer to where the pointer to the created context + * will be stored on success. + * @ctx: the pointer to the parent context. Reference count will be + * increased if initialization is successful + * @num_changes: The number of changes to allocate a buffer for + * + * Return: 0 on success, else error code. + */ +static int reader_init(struct reader **const out_reader, + struct kbase_kinstr_jm *const ctx, + size_t const num_changes) +{ + struct reader *reader = NULL; + const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); + int status; + + if (!out_reader || !ctx || !num_changes) + return -EINVAL; + + reader = kzalloc(sizeof(*reader), GFP_KERNEL); + if (!reader) + return -ENOMEM; + + INIT_HLIST_BL_NODE(&reader->node); + init_waitqueue_head(&reader->wait_queue); + + reader->context = kbase_kinstr_jm_ref_get(ctx); + + status = reader_changes_init(&reader->changes, num_changes * change_size); + if (status < 0) + goto fail; + + status = kbase_kinstr_jm_readers_add(ctx, reader); + if (status < 0) + goto fail; + + *out_reader = reader; + + return 0; + +fail: + kbase_kinstr_jm_ref_put(reader->context); + kfree(reader); + return status; +} + +/** + * reader_release() - Invoked when the reader file descriptor is released + * @node: The inode that the file descriptor that the file corresponds to. In + * our case our reader file descriptor is backed by an anonymous node so + * not much is in this. + * @file: the file data. Our reader context is held in the private data + * Return: zero on success + */ +static int reader_release(struct inode *const node, struct file *const file) +{ + struct reader *const reader = file->private_data; + + reader_term(reader); + file->private_data = NULL; + + return 0; +} + +/** + * reader_changes_copy_to_user() - Copy any changes from a changes structure to + * the user-provided buffer. + * @changes: The changes structure from which to copy. + * @buffer: The user buffer to copy the data to. + * @buffer_size: The number of bytes in the buffer. + * Return: The number of bytes copied or negative errno on failure. + */ +static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, + char __user *buffer, + size_t buffer_size) +{ + ssize_t ret = 0; + struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( + changes->data); + size_t const entry_size = sizeof(*src_buf); + size_t changes_tail, changes_count, read_size; + + /* Needed for the quick buffer capacity calculation below. + * Note that we can't use is_power_of_2() since old compilers don't + * understand it's a constant expression. + */ +#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) + static_assert(is_power_of_two( + sizeof(struct kbase_kinstr_jm_atom_state_change))); +#undef is_power_of_two + + lockdep_assert_held_once(&changes->consumer); + + /* Read continuously until either: + * - we've filled the output buffer, or + * - there are no changes when we check. + * + * If more changes arrive while we're copying to the user, we can copy + * those as well, space permitting. + */ + do { + changes_tail = changes->tail; + changes_count = reader_changes_count_locked(changes); + read_size = min(changes_count * entry_size, + buffer_size & ~(entry_size - 1)); + + if (!read_size) + break; + + if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) + return -EFAULT; + + buffer += read_size; + buffer_size -= read_size; + ret += read_size; + changes_tail = (changes_tail + read_size / entry_size) & + (changes->size - 1); + smp_store_release(&changes->tail, changes_tail); + } while (read_size); + + return ret; +} + +/** + * reader_read() - Handles a read call on the reader file descriptor + * + * @filp: The file that the read was performed on + * @buffer: The destination buffer + * @buffer_size: The maximum number of bytes to read + * @offset: The offset into the 'file' to read from. + * + * Note the destination buffer needs to be fully mapped in userspace or the read + * will fault. + * + * Return: + * * The number of bytes read or: + * * -EBADF - the file descriptor did not have an attached reader + * * -EFAULT - memory access fault + * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there + * is no data available + * + * Note: The number of bytes read will always be a multiple of the size of an + * entry. + */ +static ssize_t reader_read(struct file *const filp, + char __user *const buffer, + size_t const buffer_size, + loff_t *const offset) +{ + struct reader *const reader = filp->private_data; + struct reader_changes *changes; + ssize_t ret; + + if (!reader) + return -EBADF; + + if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) + return -ENOBUFS; + +#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE + if (!access_ok(buffer, buffer_size)) + return -EIO; +#else + if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) + return -EIO; +#endif + + changes = &reader->changes; + + mutex_lock(&changes->consumer); + if (!reader_changes_count_locked(changes)) { + if (filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + goto exit; + } + + if (wait_event_interruptible( + reader->wait_queue, + !!reader_changes_count_locked(changes))) { + ret = -EINTR; + goto exit; + } + } + + ret = reader_changes_copy_to_user(changes, buffer, buffer_size); + +exit: + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_poll() - Handles a poll call on the reader file descriptor + * @file: The file that the poll was performed on + * @wait: The poll table + * + * The results of the poll will be unreliable if there is no mapped memory as + * there is no circular buffer to push atom state changes into. + * + * Return: + * * 0 - no data ready + * * POLLIN - state changes have been buffered + * * -EBADF - the file descriptor did not have an attached reader + * * -EINVAL - the IO control arguments were invalid + */ +static __poll_t reader_poll(struct file *const file, + struct poll_table_struct *const wait) +{ + struct reader *reader; + struct reader_changes *changes; + + if (unlikely(!file || !wait)) + return -EINVAL; + + reader = file->private_data; + if (unlikely(!reader)) + return -EBADF; + + changes = &reader->changes; + + if (reader_changes_count(changes) >= changes->threshold) + return POLLIN; + + poll_wait(file, &reader->wait_queue, wait); + + return (reader_changes_count(changes) > 0) ? POLLIN : 0; +} + +/* The file operations virtual function table */ +static const struct file_operations file_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = reader_read, + .poll = reader_poll, + .release = reader_release +}; + +/* The maximum amount of readers that can be created on a context. */ +static const size_t kbase_kinstr_jm_readers_max = 16; + +/** + * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped + * @ref: the context reference count + */ +static void kbase_kinstr_jm_release(struct kref *const ref) +{ + struct kbase_kinstr_jm *const ctx = + container_of(ref, struct kbase_kinstr_jm, refcount); + + kfree(ctx); +} + +/** + * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context + * @ctx: the context to reference count + * Return: the reference counted context + */ +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_get(&ctx->refcount); + return ctx; +} + +/** + * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context + * @ctx: the context to lower the reference count on + */ +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_put(&ctx->refcount, kbase_kinstr_jm_release); +} + +/** + * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers + * @ctx: the instrumentation context + * @reader: the reader to add + * + * Return: + * 0 - success + * -ENOMEM - too many readers already added. + */ +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + struct hlist_bl_node *node; + struct reader *temp; + size_t count = 0; + + hlist_bl_lock(readers); + + hlist_bl_for_each_entry_rcu(temp, node, readers, node) + ++count; + + if (kbase_kinstr_jm_readers_max < count) { + hlist_bl_unlock(readers); + return -ENOMEM; + } + + hlist_bl_add_head_rcu(&reader->node, readers); + + hlist_bl_unlock(readers); + + static_branch_inc(&basep_kinstr_jm_reader_static_key); + + return 0; +} + +/** + * readers_del() - Deletes a reader from the list of readers + * @ctx: the instrumentation context + * @reader: the reader to delete + */ +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + + hlist_bl_lock(readers); + hlist_bl_del_rcu(&reader->node); + hlist_bl_unlock(readers); + + static_branch_dec(&basep_kinstr_jm_reader_static_key); +} + +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg) +{ + struct kbase_kinstr_jm_fd_in const *in; + struct reader *reader; + size_t const change_size = sizeof(struct + kbase_kinstr_jm_atom_state_change); + int status; + int fd; + int i; + + if (!ctx || !jm_fd_arg) + return -EINVAL; + + in = &jm_fd_arg->in; + + if (!is_power_of_2(in->count)) + return -EINVAL; + + for (i = 0; i < sizeof(in->padding); ++i) + if (in->padding[i]) + return -EINVAL; + + status = reader_init(&reader, ctx, in->count); + if (status < 0) + return status; + + jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; + jm_fd_arg->out.size = change_size; + memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); + + fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, + O_CLOEXEC); + if (fd < 0) + reader_term(reader); + + return fd; +} + +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) +{ + struct kbase_kinstr_jm *ctx = NULL; + + if (!out_ctx) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + INIT_HLIST_BL_HEAD(&ctx->readers); + kref_init(&ctx->refcount); + + *out_ctx = ctx; + + return 0; +} + +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) +{ + kbase_kinstr_jm_ref_put(ctx); +} + +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const katom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; + const u8 id = kbase_jd_atom_id(kctx, katom); + struct kbase_kinstr_jm_atom_state_change change = { + .timestamp = ktime_get_raw_ns(), .atom = id, .state = state + }; + struct reader *reader; + struct hlist_bl_node *node; + + WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, + PR_ "unsupported katom (%u) state (%i)", id, state); + + switch (state) { + case KBASE_KINSTR_JM_READER_ATOM_STATE_START: + change.data.start.slot = katom->jobslot; + break; + default: + break; + } + + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) + reader_changes_push( + &reader->changes, &change, &reader->wait_queue); + rcu_read_unlock(); +} + +KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); + +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + + if (submitted == katom) + kbase_kinstr_jm_atom_state_start(katom); +} + +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + if (WARN_ON((submitted != katom) && (queued != katom))) + return; + + if (queued == katom) + return; + + if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_stop(katom); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_start(queued); +} diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h new file mode 100644 index 0000000..555edfe --- /dev/null +++ b/mali_kbase/mali_kbase_kinstr_jm.h @@ -0,0 +1,283 @@ +/* + * + * (C) COPYRIGHT 2019,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm.h + * Kernel driver public interface to job manager atom tracing. This API provides + * a method to get the atom state changes into user space. + * + * The flow of operation is: + * + * | kernel | user | + * | ----------------------------------- | ----------------------------------- | + * | Initialize API with | | + * | kbase_kinstr_jm_init() | | + * | | | + * | Kernel code injects states with | | + * | kbase_kinstr_jm_atom_state_*() APIs | | + * | | Call ioctl() to get file descriptor | + * | | via KBASE_IOCTL_KINSTR_JM_FD | + * | Allocates a reader attached to FD | | + * | Allocates circular buffer and | | + * | patches, via ASM goto, the | | + * | kbase_kinstr_jm_atom_state_*() | | + * | | loop: | + * | | Call poll() on FD for POLLIN | + * | When threshold of changes is hit, | | + * | the poll is interrupted with | | + * | POLLIN. If circular buffer is | | + * | full then store the missed count | | + * | and interrupt poll | Call read() to get data from | + * | | circular buffer via the fd | + * | Kernel advances tail of circular | | + * | buffer | | + * | | Close file descriptor | + * | Deallocates circular buffer | | + * | | | + * | Terminate API with | | + * | kbase_kinstr_jm_term() | | + * + * All tracepoints are guarded on a static key. The static key is activated when + * a user space reader gets created. This means that there is negligible cost + * inserting the tracepoints into code when there are no readers. + */ + +#ifndef _KBASE_KINSTR_JM_H_ +#define _KBASE_KINSTR_JM_H_ + +#include "mali_kbase_kinstr_jm_reader.h" + +#ifdef __KERNEL__ +#include <linux/version.h> +#include <linux/static_key.h> +#else +/* empty wrapper macros for userspace */ +#define static_branch_unlikely(key) (1) +#define KERNEL_VERSION(a, b, c) (0) +#define LINUX_VERSION_CODE (1) +#endif /* __KERNEL__ */ + +/* Forward declarations */ +struct kbase_context; +struct kbase_kinstr_jm; +struct kbase_jd_atom; +union kbase_kinstr_jm_fd; + +/** + * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. + * @ctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); + +/** + * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. + * @ctx: Pointer to context to be terminated. + */ +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); + +/** + * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to + * read the atom state changes from userspace + * + * @ctx: Pointer to the initialized context + * @jm_fd_arg: Pointer to the union containing the in/out params + * Return: -1 on failure, valid file descriptor on success + */ +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg); + +/** + * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This performs the actual storage of the state ready for user space to + * read the data. It is only called when the static key is enabled from + * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this + * function directly. + */ +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state); + +/* Allows ASM goto patching to reduce tracing overhead. This is + * incremented/decremented when readers are created and terminated. This really + * shouldn't be changed externally, but if you do, make sure you use + * a static_key_inc()/static_key_dec() pair. + */ +#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE +extern struct static_key_false basep_kinstr_jm_reader_static_key; +#else +/* Pre-4.3 kernels have a different API for static keys, but work + * mostly the same with less type safety. */ +extern struct static_key basep_kinstr_jm_reader_static_key; +#define static_branch_unlikely(key) static_key_false(key) +#endif /* KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE */ + +/** + * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This uses a static key to reduce overhead when tracing is disabled + */ +static inline void kbase_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_state(atom, state); +} + +/** + * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a + * hardware or software queue. + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_queue( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); +} + +/** + * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); +} + +/** + * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); +} + +/** + * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed + * on an atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); +} + +/** + * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for + * execution + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_queue(atom); +} + +/** + * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully + * completed + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_complete(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_start() - A software atom has started work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_start(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_stop(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + */ +static inline void kbase_kinstr_jm_atom_hw_submit( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_submit(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + */ +static inline void kbase_kinstr_jm_atom_hw_release( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_release(atom); +} + +#endif /* _KBASE_KINSTR_JM_H_ */ diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/mali_kbase/mali_kbase_kinstr_jm_reader.h new file mode 100644 index 0000000..e267e6b --- /dev/null +++ b/mali_kbase/mali_kbase_kinstr_jm_reader.h @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm_reader.h + * Provides an ioctl API to read kernel atom state changes. The flow of the + * API is: + * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` + * 2. Determine the buffer structure layout via the above ioctl's returned + * size and version fields in ``struct kbase_kinstr_jm_fd_out`` + * 4. Poll the file descriptor for ``POLLIN`` + * 5. Get data with read() on the fd + * 6. Use the structure version to understand how to read the data from the + * buffer + * 7. Repeat 4-6 + * 8. Close the file descriptor + */ + +#ifndef _KBASE_KINSTR_JM_READER_H_ +#define _KBASE_KINSTR_JM_READER_H_ + +/** + * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has + * entered a hardware queue + * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has + * completed on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations + * + * We can add new states to the end of this if they do not break the existing + * state machine. Old user mode code can gracefully ignore states they do not + * understand. + * + * If we need to make a breaking change to the state machine, we can do that by + * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will + * mean that old user mode code will fail to understand the new state field in + * the structure and gracefully not use the state change API. + */ +enum kbase_kinstr_jm_reader_atom_state { + KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, + KBASE_KINSTR_JM_READER_ATOM_STATE_START, + KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, + KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT +}; + +#endif /* _KBASE_KINSTR_JM_READER_H_ */ diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 4a1004b..8cf7e5d 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -43,6 +43,7 @@ #include <mali_kbase_mem_pool_group.h> #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_config_defaults.h> +#include <mali_kbase_trace_gpu_mem.h> /* * Alignment of objects allocated by the GPU inside a just-in-time memory @@ -847,13 +848,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; -#if MALI_JIT_PRESSURE_LIMIT if (phys_pages_limit > jit_va_pages) -#else - if (phys_pages_limit != jit_va_pages) -#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (phys_pages_limit != jit_va_pages) + kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_lock(kctx); #ifdef CONFIG_64BIT @@ -870,11 +872,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE kctx->jit_phys_pages_limit = phys_pages_limit; dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", phys_pages_limit); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } kbase_gpu_vm_unlock(kctx); @@ -976,6 +978,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); + spin_lock_init(&kbdev->gpu_mem_usage_lock); + kbdev->total_gpu_pages = 0; + kbdev->process_root = RB_ROOT; + kbdev->dma_buf_root = RB_ROOT; + mutex_init(&kbdev->dma_buf_lock); + #ifdef IR_THRESHOLD atomic_set(&memdev->ir_threshold, IR_THRESHOLD); #else @@ -1053,6 +1061,11 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_pool_group_term(&kbdev->mem_pools); + WARN_ON(kbdev->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); + mutex_destroy(&kbdev->dma_buf_lock); + if (kbdev->mgm_dev) module_put(kbdev->mgm_dev->owner); } @@ -2033,6 +2046,9 @@ no_new_partial: (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return 0; @@ -2209,6 +2225,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return new_pages; @@ -2374,6 +2393,8 @@ int kbase_free_phy_pages_helper( kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } return 0; @@ -2496,6 +2517,8 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } } @@ -2558,6 +2581,8 @@ void kbase_mem_kref_free(struct kref *kref) alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, + alloc); } dma_buf_detach(alloc->imported.umm.dma_buf, alloc->imported.umm.dma_attachment); @@ -2643,18 +2668,28 @@ bool kbase_check_alloc_flags(unsigned long flags) /* GPU executable memory cannot: * - Be written by the GPU * - Be grown on GPU page fault - * - Have the top of its initial commit aligned to 'extent' */ + */ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_TILER_ALIGN_TOP))) + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return false; + + /* GPU executable memory also cannot have the top of its initial + * commit aligned to 'extent' + */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) return false; /* To have an allocation lie within a 4GB chunk is required only for - * TLS memory, which will never be used to contain executable code - * and also used for Tiler heap. + * TLS memory, which will never be used to contain executable code. */ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + BASE_MEM_PROT_GPU_EX)) + return false; + + /* TLS memory should also not be used for tiler heap */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) return false; /* GPU should have at least read or write access otherwise there is no @@ -2751,9 +2786,13 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, return -EINVAL; } - if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent == 0) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n"); + return -EINVAL; + } + + if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); return -EINVAL; } @@ -2983,7 +3022,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) { struct kbase_context *kctx = data->kctx; @@ -3038,7 +3077,7 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, kbase_jit_debugfs_trim_get); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ void kbase_jit_debugfs_init(struct kbase_context *kctx) { @@ -3078,7 +3117,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_phys_fops); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* * Debugfs entry for getting the number of pages used * by JIT allocations for estimating the physical pressure @@ -3093,7 +3132,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_trim_fops); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } #endif /* CONFIG_DEBUG_FS */ @@ -3153,14 +3192,16 @@ int kbase_jit_init(struct kbase_context *kctx) * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets * the alignment requirements. */ -static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, - struct kbase_va_region *walker, const struct base_jit_alloc_info *info) +static bool meet_size_and_tiler_align_top_requirements( + const struct kbase_va_region *walker, + const struct base_jit_alloc_info *info) { bool meet_reqs = true; if (walker->nr_pages != info->va_pages) meet_reqs = false; - else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + + if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { size_t align = info->extent; size_t align_mask = align - 1; @@ -3171,7 +3212,7 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct return meet_reqs; } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Function will guarantee *@freed will not exceed @pages_needed */ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, @@ -3308,8 +3349,10 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, struct kbase_va_region *reg, *tmp; size_t total_freed = 0; - kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->jit_evict_lock); + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { int err; size_t freed = 0u; @@ -3328,18 +3371,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, if (!pages_needed) break; } - mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); trace_mali_jit_trim(total_freed); return total_freed; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ static int kbase_jit_grow(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - struct kbase_va_region *reg) + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg, + struct kbase_sub_alloc **prealloc_sas) { size_t delta; size_t pages_required; @@ -3347,15 +3389,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, struct kbase_mem_pool *pool; int ret = -ENOMEM; struct tagged_addr *gpu_pages; - struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; - int i; if (info->commit_pages > reg->nr_pages) { /* Attempted to grow larger than maximum size */ return -EINVAL; } - kbase_gpu_vm_lock(kctx); + lockdep_assert_held(&kctx->reg_lock); /* Make the physical backing no longer reclaimable */ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) @@ -3372,14 +3412,6 @@ static int kbase_jit_grow(struct kbase_context *kctx, pages_required = delta; #ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), - GFP_KERNEL); - if (!prealloc_sas[i]) - goto update_failed; - } - if (pages_required >= (SZ_2M / SZ_4K)) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ @@ -3405,15 +3437,18 @@ static int kbase_jit_grow(struct kbase_context *kctx, */ while (kbase_mem_pool_size(pool) < pages_required) { int pool_delta = pages_required - kbase_mem_pool_size(pool); + int ret; kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + ret = kbase_mem_pool_grow(pool, pool_delta); + kbase_gpu_vm_lock(kctx); - if (kbase_mem_pool_grow(pool, pool_delta)) - goto update_failed_unlocked; + if (ret) + goto update_failed; - kbase_gpu_vm_lock(kctx); spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); } @@ -3459,11 +3494,6 @@ done: reg->extent = info->extent; update_failed: - kbase_gpu_vm_unlock(kctx); -update_failed_unlocked: - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) - kfree(prealloc_sas[i]); - return ret; } @@ -3492,9 +3522,9 @@ static void trace_jit_stats(struct kbase_context *kctx, max_allocations, alloc_count, va_pages, ph_pages); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** - * get_jit_backed_pressure() - calculate the physical backing of all JIT + * get_jit_phys_backing() - calculate the physical backing of all JIT * allocations * * @kctx: Pointer to the kbase context whose active JIT allocations will be @@ -3502,83 +3532,48 @@ static void trace_jit_stats(struct kbase_context *kctx, * * Return: number of pages that are committed by JIT allocations */ -static size_t get_jit_backed_pressure(struct kbase_context *kctx) +static size_t get_jit_phys_backing(struct kbase_context *kctx) { - size_t backed_pressure = 0; - int jit_id; - - lockdep_assert_held(&kctx->jctx.lock); + struct kbase_va_region *walker; + size_t backing = 0; - kbase_gpu_vm_lock(kctx); - for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { - struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; + lockdep_assert_held(&kctx->jit_evict_lock); - if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { - /* If region has no report, be pessimistic */ - if (reg->used_pages == reg->nr_pages) { - backed_pressure += reg->nr_pages; - } else { - backed_pressure += - kbase_reg_current_backed_size(reg); - } - } + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + backing += kbase_reg_current_backed_size(walker); } - kbase_gpu_vm_unlock(kctx); - return backed_pressure; + return backing; } -/** - * jit_trim_necessary_pages() - calculate and trim the least pages possible to - * satisfy a new JIT allocation - * - * @kctx: Pointer to the kbase context - * @info: Pointer to JIT allocation information for the new allocation - * - * Before allocating a new just-in-time memory region or reusing a previous - * one, ensure that the total JIT physical page usage also will not exceed the - * pressure limit. - * - * If there are no reported-on allocations, then we already guarantee this will - * be the case - because our current pressure then only comes from the va_pages - * of each JIT region, hence JIT physical page usage is guaranteed to be - * bounded by this. - * - * However as soon as JIT allocations become "reported on", the pressure is - * lowered to allow new JIT regions to be allocated. It is after such a point - * that the total JIT physical page usage could (either now or in the future on - * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly - * allocated JIT regions. Hence, trim any "reported on" regions. - * - * Any pages freed will go into the pool and be allocated from there in - * kbase_mem_alloc(). - */ -static void jit_trim_necessary_pages(struct kbase_context *kctx, - const struct base_jit_alloc_info *info) +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages) { - size_t backed_pressure = 0; - size_t needed_pages = 0; + size_t jit_backing = 0; + size_t pages_to_trim = 0; - backed_pressure = get_jit_backed_pressure(kctx); + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + jit_backing = get_jit_phys_backing(kctx); /* It is possible that this is the case - if this is the first * allocation after "ignore_pressure_limit" allocation. */ - if (backed_pressure > kctx->jit_phys_pages_limit) { - needed_pages += - (backed_pressure - kctx->jit_phys_pages_limit) - + info->va_pages; + if (jit_backing > kctx->jit_phys_pages_limit) { + pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + + needed_pages; } else { - size_t backed_diff = - kctx->jit_phys_pages_limit - backed_pressure; + size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; - if (info->va_pages > backed_diff) - needed_pages += info->va_pages - backed_diff; + if (needed_pages > backed_diff) + pages_to_trim += needed_pages - backed_diff; } - if (needed_pages) { - size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, - needed_pages); + if (pages_to_trim) { + size_t trimmed_pages = + kbase_mem_jit_trim_pages(kctx, pages_to_trim); /* This should never happen - we already asserted that * we are not violating JIT pressure limit in earlier @@ -3586,10 +3581,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx, * must have enough unused pages to satisfy the new * allocation */ - WARN_ON(trimmed_pages < needed_pages); + WARN_ON(trimmed_pages < pages_to_trim); } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * jit_allow_allocate() - check whether basic conditions are satisfied to allow @@ -3608,8 +3603,8 @@ static bool jit_allow_allocate(struct kbase_context *kctx, { lockdep_assert_held(&kctx->jctx.lock); -#if MALI_JIT_PRESSURE_LIMIT - if (likely(!ignore_pressure_limit) && +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit && ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { dev_dbg(kctx->kbdev->dev, @@ -3618,7 +3613,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx, kctx->jit_phys_pages_limit); return false; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ @@ -3644,123 +3639,152 @@ static bool jit_allow_allocate(struct kbase_context *kctx, return true; } +static struct kbase_va_region * +find_reasonable_region(const struct base_jit_alloc_info *info, + struct list_head *pool_head, bool ignore_usage_id) +{ + struct kbase_va_region *closest_reg = NULL; + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, pool_head, jit_node) { + if ((ignore_usage_id || + walker->jit_usage_id == info->usage_id) && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements(walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been met, + * it's suitable but other allocations might be a + * better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + closest_reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; + } + } + + return closest_reg; +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { struct kbase_va_region *reg = NULL; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; lockdep_assert_held(&kctx->jctx.lock); if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; -#if MALI_JIT_PRESSURE_LIMIT - if (!ignore_pressure_limit) - jit_trim_necessary_pages(kctx, info); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) + goto end; + } +#endif + kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); /* * Scan the pool for an existing allocation which meets our * requirements and remove it. */ - if (info->usage_id != 0) { + if (info->usage_id != 0) /* First scan for an allocation with the same usage ID */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_usage_id == info->usage_id && - walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, false); - /* The allocation is an exact match */ - if (current_diff == 0) - break; - } - } - } - - if (!reg) { + if (!reg) /* No allocation with the same usage ID, or usage IDs not in * use. Search for an allocation we can reuse. */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match, so stop - * looking. - */ - if (current_diff == 0) - break; - } - } - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, true); if (reg) { +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t needed_pages = 0; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + int ret; + /* * Remove the found region from the pool and add it to the * active list. */ list_move(®->jit_node, &kctx->jit_active_head); + WARN_ON(reg->gpu_alloc->evicted); + /* * Remove the allocation from the eviction list as it's no * longer eligible for eviction. This must be done before * dropping the jit_evict_lock */ list_del_init(®->gpu_alloc->evict_node); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + if (info->commit_pages > reg->gpu_alloc->nents) + needed_pages = info->commit_pages - + reg->gpu_alloc->nents; + + /* Update early the recycled JIT region's estimate of + * used_pages to ensure it doesn't get trimmed + * undesirably. This is needed as the recycled JIT + * region has been added to the active list but the + * number of used pages for it would be zero, so it + * could get trimmed instead of other allocations only + * to be regrown later resulting in a breach of the JIT + * physical pressure limit. + * Also that trimming would disturb the accounting of + * physical pages, i.e. the VM stats, as the number of + * backing pages would have changed when the call to + * kbase_mem_evictable_unmark_reclaim is made. + * + * The second call to update pressure at the end of + * this function would effectively be a nop. + */ + kbase_jit_report_update_pressure( + kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + + kbase_jit_request_phys_increase_locked(kctx, + needed_pages); + } +#endif mutex_unlock(&kctx->jit_evict_lock); - if (kbase_jit_grow(kctx, info, reg) < 0) { + /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', + * so any state protected by that lock might need to be + * re-evaluated if more code is added here in future. + */ + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) + kbase_jit_done_phys_increase(kctx, needed_pages); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_unlock(kctx); + + if (ret < 0) { /* * An update to an allocation from the pool failed, * chances are slim a new allocation would fair any @@ -3770,10 +3794,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Undo the early change made to the recycled JIT + * region's estimate of used_pages. + */ + if (!ignore_pressure_limit) { + kbase_jit_report_update_pressure( + kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); mutex_unlock(&kctx->jit_evict_lock); - return NULL; + reg = NULL; + goto end; } } else { /* No suitable JIT allocation was found so create a new one */ @@ -3783,12 +3818,23 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, BASEP_MEM_NO_USER_FREE; u64 gpu_addr; - mutex_unlock(&kctx->jit_evict_lock); - if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) flags |= BASE_MEM_TILER_ALIGN_TOP; flags |= base_mem_group_id_set(kctx->jit_group_id); +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + flags |= BASEP_MEM_PERFORM_JIT_TRIM; + /* The corresponding call to 'done_phys_increase' would + * be made inside the kbase_mem_alloc(). + */ + kbase_jit_request_phys_increase_locked( + kctx, info->commit_pages); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extent, &flags, &gpu_addr); @@ -3799,12 +3845,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); - return NULL; + goto end; } - mutex_lock(&kctx->jit_evict_lock); - list_add(®->jit_node, &kctx->jit_active_head); - mutex_unlock(&kctx->jit_evict_lock); + if (!ignore_pressure_limit) { + /* Due to enforcing of pressure limit, kbase_mem_alloc + * was instructed to perform the trimming which in turn + * would have ensured that the new JIT allocation is + * already in the jit_active_head list, so nothing to + * do here. + */ + WARN_ON(list_empty(®->jit_node)); + } else { + mutex_lock(&kctx->jit_evict_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } } trace_mali_jit_alloc(reg, info->id); @@ -3816,13 +3872,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; -#if MALI_JIT_PRESSURE_LIMIT + reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; +#if MALI_JIT_PRESSURE_LIMIT_BASE if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; reg->heap_info_gpu_addr = info->heap_info_gpu_addr; kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +end: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); return reg; } @@ -3848,11 +3909,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kbase_mem_shrink(kctx, reg, old_pages - delta); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE reg->heap_info_gpu_addr = 0; kbase_jit_report_update_pressure(kctx, reg, 0, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3863,6 +3924,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kbase_gpu_vm_lock(kctx); reg->flags |= KBASE_REG_DONT_NEED; + reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); kbase_gpu_vm_unlock(kctx); @@ -3962,6 +4024,9 @@ void kbase_jit_term(struct kbase_context *kctx) kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } +#if MALI_JIT_PRESSURE_LIMIT_BASE + WARN_ON(kctx->jit_phys_pages_to_be_allocated); +#endif mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); @@ -3972,7 +4037,7 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned int flags) { @@ -4015,9 +4080,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, out: return; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_used_pages, unsigned int flags) @@ -4053,7 +4118,7 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ bool kbase_has_exec_va_zone(struct kbase_context *kctx) { diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 6e921ec..a057f61 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -141,6 +141,7 @@ struct kbase_mem_phy_alloc { union { struct { + struct kbase_context *kctx; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; unsigned int current_mapping_usage_count; @@ -330,7 +331,8 @@ struct kbase_va_region { /* Bit 22 is reserved. * - * Do not remove, use the next unreserved bit for new flags */ + * Do not remove, use the next unreserved bit for new flags + */ #define KBASE_REG_RESERVED_BIT_22 (1ul << 22) /* The top of the initial commit is aligned to extent pages. @@ -367,6 +369,9 @@ struct kbase_va_region { */ #define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) +/* Allocation is actively used for JIT memory */ +#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -398,7 +403,7 @@ struct kbase_va_region { struct list_head jit_node; u16 jit_usage_id; u8 jit_bin_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Pointer to an object in GPU memory defining an end of an allocated * region * @@ -423,7 +428,7 @@ struct kbase_va_region { * gpu_alloc->nents) */ size_t used_pages; -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ int va_refcnt; }; @@ -1497,7 +1502,7 @@ bool kbase_jit_evict(struct kbase_context *kctx); */ void kbase_jit_term(struct kbase_context *kctx); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of * kbase_trace_jit_report_gpu_mem() that should only be called once the @@ -1508,7 +1513,7 @@ void kbase_jit_term(struct kbase_context *kctx); */ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned int flags); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used @@ -1530,7 +1535,7 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, * been included. Also gives no opportunity for the compiler to mess up * inlining it. */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ do { \ if (trace_mali_jit_report_gpu_mem_enabled()) \ @@ -1540,9 +1545,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, #else #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ CSTD_NOP(kctx, reg, flags) -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * kbase_jit_report_update_pressure - safely update the JIT physical page * pressure and JIT region's estimate of used_pages @@ -1562,7 +1567,123 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_used_pages, unsigned int flags); -#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * jit_trim_necessary_pages() - calculate and trim the least pages possible to + * satisfy a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of JIT physical pages by which trimming is requested. + * The actual number of pages trimmed could differ. + * + * Before allocating a new just-in-time memory region or reusing a previous + * one, ensure that the total JIT physical page usage also will not exceed the + * pressure limit. + * + * If there are no reported-on allocations, then we already guarantee this will + * be the case - because our current pressure then only comes from the va_pages + * of each JIT region, hence JIT physical page usage is guaranteed to be + * bounded by this. + * + * However as soon as JIT allocations become "reported on", the pressure is + * lowered to allow new JIT regions to be allocated. It is after such a point + * that the total JIT physical page usage could (either now or in the future on + * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly + * allocated JIT regions. Hence, trim any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + */ +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages); + +/* + * Same as kbase_jit_request_phys_increase(), except that Caller is supposed + * to take jit_evict_lock also on @kctx before calling this function. + */ +static inline void +kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + kctx->jit_phys_pages_to_be_allocated += needed_pages; + + kbase_jit_trim_necessary_pages(kctx, + kctx->jit_phys_pages_to_be_allocated); +} + +/** + * kbase_jit_request_phys_increase() - Increment the backing pages count and do + * the required trimming before allocating pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages to be allocated for the JIT allocation. + * + * This function needs to be called before allocating backing pages for a + * just-in-time memory region. The backing pages are currently allocated when, + * + * - A new JIT region is created. + * - An old JIT region is reused from the cached pool. + * - GPU page fault occurs for the active JIT region. + * - Backing is grown for the JIT region through the commit ioctl. + * + * This function would ensure that the total JIT physical page usage does not + * exceed the pressure limit even when the backing pages get allocated + * simultaneously for multiple JIT allocations from different threads. + * + * There should be a matching call to kbase_jit_done_phys_increase(), after + * the pages have been allocated and accounted against the active JIT + * allocation. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + mutex_lock(&kctx->jit_evict_lock); + kbase_jit_request_phys_increase_locked(kctx, needed_pages); + mutex_unlock(&kctx->jit_evict_lock); +} + +/** + * kbase_jit_done_phys_increase() - Decrement the backing pages count after the + * allocation of pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages that were allocated for the JIT allocation. + * + * This function should be called after backing pages have been allocated and + * accounted against the active JIT allocation. + * The call should be made when the following have been satisfied: + * when the allocation is on the jit_active_head. + * when additional needed_pages have been allocated. + * kctx->reg_lock was held during the above and has not yet been unlocked. + * Failure to call this function before unlocking the kctx->reg_lock when + * either the above have changed may result in over-accounting the memory. + * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of + * the memory. + * + * A matching call to kbase_jit_request_phys_increase() should have been made, + * before the allocation of backing pages. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); + + kctx->jit_phys_pages_to_be_allocated -= needed_pages; +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * kbase_has_exec_va_zone - EXEC_VA zone predicate @@ -1742,7 +1863,6 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, enum kbase_sync_type sync_fn); - /** * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to * an unaligned address at a given offset from the start of a target page. diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 219e0af..d7863e1 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -49,6 +49,8 @@ #include <tl/mali_kbase_tracepoints.h> #include <mali_kbase_ioctl.h> #include <mmu/mali_kbase_mmu.h> +#include <mali_kbase_caps.h> +#include <mali_kbase_trace_gpu_mem.h> #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -372,10 +374,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, } else reg->threshold_pages = 0; - if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { + if (*flags & BASE_MEM_GROW_ON_GPF) { /* kbase_check_alloc_sizes() already checks extent is valid for * assigning to reg->extent */ reg->extent = extent; + } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { + reg->extent = extent; } else { reg->extent = 0; } @@ -436,6 +440,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *gpu_va = reg->start_pfn << PAGE_SHIFT; } +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_jit_done_phys_increase(kctx, commit_pages); + + mutex_lock(&kctx->jit_evict_lock); + WARN_ON(!list_empty(®->jit_node)); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_unlock(kctx); return reg; @@ -443,6 +458,13 @@ no_mmap: no_cookie: no_kern_mapping: no_mem: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, commit_pages); + kbase_gpu_vm_unlock(kctx); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); invalid_flags: @@ -511,14 +533,23 @@ int kbase_mem_query(struct kbase_context *kctx, *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; - if (kctx->api_version >= KBASE_API_VERSION(11, 2)) { - /* Prior to 11.2, these were known about by user-side - * but we did not return them. Returning some of these - * caused certain clients that were not expecting them - * to fail, so we omit all of them as a special-case - * for compatibility reasons */ + if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ if (KBASE_REG_PF_GROW & reg->flags) *out |= BASE_MEM_GROW_ON_GPF; + } + if (mali_kbase_supports_mem_protected(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ if (KBASE_REG_PROTECTED & reg->flags) *out |= BASE_MEM_PROTECTED; } @@ -705,6 +736,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) kbdev, kctx->id, (u64)new_page_count); + kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); } /** @@ -731,6 +763,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) kbdev, kctx->id, (u64)new_page_count); + kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); } int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) @@ -1056,6 +1089,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); alloc->imported.umm.sgt = NULL; + kbase_remove_dma_buf_usage(kctx, alloc); + memset(pa, 0xff, sizeof(*pa) * alloc->nents); alloc->nents = 0; } @@ -1123,6 +1158,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, /* Update nents as we now have pages to map */ alloc->nents = count; + kbase_add_dma_buf_usage(kctx, alloc); return 0; @@ -1383,6 +1419,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; reg->gpu_alloc->imported.umm.need_sync = need_sync; + reg->gpu_alloc->imported.umm.kctx = kctx; reg->extent = 0; if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { @@ -2024,7 +2061,7 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) { u64 old_pages; - u64 delta; + u64 delta = 0; int res = -EINVAL; struct kbase_va_region *reg; bool read_locked = false; @@ -2054,6 +2091,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (0 == (reg->flags & KBASE_REG_GROWABLE)) goto out_unlock; + if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) + goto out_unlock; + /* Would overflow the VA region */ if (new_pages > reg->nr_pages) goto out_unlock; @@ -2216,8 +2256,6 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kfree(map); } -KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); - static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, struct kbase_va_region *reg, pgoff_t *start_off, @@ -2935,9 +2973,9 @@ KBASE_EXPORT_TEST_API(kbase_vunmap); static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) /* To avoid the build breakage due to an unexported kernel symbol - * 'mm_trace_rss_stat' from later kernels, i.e. from V5.5.0 onwards, + * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, * we inline here the equivalent of 'add_mm_counter()' from linux * kernel V5.4.0~8. */ diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index b9ed8c3..7263b58 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -39,6 +39,8 @@ #include <arbiter/mali_kbase_arbiter_pm.h> #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#include <mali_kbase_clk_rate_trace_mgr.h> + int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { return kbase_hwaccess_pm_powerup(kbdev, flags); @@ -101,6 +103,7 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_clk_rate_trace_manager_gpu_active(kbdev); } kbase_pm_unlock(kbdev); @@ -128,6 +131,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) if (c == 0) { /* Last context has gone idle */ kbase_hwaccess_pm_gpu_idle(kbdev); + kbase_clk_rate_trace_manager_gpu_idle(kbdev); /* Wake up anyone waiting for this to become 0 (e.g. suspend). * The waiters must synchronize with us by locking the pm.lock diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index cbb0c76..7a784ac 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -32,6 +32,7 @@ #include <linux/dma-mapping.h> #include <mali_base_kernel.h> #include <mali_kbase_hwaccess_time.h> +#include <mali_kbase_kinstr_jm.h> #include <mali_kbase_mem_linux.h> #include <tl/mali_kbase_tracepoints.h> #include <mali_linux_trace.h> @@ -899,7 +900,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) return -EINVAL; -#if !MALI_JIT_PRESSURE_LIMIT +#if !MALI_JIT_PRESSURE_LIMIT_BASE /* If just-in-time memory allocation pressure limit feature is disabled, * heap_info_gpu_addr must be zeroed-out */ @@ -1091,14 +1092,19 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** - * If this is the only JIT_ALLOC atom in-flight then allow it to exceed - * the defined pressure limit. + * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit + * is disabled at the context scope, then bypass JIT pressure limit + * logic in kbase_jit_allocate(). */ - if (kctx->jit_current_allocations == 0) + if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) + || (kctx->jit_current_allocations == 0)) { ignore_pressure_limit = true; -#endif /* MALI_JIT_PRESSURE_LIMIT */ + } +#else + ignore_pressure_limit = true; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { if (kctx->jit_alloc[info->id]) { @@ -1358,12 +1364,16 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) list_for_each_safe(i, tmp, &jit_pending_alloc_list) { struct kbase_jd_atom *pending_atom = list_entry(i, struct kbase_jd_atom, queue); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_start(pending_atom); if (kbase_jit_allocate_process(pending_atom) == 0) { /* Atom has completed */ INIT_WORK(&pending_atom->work, kbasep_jit_finish_worker); queue_work(kctx->jctx.job_done_wq, &pending_atom->work); } + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_stop(pending_atom); } } @@ -1538,6 +1548,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) struct kbase_device *kbdev = kctx->kbdev; KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); + kbase_kinstr_jm_atom_sw_start(katom); trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom)); @@ -1600,6 +1611,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) /* Atom is complete */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); + kbase_kinstr_jm_atom_sw_stop(katom); return ret; } diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.c b/mali_kbase/mali_kbase_trace_gpu_mem.c new file mode 100644 index 0000000..0a053da --- /dev/null +++ b/mali_kbase/mali_kbase_trace_gpu_mem.c @@ -0,0 +1,227 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_mem_linux.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_trace_gpu_mem.h> + +/** + * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation + * is mapped to GPU for the first time within a process. + * Another instantiation is done for the case when that + * allocation is mapped for the first time to GPU. + * + * @dma_buf: Reference to dma_buf been imported. + * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. + * @import_count: The number of times the dma_buf was imported. + */ +struct kbase_dma_buf { + struct dma_buf *dma_buf; + struct rb_node dma_buf_node; + u32 import_count; +}; + +/** + * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. + * + * when we un-map any dma mapping we need to remove them from rb_tree, + * rb_tree is maintained at kbase_device level and kbase_process level + * by passing the root of kbase_device or kbase_process we can remove + * the node from the tree. + */ +static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *tree) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = tree->rb_node; + bool mapping_removed = false; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + WARN_ON(!buf_node->import_count); + + buf_node->import_count--; + + if (!buf_node->import_count) { + rb_erase(&buf_node->dma_buf_node, tree); + kfree(buf_node); + mapping_removed = true; + } + + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + WARN_ON(!buf_node); + return mapping_removed; +} + +/** + * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @root: Pointer to root of rb_tree containing the dma_buf's. + * + * We maintain a kbase_device level and kbase_process level rb_tree + * of all unique dma_buf's mapped to gpu memory. So when attach any + * dma_buf add it the rb_tree's. To add the unique mapping we need + * check if the mapping is not a duplicate and then add them. + */ +static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *root) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = root->rb_node; + bool unique_buf_imported = true; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + unique_buf_imported = false; + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + if (unique_buf_imported) { + struct kbase_dma_buf *buf_node = + kzalloc(sizeof(*buf_node), GFP_KERNEL); + + if (buf_node == NULL) { + dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); + /* Dont account for it if we fail to allocate memory */ + unique_buf_imported = false; + } else { + struct rb_node **new = &(root->rb_node), *parent = NULL; + + buf_node->dma_buf = dma_buf; + buf_node->import_count = 1; + while (*new) { + struct kbase_dma_buf *node; + + parent = *new; + node = rb_entry(parent, struct kbase_dma_buf, + dma_buf_node); + if (dma_buf < node->dma_buf) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&buf_node->dma_buf_node, parent, new); + rb_insert_color(&buf_node->dma_buf_node, root); + } + } else if (!WARN_ON(!buf_node)) { + buf_node->import_count++; + } + + return unique_buf_imported; +} + +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool dev_mapping_removed, prcs_mapping_removed; + + mutex_lock(&kbdev->dma_buf_lock); + + dev_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + prcs_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(dev_mapping_removed && !prcs_mapping_removed); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (dev_mapping_removed) + kbdev->total_gpu_pages -= alloc->nents; + + if (prcs_mapping_removed) + kctx->kprcs->total_gpu_pages -= alloc->nents; + + if (dev_mapping_removed || prcs_mapping_removed) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} + +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool unique_dev_dmabuf, unique_prcs_dmabuf; + + mutex_lock(&kbdev->dma_buf_lock); + + /* add dma_buf to device and process. */ + unique_dev_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (unique_dev_dmabuf) + kbdev->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf) + kctx->kprcs->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf || unique_dev_dmabuf) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} + +#ifndef CONFIG_TRACE_GPU_MEM +#define CREATE_TRACE_POINTS +#include "mali_gpu_mem_trace.h" +#endif diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.h b/mali_kbase/mali_kbase_trace_gpu_mem.h new file mode 100644 index 0000000..b621525 --- /dev/null +++ b/mali_kbase/mali_kbase_trace_gpu_mem.h @@ -0,0 +1,101 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_TRACE_GPU_MEM_H_ +#define _KBASE_TRACE_GPU_MEM_H_ + +#ifdef CONFIG_TRACE_GPU_MEM +#include <trace/events/gpu_mem.h> +#else +#include "mali_gpu_mem_trace.h" +#endif + +#define DEVICE_TGID ((u32) 0U) + +static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + lockdep_assert_held(&kbdev->gpu_mem_usage_lock); + + trace_gpu_mem_total(kbdev->id, DEVICE_TGID, + kbdev->total_gpu_pages << PAGE_SHIFT); + + if (likely(kctx)) + trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, + kctx->kprcs->total_gpu_pages << PAGE_SHIFT); +} + +static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages -= pages; + + kbdev->total_gpu_pages -= pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages += pages; + + kbdev->total_gpu_pages += pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +/** + * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to unmap + * + * Remove reference to dma buf been unmapped from kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_add_dma_buf_usage - Add a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to map in + * + * Add reference to dma buf been mapped to kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +#endif /* _KBASE_TRACE_GPU_MEM_H_ */ diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index d96b565..72cec13 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -184,6 +184,7 @@ static int kbasep_vinstr_client_dump( unsigned int read_idx; struct kbase_hwcnt_dump_buffer *dump_buf; struct kbase_hwcnt_reader_metadata *meta; + u8 clk_cnt; WARN_ON(!vcli); lockdep_assert_held(&vcli->vctx->lock); @@ -212,9 +213,14 @@ static int kbasep_vinstr_client_dump( /* Zero all non-enabled counters (current values are undefined) */ kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); + clk_cnt = vcli->vctx->metadata->clk_cnt; + meta->timestamp = ts_end_ns; meta->event_id = event_id; meta->buffer_idx = write_idx; + meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; + meta->cycles.shader_cores = + (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; /* Notify client. Make sure all changes to memory are visible. */ wmb(); @@ -404,12 +410,15 @@ static int kbasep_vinstr_client_create( if (errcode) goto error; - phys_em.jm_bm = setup->jm_bm; + phys_em.fe_bm = setup->fe_bm; phys_em.shader_bm = setup->shader_bm; phys_em.tiler_bm = setup->tiler_bm; phys_em.mmu_l2_bm = setup->mmu_l2_bm; kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); + /* Enable all the available clk_enable_map. */ + vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; + errcode = kbase_hwcnt_dump_buffer_array_alloc( vctx->metadata, setup->buffer_count, &vcli->dump_bufs); if (errcode) @@ -675,23 +684,26 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t min_size = min(size, meta_size); /* Metadata sanity check. */ WARN_ON(idx != meta->buffer_idx); - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - /* Check if there is any buffer available. */ - if (atomic_read(&cli->write_idx) == meta_idx) + if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) return -EAGAIN; /* Check if previously taken buffer was put back. */ - if (atomic_read(&cli->read_idx) != meta_idx) + if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) return -EBUSY; + /* Clear user buffer to zero. */ + if (unlikely(meta_size < size && clear_user(buffer, size))) + return -EFAULT; + /* Copy next available buffer's metadata to user. */ - if (copy_to_user(buffer, meta, size)) + if (unlikely(copy_to_user(buffer, meta, min_size))) return -EFAULT; atomic_inc(&cli->meta_idx); @@ -715,24 +727,62 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( unsigned int read_idx = atomic_read(&cli->read_idx); unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; - struct kbase_hwcnt_reader_metadata meta; - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; + struct kbase_hwcnt_reader_metadata *meta; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t max_size = max(size, meta_size); + int ret = 0; + u8 stack_kbuf[64]; + u8 *kbuf = NULL; + size_t i; /* Check if any buffer was taken. */ - if (atomic_read(&cli->meta_idx) == read_idx) + if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) return -EPERM; + if (likely(max_size <= sizeof(stack_kbuf))) { + /* Use stack buffer when the size is small enough. */ + if (unlikely(meta_size > size)) + memset(stack_kbuf, 0, sizeof(stack_kbuf)); + kbuf = stack_kbuf; + } else { + kbuf = kzalloc(max_size, GFP_KERNEL); + if (unlikely(!kbuf)) + return -ENOMEM; + } + + /* + * Copy user buffer to zero cleared kernel buffer which has enough + * space for both user buffer and kernel metadata. + */ + if (unlikely(copy_from_user(kbuf, buffer, size))) { + ret = -EFAULT; + goto out; + } + + /* + * Make sure any "extra" data passed from userspace is zero. + * It's meaningful only in case meta_size < size. + */ + for (i = meta_size; i < size; i++) { + /* Check if user data beyond meta size is zero. */ + if (unlikely(kbuf[i] != 0)) { + ret = -EINVAL; + goto out; + } + } + /* Check if correct buffer is put back. */ - if (copy_from_user(&meta, buffer, size)) - return -EFAULT; - if (idx != meta.buffer_idx) - return -EINVAL; + meta = (struct kbase_hwcnt_reader_metadata *)kbuf; + if (unlikely(idx != meta->buffer_idx)) { + ret = -EINVAL; + goto out; + } atomic_inc(&cli->read_idx); - - return 0; +out: + if (unlikely(kbuf != stack_kbuf)) + kfree(kbuf); + return ret; } /** @@ -836,6 +886,42 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( } /** + * The hwcnt reader's ioctl command - get API version. + * @cli: The non-NULL pointer to the client + * @arg: Command's argument. + * @size: Size of arg. + * + * @return 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + struct kbase_vinstr_client *cli, unsigned long arg, size_t size) +{ + long ret = -EINVAL; + u8 clk_cnt = cli->vctx->metadata->clk_cnt; + + if (size == sizeof(u32)) { + ret = put_user(HWCNT_READER_API, (u32 __user *)arg); + } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { + struct kbase_hwcnt_reader_api_version api_version = { + .version = HWCNT_READER_API, + .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, + }; + + if (clk_cnt > 0) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; + if (clk_cnt > 1) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; + + ret = put_user(api_version, + (struct kbase_hwcnt_reader_api_version __user *) + arg); + } + return ret; +} + +/** * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. * @filp: Non-NULL pointer to file structure. * @cmd: User command. @@ -858,42 +944,43 @@ static long kbasep_vinstr_hwcnt_reader_ioctl( if (!cli) return -EINVAL; - switch (cmd) { - case KBASE_HWCNT_READER_GET_API_VERSION: - rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); + switch (_IOC_NR(cmd)) { + case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + cli, arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_GET_HWVER: + case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( cli, (u32 __user *)arg); break; - case KBASE_HWCNT_READER_GET_BUFFER_SIZE: + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): rcode = put_user( (u32)cli->vctx->metadata->dump_buf_bytes, (u32 __user *)arg); break; - case KBASE_HWCNT_READER_DUMP: + case _IOC_NR(KBASE_HWCNT_READER_DUMP): rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); break; - case KBASE_HWCNT_READER_CLEAR: + case _IOC_NR(KBASE_HWCNT_READER_CLEAR): rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); break; - case KBASE_HWCNT_READER_GET_BUFFER: + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( cli, (void __user *)arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_PUT_BUFFER: + case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( cli, (void __user *)arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_SET_INTERVAL: + case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( cli, (u32)arg); break; - case KBASE_HWCNT_READER_ENABLE_EVENT: + case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( cli, (enum base_hwcnt_reader_event)arg); break; - case KBASE_HWCNT_READER_DISABLE_EVENT: + case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( cli, (enum base_hwcnt_reader_event)arg); break; diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h index f618755..36bfd09 100644 --- a/mali_kbase/mali_linux_trace.h +++ b/mali_kbase/mali_linux_trace.h @@ -288,7 +288,7 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* trace_mali_jit_report * * Tracepoint about the GPU data structure read to form a just-in-time memory @@ -326,13 +326,13 @@ TRACE_EVENT(mali_jit_report, ), __entry->read_val, __entry->used_pages) ); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* trace_mali_jit_report_pressure * * Tracepoint about change in physical memory pressure, due to the information @@ -366,7 +366,7 @@ TRACE_EVENT(mali_jit_report_pressure, { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, "HAPPENED_ON_ALLOC_OR_FREE" })) ); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #ifndef __TRACE_SYSGRAPH_ENUM #define __TRACE_SYSGRAPH_ENUM diff --git a/mali_kbase/mali_power_gpu_frequency_trace.c b/mali_kbase/mali_power_gpu_frequency_trace.c new file mode 100644 index 0000000..b6fb5a0 --- /dev/null +++ b/mali_kbase/mali_power_gpu_frequency_trace.c @@ -0,0 +1,27 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Create the trace point if not configured in kernel */ +#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY +#define CREATE_TRACE_POINTS +#include "mali_power_gpu_frequency_trace.h" +#endif diff --git a/mali_kbase/mali_power_gpu_frequency_trace.h b/mali_kbase/mali_power_gpu_frequency_trace.h new file mode 100644 index 0000000..3b90ae4 --- /dev/null +++ b/mali_kbase/mali_power_gpu_frequency_trace.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI +#define _TRACE_POWER_GPU_FREQUENCY_MALI +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_FREQUENCY_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(gpu, + + TP_PROTO(unsigned int state, unsigned int gpu_id), + + TP_ARGS(state, gpu_id), + + TP_STRUCT__entry( + __field( u32, state ) + __field( u32, gpu_id ) + ), + + TP_fast_assign( + __entry->state = state; + __entry->gpu_id = gpu_id; + ), + + TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id) +); + +DEFINE_EVENT(gpu, gpu_frequency, + + TP_PROTO(unsigned int frequency, unsigned int gpu_id), + + TP_ARGS(frequency, gpu_id) +); + +#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index db27832..734c9de 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -45,6 +45,7 @@ #include <mmu/mali_kbase_mmu_internal.h> #include <mali_kbase_cs_experimental.h> +#include <mali_kbase_trace_gpu_mem.h> #define KBASE_MMU_PAGE_ENTRIES 512 /** @@ -150,6 +151,13 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, * Depending on reg's flags, the base used for calculating multiples is * different */ + + /* multiple is based from the current backed size, even if the + * current backed size/pfn for end of committed memory are not + * themselves aligned to multiple + */ + remainder = minimum_extra % multiple; + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* multiple is based from the top of the initial commit, which * has been allocated in such a way that (start_pfn + @@ -175,12 +183,6 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, remainder = pages_after_initial % multiple; } - } else { - /* multiple is based from the current backed size, even if the - * current backed size/pfn for end of committed memory are not - * themselves aligned to multiple - */ - remainder = minimum_extra % multiple; } if (remainder == 0) @@ -544,7 +546,9 @@ void page_fault_worker(struct work_struct *data) struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; size_t current_backed_size; - +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t pages_trimmed = 0; +#endif faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; @@ -568,6 +572,10 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); +#if MALI_JIT_PRESSURE_LIMIT_BASE + mutex_lock(&kctx->jctx.lock); +#endif + if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); @@ -758,6 +766,13 @@ page_fault_retry: pages_to_grow = 0; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { + kbase_jit_request_phys_increase(kctx, new_pages); + pages_trimmed = new_pages; + } +#endif + spin_lock(&kctx->mem_partials_lock); grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool, prealloc_sas); @@ -872,6 +887,13 @@ page_fault_retry: } } #endif + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_jit_done_phys_increase(kctx, pages_trimmed); + pages_trimmed = 0; + } +#endif kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; @@ -918,6 +940,15 @@ page_fault_retry: } fault_done: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, pages_trimmed); + kbase_gpu_vm_unlock(kctx); + } + mutex_unlock(&kctx->jctx.lock); +#endif + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) kfree(prealloc_sas[i]); @@ -964,6 +995,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, atomic_add(1, &kbdev->memdev.used_pages); + kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) kbdev->mmu_mode->entry_invalidate(&page[i]); @@ -1290,6 +1323,8 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, atomic_sub(1, &mmut->kctx->used_pages); } atomic_sub(1, &kbdev->memdev.used_pages); + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); } u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, @@ -1932,6 +1967,8 @@ static void mmu_teardown_level(struct kbase_device *kbdev, kbase_process_page_usage_dec(mmut->kctx, 1); atomic_sub(1, &mmut->kctx->used_pages); } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); } int kbase_mmu_init(struct kbase_device *const kbdev, diff --git a/mali_kbase/platform/devicetree/Kbuild b/mali_kbase/platform/devicetree/Kbuild index ce637fb..78343c0 100644 --- a/mali_kbase/platform/devicetree/Kbuild +++ b/mali_kbase/platform/devicetree/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,4 +21,5 @@ mali_kbase-y += \ $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o + $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o diff --git a/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c new file mode 100644 index 0000000..11a8b77 --- /dev/null +++ b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <linux/clk.h> +#include "mali_kbase_config_platform.h" + +static void *enumerate_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index >= kbdev->nr_clocks) + return NULL; + + return kbdev->clocks[index]; +} + +static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + return clk_get_rate((struct clk *)gpu_clk_handle); +} + +static int gpu_clk_notifier_register(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + compiletime_assert(offsetof(struct clk_notifier_data, clk) == + offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), + "mismatch in the offset of clk member"); + + compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == + sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), + "mismatch in the size of clk member"); + + return clk_notifier_register((struct clk *)gpu_clk_handle, nb); +} + +static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); +} + +struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { + .get_gpu_clk_rate = get_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_gpu_clk, + .gpu_clk_notifier_register = gpu_clk_notifier_register, + .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, +}; diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h index 5990313..2137b42 100644 --- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h +++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,10 @@ */ #define PLATFORM_FUNCS (NULL) +#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) + extern struct kbase_pm_callback_conf pm_callbacks; +extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; /** * Autosuspend delay diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild index df16a77..c26bef7 100644 --- a/mali_kbase/tests/Kbuild +++ b/mali_kbase/tests/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,3 +21,4 @@ obj-$(CONFIG_MALI_KUTF) += kutf/ obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ +obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace/kernel/ diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig index fa91aea..83a4d77 100644 --- a/mali_kbase/tests/Kconfig +++ b/mali_kbase/tests/Kconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,3 +21,4 @@ source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" +source "drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kconfig" diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h index 15e168c..858b9c3 100644 --- a/mali_kbase/tests/include/kutf/kutf_helpers.h +++ b/mali_kbase/tests/include/kutf/kutf_helpers.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,14 @@ #include <kutf/kutf_suite.h> /** + * kutf_helper_pending_input() - Check any pending lines sent by user space + * @context: KUTF context + * + * Return: true if there are pending lines, otherwise false + */ +bool kutf_helper_pending_input(struct kutf_context *context); + +/** * kutf_helper_input_dequeue() - Dequeue a line sent by user space * @context: KUTF context * @str_size: Pointer to an integer to receive the size of the string diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c index cab5add..4463b04 100644 --- a/mali_kbase/tests/kutf/kutf_helpers.c +++ b/mali_kbase/tests/kutf/kutf_helpers.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,10 +29,11 @@ #include <linux/preempt.h> #include <linux/wait.h> #include <linux/uaccess.h> +#include <linux/export.h> static DEFINE_SPINLOCK(kutf_input_lock); -static bool pending_input(struct kutf_context *context) +bool kutf_helper_pending_input(struct kutf_context *context) { bool input_pending; @@ -44,6 +45,7 @@ static bool pending_input(struct kutf_context *context) return input_pending; } +EXPORT_SYMBOL(kutf_helper_pending_input); char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) { @@ -59,7 +61,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) spin_unlock(&kutf_input_lock); err = wait_event_interruptible(context->userdata.input_waitq, - pending_input(context)); + kutf_helper_pending_input(context)); if (err) return ERR_PTR(-EINTR); diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild new file mode 100644 index 0000000..f5565d3 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + +obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace_test_portal.o + +mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig new file mode 100644 index 0000000..04b44cf --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig @@ -0,0 +1,30 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +config CONFIG_MALI_CLK_RATE_TRACE_PORTAL + tristate "Mali GPU Clock Trace Test portal" + depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF + default m + help + This option will build a test module mali_kutf_clk_rate_trace_test_portal + that can test the clocks integration into the platform and exercise some + basic trace test in the system. Choosing M here will generate a single + module called mali_kutf_clk_rate_trace_test_portal. diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile new file mode 100644 index 0000000..71c78b8 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile @@ -0,0 +1,57 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ifneq ($(KERNELRELEASE),) + +ccflags-y := \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -I$(src)/../../include \ + -I$(src)/../../../../../../../include \ + -I$(src)/../../../../ \ + -I$(src)/../../../ \ + -I$(src)/../../../backend/gpu \ + -I$(src)/../../../debug \ + -I$(src)/../../../debug/backend \ + -I$(src)/ \ + -I$(srctree)/drivers/staging/android \ + -I$(srctree)/include/linux + +obj-m := mali_kutf_clk_rate_trace_test_portal.o +mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o + +else +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +ifeq ($(KDIR),) +$(error Must specify KDIR to point to the kernel to target)) +endif + +all: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../../kutf/Module.symvers $(CURDIR)/../../../Module.symvers" modules + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + +endif diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp new file mode 100644 index 0000000..0cc2904 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp @@ -0,0 +1,34 @@ +/* + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +bob_kernel_module { + name: "mali_kutf_clk_rate_trace_test_portal", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_includes", + ], + srcs: [ + "../mali_kutf_clk_rate_trace_test.h", + "Makefile", + "mali_kutf_clk_rate_trace_test.c", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + base_build_kutf: { + enabled: true, + kbuild_options: ["CONFIG_MALI_CLK_RATE_TRACE_PORTAL=m"], + }, +} diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c new file mode 100644 index 0000000..d466661 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -0,0 +1,886 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <linux/fdtable.h> +#include <linux/module.h> + +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/ktime.h> +#include <linux/version.h> +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) +#include <linux/sched/task.h> +#else +#include <linux/sched.h> +#endif +#include "mali_kbase.h" +#include "mali_kbase_irq_internal.h" +#include "mali_kbase_pm_internal.h" +#include "mali_kbase_clk_rate_trace_mgr.h" + +#include <kutf/kutf_suite.h> +#include <kutf/kutf_utils.h> +#include <kutf/kutf_helpers.h> +#include <kutf/kutf_helpers_user.h> + +#include "../mali_kutf_clk_rate_trace_test.h" + +#define MINOR_FOR_FIRST_KBASE_DEV (-1) + +/* KUTF test application pointer for this test */ +struct kutf_application *kutf_app; + +enum portal_server_state { + PORTAL_STATE_NO_CLK, + PORTAL_STATE_LIVE, + PORTAL_STATE_CLOSING, +}; + +/** + * struct clk_trace_snapshot - Trace info data on a clock. + * @previous_rate: Snapshot start point clock rate. + * @current_rate: End point clock rate. It becomes the start rate of the + * next trace snapshot. + * @rate_up_cnt: Count in the snapshot duration when the clock trace + * write is a rate of higher value than the last. + * @rate_down_cnt: Count in the snapshot duration when the clock trace write + * is a rate of lower value than the last. + */ +struct clk_trace_snapshot { + unsigned long previous_rate; + unsigned long current_rate; + u32 rate_up_cnt; + u32 rate_down_cnt; +}; + +/** + * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. + * @kbdev: kbase device for the GPU. + * @listener: Clock rate change listener structure. + * @invoke_notify: When true, invoke notify command is being executed. + * @snapshot: Clock trace update snapshot data array. A snapshot + * for each clock contains info accumulated beteen two + * GET_TRACE_SNAPSHOT requests. + * @nclks: Number of clocks visible to the trace portal. + * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC + * PM_CTX_CNT requests made to the portal. On change from + * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is + * triggered. + * @total_update_cnt: Total number of received trace write callbacks. + * @server_state: Portal server operational state. + * @result_msg: Message for the test result. + * @test_status: Portal test reslt status. + */ +struct kutf_clk_rate_trace_fixture_data { + struct kbase_device *kbdev; + struct kbase_clk_rate_listener listener; + bool invoke_notify; + struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nclks; + unsigned int pm_ctx_cnt; + unsigned int total_update_cnt; + enum portal_server_state server_state; + char const *result_msg; + enum kutf_result_status test_status; +}; + +struct clk_trace_portal_input { + struct kutf_helper_named_val cmd_input; + enum kbasep_clk_rate_trace_req portal_cmd; + int named_val_err; +}; + +struct kbasep_cmd_name_pair { + enum kbasep_clk_rate_trace_req cmd; + const char *name; +}; + +struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { + {PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR}, + {PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE}, + {PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT}, + {PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT}, + {PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT}, + {PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL}, + {PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ}, + }; + +/* Global pointer for the kutf_portal_trace_write() to use. When + * this pointer is engaged, new requests for create fixture will fail + * hence limiting the use of the portal at any time to a singleton. + */ +struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; + +#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) +static char portal_msg_buf[PORTAL_MSG_LEN]; + +static void kutf_portal_trace_write( + struct kbase_clk_rate_listener *listener, + u32 index, u32 new_rate) +{ + struct clk_trace_snapshot *snapshot; + struct kutf_clk_rate_trace_fixture_data *data = container_of( + listener, struct kutf_clk_rate_trace_fixture_data, listener); + + lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); + + if (WARN_ON(g_ptr_portal_data == NULL)) + return; + if (WARN_ON(index >= g_ptr_portal_data->nclks)) + return; + + /* This callback is triggered by invoke notify command, skipping */ + if (data->invoke_notify) + return; + + snapshot = &g_ptr_portal_data->snapshot[index]; + if (new_rate > snapshot->current_rate) + snapshot->rate_up_cnt++; + else + snapshot->rate_down_cnt++; + snapshot->current_rate = new_rate; + g_ptr_portal_data->total_update_cnt++; +} + +static void kutf_set_pm_ctx_active(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt != 1)) + return; + + kbase_pm_context_active(data->kbdev); + kbase_pm_wait_for_desired_state(data->kbdev); + kbase_pm_request_gpu_cycle_counter(data->kbdev); +} + +static void kutf_set_pm_ctx_idle(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt > 0)) + return; + + kbase_pm_context_idle(data->kbdev); + kbase_pm_release_gpu_cycle_counter(data->kbdev); +} + +static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned int cnt = data->pm_ctx_cnt; + const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; + char const *errmsg = NULL; + + WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && + req != PORTAL_CMD_DEC_PM_CTX_CNT); + + if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { + data->pm_ctx_cnt++; + if (data->pm_ctx_cnt == 1) + kutf_set_pm_ctx_active(context); + } + + if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { + data->pm_ctx_cnt--; + if (data->pm_ctx_cnt == 0) + kutf_set_pm_ctx_idle(context); + } + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for adjusting pm_ctx_cnt"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + unsigned long rate; + bool idle; + int ret; + int i; + char const *errmsg = NULL; + + WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && + (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, RATE:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&kbdev->pm.clk_rtm.lock); + if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + else + rate = data->snapshot[i].current_rate; + idle = kbdev->pm.clk_rtm.gpu_idle; + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if ((i + 1) == data->nclks) + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", + rate, idle); + else + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx, ", rate); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with rate array data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with rate array data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back rate array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending rate array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot + * @context: KUTF context + * @cmd: The decoded portal input request + * + * The accumulated clock rate trace information is kept inside as an snapshot + * record. A user request of getting the snapshot marks the closure of the + * current snapshot record, and the start of the next one. The response + * message contains the current snapshot record, with each clock's + * data sequentially placed inside (array marker) [ ]. + */ +static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_snapshot snapshot; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + int ret; + int i; + char const *fmt; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&data->kbdev->pm.clk_rtm.lock); + /* copy out the snapshot of the clock */ + snapshot = data->snapshot[i]; + /* Set the next snapshot start condition */ + data->snapshot[i].previous_rate = snapshot.current_rate; + data->snapshot[i].rate_up_cnt = 0; + data->snapshot[i].rate_down_cnt = 0; + spin_unlock(&data->kbdev->pm.clk_rtm.lock); + + /* Check i corresponding to the last clock */ + if ((i + 1) == data->nclks) + fmt = "(0x%lx, 0x%lx, %u, %u)]}"; + else + fmt = "(0x%lx, 0x%lx, %u, %u), "; + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, + fmt, snapshot.previous_rate, snapshot.current_rate, + snapshot.rate_up_cnt, snapshot.rate_down_cnt); + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with snapshot data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with snapshot data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back snapshot array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending snapshot array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback + * @context: KUTF context + * @cmd: The decoded portal input request + * + * Invokes frequency change notification callbacks with a fake + * GPU frequency 42 kHz for the top clock domain. + */ +static char const *kutf_clk_trace_do_invoke_notify_42k( + struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned long new_rate_hz = 42000; + int ret; + char const *errmsg = NULL; + struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); + + spin_lock(&clk_rtm->lock); + + data->invoke_notify = true; + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, 0, new_rate_hz); + data->invoke_notify = false; + + spin_unlock(&clk_rtm->lock); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with invoked data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with invoked data"); + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); + + data->server_state = PORTAL_STATE_CLOSING; + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " CLOSE_PORTAL "reuquest"); + } + + return errmsg; +} + +static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int i; + int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); + + cmd->named_val_err = err; + if (err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* All portal request commands are of format (named u64): + * CMD_NAME=1234 + * where, 1234 is a (variable) sequence number tag. + */ + for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { + if (strcmp(cmd->cmd_input.val_name, + kbasep_portal_cmd_name_map[i].name)) + continue; + + cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; + return true; + } + } + + cmd->portal_cmd = PORTAL_CMD_INVALID; + return false; +} + +static void kutf_clk_trace_flag_result(struct kutf_context *context, + enum kutf_result_status result, char const *msg) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (result > data->test_status) { + data->test_status = result; + if (msg) + data->result_msg = msg; + if (data->server_state == PORTAL_STATE_LIVE && + result > KUTF_RESULT_WARN) { + data->server_state = PORTAL_STATE_CLOSING; + } + } +} + +static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + char const *errmsg = NULL; + + BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != + PORTAL_TOTAL_CMDS); + WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); + + switch (cmd->portal_cmd) { + case PORTAL_CMD_GET_CLK_RATE_MGR: + /* Fall through */ + case PORTAL_CMD_GET_CLK_RATE_TRACE: + errmsg = kutf_clk_trace_do_get_rate(context, cmd); + break; + case PORTAL_CMD_GET_TRACE_SNAPSHOT: + errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); + break; + case PORTAL_CMD_INC_PM_CTX_CNT: + /* Fall through */ + case PORTAL_CMD_DEC_PM_CTX_CNT: + errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); + break; + case PORTAL_CMD_CLOSE_PORTAL: + errmsg = kutf_clk_trace_do_close_portal(context, cmd); + break; + case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: + errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); + break; + default: + pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", + cmd->portal_cmd); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Don't know how to handle portal_cmd: %d", + cmd->portal_cmd); + break; + } + + if (errmsg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + + return (errmsg == NULL); +} + +/** + * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input + * @context: KUTF context + * @cmd: The erroneous input request + * + * This function deal with an erroneous input request, and respond with + * a proper 'NACK' message. + */ +static int kutf_clk_trace_do_nack_response(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int seq; + int err; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); + + if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* Keep seq number as % 256 */ + seq = cmd->cmd_input.u.val_u64 & 255; + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, MSG: Unknown command '%s'.}", seq, + cmd->cmd_input.val_name); + err = kutf_helper_send_named_str(context, "NACK", + portal_msg_buf); + } else + err = kutf_helper_send_named_str(context, "NACK", + "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); + + if (err) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send portal NACK response"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + } + + return err; +} + +/** + * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing + * @context: KUTF context + * + * This function carries out some basic test on the tracing operation: + * 1). GPU idle on test start, trace rate should be 0 (low power state) + * 2). Make sure GPU is powered up, the trace rate should match + * that from the clcok manager's internal recorded rate + * 3). If the GPU active transition occurs following 2), there + * must be rate change event from tracing. + */ +void kutf_clk_trace_barebone_check(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + bool fail = false; + bool idle[2] = { false }; + char const *msg = NULL; + int i; + + /* Check consistency if gpu happens to be idle */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[0] = kbdev->pm.clk_rtm.gpu_idle; + if (kbdev->pm.clk_rtm.gpu_idle) { + for (i = 0; i < data->nclks; i++) { + if (data->snapshot[i].current_rate) { + /* Idle should have a rate 0 */ + fail = true; + break; + } + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + if (fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU Idle not yielding 0-rate"); + pr_err("Trace did not see idle rate\n"); + } else { + /* Make local PM active if not done so yet */ + if (data->pm_ctx_cnt == 0) { + /* Ensure the GPU is powered */ + data->pm_ctx_cnt++; + kutf_set_pm_ctx_active(context); + } + /* Checking the rate is consistent */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[1] = kbdev->pm.clk_rtm.gpu_idle; + for (i = 0; i < data->nclks; i++) { + /* Rate match between the manager and the trace */ + if (kbdev->pm.clk_rtm.clks[i]->clock_val != + data->snapshot[i].current_rate) { + fail = true; + break; + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (idle[1]) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU still idle after set_pm_ctx_active"); + pr_err("GPU still idle after set_pm_ctx_active\n"); + } + + if (!msg && fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace rate not matching Clk manager's read"); + pr_err("Trace rate not matching Clk manager's read\n"); + } + } + + if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace update did not occur"); + pr_err("Trace update did not occur\n"); + } + if (msg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); + else if (!data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "No trace update seen during the test!"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); + } +} + +static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) +{ + return (cmd->named_val_err == -EBUSY); +} + +void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) +{ + struct clk_trace_portal_input cmd; + unsigned long timeout = jiffies + HZ * 2; + bool has_cmd; + + while (time_before(jiffies, timeout)) { + if (kutf_helper_pending_input(context)) { + has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, + &cmd); + if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) + break; + + kutf_helper_send_named_str(context, "NACK", + "Fatal! No clocks visible, aborting"); + } + msleep(20); + } + + kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, + "No clocks visble to the portal"); +} + +/** + * mali_kutf_clk_rate_trace_test_portal() - Service portal input + * @context: KUTF context + * + * The test portal operates on input requests. If the input request is one + * of the recognized portal commands, it handles it accordingly. Otherwise + * a negative response 'NACK' is returned. The portal service terminates + * when a 'CLOSE_PORTAL' request is received, or due to an internal error. + * Both case would result in the server_state transitioned to CLOSING. + * + * If the portal is closed on request, a sanity test on the clock rate + * trace operation is undertaken via function: + * kutf_clk_trace_barebone_check(); + */ +static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_portal_input new_cmd; + + pr_debug("Test portal service start\n"); + + while (data->server_state == PORTAL_STATE_LIVE) { + if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) + kutf_clk_trace_process_portal_cmd(context, &new_cmd); + else if (kutf_clk_trace_end_of_stream(&new_cmd)) + /* Dequeue on portal input, end of stream */ + data->server_state = PORTAL_STATE_CLOSING; + else + kutf_clk_trace_do_nack_response(context, &new_cmd); + } + + /* Closing, exhausting all the pending inputs with NACKs. */ + if (data->server_state == PORTAL_STATE_CLOSING) { + while (kutf_helper_pending_input(context) && + (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || + !kutf_clk_trace_end_of_stream(&new_cmd))) { + kutf_helper_send_named_str(context, "NACK", + "Portal closing down"); + } + } + + /* If no portal error, do a barebone test here irrespective + * whatever the portal live session has been testing, which + * is entirely driven by the user-side via portal requests. + */ + if (data->test_status <= KUTF_RESULT_WARN) { + if (data->server_state != PORTAL_STATE_NO_CLK) + kutf_clk_trace_barebone_check(context); + else { + /* No clocks case, NACK 2-sec for the fatal situation */ + kutf_clk_trace_no_clks_dummy(context); + } + } + + /* If we have changed pm_ctx count, drop it back */ + if (data->pm_ctx_cnt) { + /* Although we count on portal requests, it only has material + * impact when from 0 -> 1. So the reverse is a simple one off. + */ + data->pm_ctx_cnt = 0; + kutf_set_pm_ctx_idle(context); + } + + /* Finally log the test result line */ + if (data->test_status < KUTF_RESULT_WARN) + kutf_test_pass(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_WARN) + kutf_test_warn(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_FATAL) + kutf_test_fatal(context, data->result_msg); + else + kutf_test_fail(context, data->result_msg); + + pr_debug("Test end\n"); +} + +/** + * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data + * required for mali_kutf_clk_rate_trace_test_portal. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_clk_rate_trace_create_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data; + struct kbase_device *kbdev; + unsigned long rate; + int i; + + /* Acquire the kbase device */ + pr_debug("Finding device\n"); + kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); + if (kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + return NULL; + } + + pr_debug("Creating fixture\n"); + data = kutf_mempool_alloc(&context->fixture_pool, + sizeof(struct kutf_clk_rate_trace_fixture_data)); + if (!data) + return NULL; + + *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 }; + pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); + spin_lock(&kbdev->pm.clk_rtm.lock); + + if (g_ptr_portal_data != NULL) { + pr_warn("Test portal is already in use, run aborted\n"); + kutf_test_fail(context, "Portal allows single session only"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + return NULL; + } + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->pm.clk_rtm.clks[i]) { + data->nclks++; + if (kbdev->pm.clk_rtm.gpu_idle) + rate = 0; + else + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + data->snapshot[i].previous_rate = rate; + data->snapshot[i].current_rate = rate; + } + } + + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (data->nclks) { + /* Subscribe this test server portal */ + data->listener.notify = kutf_portal_trace_write; + data->invoke_notify = false; + + kbase_clk_rate_trace_manager_subscribe( + &kbdev->pm.clk_rtm, &data->listener); + /* Update the kutf_server_portal fixture_data pointer */ + g_ptr_portal_data = data; + } + + data->kbdev = kbdev; + data->result_msg = NULL; + data->test_status = KUTF_RESULT_PASS; + + if (data->nclks == 0) { + data->server_state = PORTAL_STATE_NO_CLK; + pr_debug("Kbdev has no clocks for rate trace"); + } else + data->server_state = PORTAL_STATE_LIVE; + + pr_debug("Created fixture\n"); + + return data; +} + +/** + * Destroy fixture data previously created by + * mali_kutf_clk_rate_trace_create_fixture. + * + * @context: KUTF context. + */ +static void mali_kutf_clk_rate_trace_remove_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + if (data->nclks) { + /* Clean up the portal trace write arrangement */ + g_ptr_portal_data = NULL; + + kbase_clk_rate_trace_manager_unsubscribe( + &kbdev->pm.clk_rtm, &data->listener); + } + pr_debug("Destroying fixture\n"); + kbase_release_device(kbdev); + pr_debug("Destroyed fixture\n"); +} + +/** + * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. + */ +int mali_kutf_clk_rate_trace_test_module_init(void) +{ + struct kutf_suite *suite; + unsigned int filters; + union kutf_callback_data suite_data = { 0 }; + + pr_debug("Creating app\n"); + + g_ptr_portal_data = NULL; + kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); + + if (!kutf_app) { + pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME + " failed!\n"); + return -ENOMEM; + } + + pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); + suite = kutf_create_suite_with_filters_and_data( + kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, + mali_kutf_clk_rate_trace_create_fixture, + mali_kutf_clk_rate_trace_remove_fixture, + KUTF_F_TEST_GENERIC, + suite_data); + + if (!suite) { + pr_warn("Creation of suite %s failed!\n", + CLK_RATE_TRACE_SUITE_NAME); + kutf_destroy_application(kutf_app); + return -ENOMEM; + } + + filters = suite->suite_default_flags; + kutf_add_test_with_filters( + suite, 0x0, CLK_RATE_TRACE_PORTAL, + mali_kutf_clk_rate_trace_test_portal, + filters); + + pr_debug("Init complete\n"); + return 0; +} + +/** + * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this + * test. + */ +void mali_kutf_clk_rate_trace_test_module_exit(void) +{ + pr_debug("Exit start\n"); + kutf_destroy_application(kutf_app); + pr_debug("Exit complete\n"); +} + + +module_init(mali_kutf_clk_rate_trace_test_module_init); +module_exit(mali_kutf_clk_rate_trace_test_module_exit); + +MODULE_LICENSE("GPL"); diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h new file mode 100644 index 0000000..f46afd5 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h @@ -0,0 +1,148 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ +#define _KUTF_CLK_RATE_TRACE_TEST_H_ + +#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" +#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" +#define CLK_RATE_TRACE_PORTAL "portal" + +/** + * enum kbasep_clk_rate_trace_req - request command to the clock rate trace + * service portal. + * + * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal + * data record. On a positive acknowledgement + * the prevailing clock rates and the GPU idle + * condition flag are returned. + * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its + * data record. On a positive acknowledgement + * the last trace recorded clock rates and the + * GPU idle condition flag are returned. + * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its + * current snapshot data record. On a positive + * acknowledgement the snapshot array matching + * the number of clocks are returned. It also + * starts a fresh snapshot inside the clock + * trace portal. + * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase + * its internal PM_CTX_COUNT. If this increase + * yielded a count of 0 -> 1 change, the portal + * will initiate a PM_CTX_ACTIVE call to the + * Kbase power management. Futher increase + * requests will limit to only affect the + * portal internal count value. + * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease + * its internal PM_CTX_COUNT. If this decrease + * yielded a count of 1 -> 0 change, the portal + * will initiate a PM_CTX_IDLE call to the + * Kbase power management. + * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the + * client has completed its session. The portal + * will start the close down action. If no + * error has occurred during the dynamic + * interactive session, an inherent basic test + * carrying out some sanity check on the clock + * trace is undertaken. + * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks + * for the top clock domain with a new GPU frequency + * set to 42 kHZ. + * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be + * the highest enumeration value, as it + * represents valid command array size. + * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. + */ +/* PORTAL_CMD_INVALID must be the last one, serving the size */ +enum kbasep_clk_rate_trace_req { + PORTAL_CMD_GET_CLK_RATE_MGR, + PORTAL_CMD_GET_CLK_RATE_TRACE, + PORTAL_CMD_GET_TRACE_SNAPSHOT, + PORTAL_CMD_INC_PM_CTX_CNT, + PORTAL_CMD_DEC_PM_CTX_CNT, + PORTAL_CMD_CLOSE_PORTAL, + PORTAL_CMD_INVOKE_NOTIFY_42KHZ, + PORTAL_CMD_INVALID, + PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, +}; + +/** + * Portal service request command names. The portal request consists of a kutf + * named u64-value. For those above enumerated PORTAL_CMD, the names defined + * here are used to mark the name and then followed with a sequence number + * value. Example (manual script here for illustration): + * exec 5<>run # open the portal kutf run as fd-5 + * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string + * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * echo CLOSE_PORTAL=1 >&5 # close the portal + * cat <&5 # read back all the response lines + * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command + * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. + * exec 5>&- # close the service portal fd. + * + * Expected request command return format: + * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" + * Note, the above contains 2-clock with rates in [], GPU idle + * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" + * Note, 1-clock with rate in [], GPU not idle + * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, + * trace_rate_up_count: 1, trace_rate_down_count : 0) + * For the specific sample case here, there is a single rate_trace event + * that yielded a rate increase change. No rate drop event recorded in the + * reporting snapshot duration. + * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) + * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" + * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) + * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be + * dropped down to 0 as part of the portal close clean up. + */ +#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" +#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" +#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" +#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" +#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" +#define CLOSE_PORTAL "CLOSE_PORTAL" +#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" + +/** + * Portal service response tag names. The response consists of a kutf + * named string-value. In case of a 'NACK' (negative acknowledgement), it + * can be one of the two formats: + * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. + * Note, the portal has received a valid name and valid sequence number + * but can't carry-out the request, reason in the MSG field. + * 2. NACK="Failing-message" + * Note, unable to parse a valid name or valid sequence number, + * or some internal error condition. Reason in the quoted string. + */ +#define ACK "ACK" +#define NACK "NACK" +#define MAX_REPLY_NAME_LEN 32 + +#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ |