summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2020-09-11 16:44:12 +0100
committerSidath Senanayake <sidaths@google.com>2020-09-11 16:44:12 +0100
commitd4ca6eb7268ee2db9deabd1745b505c6e1c162f9 (patch)
tree64058c324e9e6adb30e8689d17f0a2e2b27636bc
parentbc3c01e61c8ce9783a8ab091053905effcae12de (diff)
downloadgpu-d4ca6eb7268ee2db9deabd1745b505c6e1c162f9.tar.gz
Mali Valhall DDK r26p0 KMD
Provenance: 009a7d86a (collaborate/EAC/v_r26p0) VX504X08X-BU-00000-r26p0-01eac0 - Android DDK VX504X08X-BU-60000-r26p0-01eac0 - Android Document Bundle Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Ic3671bdc454b706b6f98a9d1a615d1886da0c3e8
-rw-r--r--mali_kbase/Kbuild17
-rw-r--r--mali_kbase/Kconfig4
-rw-r--r--mali_kbase/backend/gpu/Kbuild5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c280
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h155
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_irq_linux.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c6
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c15
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_jm.c18
-rw-r--r--mali_kbase/context/mali_kbase_context.c130
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c8
-rw-r--r--mali_kbase/device/mali_kbase_device.c8
-rw-r--r--mali_kbase/device/mali_kbase_device_internal.h6
-rw-r--r--mali_kbase/jm/mali_base_jm_kernel.h81
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h7
-rw-r--r--mali_kbase/jm/mali_kbase_jm_ioctl.h64
-rw-r--r--mali_kbase/mali_base_kernel.h22
-rw-r--r--mali_kbase/mali_gpu_mem_trace.h73
-rw-r--r--mali_kbase/mali_kbase.h6
-rw-r--r--mali_kbase/mali_kbase_caps.h65
-rw-r--r--mali_kbase/mali_kbase_ccswe.c105
-rw-r--r--mali_kbase/mali_kbase_ccswe.h97
-rw-r--r--mali_kbase/mali_kbase_config.h84
-rw-r--r--mali_kbase/mali_kbase_core_linux.c264
-rw-r--r--mali_kbase/mali_kbase_cs_experimental.h3
-rw-r--r--mali_kbase/mali_kbase_defs.h143
-rw-r--r--mali_kbase/mali_kbase_hwaccess_instr.h4
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h14
-rw-r--r--mali_kbase/mali_kbase_hwcnt.c24
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend.h7
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_gpu.c510
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.c707
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_gpu.h)18
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.c33
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.h10
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.c4
-rw-r--r--mali_kbase/mali_kbase_hwcnt_reader.h41
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.c92
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.h77
-rw-r--r--mali_kbase/mali_kbase_ioctl.h8
-rw-r--r--mali_kbase/mali_kbase_jd.c140
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.c896
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.h283
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm_reader.h70
-rw-r--r--mali_kbase/mali_kbase_mem.c483
-rw-r--r--mali_kbase/mali_kbase_mem.h140
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c62
-rw-r--r--mali_kbase/mali_kbase_pm.c4
-rw-r--r--mali_kbase/mali_kbase_softjobs.c24
-rw-r--r--mali_kbase/mali_kbase_trace_gpu_mem.c227
-rw-r--r--mali_kbase/mali_kbase_trace_gpu_mem.h101
-rw-r--r--mali_kbase/mali_kbase_vinstr.c147
-rw-r--r--mali_kbase/mali_linux_trace.h8
-rw-r--r--mali_kbase/mali_power_gpu_frequency_trace.c27
-rw-r--r--mali_kbase/mali_power_gpu_frequency_trace.h69
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c51
-rw-r--r--mali_kbase/platform/devicetree/Kbuild5
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c68
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_config_platform.h5
-rw-r--r--mali_kbase/tests/Kbuild3
-rw-r--r--mali_kbase/tests/Kconfig3
-rw-r--r--mali_kbase/tests/include/kutf/kutf_helpers.h10
-rw-r--r--mali_kbase/tests/kutf/kutf_helpers.c8
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild26
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig30
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile57
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp34
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c886
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h148
72 files changed, 6232 insertions, 1039 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 192ac06..06dda9c 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -21,9 +21,12 @@
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r25p0-01eac0"
+MALI_RELEASE_NAME ?= "r26p0-01eac0"
# Paths required for build
+
+# make $(src) as absolute path if it isn't already, by prefixing $(srctree)
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
KBASE_PATH = $(src)
KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
UMP_PATH = $(src)/../../../base
@@ -34,6 +37,7 @@ MALI_USE_CSF ?= 0
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
MALI_COVERAGE ?= 0
+MALI_JIT_PRESSURE_LIMIT_BASE ?= 1
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
# Experimental features (corresponding -D definition should be appended to
# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE,
@@ -41,7 +45,6 @@ CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
#
# Experimental features must default to disabled, e.g.:
# MALI_EXPERIMENTAL_FEATURE ?= 0
-MALI_JIT_PRESSURE_LIMIT ?= 0
MALI_INCREMENTAL_RENDERING ?= 0
# Set up our defines, which will be passed to gcc
@@ -52,7 +55,7 @@ DEFINES = \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
- -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \
+ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
-DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING)
ifeq ($(KBUILD_EXTMOD),)
@@ -76,6 +79,7 @@ SRC := \
debug/mali_kbase_debug_ktrace.c \
device/mali_kbase_device.c \
mali_kbase_cache_policy.c \
+ mali_kbase_ccswe.c \
mali_kbase_mem.c \
mali_kbase_mem_pool_group.c \
mali_kbase_native_mgm.c \
@@ -86,7 +90,7 @@ SRC := \
mali_kbase_config.c \
mali_kbase_vinstr.c \
mali_kbase_hwcnt.c \
- mali_kbase_hwcnt_backend_gpu.c \
+ mali_kbase_hwcnt_backend_jm.c \
mali_kbase_hwcnt_gpu.c \
mali_kbase_hwcnt_legacy.c \
mali_kbase_hwcnt_types.c \
@@ -111,12 +115,14 @@ SRC := \
mali_kbase_strings.c \
mali_kbase_as_fault_debugfs.c \
mali_kbase_regs_history_debugfs.c \
+ mali_power_gpu_frequency_trace.c \
thirdparty/mali_kbase_mmap.c \
tl/mali_kbase_timeline.c \
tl/mali_kbase_timeline_io.c \
tl/mali_kbase_tlstream.c \
tl/mali_kbase_tracepoints.c \
- gpu/mali_kbase_gpu.c
+ gpu/mali_kbase_gpu.c \
+ mali_kbase_trace_gpu_mem.c
ifeq ($(MALI_USE_CSF),1)
SRC += \
@@ -135,6 +141,7 @@ else
mali_kbase_jd_debugfs.c \
mali_kbase_js.c \
mali_kbase_js_ctx_attr.c \
+ mali_kbase_kinstr_jm.c \
debug/backend/mali_kbase_debug_ktrace_jm.c \
device/backend/mali_kbase_device_jm.c \
gpu/backend/mali_kbase_gpu_fault_jm.c \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 58a5b0b..ca59dbb 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -230,6 +230,10 @@ config MALI_DMA_BUF_LEGACY_COMPAT
maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
including a cache flush.
+ This option might work-around issues related to missing cache
+ flushes in other drivers. This only has an effect for clients using
+ UK 11.18 or older. For later UK versions it is not possible.
+
config MALI_HW_ERRATA_1485982_NOT_AFFECTED
bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 2449e80..0b3e073 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -34,7 +34,8 @@ BACKEND += \
backend/gpu/mali_kbase_pm_coarse_demand.c \
backend/gpu/mali_kbase_pm_policy.c \
backend/gpu/mali_kbase_time.c \
- backend/gpu/mali_kbase_l2_mmu_config.c
+ backend/gpu/mali_kbase_l2_mmu_config.c \
+ backend/gpu/mali_kbase_clk_rate_trace_mgr.c
ifeq ($(MALI_USE_CSF),1)
# empty
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
new file mode 100644
index 0000000..18bb117
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -0,0 +1,280 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of the GPU clock rate trace manager.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <linux/clk.h>
+#include <asm/div64.h>
+#include "mali_kbase_clk_rate_trace_mgr.h"
+
+#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY
+#include <trace/events/power_gpu_frequency.h>
+#else
+#include "mali_power_gpu_frequency_trace.h"
+#endif
+
+#ifndef CLK_RATE_TRACE_OPS
+#define CLK_RATE_TRACE_OPS (NULL)
+#endif
+
+static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct kbase_gpu_clk_notifier_data *ndata = data;
+ struct kbase_clk_data *clk_data =
+ container_of(nb, struct kbase_clk_data, clk_rate_change_nb);
+ struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm;
+ unsigned long flags;
+
+ if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle))
+ return NOTIFY_BAD;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+ if (event == POST_RATE_CHANGE) {
+ if (!clk_rtm->gpu_idle &&
+ (clk_data->clock_val != ndata->new_rate)) {
+ kbase_clk_rate_trace_manager_notify_all(
+ clk_rtm, clk_data->index, ndata->new_rate);
+ }
+
+ clk_data->clock_val = ndata->new_rate;
+ }
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+
+ return NOTIFY_DONE;
+}
+
+static int gpu_clk_data_init(struct kbase_device *kbdev,
+ void *gpu_clk_handle, unsigned int index)
+{
+ struct kbase_clk_rate_trace_op_conf *callbacks =
+ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+ struct kbase_clk_data *clk_data;
+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+ int ret = 0;
+
+ if (WARN_ON(!callbacks) ||
+ WARN_ON(!gpu_clk_handle) ||
+ WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS))
+ return -EINVAL;
+
+ clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
+ if (!clk_data) {
+ dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index);
+ return -ENOMEM;
+ }
+
+ clk_data->index = (u8)index;
+ clk_data->gpu_clk_handle = gpu_clk_handle;
+ /* Store the initial value of clock */
+ clk_data->clock_val =
+ callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle);
+
+ {
+ /* At the initialization time, GPU is powered off. */
+ unsigned long flags;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+ kbase_clk_rate_trace_manager_notify_all(
+ clk_rtm, clk_data->index, 0);
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+ }
+
+ clk_data->clk_rtm = clk_rtm;
+ clk_rtm->clks[index] = clk_data;
+
+ clk_data->clk_rate_change_nb.notifier_call =
+ gpu_clk_rate_change_notifier;
+
+ ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle,
+ &clk_data->clk_rate_change_nb);
+ if (ret) {
+ dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index);
+ kfree(clk_data);
+ }
+
+ return ret;
+}
+
+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
+{
+ struct kbase_clk_rate_trace_op_conf *callbacks =
+ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+ unsigned int i;
+ int ret = 0;
+
+ /* Return early if no callbacks provided for clock rate tracing */
+ if (!callbacks)
+ return 0;
+
+ spin_lock_init(&clk_rtm->lock);
+ INIT_LIST_HEAD(&clk_rtm->listeners);
+
+ clk_rtm->gpu_idle = true;
+
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ void *gpu_clk_handle =
+ callbacks->enumerate_gpu_clk(kbdev, i);
+
+ if (!gpu_clk_handle)
+ break;
+
+ ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i);
+ if (ret)
+ goto error;
+ }
+
+ /* Activate clock rate trace manager if at least one GPU clock was
+ * enumerated.
+ */
+ if (i)
+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks);
+ else
+ dev_info(kbdev->dev, "No clock(s) available for rate tracing");
+
+ return 0;
+
+error:
+ while (i--) {
+ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
+ kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+ &clk_rtm->clks[i]->clk_rate_change_nb);
+ kfree(clk_rtm->clks[i]);
+ }
+
+ return ret;
+}
+
+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev)
+{
+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+ unsigned int i;
+
+ WARN_ON(!list_empty(&clk_rtm->listeners));
+
+ if (!clk_rtm->clk_rate_trace_ops)
+ return;
+
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ if (!clk_rtm->clks[i])
+ break;
+
+ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
+ kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+ &clk_rtm->clks[i]->clk_rate_change_nb);
+ kfree(clk_rtm->clks[i]);
+ }
+
+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL);
+}
+
+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
+{
+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+ unsigned int i;
+ unsigned long flags;
+
+ if (!clk_rtm->clk_rate_trace_ops)
+ return;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ struct kbase_clk_data *clk_data = clk_rtm->clks[i];
+
+ if (!clk_data)
+ break;
+
+ if (unlikely(!clk_data->clock_val))
+ continue;
+
+ kbase_clk_rate_trace_manager_notify_all(
+ clk_rtm, clk_data->index, clk_data->clock_val);
+ }
+
+ clk_rtm->gpu_idle = false;
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev)
+{
+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+ unsigned int i;
+ unsigned long flags;
+
+ if (!clk_rtm->clk_rate_trace_ops)
+ return;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ struct kbase_clk_data *clk_data = clk_rtm->clks[i];
+
+ if (!clk_data)
+ break;
+
+ if (unlikely(!clk_data->clock_val))
+ continue;
+
+ kbase_clk_rate_trace_manager_notify_all(
+ clk_rtm, clk_data->index, 0);
+ }
+
+ clk_rtm->gpu_idle = true;
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+void kbase_clk_rate_trace_manager_notify_all(
+ struct kbase_clk_rate_trace_manager *clk_rtm,
+ u32 clk_index,
+ unsigned long new_rate)
+{
+ struct kbase_clk_rate_listener *pos;
+ struct kbase_device *kbdev;
+
+ lockdep_assert_held(&clk_rtm->lock);
+
+ kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm);
+
+ dev_dbg(kbdev->dev, "GPU clock %u rate changed to %lu",
+ clk_index, new_rate);
+
+ /* Raise standard `power/gpu_frequency` ftrace event */
+ {
+ unsigned long new_rate_khz = new_rate;
+
+ do_div(new_rate_khz, 1000);
+ trace_gpu_frequency(new_rate_khz, clk_index);
+ }
+
+ /* Notify the listeners. */
+ list_for_each_entry(pos, &clk_rtm->listeners, node) {
+ pos->notify(pos, clk_index, new_rate);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all);
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
new file mode 100644
index 0000000..dcafb26
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CLK_RATE_TRACE_MGR_
+#define _KBASE_CLK_RATE_TRACE_MGR_
+
+/** The index of top clock domain in kbase_clk_rate_trace_manager:clks. */
+#define KBASE_CLOCK_DOMAIN_TOP (0)
+
+/** The index of shader-cores clock domain in
+ * kbase_clk_rate_trace_manager:clks.
+ */
+#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1)
+
+/**
+ * struct kbase_clk_data - Data stored per enumerated GPU clock.
+ *
+ * @clk_rtm: Pointer to clock rate trace manager object.
+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock.
+ * @plat_private: Private data for the platform to store into
+ * @clk_rate_change_nb: notifier block containing the pointer to callback
+ * function that is invoked whenever the rate of
+ * enumerated GPU clock changes.
+ * @clock_val: Current rate of the enumerated GPU clock.
+ * @index: Index at which the GPU clock was enumerated.
+ */
+struct kbase_clk_data {
+ struct kbase_clk_rate_trace_manager *clk_rtm;
+ void *gpu_clk_handle;
+ void *plat_private;
+ struct notifier_block clk_rate_change_nb;
+ unsigned long clock_val;
+ u8 index;
+};
+
+/**
+ * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if success, or an error code on failure.
+ */
+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager.
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace
+ * manager of GPU becoming active.
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace
+ * manager of GPU becoming idle.
+ * @kbdev: Device pointer
+ */
+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener.
+ *
+ * @clk_rtm: Clock rate manager instance.
+ * @listener: Listener handle
+ *
+ * kbase_clk_rate_trace_manager:lock must be held by the caller.
+ */
+static inline void kbase_clk_rate_trace_manager_subscribe_no_lock(
+ struct kbase_clk_rate_trace_manager *clk_rtm,
+ struct kbase_clk_rate_listener *listener)
+{
+ lockdep_assert_held(&clk_rtm->lock);
+ list_add(&listener->node, &clk_rtm->listeners);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener.
+ *
+ * @clk_rtm: Clock rate manager instance.
+ * @listener: Listener handle
+ */
+static inline void kbase_clk_rate_trace_manager_subscribe(
+ struct kbase_clk_rate_trace_manager *clk_rtm,
+ struct kbase_clk_rate_listener *listener)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+ kbase_clk_rate_trace_manager_subscribe_no_lock(
+ clk_rtm, listener);
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener.
+ *
+ * @clk_rtm: Clock rate manager instance.
+ * @listener: Listener handle
+ */
+static inline void kbase_clk_rate_trace_manager_unsubscribe(
+ struct kbase_clk_rate_trace_manager *clk_rtm,
+ struct kbase_clk_rate_listener *listener)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clk_rtm->lock, flags);
+ list_del(&listener->node);
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \
+ * rate listeners.
+ *
+ * @clk_rtm: Clock rate manager instance.
+ * @clk_index: Clock index.
+ * @new_rate: New clock frequency(Hz)
+ *
+ * kbase_clk_rate_trace_manager:lock must be locked.
+ * This function is exported to be used by clock rate trace test
+ * portal.
+ */
+void kbase_clk_rate_trace_manager_notify_all(
+ struct kbase_clk_rate_trace_manager *clk_rtm,
+ u32 clock_index,
+ unsigned long new_rate);
+
+#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 8b320c7..f9c2ec7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -87,7 +87,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
enable->dump_buffer >> 32);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
- enable->jm_bm);
+ enable->fe_bm);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
enable->shader_bm);
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index 21b2aa2..8696c6a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -79,8 +79,6 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
-KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
-
static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
{
unsigned long flags;
@@ -177,7 +175,7 @@ static irq_handler_t kbase_handler_table[] = {
* Return: IRQ_HANDLED if the requests are from the GPU device,
* IRQ_NONE otherwise
*/
-static irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
{
struct kbase_device *kbdev = kbase_untag(data);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index fa6bc83..73c4f6b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -33,6 +33,7 @@
#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -277,6 +278,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
katom,
&kbdev->gpu_props.props.raw_props.js_features[js],
"ctx_nr,atom_nr");
+ kbase_kinstr_jm_atom_hw_submit(katom);
#ifdef CONFIG_GPU_TRACEPOINTS
if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
/* If this is the only job on the slot, trace it as starting */
@@ -692,12 +694,40 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
kbase_job_slot_hardstop(kctx, i, NULL);
}
+/**
+ * kbase_is_existing_atom_submitted_later_than_ready
+ * @ready: sequence number of the ready atom
+ * @existing: sequence number of the existing atom
+ *
+ * Returns true if the existing atom has been submitted later than the
+ * ready atom. It is used to understand if an atom that is ready has been
+ * submitted earlier than the currently running atom, so that the currently
+ * running atom should be preempted to allow the ready atom to run.
+ */
+static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing)
+{
+ /* No seq_nr set? */
+ if (!ready || !existing)
+ return false;
+
+ /* Efficiently handle the unlikely case of wrapping.
+ * The following code assumes that the delta between the sequence number
+ * of the two atoms is less than INT64_MAX.
+ * In the extremely unlikely case where the delta is higher, the comparison
+ * defaults for no preemption.
+ * The code also assumes that the conversion from unsigned to signed types
+ * works because the signed integers are 2's complement.
+ */
+ return (s64)(ready - existing) < 0;
+}
+
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev;
int js = target_katom->slot_nr;
int priority = target_katom->sched_priority;
+ int seq_nr = target_katom->seq_nr;
int i;
bool stop_sent = false;
@@ -719,7 +749,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
(katom->kctx != kctx))
continue;
- if (katom->sched_priority > priority) {
+ if ((katom->sched_priority > priority) ||
+ (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) {
if (!stop_sent)
KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
kbdev,
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index ec7bcb1..8b409a0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -33,6 +33,7 @@
#include <tl/mali_kbase_tracepoints.h>
#include <mali_kbase_hwcnt_context.h>
#include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_kinstr_jm.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
@@ -278,6 +279,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
break;
case KBASE_ATOM_GPU_RB_SUBMITTED:
+ kbase_kinstr_jm_atom_hw_release(katom);
/* Inform power management at start/finish of atom so it can
* update its GPU utilisation metrics. Mark atom as not
* submitted beforehand. */
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index fcc0437..d2d11a3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -37,7 +37,7 @@
static inline bool timer_callback_should_run(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
- s8 nr_running_ctxs;
+ int nr_running_ctxs;
lockdep_assert_held(&kbdev->js_data.runpool_mutex);
@@ -69,10 +69,10 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev)
* don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
*/
{
- s8 nr_compute_ctxs =
+ int nr_compute_ctxs =
kbasep_js_ctx_attr_count_on_runpool(kbdev,
KBASEP_JS_CTX_ATTR_COMPUTE);
- s8 nr_noncompute_ctxs = nr_running_ctxs -
+ int nr_noncompute_ctxs = nr_running_ctxs -
nr_compute_ctxs;
return (bool) (nr_compute_ctxs >= 2 ||
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index cb10518..a9c33e2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -25,13 +25,13 @@
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
- u64 *system_time, struct timespec64 *ts)
+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
+ u64 *cycle_counter,
+ u64 *system_time,
+ struct timespec64 *ts)
{
u32 hi1, hi2;
- kbase_pm_request_gpu_cycle_counter(kbdev);
-
if (cycle_counter) {
/* Read hi, lo, hi to ensure a coherent u64 */
do {
@@ -65,6 +65,13 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
#else
ktime_get_raw_ts64(ts);
#endif
+}
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+ u64 *system_time, struct timespec64 *ts)
+{
+ kbase_pm_request_gpu_cycle_counter(kbdev);
+ kbase_backend_get_gpu_time_norequest(
+ kbdev, cycle_counter, system_time, ts);
kbase_pm_release_gpu_cycle_counter(kbdev);
}
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 2cd2551..5d5b639 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -30,6 +30,7 @@
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
@@ -70,6 +71,21 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx)
KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
#endif /* CONFIG_DEBUG_FS */
+static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx)
+{
+ int ret = kbase_kinstr_jm_init(&kctx->kinstr_jm);
+
+ if (!ret)
+ return ret;
+
+ return 0;
+}
+
+static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx)
+{
+ kbase_kinstr_jm_term(kctx->kinstr_jm);
+}
+
static int kbase_context_kbase_timer_setup(struct kbase_context *kctx)
{
kbase_timer_setup(&kctx->soft_job_timeout,
@@ -122,6 +138,8 @@ static const struct kbase_context_init context_init[] = {
"Sticky resource initialization failed"},
{kbase_jit_init, kbase_jit_term,
"JIT initialization failed"},
+ {kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term,
+ "JM instrumentation initialization failed"},
{kbase_context_kbase_timer_setup, NULL, NULL},
{kbase_context_submit_check, NULL, NULL},
};
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 93fe431..5c27224 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -36,9 +36,99 @@
#include <mmu/mali_kbase_mmu.h>
#include <context/mali_kbase_context_internal.h>
+/**
+ * find_process_node - Used to traverse the process rb_tree to find if
+ * process exists already in process rb_tree.
+ *
+ * @node: Pointer to root node to start search.
+ * @tgid: Thread group PID to search for.
+ *
+ * Return: Pointer to kbase_process if exists otherwise NULL.
+ */
+static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid)
+{
+ struct kbase_process *kprcs = NULL;
+
+ /* Check if the kctx creation request is from a existing process.*/
+ while (node) {
+ struct kbase_process *prcs_node =
+ rb_entry(node, struct kbase_process, kprcs_node);
+ if (prcs_node->tgid == tgid) {
+ kprcs = prcs_node;
+ break;
+ }
+
+ if (tgid < prcs_node->tgid)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+
+ return kprcs;
+}
+
+/**
+ * kbase_insert_kctx_to_process - Initialise kbase process context.
+ *
+ * @kctx: Pointer to kbase context.
+ *
+ * Here we initialise per process rb_tree managed by kbase_device.
+ * We maintain a rb_tree of each unique process that gets created.
+ * and Each process maintains a list of kbase context.
+ * This setup is currently used by kernel trace functionality
+ * to trace and visualise gpu memory consumption.
+ *
+ * Return: 0 on success and error number on failure.
+ */
+static int kbase_insert_kctx_to_process(struct kbase_context *kctx)
+{
+ struct rb_root *const prcs_root = &kctx->kbdev->process_root;
+ const pid_t tgid = kctx->tgid;
+ struct kbase_process *kprcs = NULL;
+
+ lockdep_assert_held(&kctx->kbdev->kctx_list_lock);
+
+ kprcs = find_process_node(prcs_root->rb_node, tgid);
+
+ /* if the kctx is from new process then create a new kbase_process
+ * and add it to the &kbase_device->rb_tree
+ */
+ if (!kprcs) {
+ struct rb_node **new = &prcs_root->rb_node, *parent = NULL;
+
+ kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL);
+ if (kprcs == NULL)
+ return -ENOMEM;
+ kprcs->tgid = tgid;
+ INIT_LIST_HEAD(&kprcs->kctx_list);
+ kprcs->dma_buf_root = RB_ROOT;
+ kprcs->total_gpu_pages = 0;
+
+ while (*new) {
+ struct kbase_process *prcs_node;
+
+ parent = *new;
+ prcs_node = rb_entry(parent, struct kbase_process,
+ kprcs_node);
+ if (tgid < prcs_node->tgid)
+ new = &(*new)->rb_left;
+ else
+ new = &(*new)->rb_right;
+ }
+ rb_link_node(&kprcs->kprcs_node, parent, new);
+ rb_insert_color(&kprcs->kprcs_node, prcs_root);
+ }
+
+ kctx->kprcs = kprcs;
+ list_add(&kctx->kprcs_link, &kprcs->kctx_list);
+
+ return 0;
+}
+
int kbase_context_common_init(struct kbase_context *kctx)
{
const unsigned long cookies_mask = KBASE_COOKIE_MASK;
+ int err = 0;
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
@@ -81,13 +171,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
mutex_lock(&kctx->kbdev->kctx_list_lock);
list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list);
+ err = kbase_insert_kctx_to_process(kctx);
+ if (err)
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process\n", err);
+
KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id,
kctx->kbdev->gpu_props.props.raw_props.gpu_id);
KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id,
(u32)(kctx->tgid));
mutex_unlock(&kctx->kbdev->kctx_list_lock);
- return 0;
+ return err;
+}
+
+/**
+ * kbase_remove_kctx_from_process - remove a terminating context from
+ * the process list.
+ *
+ * @kctx: Pointer to kbase context.
+ *
+ * Remove the tracking of context from the list of contexts maintained under
+ * kbase process and if the list if empty then there no outstanding contexts
+ * we can remove the process node as well.
+ */
+
+static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
+{
+ struct kbase_process *kprcs = kctx->kprcs;
+
+ lockdep_assert_held(&kctx->kbdev->kctx_list_lock);
+ list_del(&kctx->kprcs_link);
+
+ /* if there are no outstanding contexts in current process node,
+ * we can remove it from the process rb_tree.
+ */
+ if (list_empty(&kprcs->kctx_list)) {
+ rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root);
+ /* Add checks, so that the terminating process Should not
+ * hold any gpu_memory.
+ */
+ WARN_ON(kprcs->total_gpu_pages);
+ WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root));
+ kfree(kprcs);
+ }
}
void kbase_context_common_term(struct kbase_context *kctx)
@@ -109,6 +236,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
mutex_lock(&kctx->kbdev->kctx_list_lock);
+ kbase_remove_kctx_from_process(kctx);
KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id);
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index fbba2e7..2a45a33 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -43,6 +43,7 @@
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_dummy_job_wa.h>
+#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
/**
* kbase_backend_late_init - Perform any backend-specific initialization.
@@ -178,8 +179,11 @@ static const struct kbase_device_init dev_init[] = {
"Job JS devdata initialization failed"},
{kbase_device_timeline_init, kbase_device_timeline_term,
"Timeline stream initialization failed"},
- {kbase_device_hwcnt_backend_gpu_init,
- kbase_device_hwcnt_backend_gpu_term,
+ {kbase_clk_rate_trace_manager_init,
+ kbase_clk_rate_trace_manager_term,
+ "Clock rate trace manager initialization failed"},
+ {kbase_device_hwcnt_backend_jm_init,
+ kbase_device_hwcnt_backend_jm_term,
"GPU hwcnt backend creation failed"},
{kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
"GPU hwcnt context initialization failed"},
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 76f14e5..d0b85ba 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -271,14 +271,14 @@ void kbase_increment_device_id(void)
kbase_dev_nr++;
}
-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev)
+int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
{
- return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface);
+ return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
}
-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev)
+void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
{
- kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface);
+ kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
}
int kbase_device_hwcnt_context_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index 9f96db0..5464458 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,8 +43,8 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev);
int kbase_device_timeline_init(struct kbase_device *kbdev);
void kbase_device_timeline_term(struct kbase_device *kbdev);
-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev);
+int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev);
+void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev);
int kbase_device_hwcnt_context_init(struct kbase_device *kbdev);
void kbase_device_hwcnt_context_term(struct kbase_device *kbdev);
diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h
index 879a436..ce36020 100644
--- a/mali_kbase/jm/mali_base_jm_kernel.h
+++ b/mali_kbase/jm/mali_base_jm_kernel.h
@@ -155,18 +155,23 @@
/* Use the GPU VA chosen by the kernel client */
#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
+/* Bit 28 reserved for Kernel side cache sync ops flag */
+
+/* Force trimming of JIT allocations when creating a new allocation */
+#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
+
/* Number of bits used as flags for base memory management
*
* Must be kept in sync with the base_mem_alloc_flags flags
*/
-#define BASE_MEM_FLAGS_NR_BITS 28
+#define BASE_MEM_FLAGS_NR_BITS 30
/* A mask of all the flags which are only valid for allocations within kbase,
* and may not be passed from user space.
*/
#define BASEP_MEM_FLAGS_KERNEL_ONLY \
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
- BASE_MEM_FLAG_MAP_FIXED)
+ BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
/* A mask for all output bits, excluding IN/OUT bits.
*/
@@ -192,6 +197,28 @@
#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
BASE_MEM_COOKIE_BASE)
+/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
+
+/**
+ * If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
+
+/**
+ * Valid set of just-in-time memory allocation flags
+ *
+ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
+ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
+ * and heap_info_gpu_addr being 0 will be rejected).
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS \
+ (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+
/**
* typedef base_context_create_flags - Flags to pass to ::base_context_init.
*
@@ -787,6 +814,54 @@ struct base_jd_atom_v2 {
u8 padding[7];
};
+/**
+ * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
+ * at the beginning.
+ *
+ * @seq_nr: Sequence number of logical grouping of atoms.
+ * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
+ * is set in the base_jd_core_req) the CPU address of a
+ * base_jd_fragment object.
+ * @udata: User data.
+ * @extres_list: List of external resources.
+ * @nr_extres: Number of external resources or JIT allocations.
+ * @jit_id: Zero-terminated array of IDs of just-in-time memory
+ * allocations written to by the atom. When the atom
+ * completes, the value stored at the
+ * &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ * each allocation is read in order to enforce an
+ * overall physical memory usage limit.
+ * @pre_dep: Pre-dependencies. One need to use SETTER function to assign
+ * this field; this is done in order to reduce possibility of
+ * improper assignment of a dependency field.
+ * @atom_number: Unique number to identify the atom.
+ * @prio: Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ * specified.
+ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req: Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ * BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ * with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding: Unused. Must be zero.
+ */
+typedef struct base_jd_atom {
+ u64 seq_nr;
+ u64 jc;
+ struct base_jd_udata udata;
+ u64 extres_list;
+ u16 nr_extres;
+ u8 jit_id[2];
+ struct base_dependency pre_dep[2];
+ base_atom_id atom_number;
+ base_jd_prio prio;
+ u8 device_nr;
+ u8 jobslot;
+ base_jd_core_req core_req;
+ u8 renderpass_id;
+ u8 padding[7];
+} base_jd_atom;
+
/* Job chain event code bits
* Defines the bits used to create ::base_jd_event_code
*/
@@ -982,7 +1057,7 @@ struct base_jd_event_v2 {
* jobs.
*
* This structure is stored into the memory pointed to by the @jc field
- * of &struct base_jd_atom_v2.
+ * of &struct base_jd_atom.
*
* It must not occupy the same CPU cache line(s) as any neighboring data.
* This is to avoid cases where access to pages containing the structure
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index aac561b..307a342 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -496,9 +496,9 @@ struct kbase_jd_atom {
struct list_head jd_item;
bool in_jd_list;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
u8 jit_ids[2];
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
u16 nr_extres;
struct kbase_ext_res *extres;
@@ -608,6 +608,9 @@ struct kbase_jd_atom {
atomic_t blocked;
+ /* user-space sequence number, to order atoms in some temporal order */
+ u64 seq_nr;
+
struct kbase_jd_atom *pre_dep;
struct kbase_jd_atom *post_dep;
diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h
index 408e98e..6dc57d0 100644
--- a/mali_kbase/jm/mali_kbase_jm_ioctl.h
+++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h
@@ -94,16 +94,32 @@
* - The above changes are checked for safe values in usual builds
* 11.21:
* - v2.0 of mali_trace debugfs file, which now versions the file separately
+ * 11.22:
+ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
+ * KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
+ * 11.23:
+ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
+ * the physical memory backing of JIT allocations. This was not supposed
+ * to be a valid use case, but it was allowed by the previous implementation.
+ * 11.24:
+ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
+ * 'scheduling'.
+ * 11.25:
+ * - Enabled JIT pressure limit in base/kbase by default
+ * 11.26
+ * - Added kinstr_jm API
+ * 11.27
+ * - Backwards compatible extension to HWC ioctl.
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 21
+#define BASE_UK_VERSION_MINOR 27
/**
* struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
*
- * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
* @nr_atoms: Number of entries in the array
- * @stride: sizeof(struct base_jd_atom_v2)
+ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
*/
struct kbase_ioctl_job_submit {
__u64 addr;
@@ -132,5 +148,47 @@ struct kbase_ioctl_soft_event_update {
#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+/**
+ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
+ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
+ * kernel
+ *
+ * @size: The size of the `struct kbase_kinstr_jm_atom_state_change`
+ * @version: Represents a breaking change in the
+ * `struct kbase_kinstr_jm_atom_state_change`
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ *
+ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
+ * end of the structure that older user space might not understand. If the
+ * `version` is the same, the structure is still compatible with newer kernels.
+ * The `size` can be used to cast the opaque memory returned from the kernel.
+ */
+struct kbase_kinstr_jm_fd_out {
+ __u16 size;
+ __u8 version;
+ __u8 padding[5];
+};
+
+/**
+ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
+ *
+ * @count: Number of atom states that can be stored in the kernel circular
+ * buffer. Must be a power of two
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ */
+struct kbase_kinstr_jm_fd_in {
+ __u16 count;
+ __u8 padding[6];
+};
+
+union kbase_kinstr_jm_fd {
+ struct kbase_kinstr_jm_fd_in in;
+ struct kbase_kinstr_jm_fd_out out;
+};
+
+#define KBASE_IOCTL_KINSTR_JM_FD \
+ _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
#endif /* _KBASE_JM_IOCTL_H_ */
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
index 1e2744d..d45092f 100644
--- a/mali_kbase/mali_base_kernel.h
+++ b/mali_kbase/mali_base_kernel.h
@@ -213,28 +213,6 @@ struct base_mem_aliasing_info {
*/
#define BASE_JIT_ALLOC_COUNT (255)
-/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
- * initial commit is aligned to 'extent' pages, where 'extent' must be a power
- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
- */
-#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
-
-/**
- * If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
- */
-#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
-
-/**
- * Valid set of just-in-time memory allocation flags
- *
- * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
- * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
- * and heap_info_gpu_addr being 0 will be rejected).
- */
-#define BASE_JIT_ALLOC_VALID_FLAGS \
- (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
-
/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
*
* jit_version is 1
diff --git a/mali_kbase/mali_gpu_mem_trace.h b/mali_kbase/mali_gpu_mem_trace.h
new file mode 100644
index 0000000..183e6c4
--- /dev/null
+++ b/mali_kbase/mali_gpu_mem_trace.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_mem
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_gpu_mem_trace
+
+#if !defined(_TRACE_MALI_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_GPU_MEM_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * trace_gpu_mem_total
+ *
+ * The gpu_memory_total event indicates that there's an update to either the
+ * global or process total gpu memory counters.
+ *
+ * This event should be emitted whenever the kernel device driver allocates,
+ * frees, imports, unimports memory in the GPU addressable space.
+ *
+ * @gpu_id: Kbase device id.
+ * @pid: This is either the thread group ID of the process for which there was
+ * an update in the GPU memory usage or 0 so as to indicate an update in
+ * the device wide GPU memory usage.
+ * @size: GPU memory usage in bytes.
+ */
+TRACE_EVENT(gpu_mem_total,
+ TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size),
+
+ TP_ARGS(gpu_id, pid, size),
+
+ TP_STRUCT__entry(
+ __field(uint32_t, gpu_id)
+ __field(uint32_t, pid)
+ __field(uint64_t, size)
+ ),
+
+ TP_fast_assign(
+ __entry->gpu_id = gpu_id;
+ __entry->pid = pid;
+ __entry->size = size;
+ ),
+
+ TP_printk("gpu_id=%u pid=%u size=%llu",
+ __entry->gpu_id,
+ __entry->pid,
+ __entry->size)
+);
+#endif /* _TRACE_MALI_GPU_MEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 0445e0c..c623e7e 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -213,9 +213,9 @@ void kbase_jd_exit(struct kbase_context *kctx);
* kbase_jd_submit - Submit atoms to the job dispatcher
*
* @kctx: The kbase context to submit to
- * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @user_addr: The address in user space of the struct base_jd_atom array
* @nr_atoms: The number of atoms in the array
- * @stride: sizeof(struct base_jd_atom_v2)
+ * @stride: sizeof(struct base_jd_atom)
* @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
*
* Return: 0 on success or error code
@@ -457,7 +457,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev);
/**
* Return the atom's ID, as was originally supplied by userspace in
- * base_jd_atom_v2::atom_number
+ * base_jd_atom::atom_number
*/
static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h
new file mode 100644
index 0000000..b201a60
--- /dev/null
+++ b/mali_kbase/mali_kbase_caps.h
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_caps.h
+ *
+ * Driver Capability Queries.
+ */
+
+#ifndef _KBASE_CAPS_H_
+#define _KBASE_CAPS_H_
+
+#include <linux/types.h>
+
+typedef enum mali_kbase_cap {
+ MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
+ MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
+ MALI_KBASE_CAP_MEM_GROW_ON_GPF,
+ MALI_KBASE_CAP_MEM_PROTECTED,
+ MALI_KBASE_NUM_CAPS
+} mali_kbase_cap;
+
+extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
+
+static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR);
+}
+
+static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT);
+}
+
+static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF);
+}
+
+static inline bool mali_kbase_supports_mem_protected(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED);
+}
+
+#endif /* __KBASE_CAPS_H_ */
diff --git a/mali_kbase/mali_kbase_ccswe.c b/mali_kbase/mali_kbase_ccswe.c
new file mode 100644
index 0000000..87d5aaa
--- /dev/null
+++ b/mali_kbase/mali_kbase_ccswe.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ccswe.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/math64.h>
+#include <linux/time.h>
+
+static u64 kbasep_ccswe_cycle_at_no_lock(
+ struct kbase_ccswe *self, u64 timestamp_ns)
+{
+ s64 diff_s, diff_ns;
+ u32 gpu_freq;
+
+ lockdep_assert_held(&self->access);
+
+ diff_ns = timestamp_ns - self->timestamp_ns;
+ gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq;
+
+ diff_s = div_s64(diff_ns, NSEC_PER_SEC);
+ diff_ns -= diff_s * NSEC_PER_SEC;
+
+ return self->cycles_elapsed + diff_s * gpu_freq
+ + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC);
+}
+
+void kbase_ccswe_init(struct kbase_ccswe *self)
+{
+ memset(self, 0, sizeof(*self));
+
+ spin_lock_init(&self->access);
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_init);
+
+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns)
+{
+ unsigned long flags;
+ u64 result;
+
+ spin_lock_irqsave(&self->access, flags);
+ result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns);
+ spin_unlock_irqrestore(&self->access, flags);
+
+ return result;
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_cycle_at);
+
+void kbase_ccswe_freq_change(
+ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&self->access, flags);
+
+ /* The time must go only forward. */
+ if (WARN_ON(timestamp_ns < self->timestamp_ns))
+ goto exit;
+
+ /* If this is the first frequency change, cycles_elapsed is zero. */
+ if (self->timestamp_ns)
+ self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock(
+ self, timestamp_ns);
+
+ self->timestamp_ns = timestamp_ns;
+ self->prev_gpu_freq = self->gpu_freq;
+ self->gpu_freq = gpu_freq;
+exit:
+ spin_unlock_irqrestore(&self->access, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_freq_change);
+
+void kbase_ccswe_reset(struct kbase_ccswe *self)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&self->access, flags);
+
+ self->timestamp_ns = 0;
+ self->cycles_elapsed = 0;
+ self->gpu_freq = 0;
+ self->prev_gpu_freq = 0;
+
+ spin_unlock_irqrestore(&self->access, flags);
+}
+
diff --git a/mali_kbase/mali_kbase_ccswe.h b/mali_kbase/mali_kbase_ccswe.h
new file mode 100644
index 0000000..3a7cf73
--- /dev/null
+++ b/mali_kbase/mali_kbase_ccswe.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CCSWE_H_
+#define _KBASE_CCSWE_H_
+
+#include <linux/spinlock.h>
+
+/**
+ * struct kbase_ccswe - Cycle count software estimator.
+ *
+ * @access: Spinlock protecting this structure access.
+ * @timestamp_ns: Timestamp(ns) when the last frequency change
+ * occurred.
+ * @cycles_elapsed: Number of cycles elapsed before the last frequency
+ * change
+ * @gpu_freq: Current GPU frequency(Hz) value.
+ * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency
+ * change.
+ */
+struct kbase_ccswe {
+ spinlock_t access;
+ u64 timestamp_ns;
+ u64 cycles_elapsed;
+ u32 gpu_freq;
+ u32 prev_gpu_freq;
+};
+
+/**
+ * kbase_ccswe_init() - initialize the cycle count estimator.
+ *
+ * @self: Cycles count software estimator instance.
+ */
+void kbase_ccswe_init(struct kbase_ccswe *self);
+
+
+/**
+ * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
+ *
+ * @self: Cycles count software estimator instance.
+ * @timestamp_ns: The timestamp(ns) for cycle count estimation.
+ *
+ * The timestamp must be bigger than the timestamp of the penultimate
+ * frequency change. If only one frequency change occurred, the
+ * timestamp must be bigger than the timestamp of the frequency change.
+ * This is to allow the following code to be executed w/o synchronization.
+ * If lines below executed atomically, it is safe to assume that only
+ * one frequency change may happen in between.
+ *
+ * u64 ts = ktime_get_raw_ns();
+ * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
+ *
+ * Returns: estimated value of cycle count at a given time.
+ */
+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);
+
+/**
+ * kbase_ccswe_freq_change() - update GPU frequency.
+ *
+ * @self: Cycles count software estimator instance.
+ * @timestamp_ns: Timestamp(ns) when frequency change occurred.
+ * @gpu_freq: New GPU frequency value.
+ *
+ * The timestamp must be bigger than the timestamp of the previous
+ * frequency change. The function is to be called at the frequency
+ * change moment (not later).
+ */
+void kbase_ccswe_freq_change(
+ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq);
+
+/**
+ * kbase_ccswe_reset() - reset estimator state
+ *
+ * @self: Cycles count software estimator instance.
+ */
+void kbase_ccswe_reset(struct kbase_ccswe *self);
+
+#endif /* _KBASE_CCSWE_H_ */
diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h
index 69723ea..57456e2 100644
--- a/mali_kbase/mali_kbase_config.h
+++ b/mali_kbase/mali_kbase_config.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -223,6 +223,88 @@ struct kbase_pm_callback_conf {
int (*soft_reset_callback)(struct kbase_device *kbdev);
};
+/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier.
+ *
+ * Pointer to this structure is supposed to be passed to the gpu clock rate
+ * change notifier function. This structure is deliberately aligned with the
+ * common clock framework notification structure 'struct clk_notifier_data'
+ * and such alignment should be maintained.
+ *
+ * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered.
+ * @old_rate: Previous rate of this GPU clock.
+ * @new_rate: New rate of this GPU clock.
+ */
+struct kbase_gpu_clk_notifier_data {
+ void *gpu_clk_handle;
+ unsigned long old_rate;
+ unsigned long new_rate;
+};
+
+/**
+ * kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace operations.
+ *
+ * Specifies the functions pointers for platform specific GPU clock rate trace
+ * operations. By default no functions are required.
+ */
+struct kbase_clk_rate_trace_op_conf {
+ /**
+ * enumerate_gpu_clk - Enumerate a GPU clock on the given index
+ * @kbdev - kbase_device pointer
+ * @index - GPU clock index
+ *
+ * Returns a handle unique to the given GPU clock, or NULL if the clock
+ * array has been exhausted at the given index value.
+ *
+ * Kbase will use this function pointer to enumerate the existence of a
+ * GPU clock on the given index.
+ */
+ void *(*enumerate_gpu_clk)(struct kbase_device *kbdev,
+ unsigned int index);
+
+ /**
+ * get_gpu_clk_rate - Get the current rate for an enumerated clock.
+ * @kbdev - kbase_device pointer
+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+ *
+ * Returns current rate of the GPU clock in unit of Hz.
+ */
+ unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev,
+ void *gpu_clk_handle);
+
+ /**
+ * gpu_clk_notifier_register - Register a clock rate change notifier.
+ * @kbdev - kbase_device pointer
+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+ * @nb - notifier block containing the callback function
+ * pointer
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * This function pointer is used to register a callback function that
+ * is supposed to be invoked whenever the rate of clock corresponding
+ * to @gpu_clk_handle changes.
+ * @nb contains the pointer to callback function.
+ * The callback function expects the pointer of type
+ * 'struct kbase_gpu_clk_notifier_data' as the third argument.
+ */
+ int (*gpu_clk_notifier_register)(struct kbase_device *kbdev,
+ void *gpu_clk_handle, struct notifier_block *nb);
+
+ /**
+ * gpu_clk_notifier_unregister - Unregister clock rate change notifier
+ * @kbdev - kbase_device pointer
+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+ * @nb - notifier block containing the callback function
+ * pointer
+ *
+ * This function pointer is used to unregister a callback function that
+ * was previously registered to get notified of the change in rate
+ * of clock corresponding to @gpu_clk_handle.
+ */
+ void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev,
+ void *gpu_clk_handle, struct notifier_block *nb);
+};
+
#ifdef CONFIG_OF
struct kbase_platform_config {
};
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index fb2353e..83a22d9 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -55,6 +55,7 @@
#include <mali_kbase_reset_gpu.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include "mali_kbase_ioctl.h"
+#include "mali_kbase_kinstr_jm.h"
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_legacy.h"
@@ -114,6 +115,8 @@
#include <device/mali_kbase_device.h>
#include <context/mali_kbase_context.h>
+#include <mali_kbase_caps.h>
+
/* GPU IRQ Tags */
#define JOB_IRQ_TAG 0
#define MMU_IRQ_TAG 1
@@ -122,6 +125,82 @@
#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
/**
+ * Kernel min/maj <=> API Version
+ */
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
+ (((minor) & 0xFFF) << 8) | \
+ ((0 & 0xFF) << 0))
+
+#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF)
+#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF)
+
+/**
+ * mali_kbase_api_version_to_maj_min - convert an api_version to a min/maj pair
+ *
+ * @api_version: API version to convert
+ * @major: Major version number (must not exceed 12 bits)
+ * @minor: Major version number (must not exceed 12 bits)
+ */
+void mali_kbase_api_version_to_maj_min(unsigned long api_version, u16 *maj, u16 *min)
+{
+ if (WARN_ON(!maj))
+ return;
+
+ if (WARN_ON(!min))
+ return;
+
+ *maj = KBASE_API_MAJ(api_version);
+ *min = KBASE_API_MIN(api_version);
+}
+
+/**
+ * kbase capabilities table
+ */
+typedef struct mali_kbase_capability_def {
+ u16 required_major;
+ u16 required_minor;
+} mali_kbase_capability_def;
+
+/**
+ * This must be kept in-sync with mali_kbase_cap
+ *
+ * TODO: The alternative approach would be to embed the cap enum values
+ * in the table. Less efficient but potentially safer.
+ */
+static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = {
+ { 11, 15 }, /* SYSTEM_MONITOR */
+ { 11, 25 }, /* JIT_PRESSURE_LIMIT */
+ { 11, 2 }, /* MEM_GROW_ON_GPF */
+ { 11, 2 } /* MEM_PROTECTED */
+};
+
+/**
+ * mali_kbase_supports_cap - Query whether a kbase capability is supported
+ *
+ * @api_version: API version to convert
+ * @cap: Capability to query for - see mali_kbase_caps.h
+ */
+bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap)
+{
+ bool supported = false;
+ unsigned long required_ver;
+
+ mali_kbase_capability_def const *cap_def;
+
+ if (WARN_ON(cap < 0))
+ return false;
+
+ if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS))
+ return false;
+
+ cap_def = &kbase_caps_table[(int)cap];
+ required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor);
+ supported = (api_version >= required_ver);
+
+ return supported;
+}
+
+/**
* kbase_file_new - Create an object representing a device file
*
* @kbdev: An instance of the GPU platform device, allocated from the probe
@@ -152,7 +231,7 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
}
/**
- * kbase_file_get_api_version - Set the application programmer interface version
+ * kbase_file_set_api_version - Set the application programmer interface version
*
* @kfile: A device file created by kbase_file_new()
* @major: Major version number (must not exceed 12 bits)
@@ -326,7 +405,7 @@ static int kbase_api_handshake(struct kbase_file *kfile,
* the flags have been set. Originally it was created on file open
* (with job submission disabled) but we don't support that usage.
*/
- if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15))
+ if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile)))
err = kbase_file_create_kctx(kfile,
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED);
@@ -663,7 +742,7 @@ static int kbase_api_set_flags(struct kbase_file *kfile,
/* For backward compatibility, the context may have been created before
* the flags were set.
*/
- if (api_version >= KBASE_API_VERSION(11, 15)) {
+ if (mali_kbase_supports_system_monitor(api_version)) {
err = kbase_file_create_kctx(kfile, flags->create_flags);
} else {
struct kbasep_js_kctx_info *js_kctx_info = NULL;
@@ -790,6 +869,12 @@ static int kbase_api_mem_free(struct kbase_context *kctx,
return kbase_mem_free(kctx, free->gpu_addr);
}
+static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx,
+ union kbase_kinstr_jm_fd *arg)
+{
+ return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg);
+}
+
static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
struct kbase_ioctl_hwcnt_reader_setup *setup)
{
@@ -1536,6 +1621,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
break;
/* Instrumentation. */
+ case KBASE_IOCTL_KINSTR_JM_FD:
+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD,
+ kbase_api_kinstr_jm_fd,
+ union kbase_kinstr_jm_fd,
+ kctx);
+ break;
case KBASE_IOCTL_HWCNT_READER_SETUP:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
kbase_api_hwcnt_reader_setup,
@@ -1890,7 +1981,7 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
* @dev: The device with sysfs file is for
* @attr: The attributes of the sysfs file
* @buf: The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
*
* Return: @count if the function succeeded. An error code on failure.
*/
@@ -1985,7 +2076,7 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
* @dev: The device this sysfs file is for.
* @attr: The attributes of the sysfs file.
* @buf: The value written to the sysfs file.
- * @count: The number of bytes written to the sysfs file.
+ * @count: The number of bytes to write to the sysfs file.
*
* This allows setting the timeout for software jobs. Waiting soft event wait
* jobs will be cancelled after this period expires, while soft fence wait jobs
@@ -2078,7 +2169,7 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
* @dev: The device with sysfs file is for
* @attr: The attributes of the sysfs file
* @buf: The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
*
* Return: @count if the function succeeded. An error code on failure.
*/
@@ -2255,7 +2346,7 @@ static u32 get_new_js_timeout(
* @dev: The device the sysfs file is for
* @attr: The attributes of the sysfs file
* @buf: The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
*
* This function is called when the js_scheduling_period sysfs file is written
* to. It checks the data written, and if valid updates the js_scheduling_period
@@ -2495,7 +2586,7 @@ static ssize_t show_debug(struct device *dev, struct device_attribute *attr, cha
* @dev: The device with sysfs file is for
* @attr: The attributes of the sysfs file
* @buf: The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
*
* Return: @count if the function succeeded. An error code on failure.
*/
@@ -3096,7 +3187,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
set_js_ctx_scheduling_mode);
#ifdef MALI_KBASE_BUILD
-#ifdef CONFIG_DEBUG_FS
/* Number of entries in serialize_jobs_settings[] */
#define NR_SERIALIZE_JOBS_SETTINGS 5
@@ -3117,8 +3207,47 @@ static struct
};
/**
- * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
- * file
+ * update_serialize_jobs_setting - Update the serialization setting for the
+ * submission of GPU jobs.
+ *
+ * This function is called when the serialize_jobs sysfs/debugfs file is
+ * written to. It matches the requested setting against the available settings
+ * and if a matching setting is found updates kbdev->serialize_jobs.
+ *
+ * @kbdev: An instance of the GPU platform device, allocated from the probe
+ * method of the driver.
+ * @buf: Buffer containing the value written to the sysfs/debugfs file.
+ * @count: The number of bytes to write to the sysfs/debugfs file.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev,
+ const char *buf, size_t count)
+{
+ int i;
+ bool valid = false;
+
+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+ kbdev->serialize_jobs =
+ serialize_jobs_settings[i].setting;
+ valid = true;
+ break;
+ }
+ }
+
+ if (!valid) {
+ dev_err(kbdev->dev, "serialize_jobs: invalid setting");
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs
+ * debugfs file
* @sfile: seq_file pointer
* @data: Private callback data
*
@@ -3128,7 +3257,8 @@ static struct
*
* Return: 0 on success, or an error code on error
*/
-static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile,
+ void *data)
{
struct kbase_device *kbdev = sfile->private;
int i;
@@ -3169,8 +3299,6 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
struct seq_file *s = file->private_data;
struct kbase_device *kbdev = s->private;
char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
- int i;
- bool valid = false;
CSTD_UNUSED(ppos);
@@ -3180,21 +3308,7 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
buf[count] = 0;
- for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
- if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
- kbdev->serialize_jobs =
- serialize_jobs_settings[i].setting;
- valid = true;
- break;
- }
- }
-
- if (!valid) {
- dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
- return -EINVAL;
- }
-
- return count;
+ return update_serialize_jobs_setting(kbdev, buf, count);
}
/**
@@ -3208,7 +3322,8 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
struct file *file)
{
- return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+ return single_open(file, kbasep_serialize_jobs_seq_debugfs_show,
+ in->i_private);
}
static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
@@ -3221,6 +3336,72 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
};
#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file.
+ *
+ * This function is called to get the contents of the serialize_jobs sysfs
+ * file. This is a list of the available settings with the currently active
+ * one surrounded by square brackets.
+ *
+ * @dev: The device this sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_serialize_jobs_sysfs(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct kbase_device *kbdev = to_kbase_device(dev);
+ ssize_t ret = 0;
+ int i;
+
+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+ if (kbdev->serialize_jobs ==
+ serialize_jobs_settings[i].setting)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]",
+ serialize_jobs_settings[i].name);
+ else
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ",
+ serialize_jobs_settings[i].name);
+ }
+
+ if (ret < PAGE_SIZE - 1) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ } else {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/**
+ * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file.
+ *
+ * This function is called when the serialize_jobs sysfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * @dev: The device this sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t store_serialize_jobs_sysfs(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return update_serialize_jobs_setting(to_kbase_device(dev), buf, count);
+}
+
+static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs,
+ store_serialize_jobs_sysfs);
#endif /* MALI_KBASE_BUILD */
static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
@@ -4019,6 +4200,11 @@ void buslog_term(struct kbase_device *kbdev)
}
#endif
+static struct attribute *kbase_scheduling_attrs[] = {
+ &dev_attr_serialize_jobs.attr,
+ NULL
+};
+
static struct attribute *kbase_attrs[] = {
#ifdef CONFIG_MALI_DEBUG
&dev_attr_debug_command.attr,
@@ -4041,6 +4227,12 @@ static struct attribute *kbase_attrs[] = {
NULL
};
+#define SYSFS_SCHEDULING_GROUP "scheduling"
+static const struct attribute_group kbase_scheduling_attr_group = {
+ .name = SYSFS_SCHEDULING_GROUP,
+ .attrs = kbase_scheduling_attrs,
+};
+
static const struct attribute_group kbase_attr_group = {
.attrs = kbase_attrs,
};
@@ -4056,11 +4248,23 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
kbdev->mdev.mode = 0666;
err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+ if (!err) {
+ err = sysfs_create_group(&kbdev->dev->kobj,
+ &kbase_scheduling_attr_group);
+ if (err) {
+ dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+ SYSFS_SCHEDULING_GROUP);
+ sysfs_remove_group(&kbdev->dev->kobj,
+ &kbase_attr_group);
+ }
+ }
+
return err;
}
void kbase_sysfs_term(struct kbase_device *kbdev)
{
+ sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group);
sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
put_device(kbdev->dev);
}
diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h
index e1fffc3..caba2cd 100644
--- a/mali_kbase/mali_kbase_cs_experimental.h
+++ b/mali_kbase/mali_kbase_cs_experimental.h
@@ -41,9 +41,6 @@
*/
static inline void mali_kbase_print_cs_experimental(void)
{
-#if MALI_JIT_PRESSURE_LIMIT
- pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled");
-#endif /* MALI_JIT_PRESSURE_LIMIT */
#if MALI_INCREMENTAL_RENDERING
pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled");
#endif /* MALI_INCREMENTAL_RENDERING */
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 7056d80..5cbe6a9 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -40,7 +40,7 @@
#include <mali_kbase_instr_defs.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_gpuprops_types.h>
-#include <mali_kbase_hwcnt_backend_gpu.h>
+#include <mali_kbase_hwcnt_backend_jm.h>
#include <protected_mode_switcher.h>
#include <linux/atomic.h>
@@ -156,6 +156,7 @@ struct kbase_device;
struct kbase_as;
struct kbase_mmu_setup;
struct kbase_ipa_model_vinstr_data;
+struct kbase_kinstr_jm;
/**
* struct kbase_io_access - holds information about 1 register access
@@ -320,6 +321,58 @@ struct kbasep_mem_device {
atomic_t ir_threshold;
};
+struct kbase_clk_rate_listener;
+
+/**
+ * kbase_clk_rate_listener_on_change_t() - Frequency change callback
+ *
+ * @listener: Clock frequency change listener.
+ * @clk_index: Index of the clock for which the change has occurred.
+ * @clk_rate_hz: Clock frequency(Hz).
+ *
+ * A callback to call when clock rate changes. The function must not
+ * sleep. No clock rate manager functions must be called from here, as
+ * its lock is taken.
+ */
+typedef void (*kbase_clk_rate_listener_on_change_t)(
+ struct kbase_clk_rate_listener *listener,
+ u32 clk_index,
+ u32 clk_rate_hz);
+
+/**
+ * struct kbase_clk_rate_listener - Clock frequency listener
+ *
+ * @node: List node.
+ * @notify: Callback to be called when GPU frequency changes.
+ */
+struct kbase_clk_rate_listener {
+ struct list_head node;
+ kbase_clk_rate_listener_on_change_t notify;
+};
+
+/**
+ * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock
+ * rate trace manager.
+ *
+ * @gpu_idle: Tracks the idle state of GPU.
+ * @clks: Array of pointer to structures storing data for every
+ * enumerated GPU clock.
+ * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
+ * operations.
+ * @gpu_clk_rate_trace_write: Pointer to the function that would emit the
+ * tracepoint for the clock rate change.
+ * @listeners: List of listener attached.
+ * @lock: Lock to serialize the actions of GPU clock rate trace
+ * manager.
+ */
+struct kbase_clk_rate_trace_manager {
+ bool gpu_idle;
+ struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS];
+ struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops;
+ struct list_head listeners;
+ spinlock_t lock;
+};
+
/**
* Data stored per device for power management.
*
@@ -385,6 +438,11 @@ struct kbase_pm_device_data {
*/
struct kbase_arbiter_vm_state *arb_vm_state;
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+ /**
+ * The state of the GPU clock rate trace manager
+ */
+ struct kbase_clk_rate_trace_manager clk_rtm;
};
/**
@@ -560,6 +618,32 @@ struct kbase_devfreq_queue_info {
};
/**
+ * struct kbase_process - Representing an object of a kbase process instantiated
+ * when the first kbase context is created under it.
+ * @tgid: Thread group ID.
+ * @total_gpu_pages: Total gpu pages allocated across all the contexts
+ * of this process, it accounts for both native allocations
+ * and dma_buf imported allocations.
+ * @kctx_list: List of kbase contexts created for the process.
+ * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree
+ * based on key tgid, kprcs_node is the node link to
+ * &struct_kbase_device.process_root.
+ * @dma_buf_root: RB tree of the dma-buf imported allocations, imported
+ * across all the contexts created for this process.
+ * Used to ensure that pages of allocation are accounted
+ * only once for the process, even if the allocation gets
+ * imported multiple times for the process.
+ */
+struct kbase_process {
+ pid_t tgid;
+ size_t total_gpu_pages;
+ struct list_head kctx_list;
+
+ struct rb_node kprcs_node;
+ struct rb_root dma_buf_root;
+};
+
+/**
* struct kbase_device - Object representing an instance of GPU platform device,
* allocated from the probe method of mali driver.
* @hw_quirks_sc: Configuration to be used for the shader cores as per
@@ -806,6 +890,20 @@ struct kbase_devfreq_queue_info {
* Job Scheduler
* @l2_size_override: Used to set L2 cache size via device tree blob
* @l2_hash_override: Used to set L2 cache hash via device tree blob
+ * @process_root: rb_tree root node for maintaining a rb_tree of
+ * kbase_process based on key tgid(thread group ID).
+ * @dma_buf_root: rb_tree root node for maintaining a rb_tree of
+ * &struct kbase_dma_buf based on key dma_buf.
+ * We maintain a rb_tree of dma_buf mappings under
+ * kbase_device and kbase_process, one indicates a
+ * mapping and gpu memory usage at device level and
+ * other one at process level.
+ * @total_gpu_pages: Total GPU pages used for the complete GPU device.
+ * @dma_buf_lock: This mutex should be held while accounting for
+ * @total_gpu_pages from imported dma buffers.
+ * @gpu_mem_usage_lock: This spinlock should be held while accounting
+ * @total_gpu_pages for both native and dma-buf imported
+ * allocations.
*/
struct kbase_device {
u32 hw_quirks_sc;
@@ -1043,6 +1141,13 @@ struct kbase_device {
#endif /* CONFIG_MALI_CINSTR_GWT */
+ struct rb_root process_root;
+ struct rb_root dma_buf_root;
+
+ size_t total_gpu_pages;
+ struct mutex dma_buf_lock;
+ spinlock_t gpu_mem_usage_lock;
+
struct {
struct kbase_context *ctx;
u64 jc;
@@ -1056,10 +1161,6 @@ struct kbase_device {
#endif
};
-#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
- (((minor) & 0xFFF) << 8) | \
- ((0 & 0xFF) << 0))
-
/**
* enum kbase_file_state - Initialization state of a file opened by @kbase_open
*
@@ -1189,6 +1290,13 @@ enum kbase_context_flags {
KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ /*
+ * Set when JIT physical page limit is less than JIT virtual address
+ * page limit, so we must take care to not exceed the physical limit
+ */
+ KCTX_JPL_ENABLED = 1U << 16,
+#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */
};
struct kbase_sub_alloc {
@@ -1399,6 +1507,16 @@ struct kbase_sub_alloc {
* that were used (i.e. the
* &struct_kbase_va_region.used_pages for regions
* that have had a usage report).
+ * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being
+ * now allocated for just-in-time memory
+ * allocations of a context (across all the
+ * threads). This is supposed to be updated
+ * with @reg_lock held before allocating
+ * the backing pages. This helps ensure that
+ * total physical memory usage for just in
+ * time memory allocation remains within the
+ * @jit_phys_pages_limit in multi-threaded
+ * scenarios.
* @jit_active_head: List containing the just-in-time memory allocations
* which are in use.
* @jit_pool_head: List containing the just-in-time memory allocations
@@ -1425,6 +1543,10 @@ struct kbase_sub_alloc {
* is used to determine the atom's age when it is added to
* the runnable RB-tree.
* @trim_level: Level of JIT allocation trimming to perform on free (0-100%)
+ * @kprcs: Reference to @struct kbase_process that the current
+ * kbase_context belongs to.
+ * @kprcs_link: List link for the list of kbase context maintained
+ * under kbase_process.
* @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by
* kbase_context.reg_lock.
* @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled.
@@ -1435,6 +1557,7 @@ struct kbase_sub_alloc {
* for context scheduling, protected by hwaccess_lock.
* @atoms_count: Number of GPU atoms currently in use, per priority
* @create_flags: Flags used in context creation.
+ * @kinstr_jm: Kernel job manager instrumentation context handle
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1545,10 +1668,11 @@ struct kbase_context {
u8 jit_current_allocations_per_bin[256];
u8 jit_version;
u8 jit_group_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
u64 jit_phys_pages_limit;
u64 jit_current_phys_pressure;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+ u64 jit_phys_pages_to_be_allocated;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
struct list_head jit_active_head;
struct list_head jit_pool_head;
struct list_head jit_destroy_head;
@@ -1559,6 +1683,9 @@ struct kbase_context {
u8 trim_level;
+ struct kbase_process *kprcs;
+ struct list_head kprcs_link;
+
#ifdef CONFIG_MALI_CINSTR_GWT
bool gwt_enabled;
bool gwt_was_enabled;
@@ -1567,6 +1694,8 @@ struct kbase_context {
#endif
base_context_create_flags create_flags;
+
+ struct kbase_kinstr_jm *kinstr_jm;
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index be85491..4fd2e35 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -35,7 +35,7 @@
* struct kbase_instr_hwcnt_enable - Enable hardware counter collection.
* @dump_buffer: GPU address to write counters to.
* @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer.
- * @jm_bm: counters selection bitmask (JM).
+ * @fe_bm: counters selection bitmask (Front End).
* @shader_bm: counters selection bitmask (Shader).
* @tiler_bm: counters selection bitmask (Tiler).
* @mmu_l2_bm: counters selection bitmask (MMU_L2).
@@ -45,7 +45,7 @@
struct kbase_instr_hwcnt_enable {
u64 dump_buffer;
u64 dump_buffer_bytes;
- u32 jm_bm;
+ u32 fe_bm;
u32 shader_bm;
u32 tiler_bm;
u32 mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index a61e5b9..94b7551 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -39,4 +39,18 @@
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
u64 *system_time, struct timespec64 *ts);
+/**
+ * kbase_backend_get_gpu_time_norequest() - Get current GPU time without
+ * request/release cycle counter
+ * @kbdev: Device pointer
+ * @cycle_counter: Pointer to u64 to store cycle counter in
+ * @system_time: Pointer to u64 to store system time in
+ * @ts: Pointer to struct timespec to store current monotonic
+ * time in
+ */
+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
+ u64 *cycle_counter,
+ u64 *system_time,
+ struct timespec64 *ts);
+
#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
index 14ec5cb..2708af7 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -242,6 +242,7 @@ static void kbasep_hwcnt_accumulator_disable(
bool backend_enabled = false;
struct kbase_hwcnt_accumulator *accum;
unsigned long flags;
+ u64 dump_time_ns;
WARN_ON(!hctx);
lockdep_assert_held(&hctx->accum_lock);
@@ -271,7 +272,7 @@ static void kbasep_hwcnt_accumulator_disable(
goto disable;
/* Try and accumulate before disabling */
- errcode = hctx->iface->dump_request(accum->backend);
+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
if (errcode)
goto disable;
@@ -419,23 +420,16 @@ static int kbasep_hwcnt_accumulator_dump(
/* Initiate the dump if the backend is enabled. */
if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
- /* Disable pre-emption, to make the timestamp as accurate as
- * possible.
- */
- preempt_disable();
- {
+ if (dump_buf) {
+ errcode = hctx->iface->dump_request(
+ accum->backend, &dump_time_ns);
+ dump_requested = true;
+ } else {
dump_time_ns = hctx->iface->timestamp_ns(
- accum->backend);
- if (dump_buf) {
- errcode = hctx->iface->dump_request(
accum->backend);
- dump_requested = true;
- } else {
- errcode = hctx->iface->dump_clear(
- accum->backend);
- }
+ errcode = hctx->iface->dump_clear(accum->backend);
}
- preempt_enable();
+
if (errcode)
goto error;
} else {
diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/mali_kbase_hwcnt_backend.h
index b7aa0e1..3a921b7 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -137,6 +137,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
* typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
* dump.
* @backend: Non-NULL pointer to backend.
+ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was
+ * requested will be written out to on success.
*
* If the backend is not enabled or another dump is already in progress,
* returns an error.
@@ -144,7 +146,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
* Return: 0 on success, else error code.
*/
typedef int (*kbase_hwcnt_backend_dump_request_fn)(
- struct kbase_hwcnt_backend *backend);
+ struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns);
/**
* typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
deleted file mode 100644
index 407c768..0000000
--- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-#include "mali_kbase_hwcnt_backend_gpu.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase.h"
-#include "mali_kbase_pm_ca.h"
-#include "mali_kbase_hwaccess_instr.h"
-#ifdef CONFIG_MALI_NO_MALI
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif
-
-
-/**
- * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance
- * of a GPU hardware counter backend.
- * @kbdev: KBase device.
- * @use_secondary: True if secondary performance counters should be used,
- * else false. Ignored if secondary counters are not supported.
- * @metadata: Hardware counter metadata.
- * @dump_bytes: Bytes of GPU memory required to perform a
- * hardware counter dump.
- */
-struct kbase_hwcnt_backend_gpu_info {
- struct kbase_device *kbdev;
- bool use_secondary;
- const struct kbase_hwcnt_metadata *metadata;
- size_t dump_bytes;
-};
-
-/**
- * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend.
- * @info: Info used to create the backend.
- * @kctx: KBase context used for GPU memory allocation and
- * counter dumping.
- * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
- * @cpu_dump_va: CPU mapping of gpu_dump_va.
- * @vmap: Dump buffer vmap.
- * @enabled: True if dumping has been enabled, else false.
- * @pm_core_mask: PM state sync-ed shaders core mask for the enabled dumping.
- */
-struct kbase_hwcnt_backend_gpu {
- const struct kbase_hwcnt_backend_gpu_info *info;
- struct kbase_context *kctx;
- u64 gpu_dump_va;
- void *cpu_dump_va;
- struct kbase_vmap_struct *vmap;
- bool enabled;
- u64 pm_core_mask;
-};
-
-/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
-static u64 kbasep_hwcnt_backend_gpu_timestamp_ns(
- struct kbase_hwcnt_backend *backend)
-{
- (void)backend;
- return ktime_get_raw_ns();
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
-static int kbasep_hwcnt_backend_gpu_dump_enable_nolock(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
-{
- int errcode;
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
- struct kbase_context *kctx;
- struct kbase_device *kbdev;
- struct kbase_hwcnt_physical_enable_map phys;
- struct kbase_instr_hwcnt_enable enable;
-
- if (!backend_gpu || !enable_map || backend_gpu->enabled ||
- (enable_map->metadata != backend_gpu->info->metadata))
- return -EINVAL;
-
- kctx = backend_gpu->kctx;
- kbdev = backend_gpu->kctx->kbdev;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
-
- enable.jm_bm = phys.jm_bm;
- enable.shader_bm = phys.shader_bm;
- enable.tiler_bm = phys.tiler_bm;
- enable.mmu_l2_bm = phys.mmu_l2_bm;
- enable.use_secondary = backend_gpu->info->use_secondary;
- enable.dump_buffer = backend_gpu->gpu_dump_va;
- enable.dump_buffer_bytes = backend_gpu->info->dump_bytes;
-
- errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
- if (errcode)
- goto error;
-
- backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
- backend_gpu->enabled = true;
-
- return 0;
-error:
- return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */
-static int kbasep_hwcnt_backend_gpu_dump_enable(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
-{
- unsigned long flags;
- int errcode;
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
- struct kbase_device *kbdev;
-
- if (!backend_gpu)
- return -EINVAL;
-
- kbdev = backend_gpu->kctx->kbdev;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock(
- backend, enable_map);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_gpu_dump_disable(
- struct kbase_hwcnt_backend *backend)
-{
- int errcode;
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
-
- if (WARN_ON(!backend_gpu) || !backend_gpu->enabled)
- return;
-
- errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx);
- WARN_ON(errcode);
-
- backend_gpu->enabled = false;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */
-static int kbasep_hwcnt_backend_gpu_dump_clear(
- struct kbase_hwcnt_backend *backend)
-{
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
-
- if (!backend_gpu || !backend_gpu->enabled)
- return -EINVAL;
-
- return kbase_instr_hwcnt_clear(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */
-static int kbasep_hwcnt_backend_gpu_dump_request(
- struct kbase_hwcnt_backend *backend)
-{
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
-
- if (!backend_gpu || !backend_gpu->enabled)
- return -EINVAL;
-
- return kbase_instr_hwcnt_request_dump(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */
-static int kbasep_hwcnt_backend_gpu_dump_wait(
- struct kbase_hwcnt_backend *backend)
-{
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
-
- if (!backend_gpu || !backend_gpu->enabled)
- return -EINVAL;
-
- return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */
-static int kbasep_hwcnt_backend_gpu_dump_get(
- struct kbase_hwcnt_backend *backend,
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate)
-{
- struct kbase_hwcnt_backend_gpu *backend_gpu =
- (struct kbase_hwcnt_backend_gpu *)backend;
-
- if (!backend_gpu || !dst || !dst_enable_map ||
- (backend_gpu->info->metadata != dst->metadata) ||
- (dst_enable_map->metadata != dst->metadata))
- return -EINVAL;
-
- /* Invalidate the kernel buffer before reading from it. */
- kbase_sync_mem_regions(
- backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU);
-
- return kbase_hwcnt_gpu_dump_get(
- dst, backend_gpu->cpu_dump_va, dst_enable_map,
- backend_gpu->pm_core_mask, accumulate);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer.
- * @info: Non-NULL pointer to GPU backend info.
- * @kctx: Non-NULL pointer to kbase context.
- * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
- * is stored on success.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_dump_alloc(
- const struct kbase_hwcnt_backend_gpu_info *info,
- struct kbase_context *kctx,
- u64 *gpu_dump_va)
-{
- struct kbase_va_region *reg;
- u64 flags;
- u64 nr_pages;
-
- WARN_ON(!info);
- WARN_ON(!kctx);
- WARN_ON(!gpu_dump_va);
-
- flags = BASE_MEM_PROT_CPU_RD |
- BASE_MEM_PROT_GPU_WR |
- BASEP_MEM_PERMANENT_KERNEL_MAPPING |
- BASE_MEM_CACHED_CPU;
-
- if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
- flags |= BASE_MEM_UNCACHED_GPU;
-
- nr_pages = PFN_UP(info->dump_bytes);
-
- reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
-
- if (!reg)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer.
- * @kctx: Non-NULL pointer to kbase context.
- * @gpu_dump_va: GPU dump buffer virtual address.
- */
-static void kbasep_hwcnt_backend_gpu_dump_free(
- struct kbase_context *kctx,
- u64 gpu_dump_va)
-{
- WARN_ON(!kctx);
- if (gpu_dump_va)
- kbase_mem_free(kctx, gpu_dump_va);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend.
- * @backend: Pointer to GPU backend to destroy.
- *
- * Can be safely called on a backend in any state of partial construction.
- */
-static void kbasep_hwcnt_backend_gpu_destroy(
- struct kbase_hwcnt_backend_gpu *backend)
-{
- if (!backend)
- return;
-
- if (backend->kctx) {
- struct kbase_context *kctx = backend->kctx;
- struct kbase_device *kbdev = kctx->kbdev;
-
- if (backend->cpu_dump_va)
- kbase_phy_alloc_mapping_put(kctx, backend->vmap);
-
- if (backend->gpu_dump_va)
- kbasep_hwcnt_backend_gpu_dump_free(
- kctx, backend->gpu_dump_va);
-
- kbasep_js_release_privileged_ctx(kbdev, kctx);
- kbase_destroy_context(kctx);
- }
-
- kfree(backend);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend.
- * @info: Non-NULL pointer to backend info.
- * @out_backend: Non-NULL pointer to where backend is stored on success.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_create(
- const struct kbase_hwcnt_backend_gpu_info *info,
- struct kbase_hwcnt_backend_gpu **out_backend)
-{
-
- int errcode;
- struct kbase_device *kbdev;
- struct kbase_hwcnt_backend_gpu *backend = NULL;
-
- WARN_ON(!info);
- WARN_ON(!out_backend);
-
- kbdev = info->kbdev;
-
- backend = kzalloc(sizeof(*backend), GFP_KERNEL);
- if (!backend)
- goto alloc_error;
-
- backend->info = info;
-
- backend->kctx = kbase_create_context(kbdev, true,
- BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
- if (!backend->kctx)
- goto alloc_error;
-
- kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
-
- errcode = kbasep_hwcnt_backend_gpu_dump_alloc(
- info, backend->kctx, &backend->gpu_dump_va);
- if (errcode)
- goto error;
-
- backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
- backend->gpu_dump_va, &backend->vmap);
- if (!backend->cpu_dump_va)
- goto alloc_error;
-
-#ifdef CONFIG_MALI_NO_MALI
- /* The dummy model needs the CPU mapping. */
- gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
-#endif
-
- *out_backend = backend;
- return 0;
-
-alloc_error:
- errcode = -ENOMEM;
-error:
- kbasep_hwcnt_backend_gpu_destroy(backend);
- return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_init_fn */
-static int kbasep_hwcnt_backend_gpu_init(
- const struct kbase_hwcnt_backend_info *info,
- struct kbase_hwcnt_backend **out_backend)
-{
- int errcode;
- struct kbase_hwcnt_backend_gpu *backend = NULL;
-
- if (!info || !out_backend)
- return -EINVAL;
-
- errcode = kbasep_hwcnt_backend_gpu_create(
- (const struct kbase_hwcnt_backend_gpu_info *) info, &backend);
- if (errcode)
- return errcode;
-
- *out_backend = (struct kbase_hwcnt_backend *)backend;
-
- return 0;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_term_fn */
-static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend)
-{
- if (!backend)
- return;
-
- kbasep_hwcnt_backend_gpu_dump_disable(backend);
- kbasep_hwcnt_backend_gpu_destroy(
- (struct kbase_hwcnt_backend_gpu *)backend);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info.
- * @info: Pointer to info to destroy.
- *
- * Can be safely called on a backend info in any state of partial construction.
- */
-static void kbasep_hwcnt_backend_gpu_info_destroy(
- const struct kbase_hwcnt_backend_gpu_info *info)
-{
- if (!info)
- return;
-
- kbase_hwcnt_gpu_metadata_destroy(info->metadata);
- kfree(info);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info.
- * @kbdev: Non_NULL pointer to kbase device.
- * @out_info: Non-NULL pointer to where info is stored on success.
- *
- * Return 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_info_create(
- struct kbase_device *kbdev,
- const struct kbase_hwcnt_backend_gpu_info **out_info)
-{
- int errcode = -ENOMEM;
- struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
- struct kbase_hwcnt_backend_gpu_info *info = NULL;
-
- WARN_ON(!kbdev);
- WARN_ON(!out_info);
-
- errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
- if (errcode)
- return errcode;
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- goto error;
-
- info->kbdev = kbdev;
-
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
- info->use_secondary = true;
-#else
- info->use_secondary = false;
-#endif
-
- errcode = kbase_hwcnt_gpu_metadata_create(
- &hwcnt_gpu_info, info->use_secondary,
- &info->metadata,
- &info->dump_bytes);
- if (errcode)
- goto error;
-
- *out_info = info;
-
- return 0;
-error:
- kbasep_hwcnt_backend_gpu_info_destroy(info);
- return errcode;
-}
-
-int kbase_hwcnt_backend_gpu_create(
- struct kbase_device *kbdev,
- struct kbase_hwcnt_backend_interface *iface)
-{
- int errcode;
- const struct kbase_hwcnt_backend_gpu_info *info = NULL;
-
- if (!kbdev || !iface)
- return -EINVAL;
-
- errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info);
-
- if (errcode)
- return errcode;
-
- iface->metadata = info->metadata;
- iface->info = (struct kbase_hwcnt_backend_info *)info;
- iface->init = kbasep_hwcnt_backend_gpu_init;
- iface->term = kbasep_hwcnt_backend_gpu_term;
- iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns;
- iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable;
- iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock;
- iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable;
- iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear;
- iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request;
- iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait;
- iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get;
-
- return 0;
-}
-
-void kbase_hwcnt_backend_gpu_destroy(
- struct kbase_hwcnt_backend_interface *iface)
-{
- if (!iface)
- return;
-
- kbasep_hwcnt_backend_gpu_info_destroy(
- (const struct kbase_hwcnt_backend_gpu_info *)iface->info);
- memset(iface, 0, sizeof(*iface));
-}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
new file mode 100644
index 0000000..02a42bf
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -0,0 +1,707 @@
+/*
+ *
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_jm.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "mali_kbase_ccswe.h"
+
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/**
+ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
+ * of a JM hardware counter backend.
+ * @kbdev: KBase device.
+ * @use_secondary: True if secondary performance counters should be used,
+ * else false. Ignored if secondary counters are not supported.
+ * @metadata: Hardware counter metadata.
+ * @dump_bytes: Bytes of GPU memory required to perform a
+ * hardware counter dump.
+ */
+struct kbase_hwcnt_backend_jm_info {
+ struct kbase_device *kbdev;
+ bool use_secondary;
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
+ * @info: Info used to create the backend.
+ * @kctx: KBase context used for GPU memory allocation and
+ * counter dumping.
+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va: CPU mapping of gpu_dump_va.
+ * @vmap: Dump buffer vmap.
+ * @enabled: True if dumping has been enabled, else false.
+ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
+ * dumping.
+ * @clk_enable_map: The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:
+ * Cycle count elapsed for a given sample period.
+ * The top clock cycle, index 0, is read directly from
+ * hardware, but the other clock domains need to be
+ * calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ * sample period.
+ * @rate_listener: Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ */
+struct kbase_hwcnt_backend_jm {
+ const struct kbase_hwcnt_backend_jm_info *info;
+ struct kbase_context *kctx;
+ u64 gpu_dump_va;
+ void *cpu_dump_va;
+ struct kbase_vmap_struct *vmap;
+ bool enabled;
+ u64 pm_core_mask;
+ u64 clk_enable_map;
+ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+ struct kbase_clk_rate_listener rate_listener;
+ struct kbase_ccswe ccswe_shader_cores;
+};
+
+/**
+ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
+ *
+ * @rate_listener: Callback state
+ * @clk_index: Clock index
+ * @clk_rate_hz: Clock frequency(hz)
+ */
+static void kbasep_hwcnt_backend_jm_on_freq_change(
+ struct kbase_clk_rate_listener *rate_listener,
+ u32 clk_index,
+ u32 clk_rate_hz)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm = container_of(
+ rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
+ u64 timestamp_ns;
+
+ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+ return;
+
+ timestamp_ns = ktime_get_raw_ns();
+ kbase_ccswe_freq_change(
+ &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
+ *
+ * @backend: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
+ */
+static void kbasep_hwcnt_backend_jm_cc_enable(
+ struct kbase_hwcnt_backend_jm *backend_jm,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 timestamp_ns)
+{
+ struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+ u64 clk_enable_map = enable_map->clk_enable_map;
+ u64 cycle_count;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ /* turn on the cycle counter */
+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
+ /* Read cycle count for top clock domain. */
+ kbase_backend_get_gpu_time_norequest(
+ kbdev, &cycle_count, NULL, NULL);
+
+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] =
+ cycle_count;
+ }
+
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ /* software estimation for non-top clock domains */
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ const struct kbase_clk_data *clk_data =
+ rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+ u32 cur_freq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtm->lock, flags);
+
+ cur_freq = (u32) clk_data->clock_val;
+ kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
+ kbase_ccswe_freq_change(
+ &backend_jm->ccswe_shader_cores,
+ timestamp_ns,
+ cur_freq);
+
+ kbase_clk_rate_trace_manager_subscribe_no_lock(
+ rtm, &backend_jm->rate_listener);
+
+ spin_unlock_irqrestore(&rtm->lock, flags);
+
+ /* ccswe was reset. The estimated cycle is zero. */
+ backend_jm->prev_cycle_count[
+ KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
+ }
+
+ /* Keep clk_enable_map for dump_request. */
+ backend_jm->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
+ *
+ * @backend: Non-NULL pointer to backend.
+ */
+static void kbasep_hwcnt_backend_jm_cc_disable(
+ struct kbase_hwcnt_backend_jm *backend_jm)
+{
+ struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ u64 clk_enable_map = backend_jm->clk_enable_map;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ /* turn off the cycle counter */
+ kbase_pm_release_gpu_cycle_counter(backend_jm->kctx->kbdev);
+ }
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+
+ kbase_clk_rate_trace_manager_unsubscribe(
+ rtm, &backend_jm->rate_listener);
+ }
+}
+
+
+/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
+ struct kbase_hwcnt_backend *backend)
+{
+ (void)backend;
+ return ktime_get_raw_ns();
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_physical_enable_map phys;
+ struct kbase_instr_hwcnt_enable enable;
+ u64 timestamp_ns;
+
+ if (!backend_jm || !enable_map || backend_jm->enabled ||
+ (enable_map->metadata != backend_jm->info->metadata))
+ return -EINVAL;
+
+ kctx = backend_jm->kctx;
+ kbdev = backend_jm->kctx->kbdev;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+
+ enable.fe_bm = phys.fe_bm;
+ enable.shader_bm = phys.shader_bm;
+ enable.tiler_bm = phys.tiler_bm;
+ enable.mmu_l2_bm = phys.mmu_l2_bm;
+ enable.use_secondary = backend_jm->info->use_secondary;
+ enable.dump_buffer = backend_jm->gpu_dump_va;
+ enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+
+ timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+
+ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+ if (errcode)
+ goto error;
+
+ backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+ backend_jm->enabled = true;
+
+ kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
+
+ return 0;
+error:
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ unsigned long flags;
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_device *kbdev;
+
+ if (!backend_jm)
+ return -EINVAL;
+
+ kbdev = backend_jm->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(
+ backend, enable_map);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_jm_dump_disable(
+ struct kbase_hwcnt_backend *backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+ return;
+
+ kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
+
+ errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
+ WARN_ON(errcode);
+
+ backend_jm->enabled = false;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_jm_dump_clear(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (!backend_jm || !backend_jm->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_clear(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_jm_dump_request(
+ struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_device *kbdev;
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 current_cycle_count;
+ size_t clk;
+ int ret;
+
+ if (!backend_jm || !backend_jm->enabled)
+ return -EINVAL;
+
+ kbdev = backend_jm->kctx->kbdev;
+ metadata = backend_jm->info->metadata;
+
+ /* Disable pre-emption, to make the timestamp as accurate as possible */
+ preempt_disable();
+ {
+ *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+ ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ if (!kbase_hwcnt_clk_enable_map_enabled(
+ backend_jm->clk_enable_map, clk))
+ continue;
+
+ if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+ /* Read cycle count for top clock domain. */
+ kbase_backend_get_gpu_time_norequest(
+ kbdev, &current_cycle_count,
+ NULL, NULL);
+ } else {
+ /*
+ * Estimate cycle count for non-top clock
+ * domain.
+ */
+ current_cycle_count = kbase_ccswe_cycle_at(
+ &backend_jm->ccswe_shader_cores,
+ *dump_time_ns);
+ }
+ backend_jm->cycle_count_elapsed[clk] =
+ current_cycle_count -
+ backend_jm->prev_cycle_count[clk];
+
+ /*
+ * Keep the current cycle count for later calculation.
+ */
+ backend_jm->prev_cycle_count[clk] = current_cycle_count;
+ }
+ }
+ preempt_enable();
+
+ return ret;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_jm_dump_wait(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (!backend_jm || !backend_jm->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_jm_dump_get(
+ struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ (struct kbase_hwcnt_backend_jm *)backend;
+ size_t clk;
+
+ if (!backend_jm || !dst || !dst_enable_map ||
+ (backend_jm->info->metadata != dst->metadata) ||
+ (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ /* Invalidate the kernel buffer before reading from it. */
+ kbase_sync_mem_regions(
+ backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
+ if (!kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk))
+ continue;
+
+ /* Extract elapsed cycle count for each clock domain. */
+ dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
+ }
+
+ return kbase_hwcnt_gpu_dump_get(
+ dst, backend_jm->cpu_dump_va, dst_enable_map,
+ backend_jm->pm_core_mask, accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer.
+ * @info: Non-NULL pointer to JM backend info.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ * is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_dump_alloc(
+ const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_context *kctx,
+ u64 *gpu_dump_va)
+{
+ struct kbase_va_region *reg;
+ u64 flags;
+ u64 nr_pages;
+
+ WARN_ON(!info);
+ WARN_ON(!kctx);
+ WARN_ON(!gpu_dump_va);
+
+ flags = BASE_MEM_PROT_CPU_RD |
+ BASE_MEM_PROT_GPU_WR |
+ BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+ BASE_MEM_CACHED_CPU;
+
+ if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+ flags |= BASE_MEM_UNCACHED_GPU;
+
+ nr_pages = PFN_UP(info->dump_bytes);
+
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+
+ if (!reg)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_jm_dump_free(
+ struct kbase_context *kctx,
+ u64 gpu_dump_va)
+{
+ WARN_ON(!kctx);
+ if (gpu_dump_va)
+ kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend.
+ * @backend: Pointer to JM backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_destroy(
+ struct kbase_hwcnt_backend_jm *backend)
+{
+ if (!backend)
+ return;
+
+ if (backend->kctx) {
+ struct kbase_context *kctx = backend->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (backend->cpu_dump_va)
+ kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+ if (backend->gpu_dump_va)
+ kbasep_hwcnt_backend_jm_dump_free(
+ kctx, backend->gpu_dump_va);
+
+ kbasep_js_release_privileged_ctx(kbdev, kctx);
+ kbase_destroy_context(kctx);
+ }
+
+ kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_create() - Create a JM backend.
+ * @info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_create(
+ const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_hwcnt_backend_jm **out_backend)
+{
+
+ int errcode;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_backend_jm *backend = NULL;
+
+ WARN_ON(!info);
+ WARN_ON(!out_backend);
+
+ kbdev = info->kbdev;
+
+ backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+ if (!backend)
+ goto alloc_error;
+
+ backend->info = info;
+
+ backend->kctx = kbase_create_context(kbdev, true,
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+ if (!backend->kctx)
+ goto alloc_error;
+
+ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+ errcode = kbasep_hwcnt_backend_jm_dump_alloc(
+ info, backend->kctx, &backend->gpu_dump_va);
+ if (errcode)
+ goto error;
+
+ backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
+ backend->gpu_dump_va, &backend->vmap);
+ if (!backend->cpu_dump_va)
+ goto alloc_error;
+
+ kbase_ccswe_init(&backend->ccswe_shader_cores);
+ backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+
+#ifdef CONFIG_MALI_NO_MALI
+ /* The dummy model needs the CPU mapping. */
+ gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
+#endif
+
+ *out_backend = backend;
+ return 0;
+
+alloc_error:
+ errcode = -ENOMEM;
+error:
+ kbasep_hwcnt_backend_jm_destroy(backend);
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_init(
+ const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend = NULL;
+
+ if (!info || !out_backend)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_jm_create(
+ (const struct kbase_hwcnt_backend_jm_info *) info, &backend);
+ if (errcode)
+ return errcode;
+
+ *out_backend = (struct kbase_hwcnt_backend *)backend;
+
+ return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
+{
+ if (!backend)
+ return;
+
+ kbasep_hwcnt_backend_jm_dump_disable(backend);
+ kbasep_hwcnt_backend_jm_destroy(
+ (struct kbase_hwcnt_backend_jm *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_info_destroy(
+ const struct kbase_hwcnt_backend_jm_info *info)
+{
+ if (!info)
+ return;
+
+ kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+ kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_info_create(
+ struct kbase_device *kbdev,
+ const struct kbase_hwcnt_backend_jm_info **out_info)
+{
+ int errcode = -ENOMEM;
+ struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+ struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+ WARN_ON(!kbdev);
+ WARN_ON(!out_info);
+
+ errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+ if (errcode)
+ return errcode;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto error;
+
+ info->kbdev = kbdev;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+ info->use_secondary = true;
+#else
+ info->use_secondary = false;
+#endif
+
+ errcode = kbase_hwcnt_gpu_metadata_create(
+ &hwcnt_gpu_info, info->use_secondary,
+ &info->metadata,
+ &info->dump_bytes);
+ if (errcode)
+ goto error;
+
+ *out_info = info;
+
+ return 0;
+error:
+ kbasep_hwcnt_backend_jm_info_destroy(info);
+ return errcode;
+}
+
+int kbase_hwcnt_backend_jm_create(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ int errcode;
+ const struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+ if (!kbdev || !iface)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info);
+
+ if (errcode)
+ return errcode;
+
+ iface->metadata = info->metadata;
+ iface->info = (struct kbase_hwcnt_backend_info *)info;
+ iface->init = kbasep_hwcnt_backend_jm_init;
+ iface->term = kbasep_hwcnt_backend_jm_term;
+ iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
+ iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
+ iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
+ iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable;
+ iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear;
+ iface->dump_request = kbasep_hwcnt_backend_jm_dump_request;
+ iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait;
+ iface->dump_get = kbasep_hwcnt_backend_jm_dump_get;
+
+ return 0;
+}
+
+void kbase_hwcnt_backend_jm_destroy(
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ if (!iface)
+ return;
+
+ kbasep_hwcnt_backend_jm_info_destroy(
+ (const struct kbase_hwcnt_backend_jm_info *)iface->info);
+ memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h b/mali_kbase/mali_kbase_hwcnt_backend_jm.h
index 7712f14..f15faeb 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,19 +21,19 @@
*/
/**
- * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM
* backend.
*/
-#ifndef _KBASE_HWCNT_BACKEND_GPU_H_
-#define _KBASE_HWCNT_BACKEND_GPU_H_
+#ifndef _KBASE_HWCNT_BACKEND_JM_H_
+#define _KBASE_HWCNT_BACKEND_JM_H_
#include "mali_kbase_hwcnt_backend.h"
struct kbase_device;
/**
- * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend
+ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend
* interface.
* @kbdev: Non-NULL pointer to kbase device.
* @iface: Non-NULL pointer to backend interface structure that is filled in
@@ -43,19 +43,19 @@ struct kbase_device;
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_gpu_create(
+int kbase_hwcnt_backend_jm_create(
struct kbase_device *kbdev,
struct kbase_hwcnt_backend_interface *iface);
/**
- * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend
+ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
* interface.
* @iface: Pointer to interface to destroy.
*
* Can be safely called on an all-zeroed interface, or on an already destroyed
* interface.
*/
-void kbase_hwcnt_backend_gpu_destroy(
+void kbase_hwcnt_backend_jm_destroy(
struct kbase_hwcnt_backend_interface *iface);
-#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */
+#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 095c765..1034328 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -138,6 +138,8 @@ static int kbasep_hwcnt_backend_gpu_metadata_v4_create(
}
}
+ desc.clk_cnt = v4_info->clk_cnt;
+
errcode = kbase_hwcnt_metadata_create(&desc, metadata);
/* Always clean up, as metadata will make a copy of the input args */
@@ -258,6 +260,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
desc.grp_cnt = 1;
desc.grps = &group;
+ desc.clk_cnt = v5_info->clk_cnt;
/* The JM, Tiler, and L2s are always available, and are before cores */
desc.avail_mask = (1ull << non_sc_block_count) - 1;
@@ -287,6 +290,8 @@ int kbase_hwcnt_gpu_info_init(
struct kbase_device *kbdev,
struct kbase_hwcnt_gpu_info *info)
{
+ size_t clk;
+
if (!kbdev || !info)
return -EINVAL;
@@ -307,6 +312,14 @@ int kbase_hwcnt_gpu_info_init(
info->v5.core_mask = core_mask;
}
#endif
+
+ /* Determine the number of available clock domains. */
+ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+ if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+ break;
+ }
+ info->v5.clk_cnt = clk;
+
return 0;
}
@@ -563,7 +576,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
{
const struct kbase_hwcnt_metadata *metadata;
- u64 jm_bm = 0;
+ u64 fe_bm = 0;
u64 shader_bm = 0;
u64 tiler_bm = 0;
u64 mmu_l2_bm = 0;
@@ -601,7 +614,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
mmu_l2_bm |= *blk_map;
break;
case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
- jm_bm |= *blk_map;
+ fe_bm |= *blk_map;
break;
case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
break;
@@ -613,7 +626,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
- jm_bm |= *blk_map;
+ fe_bm |= *blk_map;
break;
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
tiler_bm |= *blk_map;
@@ -635,8 +648,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
}
}
- dst->jm_bm =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0);
+ dst->fe_bm =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0);
dst->shader_bm =
kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
dst->tiler_bm =
@@ -653,7 +666,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
const struct kbase_hwcnt_metadata *metadata;
u64 ignored_hi;
- u64 jm_bm;
+ u64 fe_bm;
u64 shader_bm;
u64 tiler_bm;
u64 mmu_l2_bm;
@@ -665,7 +678,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
metadata = dst->metadata;
kbasep_hwcnt_backend_gpu_block_map_from_physical(
- src->jm_bm, &jm_bm, &ignored_hi);
+ src->fe_bm, &fe_bm, &ignored_hi);
kbasep_hwcnt_backend_gpu_block_map_from_physical(
src->shader_bm, &shader_bm, &ignored_hi);
kbasep_hwcnt_backend_gpu_block_map_from_physical(
@@ -698,7 +711,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
*blk_map = mmu_l2_bm;
break;
case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
- *blk_map = jm_bm;
+ *blk_map = fe_bm;
break;
case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
break;
@@ -710,7 +723,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
- *blk_map = jm_bm;
+ *blk_map = fe_bm;
break;
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
*blk_map = tiler_bm;
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index 12891e0..13c1af3 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,13 +80,13 @@ enum kbase_hwcnt_gpu_v5_block_type {
/**
* struct kbase_hwcnt_physical_enable_map - Representation of enable map
* directly used by GPU.
- * @jm_bm: Job Manager counters selection bitmask.
+ * @fe_bm: Front end (JM/CSHW) counters selection bitmask.
* @shader_bm: Shader counters selection bitmask.
* @tiler_bm: Tiler counters selection bitmask.
* @mmu_l2_bm: MMU_L2 counters selection bitmask.
*/
struct kbase_hwcnt_physical_enable_map {
- u32 jm_bm;
+ u32 fe_bm;
u32 shader_bm;
u32 tiler_bm;
u32 mmu_l2_bm;
@@ -96,6 +96,7 @@ struct kbase_hwcnt_physical_enable_map {
* struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs.
* @cg_count: Core group count.
* @cgs: Non-NULL pointer to array of cg_count coherent group structures.
+ * @clk_cnt: Number of clock domains available.
*
* V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups,
* where each core group may have a physically different layout.
@@ -103,16 +104,19 @@ struct kbase_hwcnt_physical_enable_map {
struct kbase_hwcnt_gpu_v4_info {
size_t cg_count;
const struct mali_base_gpu_coherent_group *cgs;
+ u8 clk_cnt;
};
/**
* struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
* @l2_count: L2 cache count.
* @core_mask: Shader core mask. May be sparse.
+ * @clk_cnt: Number of clock domains available.
*/
struct kbase_hwcnt_gpu_v5_info {
size_t l2_count;
u64 core_mask;
+ u8 clk_cnt;
};
/**
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
index b0e6aee..794ef39 100644
--- a/mali_kbase/mali_kbase_hwcnt_legacy.c
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ int kbase_hwcnt_legacy_client_create(
goto error;
/* Translate from the ioctl enable map to the internal one */
- phys_em.jm_bm = enable->jm_bm;
+ phys_em.fe_bm = enable->fe_bm;
phys_em.shader_bm = enable->shader_bm;
phys_em.tiler_bm = enable->tiler_bm;
phys_em.mmu_l2_bm = enable->mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/mali_kbase/mali_kbase_hwcnt_reader.h
index 10706b8..8cd3835 100644
--- a/mali_kbase/mali_kbase_hwcnt_reader.h
+++ b/mali_kbase/mali_kbase_hwcnt_reader.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,31 +23,53 @@
#ifndef _KBASE_HWCNT_READER_H_
#define _KBASE_HWCNT_READER_H_
+#include <stddef.h>
+
/* The ids of ioctl commands. */
#define KBASE_HWCNT_READER 0xBE
#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32)
#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32)
#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32)
-#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\
+#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\
+ offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\
struct kbase_hwcnt_reader_metadata)
-#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\
+#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\
+ offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\
struct kbase_hwcnt_reader_metadata)
#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32)
#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32)
#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32)
#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
+ _IOW(KBASE_HWCNT_READER, 0xFF, \
+ struct kbase_hwcnt_reader_api_version)
+
+/**
+ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
+ * @top: the number of cycles associated with the main clock for the
+ * GPU
+ * @shader_cores: the cycles that have elapsed on the GPU shader cores
+ */
+struct kbase_hwcnt_reader_metadata_cycles {
+ u64 top;
+ u64 shader_cores;
+};
/**
* struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
* @timestamp: time when sample was collected
* @event_id: id of an event that triggered sample collection
* @buffer_idx: position in sampling area where sample buffer was stored
+ * @cycles: the GPU cycles that occurred since the last sample
*/
struct kbase_hwcnt_reader_metadata {
u64 timestamp;
u32 event_id;
u32 buffer_idx;
+ struct kbase_hwcnt_reader_metadata_cycles cycles;
};
/**
@@ -67,5 +89,18 @@ enum base_hwcnt_reader_event {
BASE_HWCNT_READER_EVENT_COUNT
};
+/**
+ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
+ * @versoin: API version
+ * @features: available features in this API version
+ */
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+struct kbase_hwcnt_reader_api_version {
+ u32 version;
+ u32 features;
+};
+
#endif /* _KBASE_HWCNT_READER_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c
index 1e9efde..73ea609 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.c
+++ b/mali_kbase/mali_kbase_hwcnt_types.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -55,6 +55,10 @@ int kbase_hwcnt_metadata_create(
if (!desc || !out_metadata)
return -EINVAL;
+ /* The maximum number of clock domains is 64. */
+ if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE))
+ return -EINVAL;
+
/* Calculate the bytes needed to tightly pack the metadata */
/* Top level metadata */
@@ -158,6 +162,7 @@ int kbase_hwcnt_metadata_create(
enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
metadata->avail_mask = desc->avail_mask;
+ metadata->clk_cnt = desc->clk_cnt;
WARN_ON(size != offset);
/* Due to the block alignment, there should be exactly one enable map
@@ -187,12 +192,17 @@ int kbase_hwcnt_enable_map_alloc(
if (!metadata || !enable_map)
return -EINVAL;
- enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
- if (!enable_map_buf)
- return -ENOMEM;
+ if (metadata->enable_map_bytes > 0) {
+ enable_map_buf =
+ kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+ if (!enable_map_buf)
+ return -ENOMEM;
+ } else {
+ enable_map_buf = NULL;
+ }
enable_map->metadata = metadata;
- enable_map->enable_map = enable_map_buf;
+ enable_map->hwcnt_enable_map = enable_map_buf;
return 0;
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
@@ -202,8 +212,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
if (!enable_map)
return;
- kfree(enable_map->enable_map);
- enable_map->enable_map = NULL;
+ kfree(enable_map->hwcnt_enable_map);
+ enable_map->hwcnt_enable_map = NULL;
enable_map->metadata = NULL;
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
@@ -212,17 +222,25 @@ int kbase_hwcnt_dump_buffer_alloc(
const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_dump_buffer *dump_buf)
{
- u32 *buf;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ u8 *buf;
if (!metadata || !dump_buf)
return -EINVAL;
- buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL);
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+
+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */
+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
if (!buf)
return -ENOMEM;
dump_buf->metadata = metadata;
- dump_buf->dump_buf = buf;
+ dump_buf->dump_buf = (u32 *)buf;
+ dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
+
return 0;
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
@@ -246,10 +264,16 @@ int kbase_hwcnt_dump_buffer_array_alloc(
size_t buf_idx;
unsigned int order;
unsigned long addr;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
if (!metadata || !dump_bufs)
return -EINVAL;
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes =
+ sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
+
/* Allocate memory for the dump buffer struct array */
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
if (!buffers)
@@ -258,7 +282,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
/* Allocate pages for the actual dump buffers, as they tend to be fairly
* large.
*/
- order = get_order(metadata->dump_buf_bytes * n);
+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
addr = __get_free_pages(GFP_KERNEL, order);
if (!addr) {
@@ -273,10 +297,14 @@ int kbase_hwcnt_dump_buffer_array_alloc(
/* Set the buffer of each dump buf */
for (buf_idx = 0; buf_idx < n; buf_idx++) {
- const size_t offset = metadata->dump_buf_bytes * buf_idx;
+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+ const size_t clk_cnt_buf_offset =
+ (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
buffers[buf_idx].metadata = metadata;
- buffers[buf_idx].dump_buf = (u32 *)(addr + offset);
+ buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset);
+ buffers[buf_idx].clk_cnt_buf =
+ (u64 *)(addr + clk_cnt_buf_offset);
}
return 0;
@@ -324,6 +352,9 @@ void kbase_hwcnt_dump_buffer_zero(
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
}
+
+ memset(dst->clk_cnt_buf, 0,
+ sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
@@ -334,6 +365,9 @@ void kbase_hwcnt_dump_buffer_zero_strict(
return;
memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+
+ memset(dst->clk_cnt_buf, 0,
+ sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
@@ -384,6 +418,7 @@ void kbase_hwcnt_dump_buffer_copy(
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
+ size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
@@ -413,6 +448,12 @@ void kbase_hwcnt_dump_buffer_copy(
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
}
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
+ }
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
@@ -423,6 +464,7 @@ void kbase_hwcnt_dump_buffer_copy_strict(
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
+ size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
@@ -451,6 +493,14 @@ void kbase_hwcnt_dump_buffer_copy_strict(
kbase_hwcnt_dump_buffer_block_copy_strict(
dst_blk, src_blk, blk_em, val_cnt);
}
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ bool clk_enabled =
+ kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk);
+
+ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ }
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
@@ -461,6 +511,7 @@ void kbase_hwcnt_dump_buffer_accumulate(
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
+ size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
@@ -494,6 +545,12 @@ void kbase_hwcnt_dump_buffer_accumulate(
kbase_hwcnt_dump_buffer_block_accumulate(
dst_blk, src_blk, hdr_cnt, ctr_cnt);
}
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+ }
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
@@ -504,6 +561,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
+ size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
@@ -534,5 +592,13 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
kbase_hwcnt_dump_buffer_block_accumulate_strict(
dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
}
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ if (kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+ else
+ dst->clk_cnt_buf[clk] = 0;
+ }
}
KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h
index 4d78c84..6a2640f 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/mali_kbase_hwcnt_types.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -136,11 +136,13 @@ struct kbase_hwcnt_group_description {
* @grps: Non-NULL pointer to an array of grp_cnt group descriptions,
* describing each Hardware Counter Group in the system.
* @avail_mask: Flat Availability Mask for all block instances in the system.
+ * @clk_cnt: The number of clock domains in the system. The maximum is 64.
*/
struct kbase_hwcnt_description {
size_t grp_cnt;
const struct kbase_hwcnt_group_description *grps;
u64 avail_mask;
+ u8 clk_cnt;
};
/**
@@ -220,6 +222,7 @@ struct kbase_hwcnt_group_metadata {
* @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
* @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system.
* @avail_mask: The Availability Mask for the system.
+ * @clk_cnt: The number of clock domains in the system.
*/
struct kbase_hwcnt_metadata {
size_t grp_cnt;
@@ -227,6 +230,7 @@ struct kbase_hwcnt_metadata {
size_t enable_map_bytes;
size_t dump_buf_bytes;
u64 avail_mask;
+ u8 clk_cnt;
};
/**
@@ -234,13 +238,16 @@ struct kbase_hwcnt_metadata {
* bitfields.
* @metadata: Non-NULL pointer to metadata used to identify, and to describe
* the layout of the enable map.
- * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array
- * of u64 bitfields, each bit of which enables one hardware
+ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an
+ * array of u64 bitfields, each bit of which enables one hardware
* counter.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ * counter for a given clock domain.
*/
struct kbase_hwcnt_enable_map {
const struct kbase_hwcnt_metadata *metadata;
- u64 *enable_map;
+ u64 *hwcnt_enable_map;
+ u64 clk_enable_map;
};
/**
@@ -250,10 +257,13 @@ struct kbase_hwcnt_enable_map {
* the layout of the Dump Buffer.
* @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
* of u32 values.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ * for each clock domain.
*/
struct kbase_hwcnt_dump_buffer {
const struct kbase_hwcnt_metadata *metadata;
u32 *dump_buf;
+ u64 *clk_cnt_buf;
};
/**
@@ -473,7 +483,7 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
* block instance.
*/
#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
- ((map)->enable_map + \
+ ((map)->hwcnt_enable_map + \
(map)->metadata->grp_metadata[(grp)].enable_map_index + \
(map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
(map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
@@ -520,7 +530,11 @@ static inline void kbase_hwcnt_enable_map_block_disable_all(
static inline void kbase_hwcnt_enable_map_disable_all(
struct kbase_hwcnt_enable_map *dst)
{
- memset(dst->enable_map, 0, dst->metadata->enable_map_bytes);
+ if (dst->hwcnt_enable_map != NULL)
+ memset(dst->hwcnt_enable_map, 0,
+ dst->metadata->enable_map_bytes);
+
+ dst->clk_enable_map = 0;
}
/**
@@ -569,6 +583,8 @@ static inline void kbase_hwcnt_enable_map_enable_all(
kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
kbase_hwcnt_enable_map_block_enable_all(
dst, grp, blk, blk_inst);
+
+ dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
}
/**
@@ -582,9 +598,13 @@ static inline void kbase_hwcnt_enable_map_copy(
struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_enable_map *src)
{
- memcpy(dst->enable_map,
- src->enable_map,
- dst->metadata->enable_map_bytes);
+ if (dst->hwcnt_enable_map != NULL) {
+ memcpy(dst->hwcnt_enable_map,
+ src->hwcnt_enable_map,
+ dst->metadata->enable_map_bytes);
+ }
+
+ dst->clk_enable_map = src->clk_enable_map;
}
/**
@@ -602,8 +622,12 @@ static inline void kbase_hwcnt_enable_map_union(
dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
size_t i;
- for (i = 0; i < bitfld_count; i++)
- dst->enable_map[i] |= src->enable_map[i];
+ if (dst->hwcnt_enable_map != NULL) {
+ for (i = 0; i < bitfld_count; i++)
+ dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i];
+ }
+
+ dst->clk_enable_map |= src->clk_enable_map;
}
/**
@@ -656,6 +680,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled(
const struct kbase_hwcnt_enable_map *enable_map)
{
size_t grp, blk, blk_inst;
+ const u64 clk_enable_map_mask =
+ (1ull << enable_map->metadata->clk_cnt) - 1;
+
+ if (enable_map->metadata->clk_cnt > 0 &&
+ (enable_map->clk_enable_map & clk_enable_map_mask))
+ return true;
kbase_hwcnt_metadata_for_each_block(
enable_map->metadata, grp, blk, blk_inst) {
@@ -1084,4 +1114,29 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
}
}
+/**
+ * @brief Iterate over each clock domain in the metadata.
+ *
+ * @param[in] md Non-NULL pointer to metadata.
+ * @param[in] clk size_t variable used as clock iterator.
+ */
+#define kbase_hwcnt_metadata_for_each_clock(md, clk) \
+ for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
+
+/**
+ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled
+ * in clk_enable_map.
+ * @clk_enable_map: An enable map for clock domains.
+ * @index: Index of the enable map for clock domain.
+ *
+ * Return: true if the index of the clock domain is enabled, else false.
+ */
+static inline bool kbase_hwcnt_clk_enable_map_enabled(
+ const u64 clk_enable_map, const size_t index)
+{
+ if (clk_enable_map & (1ull << index))
+ return true;
+ return false;
+}
+
#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
index 977b194..17e7601 100644
--- a/mali_kbase/mali_kbase_ioctl.h
+++ b/mali_kbase/mali_kbase_ioctl.h
@@ -166,7 +166,7 @@ struct kbase_ioctl_mem_free {
/**
* struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
* @buffer_count: requested number of dumping buffers
- * @jm_bm: counters selection bitmask (JM)
+ * @fe_bm: counters selection bitmask (Front end)
* @shader_bm: counters selection bitmask (Shader)
* @tiler_bm: counters selection bitmask (Tiler)
* @mmu_l2_bm: counters selection bitmask (MMU_L2)
@@ -175,7 +175,7 @@ struct kbase_ioctl_mem_free {
*/
struct kbase_ioctl_hwcnt_reader_setup {
__u32 buffer_count;
- __u32 jm_bm;
+ __u32 fe_bm;
__u32 shader_bm;
__u32 tiler_bm;
__u32 mmu_l2_bm;
@@ -187,14 +187,14 @@ struct kbase_ioctl_hwcnt_reader_setup {
/**
* struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
* @dump_buffer: GPU address to write counters to
- * @jm_bm: counters selection bitmask (JM)
+ * @fe_bm: counters selection bitmask (Front end)
* @shader_bm: counters selection bitmask (Shader)
* @tiler_bm: counters selection bitmask (Tiler)
* @mmu_l2_bm: counters selection bitmask (MMU_L2)
*/
struct kbase_ioctl_hwcnt_enable {
__u64 dump_buffer;
- __u32 jm_bm;
+ __u32 fe_bm;
__u32 shader_bm;
__u32 tiler_bm;
__u32 mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 1a830dd..8f22ceb 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -32,6 +32,7 @@
#include <linux/ratelimit.h>
#include <mali_kbase_jm.h>
+#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_hwaccess_jm.h>
#include <tl/mali_kbase_tracepoints.h>
#include <mali_linux_trace.h>
@@ -39,6 +40,8 @@
#include "mali_kbase_dma_fence.h"
#include <mali_kbase_cs_experimental.h>
+#include <mali_kbase_caps.h>
+
#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
@@ -52,11 +55,6 @@
((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \
BASE_JD_REQ_DEP)))
-/* Minimum API version that supports the just-in-time memory allocation pressure
- * limit feature.
- */
-#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20)
-
/*
* This is the kernel side of the API. Only entry points are:
* - kbase_jd_submit(): Called from userspace to submit a single bag
@@ -77,6 +75,15 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p)
return u64_to_user_ptr(p);
}
+/* Mark an atom as complete, and trace it in kinstr_jm */
+static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
+{
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ kbase_kinstr_jm_atom_complete(katom);
+ dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n",
+ (void *)katom);
+}
+
/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
*
* Returns whether the JS needs a reschedule.
@@ -97,24 +104,18 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
/* Dependency only atom */
trace_sysgraph(SGR_SUBMIT, kctx->id,
kbase_jd_atom_id(katom->kctx, katom));
- katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
- dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
- (void *)katom);
+ jd_mark_atom_complete(katom);
return 0;
} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
/* Soft-job */
if (katom->will_fail_event_code) {
kbase_finish_soft_job(katom);
- katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
- dev_dbg(kctx->kbdev->dev,
- "Atom %p status to completed\n", (void *)katom);
+ jd_mark_atom_complete(katom);
return 0;
}
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
- katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
- dev_dbg(kctx->kbdev->dev,
- "Atom %p status to completed\n", (void *)katom);
+ jd_mark_atom_complete(katom);
}
return 0;
}
@@ -205,7 +206,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
* jctx.lock must be held when this is called.
*/
-static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom)
{
int err_ret_val = -EINVAL;
u32 res_no;
@@ -465,8 +466,6 @@ static inline void jd_resolve_dep(struct list_head *out_list,
}
}
-KBASE_EXPORT_TEST_API(jd_resolve_dep);
-
/**
* is_dep_valid - Validate that a dependency is valid for early dependency
* submission
@@ -558,7 +557,7 @@ static void jd_try_submitting_deps(struct list_head *out_list,
}
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
* jd_update_jit_usage - Update just-in-time physical memory usage for an atom.
*
@@ -698,7 +697,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
kbase_jit_retry_pending_alloc(kctx);
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/*
* Perform the necessary handling of an atom that has finished running
@@ -723,9 +722,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
-#if MALI_JIT_PRESSURE_LIMIT
- jd_update_jit_usage(katom);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED))
+ jd_update_jit_usage(katom);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/* This is needed in case an atom is failed due to being invalid, this
* can happen *before* the jobs that the atom depends on have completed */
@@ -736,9 +736,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
}
}
- katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
- dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
- (void *)katom);
+ jd_mark_atom_complete(katom);
list_add_tail(&katom->jd_item, &completed_jobs);
while (!list_empty(&completed_jobs)) {
@@ -870,8 +868,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
}
#endif
+/* Trace an atom submission. */
+static void jd_trace_atom_submit(struct kbase_context *const kctx,
+ struct kbase_jd_atom *const katom,
+ int *priority)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+
+ KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom));
+ KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+ if (priority)
+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority);
+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
+ kbase_kinstr_jm_atom_queue(katom);
+}
+
static bool jd_submit_atom(struct kbase_context *const kctx,
- const struct base_jd_atom_v2 *const user_atom,
+ const struct base_jd_atom *const user_atom,
const struct base_jd_fragment *const user_jc_incr,
struct kbase_jd_atom *const katom)
{
@@ -901,6 +914,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->jc = user_atom->jc;
katom->core_req = user_atom->core_req;
katom->jobslot = user_atom->jobslot;
+ katom->seq_nr = user_atom->seq_nr;
katom->atom_flags = 0;
katom->retry_count = 0;
katom->need_cache_flush_cores_retained = 0;
@@ -913,19 +927,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number);
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/* Older API version atoms might have random values where jit_id now
* lives, but we must maintain backwards compatibility - handle the
* issue.
*/
- if (kctx->api_version < MIN_API_VERSION_WITH_JPL) {
+ if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) {
katom->jit_ids[0] = 0;
katom->jit_ids[1] = 0;
} else {
katom->jit_ids[0] = user_atom->jit_id[0];
katom->jit_ids[1] = user_atom->jit_id[1];
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
katom->renderpass_id = user_atom->renderpass_id;
@@ -961,17 +975,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
/* Wrong dependency setup. Atom will be sent
* back to user space. Do not record any
* dependencies. */
- KBASE_TLSTREAM_TL_NEW_ATOM(
- kbdev,
- katom,
- kbase_jd_atom_id(kctx, katom));
- KBASE_TLSTREAM_TL_RET_ATOM_CTX(
- kbdev,
- katom, kctx);
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(
- kbdev,
- katom,
- TL_ATOM_STATE_IDLE);
+ jd_trace_atom_submit(kctx, katom, NULL);
return jd_done_nolock(katom, NULL);
}
@@ -1013,13 +1017,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
/* This atom will be sent back to user space.
* Do not record any dependencies.
*/
- KBASE_TLSTREAM_TL_NEW_ATOM(
- kbdev,
- katom,
- kbase_jd_atom_id(kctx, katom));
- KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom,
- TL_ATOM_STATE_IDLE);
+ jd_trace_atom_submit(kctx, katom, NULL);
will_fail = true;
@@ -1078,13 +1076,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->sched_priority = sched_prio;
/* Create a new atom. */
- KBASE_TLSTREAM_TL_NEW_ATOM(
- kbdev,
- katom,
- kbase_jd_atom_id(kctx, katom));
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority);
- KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+ jd_trace_atom_submit(kctx, katom, &katom->sched_priority);
#if !MALI_INCREMENTAL_RENDERING
/* Reject atoms for incremental rendering if not supported */
@@ -1151,8 +1143,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
}
}
-#if !MALI_JIT_PRESSURE_LIMIT
- if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) &&
+#if !MALI_JIT_PRESSURE_LIMIT_BASE
+ if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) &&
(user_atom->jit_id[0] || user_atom->jit_id[1])) {
/* JIT pressure limit is disabled, but we are receiving non-0
* JIT IDs - atom is invalid.
@@ -1160,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, NULL);
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/* Validate the atom. Function will return error if the atom is
* malformed.
@@ -1233,6 +1225,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
struct kbase_device *kbdev;
u32 latest_flush;
+ bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) ||
+ stride == offsetof(struct base_jd_atom_v2, renderpass_id));
+
/*
* kbase_jd_submit isn't expected to fail and so all errors with the
* jobs are reported by immediately failing them (through event system)
@@ -1247,7 +1242,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
}
if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) &&
- stride != sizeof(struct base_jd_atom_v2)) {
+ stride != sizeof(struct base_jd_atom_v2) &&
+ stride != offsetof(struct base_jd_atom, renderpass_id) &&
+ stride != sizeof(struct base_jd_atom)) {
dev_err(kbdev->dev,
"Stride %u passed to job_submit isn't supported by the kernel\n",
stride);
@@ -1258,16 +1255,29 @@ int kbase_jd_submit(struct kbase_context *kctx,
latest_flush = kbase_backend_get_current_flush_id(kbdev);
for (i = 0; i < nr_atoms; i++) {
- struct base_jd_atom_v2 user_atom;
+ struct base_jd_atom user_atom;
struct base_jd_fragment user_jc_incr;
struct kbase_jd_atom *katom;
- if (copy_from_user(&user_atom, user_addr, stride) != 0) {
- dev_err(kbdev->dev,
- "Invalid atom address %p passed to job_submit\n",
- user_addr);
- err = -EFAULT;
- break;
+ if (unlikely(jd_atom_is_v2)) {
+ if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) {
+ dev_err(kbdev->dev,
+ "Invalid atom address %p passed to job_submit\n",
+ user_addr);
+ err = -EFAULT;
+ break;
+ }
+
+ /* no seq_nr in v2 */
+ user_atom.seq_nr = 0;
+ } else {
+ if (copy_from_user(&user_atom, user_addr, stride) != 0) {
+ dev_err(kbdev->dev,
+ "Invalid atom address %p passed to job_submit\n",
+ user_addr);
+ err = -EFAULT;
+ break;
+ }
}
if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) {
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
new file mode 100644
index 0000000..1e91a7c
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -0,0 +1,896 @@
+/*
+ *
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm.c
+ * Kernel driver public interface to job manager atom tracing
+ */
+
+#include "mali_kbase_kinstr_jm.h"
+#include "mali_kbase_kinstr_jm_reader.h"
+
+#include "mali_kbase.h"
+#include "mali_kbase_linux.h"
+
+#include <mali_kbase_jm_rb.h>
+
+#include <asm/barrier.h>
+#include <linux/anon_inodes.h>
+#include <linux/circ_buf.h>
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/ktime.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/rculist_bl.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/version.h>
+#include <linux/wait.h>
+
+#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE
+#include <linux/build_bug.h>
+#else
+// Stringify the expression if no message is given.
+#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e)
+#define __static_assert(e, msg, ...) _Static_assert(e, msg)
+#endif
+
+#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+typedef unsigned int __poll_t;
+#endif
+
+#ifndef ENOTSUP
+#define ENOTSUP EOPNOTSUPP
+#endif
+
+/* The module printing prefix */
+#define PR_ "mali_kbase_kinstr_jm: "
+
+/* Allows us to perform ASM goto for the tracing
+ * https://www.kernel.org/doc/Documentation/static-keys.txt
+ */
+#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
+DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key);
+#else
+struct static_key basep_kinstr_jm_reader_static_key = STATIC_KEY_INIT_FALSE;
+#define static_branch_inc(key) static_key_slow_inc(key)
+#define static_branch_dec(key) static_key_slow_dec(key)
+#endif /* KERNEL_VERSION(4 ,3, 0) <= LINUX_VERSION_CODE */
+
+#define KBASE_KINSTR_JM_VERSION 1
+
+/**
+ * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing
+ * @readers: a bitlocked list of opened readers. Readers are attached to the
+ * private data of a file descriptor that the user opens with the
+ * KBASE_IOCTL_KINSTR_JM_FD IO control call.
+ * @refcount: reference count for the context. Any reader will have a link
+ * back to the context so that they can remove themselves from the
+ * list.
+ *
+ * This is opaque outside this compilation unit
+ */
+struct kbase_kinstr_jm {
+ struct hlist_bl_head readers;
+ struct kref refcount;
+};
+
+/**
+ * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a
+ * new state
+ * @timestamp: Raw monotonic nanoseconds of the state change
+ * @state: The state that the atom has moved to
+ * @atom: The atom number that has changed state
+ * @flags: Flags associated with the state change. See
+ * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines.
+ * @reserved: Reserved for future use.
+ * @data: Extra data for the state change. Active member depends on state.
+ *
+ * We can add new fields to the structure and old user code will gracefully
+ * ignore the new fields.
+ *
+ * We can change the size of the structure and old user code will gracefully
+ * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`.
+ *
+ * If we remove fields, the version field in `struct
+ * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will
+ * gracefully fail and tell the user that the kernel API is too new and has
+ * backwards-incompatible changes. Note that one userspace can opt to handle
+ * multiple kernel major versions of the structure.
+ *
+ * If we need to change the _meaning_ of one of the fields, i.e. the state
+ * machine has had a incompatible change, we can keep the same members in the
+ * structure and update the version as above. User code will no longer
+ * recognise that it has the supported field and can gracefully explain to the
+ * user that the kernel API is no longer supported.
+ *
+ * When making changes to this structure, make sure they are either:
+ * - additions to the end (for minor version bumps (i.e. only a size increase))
+ * such that the layout of existing fields doesn't change, or;
+ * - update the version reported to userspace so that it can fail explicitly.
+ */
+struct kbase_kinstr_jm_atom_state_change {
+ u64 timestamp;
+ s8 state; /* enum kbase_kinstr_jm_reader_atom_state */
+ u8 atom;
+ u8 flags;
+ u8 reserved[1];
+ /* Tagged union based on state. Ensure members are aligned correctly! */
+ union {
+ struct {
+ u8 slot;
+ } start;
+ u8 padding[4];
+ } data;
+};
+static_assert(
+ ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >=
+ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT);
+
+#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0)
+
+/**
+ * struct reader_changes - The circular buffer of kernel atom state changes
+ * @data: The allocated buffer. This is allocated when the user requests
+ * the reader file descriptor. It is released when the user calls
+ * close() on the fd. When accessing this, lock the producer spin
+ * lock to prevent races on the allocated memory. The consume lock
+ * does not need to be held because newly-inserted data will always
+ * be outside the currenly-read range.
+ * @producer: The producing spinlock which allows us to push changes into the
+ * buffer at the same time as a user read occurring. This needs to
+ * be locked when saving/restoring the IRQ because we can receive an
+ * interrupt from the GPU when an atom completes. The CPU could have
+ * a task preempted that is holding this lock.
+ * @consumer: The consuming mutex which locks around the user read().
+ * Must be held when updating the tail of the circular buffer.
+ * @head: The head of the circular buffer. Can be used with Linux @c CIRC_
+ * helpers. The producer should lock and update this with an SMP
+ * store when a new change lands. The consumer can read with an
+ * SMP load. This allows the producer to safely insert new changes
+ * into the circular buffer.
+ * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_
+ * helpers. The producer should do a READ_ONCE load and the consumer
+ * should SMP store.
+ * @size: The number of changes that are allowed in @c data. Can be used
+ * with Linux @c CIRC_ helpers. Will always be a power of two. The
+ * producer lock should be held when updating this and stored with
+ * an SMP release memory barrier. This means that the consumer can
+ * do an SMP load.
+ * @threshold: The number of changes above which threads polling on the reader
+ * file descriptor will be woken up.
+ */
+struct reader_changes {
+ struct kbase_kinstr_jm_atom_state_change *data;
+ spinlock_t producer;
+ struct mutex consumer;
+ u32 head;
+ u32 tail;
+ u32 size;
+ u32 threshold;
+};
+
+/**
+ * reader_changes_is_valid_size() - Determines if requested changes buffer size
+ * is valid.
+ * @size: The requested memory size
+ *
+ * We have a constraint that the underlying physical buffer must be a
+ * power of two so that we can use the efficient circular buffer helpers that
+ * the kernel provides. It also needs to be representable within a u32.
+ *
+ * Return:
+ * * true - the size is valid
+ * * false - the size is invalid
+ */
+static inline bool reader_changes_is_valid_size(const size_t size)
+{
+ typedef struct reader_changes changes_t;
+ const size_t elem_size = sizeof(*((changes_t *)0)->data);
+ const size_t size_size = sizeof(((changes_t *)0)->size);
+ const size_t size_max = (1ull << (size_size * 8)) - 1;
+
+ return is_power_of_2(size) && /* Is a power of two */
+ ((size / elem_size) <= size_max); /* Small enough */
+}
+
+/**
+ * reader_changes_init() - Initializes the reader changes and allocates the
+ * changes buffer
+ * @changes: The context pointer, must point to a zero-inited allocated reader
+ * changes structure. We may support allocating the structure in the
+ * future.
+ * @size: The requested changes buffer size
+ *
+ * Return:
+ * (0, U16_MAX] - the number of data elements allocated
+ * -EINVAL - a pointer was invalid
+ * -ENOTSUP - we do not support allocation of the context
+ * -ERANGE - the requested memory size was invalid
+ * -ENOMEM - could not allocate the memory
+ * -EADDRINUSE - the buffer memory was already allocated
+ */
+static int reader_changes_init(struct reader_changes *const changes,
+ const size_t size)
+{
+ BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0);
+
+ if (!reader_changes_is_valid_size(size)) {
+ pr_warn(PR_ "invalid size %zu\n", size);
+ return -ERANGE;
+ }
+
+ changes->data = vmalloc(size);
+ if (!changes->data)
+ return -ENOMEM;
+
+ spin_lock_init(&changes->producer);
+ mutex_init(&changes->consumer);
+
+ changes->size = size / sizeof(*changes->data);
+ changes->threshold = min(((size_t)(changes->size)) / 4,
+ ((size_t)(PAGE_SIZE)) / sizeof(*changes->data));
+
+ return changes->size;
+}
+
+/**
+ * reader_changes_term() - Cleans up a reader changes structure
+ * @changes: The context to clean up
+ *
+ * Releases the allocated state changes memory
+ */
+static void reader_changes_term(struct reader_changes *const changes)
+{
+ struct kbase_kinstr_jm_atom_state_change *data = NULL;
+ unsigned long irq;
+
+ /*
+ * Although changes->data is used on the consumer side, too, no active
+ * consumer is possible by the time we clean up the reader changes, so
+ * no need to take the consumer lock. However, we do need the producer
+ * lock because the list removal can race with list traversal.
+ */
+ spin_lock_irqsave(&changes->producer, irq);
+ swap(changes->data, data);
+ spin_unlock_irqrestore(&changes->producer, irq);
+
+ mutex_destroy(&changes->consumer);
+ vfree(data);
+}
+
+/**
+ * reader_changes_count_locked() - Retrieves the count of state changes from the
+ * tail to the physical end of the buffer
+ * @changes: The state changes context
+ *
+ * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to
+ * determine the count, so there may be more items. However, that's the maximum
+ * number that can be read in one contiguous read.
+ *
+ * Return: the number of changes in the circular buffer until the end of the
+ * allocation
+ */
+static u32 reader_changes_count_locked(struct reader_changes *const changes)
+{
+ u32 head;
+
+ lockdep_assert_held_once(&changes->consumer);
+
+ head = smp_load_acquire(&changes->head);
+
+ return CIRC_CNT_TO_END(head, changes->tail, changes->size);
+}
+
+/**
+ * reader_changes_count() - Retrieves the count of state changes from the
+ * tail to the physical end of the buffer
+ * @changes: The state changes context
+ *
+ * Return: the number of changes in the circular buffer until the end of the
+ * allocation
+ */
+static u32 reader_changes_count(struct reader_changes *const changes)
+{
+ u32 ret;
+
+ mutex_lock(&changes->consumer);
+ ret = reader_changes_count_locked(changes);
+ mutex_unlock(&changes->consumer);
+ return ret;
+}
+
+/**
+ * reader_changes_push() - Pushes a change into the reader circular buffer.
+ * @changes: The buffer to insert the change into
+ * @change: Kernel atom change to insert
+ * @wait_queue: The queue to be kicked when changes should be read from
+ * userspace. Kicked when a threshold is reached or there is
+ * overflow.
+ */
+static void reader_changes_push(
+ struct reader_changes *const changes,
+ const struct kbase_kinstr_jm_atom_state_change *const change,
+ wait_queue_head_t *const wait_queue)
+{
+ u32 head, tail, size, space;
+ unsigned long irq;
+ struct kbase_kinstr_jm_atom_state_change *data;
+
+ spin_lock_irqsave(&changes->producer, irq);
+
+ /* We may be called for a reader_changes that's awaiting cleanup. */
+ data = changes->data;
+ if (!data)
+ goto unlock;
+
+ size = changes->size;
+ head = changes->head;
+ tail = smp_load_acquire(&changes->tail);
+
+ space = CIRC_SPACE(head, tail, size);
+ if (space >= 1) {
+ data[head] = *change;
+ if (space == 1) {
+ data[head].flags |=
+ KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW;
+ pr_warn(PR_ "overflow of circular buffer\n");
+ }
+ smp_store_release(&changes->head, (head + 1) & (size - 1));
+ }
+
+ /* Wake for either overflow or over-threshold cases. */
+ if (CIRC_CNT(head + 1, tail, size) >= changes->threshold)
+ wake_up_interruptible(wait_queue);
+
+unlock:
+ spin_unlock_irqrestore(&changes->producer, irq);
+}
+
+/**
+ * struct reader - Allows the kernel state changes to be read by user space.
+ * @node: The node in the @c readers locked list
+ * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu)
+ * @changes: The circular buffer of user changes
+ * @wait_queue: A wait queue for poll
+ * @context: a pointer to the parent context that created this reader. Can be
+ * used to remove the reader from the list of readers. Reference
+ * counted.
+ *
+ * The reader is a circular buffer in kernel space. State changes are pushed
+ * into the buffer. The flow from user space is:
+ *
+ * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will
+ * allocate the kernel side circular buffer with a size specified in the
+ * ioctl argument.
+ * * The user will then poll the file descriptor for data
+ * * Upon receiving POLLIN, perform a read() on the file descriptor to get
+ * the data out.
+ * * The buffer memory will be freed when the file descriptor is closed
+ */
+struct reader {
+ struct hlist_bl_node node;
+ struct rcu_head rcu_head;
+ struct reader_changes changes;
+ wait_queue_head_t wait_queue;
+ struct kbase_kinstr_jm *context;
+};
+
+static struct kbase_kinstr_jm *
+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx);
+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx);
+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx,
+ struct reader *const reader);
+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx,
+ struct reader *const reader);
+
+/**
+ * reader_term() - Terminate a instrumentation job manager reader context.
+ * @reader: Pointer to context to be terminated.
+ */
+static void reader_term(struct reader *const reader)
+{
+ if (!reader)
+ return;
+
+ kbase_kinstr_jm_readers_del(reader->context, reader);
+ reader_changes_term(&reader->changes);
+ kbase_kinstr_jm_ref_put(reader->context);
+
+ kfree_rcu(reader, rcu_head);
+}
+
+/**
+ * reader_init() - Initialise a instrumentation job manager reader context.
+ * @out_reader: Non-NULL pointer to where the pointer to the created context
+ * will be stored on success.
+ * @ctx: the pointer to the parent context. Reference count will be
+ * increased if initialization is successful
+ * @num_changes: The number of changes to allocate a buffer for
+ *
+ * Return: 0 on success, else error code.
+ */
+static int reader_init(struct reader **const out_reader,
+ struct kbase_kinstr_jm *const ctx,
+ size_t const num_changes)
+{
+ struct reader *reader = NULL;
+ const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change);
+ int status;
+
+ if (!out_reader || !ctx || !num_changes)
+ return -EINVAL;
+
+ reader = kzalloc(sizeof(*reader), GFP_KERNEL);
+ if (!reader)
+ return -ENOMEM;
+
+ INIT_HLIST_BL_NODE(&reader->node);
+ init_waitqueue_head(&reader->wait_queue);
+
+ reader->context = kbase_kinstr_jm_ref_get(ctx);
+
+ status = reader_changes_init(&reader->changes, num_changes * change_size);
+ if (status < 0)
+ goto fail;
+
+ status = kbase_kinstr_jm_readers_add(ctx, reader);
+ if (status < 0)
+ goto fail;
+
+ *out_reader = reader;
+
+ return 0;
+
+fail:
+ kbase_kinstr_jm_ref_put(reader->context);
+ kfree(reader);
+ return status;
+}
+
+/**
+ * reader_release() - Invoked when the reader file descriptor is released
+ * @node: The inode that the file descriptor that the file corresponds to. In
+ * our case our reader file descriptor is backed by an anonymous node so
+ * not much is in this.
+ * @file: the file data. Our reader context is held in the private data
+ * Return: zero on success
+ */
+static int reader_release(struct inode *const node, struct file *const file)
+{
+ struct reader *const reader = file->private_data;
+
+ reader_term(reader);
+ file->private_data = NULL;
+
+ return 0;
+}
+
+/**
+ * reader_changes_copy_to_user() - Copy any changes from a changes structure to
+ * the user-provided buffer.
+ * @changes: The changes structure from which to copy.
+ * @buffer: The user buffer to copy the data to.
+ * @buffer_size: The number of bytes in the buffer.
+ * Return: The number of bytes copied or negative errno on failure.
+ */
+static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes,
+ char __user *buffer,
+ size_t buffer_size)
+{
+ ssize_t ret = 0;
+ struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE(
+ changes->data);
+ size_t const entry_size = sizeof(*src_buf);
+ size_t changes_tail, changes_count, read_size;
+
+ /* Needed for the quick buffer capacity calculation below.
+ * Note that we can't use is_power_of_2() since old compilers don't
+ * understand it's a constant expression.
+ */
+#define is_power_of_two(x) ((x) && !((x) & ((x) - 1)))
+ static_assert(is_power_of_two(
+ sizeof(struct kbase_kinstr_jm_atom_state_change)));
+#undef is_power_of_two
+
+ lockdep_assert_held_once(&changes->consumer);
+
+ /* Read continuously until either:
+ * - we've filled the output buffer, or
+ * - there are no changes when we check.
+ *
+ * If more changes arrive while we're copying to the user, we can copy
+ * those as well, space permitting.
+ */
+ do {
+ changes_tail = changes->tail;
+ changes_count = reader_changes_count_locked(changes);
+ read_size = min(changes_count * entry_size,
+ buffer_size & ~(entry_size - 1));
+
+ if (!read_size)
+ break;
+
+ if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size))
+ return -EFAULT;
+
+ buffer += read_size;
+ buffer_size -= read_size;
+ ret += read_size;
+ changes_tail = (changes_tail + read_size / entry_size) &
+ (changes->size - 1);
+ smp_store_release(&changes->tail, changes_tail);
+ } while (read_size);
+
+ return ret;
+}
+
+/**
+ * reader_read() - Handles a read call on the reader file descriptor
+ *
+ * @filp: The file that the read was performed on
+ * @buffer: The destination buffer
+ * @buffer_size: The maximum number of bytes to read
+ * @offset: The offset into the 'file' to read from.
+ *
+ * Note the destination buffer needs to be fully mapped in userspace or the read
+ * will fault.
+ *
+ * Return:
+ * * The number of bytes read or:
+ * * -EBADF - the file descriptor did not have an attached reader
+ * * -EFAULT - memory access fault
+ * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there
+ * is no data available
+ *
+ * Note: The number of bytes read will always be a multiple of the size of an
+ * entry.
+ */
+static ssize_t reader_read(struct file *const filp,
+ char __user *const buffer,
+ size_t const buffer_size,
+ loff_t *const offset)
+{
+ struct reader *const reader = filp->private_data;
+ struct reader_changes *changes;
+ ssize_t ret;
+
+ if (!reader)
+ return -EBADF;
+
+ if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change))
+ return -ENOBUFS;
+
+#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE
+ if (!access_ok(buffer, buffer_size))
+ return -EIO;
+#else
+ if (!access_ok(VERIFY_WRITE, buffer, buffer_size))
+ return -EIO;
+#endif
+
+ changes = &reader->changes;
+
+ mutex_lock(&changes->consumer);
+ if (!reader_changes_count_locked(changes)) {
+ if (filp->f_flags & O_NONBLOCK) {
+ ret = -EAGAIN;
+ goto exit;
+ }
+
+ if (wait_event_interruptible(
+ reader->wait_queue,
+ !!reader_changes_count_locked(changes))) {
+ ret = -EINTR;
+ goto exit;
+ }
+ }
+
+ ret = reader_changes_copy_to_user(changes, buffer, buffer_size);
+
+exit:
+ mutex_unlock(&changes->consumer);
+ return ret;
+}
+
+/**
+ * reader_poll() - Handles a poll call on the reader file descriptor
+ * @file: The file that the poll was performed on
+ * @wait: The poll table
+ *
+ * The results of the poll will be unreliable if there is no mapped memory as
+ * there is no circular buffer to push atom state changes into.
+ *
+ * Return:
+ * * 0 - no data ready
+ * * POLLIN - state changes have been buffered
+ * * -EBADF - the file descriptor did not have an attached reader
+ * * -EINVAL - the IO control arguments were invalid
+ */
+static __poll_t reader_poll(struct file *const file,
+ struct poll_table_struct *const wait)
+{
+ struct reader *reader;
+ struct reader_changes *changes;
+
+ if (unlikely(!file || !wait))
+ return -EINVAL;
+
+ reader = file->private_data;
+ if (unlikely(!reader))
+ return -EBADF;
+
+ changes = &reader->changes;
+
+ if (reader_changes_count(changes) >= changes->threshold)
+ return POLLIN;
+
+ poll_wait(file, &reader->wait_queue, wait);
+
+ return (reader_changes_count(changes) > 0) ? POLLIN : 0;
+}
+
+/* The file operations virtual function table */
+static const struct file_operations file_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = reader_read,
+ .poll = reader_poll,
+ .release = reader_release
+};
+
+/* The maximum amount of readers that can be created on a context. */
+static const size_t kbase_kinstr_jm_readers_max = 16;
+
+/**
+ * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped
+ * @ref: the context reference count
+ */
+static void kbase_kinstr_jm_release(struct kref *const ref)
+{
+ struct kbase_kinstr_jm *const ctx =
+ container_of(ref, struct kbase_kinstr_jm, refcount);
+
+ kfree(ctx);
+}
+
+/**
+ * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context
+ * @ctx: the context to reference count
+ * Return: the reference counted context
+ */
+static struct kbase_kinstr_jm *
+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx)
+{
+ if (likely(ctx))
+ kref_get(&ctx->refcount);
+ return ctx;
+}
+
+/**
+ * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context
+ * @ctx: the context to lower the reference count on
+ */
+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx)
+{
+ if (likely(ctx))
+ kref_put(&ctx->refcount, kbase_kinstr_jm_release);
+}
+
+/**
+ * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers
+ * @ctx: the instrumentation context
+ * @reader: the reader to add
+ *
+ * Return:
+ * 0 - success
+ * -ENOMEM - too many readers already added.
+ */
+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx,
+ struct reader *const reader)
+{
+ struct hlist_bl_head *const readers = &ctx->readers;
+ struct hlist_bl_node *node;
+ struct reader *temp;
+ size_t count = 0;
+
+ hlist_bl_lock(readers);
+
+ hlist_bl_for_each_entry_rcu(temp, node, readers, node)
+ ++count;
+
+ if (kbase_kinstr_jm_readers_max < count) {
+ hlist_bl_unlock(readers);
+ return -ENOMEM;
+ }
+
+ hlist_bl_add_head_rcu(&reader->node, readers);
+
+ hlist_bl_unlock(readers);
+
+ static_branch_inc(&basep_kinstr_jm_reader_static_key);
+
+ return 0;
+}
+
+/**
+ * readers_del() - Deletes a reader from the list of readers
+ * @ctx: the instrumentation context
+ * @reader: the reader to delete
+ */
+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx,
+ struct reader *const reader)
+{
+ struct hlist_bl_head *const readers = &ctx->readers;
+
+ hlist_bl_lock(readers);
+ hlist_bl_del_rcu(&reader->node);
+ hlist_bl_unlock(readers);
+
+ static_branch_dec(&basep_kinstr_jm_reader_static_key);
+}
+
+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx,
+ union kbase_kinstr_jm_fd *jm_fd_arg)
+{
+ struct kbase_kinstr_jm_fd_in const *in;
+ struct reader *reader;
+ size_t const change_size = sizeof(struct
+ kbase_kinstr_jm_atom_state_change);
+ int status;
+ int fd;
+ int i;
+
+ if (!ctx || !jm_fd_arg)
+ return -EINVAL;
+
+ in = &jm_fd_arg->in;
+
+ if (!is_power_of_2(in->count))
+ return -EINVAL;
+
+ for (i = 0; i < sizeof(in->padding); ++i)
+ if (in->padding[i])
+ return -EINVAL;
+
+ status = reader_init(&reader, ctx, in->count);
+ if (status < 0)
+ return status;
+
+ jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION;
+ jm_fd_arg->out.size = change_size;
+ memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding));
+
+ fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader,
+ O_CLOEXEC);
+ if (fd < 0)
+ reader_term(reader);
+
+ return fd;
+}
+
+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx)
+{
+ struct kbase_kinstr_jm *ctx = NULL;
+
+ if (!out_ctx)
+ return -EINVAL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ INIT_HLIST_BL_HEAD(&ctx->readers);
+ kref_init(&ctx->refcount);
+
+ *out_ctx = ctx;
+
+ return 0;
+}
+
+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx)
+{
+ kbase_kinstr_jm_ref_put(ctx);
+}
+
+void kbasep_kinstr_jm_atom_state(
+ struct kbase_jd_atom *const katom,
+ const enum kbase_kinstr_jm_reader_atom_state state)
+{
+ struct kbase_context *const kctx = katom->kctx;
+ struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm;
+ const u8 id = kbase_jd_atom_id(kctx, katom);
+ struct kbase_kinstr_jm_atom_state_change change = {
+ .timestamp = ktime_get_raw_ns(), .atom = id, .state = state
+ };
+ struct reader *reader;
+ struct hlist_bl_node *node;
+
+ WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state,
+ PR_ "unsupported katom (%u) state (%i)", id, state);
+
+ switch (state) {
+ case KBASE_KINSTR_JM_READER_ATOM_STATE_START:
+ change.data.start.slot = katom->jobslot;
+ break;
+ default:
+ break;
+ }
+
+ rcu_read_lock();
+ hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node)
+ reader_changes_push(
+ &reader->changes, &change, &reader->wait_queue);
+ rcu_read_unlock();
+}
+
+KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state);
+
+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom)
+{
+ struct kbase_context *const kctx = katom->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ const int slot = katom->slot_nr;
+ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0);
+
+ BUILD_BUG_ON(SLOT_RB_SIZE != 2);
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS))
+ return;
+ if (WARN_ON(!submitted))
+ return;
+
+ if (submitted == katom)
+ kbase_kinstr_jm_atom_state_start(katom);
+}
+
+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom)
+{
+ struct kbase_context *const kctx = katom->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ const int slot = katom->slot_nr;
+ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0);
+ struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1);
+
+ BUILD_BUG_ON(SLOT_RB_SIZE != 2);
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS))
+ return;
+ if (WARN_ON(!submitted))
+ return;
+ if (WARN_ON((submitted != katom) && (queued != katom)))
+ return;
+
+ if (queued == katom)
+ return;
+
+ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+ kbase_kinstr_jm_atom_state_stop(katom);
+ if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+ kbase_kinstr_jm_atom_state_start(queued);
+}
diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h
new file mode 100644
index 0000000..555edfe
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm.h
@@ -0,0 +1,283 @@
+/*
+ *
+ * (C) COPYRIGHT 2019,2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm.h
+ * Kernel driver public interface to job manager atom tracing. This API provides
+ * a method to get the atom state changes into user space.
+ *
+ * The flow of operation is:
+ *
+ * | kernel | user |
+ * | ----------------------------------- | ----------------------------------- |
+ * | Initialize API with | |
+ * | kbase_kinstr_jm_init() | |
+ * | | |
+ * | Kernel code injects states with | |
+ * | kbase_kinstr_jm_atom_state_*() APIs | |
+ * | | Call ioctl() to get file descriptor |
+ * | | via KBASE_IOCTL_KINSTR_JM_FD |
+ * | Allocates a reader attached to FD | |
+ * | Allocates circular buffer and | |
+ * | patches, via ASM goto, the | |
+ * | kbase_kinstr_jm_atom_state_*() | |
+ * | | loop: |
+ * | | Call poll() on FD for POLLIN |
+ * | When threshold of changes is hit, | |
+ * | the poll is interrupted with | |
+ * | POLLIN. If circular buffer is | |
+ * | full then store the missed count | |
+ * | and interrupt poll | Call read() to get data from |
+ * | | circular buffer via the fd |
+ * | Kernel advances tail of circular | |
+ * | buffer | |
+ * | | Close file descriptor |
+ * | Deallocates circular buffer | |
+ * | | |
+ * | Terminate API with | |
+ * | kbase_kinstr_jm_term() | |
+ *
+ * All tracepoints are guarded on a static key. The static key is activated when
+ * a user space reader gets created. This means that there is negligible cost
+ * inserting the tracepoints into code when there are no readers.
+ */
+
+#ifndef _KBASE_KINSTR_JM_H_
+#define _KBASE_KINSTR_JM_H_
+
+#include "mali_kbase_kinstr_jm_reader.h"
+
+#ifdef __KERNEL__
+#include <linux/version.h>
+#include <linux/static_key.h>
+#else
+/* empty wrapper macros for userspace */
+#define static_branch_unlikely(key) (1)
+#define KERNEL_VERSION(a, b, c) (0)
+#define LINUX_VERSION_CODE (1)
+#endif /* __KERNEL__ */
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_kinstr_jm;
+struct kbase_jd_atom;
+union kbase_kinstr_jm_fd;
+
+/**
+ * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context.
+ * @ctx: Non-NULL pointer to where the pointer to the created context will
+ * be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx);
+
+/**
+ * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context.
+ * @ctx: Pointer to context to be terminated.
+ */
+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx);
+
+/**
+ * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to
+ * read the atom state changes from userspace
+ *
+ * @ctx: Pointer to the initialized context
+ * @jm_fd_arg: Pointer to the union containing the in/out params
+ * Return: -1 on failure, valid file descriptor on success
+ */
+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx,
+ union kbase_kinstr_jm_fd *jm_fd_arg);
+
+/**
+ * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state
+ * @atom: The atom that has changed state
+ * @state: The new state of the atom
+ *
+ * This performs the actual storage of the state ready for user space to
+ * read the data. It is only called when the static key is enabled from
+ * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this
+ * function directly.
+ */
+void kbasep_kinstr_jm_atom_state(
+ struct kbase_jd_atom *const atom,
+ const enum kbase_kinstr_jm_reader_atom_state state);
+
+/* Allows ASM goto patching to reduce tracing overhead. This is
+ * incremented/decremented when readers are created and terminated. This really
+ * shouldn't be changed externally, but if you do, make sure you use
+ * a static_key_inc()/static_key_dec() pair.
+ */
+#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
+extern struct static_key_false basep_kinstr_jm_reader_static_key;
+#else
+/* Pre-4.3 kernels have a different API for static keys, but work
+ * mostly the same with less type safety. */
+extern struct static_key basep_kinstr_jm_reader_static_key;
+#define static_branch_unlikely(key) static_key_false(key)
+#endif /* KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE */
+
+/**
+ * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state
+ * @atom: The atom that has changed state
+ * @state: The new state of the atom
+ *
+ * This uses a static key to reduce overhead when tracing is disabled
+ */
+static inline void kbase_kinstr_jm_atom_state(
+ struct kbase_jd_atom *const atom,
+ const enum kbase_kinstr_jm_reader_atom_state state)
+{
+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+ kbasep_kinstr_jm_atom_state(atom, state);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a
+ * hardware or software queue.
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_queue(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state(
+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an
+ * atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_start(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state(
+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an
+ * atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_stop(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state(
+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed
+ * on an atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_complete(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state(
+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE);
+}
+
+/**
+ * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for
+ * execution
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state_queue(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully
+ * completed
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_complete(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state_complete(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_sw_start() - A software atom has started work
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_sw_start(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state_start(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_sw_stop(
+ struct kbase_jd_atom *const atom)
+{
+ kbase_kinstr_jm_atom_state_stop(atom);
+}
+
+/**
+ * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted
+ * @atom: The atom that has been submitted
+ *
+ * This private implementation should not be called directly, it is protected
+ * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead.
+ */
+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom);
+
+/**
+ * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted
+ * @atom: The atom that has been submitted
+ */
+static inline void kbase_kinstr_jm_atom_hw_submit(
+ struct kbase_jd_atom *const atom)
+{
+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+ kbasep_kinstr_jm_atom_hw_submit(atom);
+}
+
+/**
+ * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released
+ * @atom: The atom that has been released
+ *
+ * This private implementation should not be called directly, it is protected
+ * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead.
+ */
+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom);
+
+/**
+ * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released
+ * @atom: The atom that has been released
+ */
+static inline void kbase_kinstr_jm_atom_hw_release(
+ struct kbase_jd_atom *const atom)
+{
+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+ kbasep_kinstr_jm_atom_hw_release(atom);
+}
+
+#endif /* _KBASE_KINSTR_JM_H_ */
diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/mali_kbase/mali_kbase_kinstr_jm_reader.h
new file mode 100644
index 0000000..e267e6b
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm_reader.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm_reader.h
+ * Provides an ioctl API to read kernel atom state changes. The flow of the
+ * API is:
+ * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
+ * 2. Determine the buffer structure layout via the above ioctl's returned
+ * size and version fields in ``struct kbase_kinstr_jm_fd_out``
+ * 4. Poll the file descriptor for ``POLLIN``
+ * 5. Get data with read() on the fd
+ * 6. Use the structure version to understand how to read the data from the
+ * buffer
+ * 7. Repeat 4-6
+ * 8. Close the file descriptor
+ */
+
+#ifndef _KBASE_KINSTR_JM_READER_H_
+#define _KBASE_KINSTR_JM_READER_H_
+
+/**
+ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has
+ * entered a hardware queue
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started
+ * on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped
+ * on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
+ * completed on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations
+ *
+ * We can add new states to the end of this if they do not break the existing
+ * state machine. Old user mode code can gracefully ignore states they do not
+ * understand.
+ *
+ * If we need to make a breaking change to the state machine, we can do that by
+ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
+ * mean that old user mode code will fail to understand the new state field in
+ * the structure and gracefully not use the state change API.
+ */
+enum kbase_kinstr_jm_reader_atom_state {
+ KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
+ KBASE_KINSTR_JM_READER_ATOM_STATE_START,
+ KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
+ KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
+ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
+};
+
+#endif /* _KBASE_KINSTR_JM_READER_H_ */
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 4a1004b..8cf7e5d 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -43,6 +43,7 @@
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_trace_gpu_mem.h>
/*
* Alignment of objects allocated by the GPU inside a just-in-time memory
@@ -847,13 +848,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
return -EINVAL;
-#if MALI_JIT_PRESSURE_LIMIT
if (phys_pages_limit > jit_va_pages)
-#else
- if (phys_pages_limit != jit_va_pages)
-#endif /* MALI_JIT_PRESSURE_LIMIT */
return -EINVAL;
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (phys_pages_limit != jit_va_pages)
+ kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
kbase_gpu_vm_lock(kctx);
#ifdef CONFIG_64BIT
@@ -870,11 +872,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
kctx->trim_level = trim_level;
kctx->jit_va = true;
kctx->jit_group_id = group_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
kctx->jit_phys_pages_limit = phys_pages_limit;
dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n",
phys_pages_limit);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
}
kbase_gpu_vm_unlock(kctx);
@@ -976,6 +978,12 @@ int kbase_mem_init(struct kbase_device *kbdev)
/* Initialize memory usage */
atomic_set(&memdev->used_pages, 0);
+ spin_lock_init(&kbdev->gpu_mem_usage_lock);
+ kbdev->total_gpu_pages = 0;
+ kbdev->process_root = RB_ROOT;
+ kbdev->dma_buf_root = RB_ROOT;
+ mutex_init(&kbdev->dma_buf_lock);
+
#ifdef IR_THRESHOLD
atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
#else
@@ -1053,6 +1061,11 @@ void kbase_mem_term(struct kbase_device *kbdev)
kbase_mem_pool_group_term(&kbdev->mem_pools);
+ WARN_ON(kbdev->total_gpu_pages);
+ WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
+ WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
+ mutex_destroy(&kbdev->dma_buf_lock);
+
if (kbdev->mgm_dev)
module_put(kbdev->mgm_dev->owner);
}
@@ -2033,6 +2046,9 @@ no_new_partial:
(u64)new_page_count);
alloc->nents += nr_pages_requested;
+
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
+
done:
return 0;
@@ -2209,6 +2225,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
(u64)new_page_count);
alloc->nents += nr_pages_requested;
+
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
+
done:
return new_pages;
@@ -2374,6 +2393,8 @@ int kbase_free_phy_pages_helper(
kbdev,
kctx->id,
(u64)new_page_count);
+
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
}
return 0;
@@ -2496,6 +2517,8 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
kbdev,
kctx->id,
(u64)new_page_count);
+
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
}
}
@@ -2558,6 +2581,8 @@ void kbase_mem_kref_free(struct kref *kref)
alloc->imported.umm.dma_attachment,
alloc->imported.umm.sgt,
DMA_BIDIRECTIONAL);
+ kbase_remove_dma_buf_usage(alloc->imported.umm.kctx,
+ alloc);
}
dma_buf_detach(alloc->imported.umm.dma_buf,
alloc->imported.umm.dma_attachment);
@@ -2643,18 +2668,28 @@ bool kbase_check_alloc_flags(unsigned long flags)
/* GPU executable memory cannot:
* - Be written by the GPU
* - Be grown on GPU page fault
- * - Have the top of its initial commit aligned to 'extent' */
+ */
if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
- (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
- BASE_MEM_TILER_ALIGN_TOP)))
+ (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+ return false;
+
+ /* GPU executable memory also cannot have the top of its initial
+ * commit aligned to 'extent'
+ */
+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+ BASE_MEM_TILER_ALIGN_TOP))
return false;
/* To have an allocation lie within a 4GB chunk is required only for
- * TLS memory, which will never be used to contain executable code
- * and also used for Tiler heap.
+ * TLS memory, which will never be used to contain executable code.
*/
if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
- (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+ BASE_MEM_PROT_GPU_EX))
+ return false;
+
+ /* TLS memory should also not be used for tiler heap */
+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+ BASE_MEM_TILER_ALIGN_TOP))
return false;
/* GPU should have at least read or write access otherwise there is no
@@ -2751,9 +2786,13 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
return -EINVAL;
}
- if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
- test_reg.extent == 0) {
- dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+ if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) {
+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n");
+ return -EINVAL;
+ }
+
+ if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) {
+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
return -EINVAL;
}
@@ -2983,7 +3022,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
kbase_jit_debugfs_phys_get);
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data)
{
struct kbase_context *kctx = data->kctx;
@@ -3038,7 +3077,7 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data)
KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops,
kbase_jit_debugfs_trim_get);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
void kbase_jit_debugfs_init(struct kbase_context *kctx)
{
@@ -3078,7 +3117,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
*/
debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry,
kctx, &kbase_jit_debugfs_phys_fops);
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/*
* Debugfs entry for getting the number of pages used
* by JIT allocations for estimating the physical pressure
@@ -3093,7 +3132,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
*/
debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry,
kctx, &kbase_jit_debugfs_trim_fops);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
}
#endif /* CONFIG_DEBUG_FS */
@@ -3153,14 +3192,16 @@ int kbase_jit_init(struct kbase_context *kctx)
* allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
* the alignment requirements.
*/
-static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
- struct kbase_va_region *walker, const struct base_jit_alloc_info *info)
+static bool meet_size_and_tiler_align_top_requirements(
+ const struct kbase_va_region *walker,
+ const struct base_jit_alloc_info *info)
{
bool meet_reqs = true;
if (walker->nr_pages != info->va_pages)
meet_reqs = false;
- else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+
+ if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) {
size_t align = info->extent;
size_t align_mask = align - 1;
@@ -3171,7 +3212,7 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct
return meet_reqs;
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/* Function will guarantee *@freed will not exceed @pages_needed
*/
static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
@@ -3308,8 +3349,10 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
struct kbase_va_region *reg, *tmp;
size_t total_freed = 0;
- kbase_gpu_vm_lock(kctx);
- mutex_lock(&kctx->jit_evict_lock);
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&kctx->jit_evict_lock);
+
list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) {
int err;
size_t freed = 0u;
@@ -3328,18 +3371,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
if (!pages_needed)
break;
}
- mutex_unlock(&kctx->jit_evict_lock);
- kbase_gpu_vm_unlock(kctx);
trace_mali_jit_trim(total_freed);
return total_freed;
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
static int kbase_jit_grow(struct kbase_context *kctx,
- const struct base_jit_alloc_info *info,
- struct kbase_va_region *reg)
+ const struct base_jit_alloc_info *info,
+ struct kbase_va_region *reg,
+ struct kbase_sub_alloc **prealloc_sas)
{
size_t delta;
size_t pages_required;
@@ -3347,15 +3389,13 @@ static int kbase_jit_grow(struct kbase_context *kctx,
struct kbase_mem_pool *pool;
int ret = -ENOMEM;
struct tagged_addr *gpu_pages;
- struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
- int i;
if (info->commit_pages > reg->nr_pages) {
/* Attempted to grow larger than maximum size */
return -EINVAL;
}
- kbase_gpu_vm_lock(kctx);
+ lockdep_assert_held(&kctx->reg_lock);
/* Make the physical backing no longer reclaimable */
if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
@@ -3372,14 +3412,6 @@ static int kbase_jit_grow(struct kbase_context *kctx,
pages_required = delta;
#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
- GFP_KERNEL);
- if (!prealloc_sas[i])
- goto update_failed;
- }
-
if (pages_required >= (SZ_2M / SZ_4K)) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
@@ -3405,15 +3437,18 @@ static int kbase_jit_grow(struct kbase_context *kctx,
*/
while (kbase_mem_pool_size(pool) < pages_required) {
int pool_delta = pages_required - kbase_mem_pool_size(pool);
+ int ret;
kbase_mem_pool_unlock(pool);
spin_unlock(&kctx->mem_partials_lock);
+
kbase_gpu_vm_unlock(kctx);
+ ret = kbase_mem_pool_grow(pool, pool_delta);
+ kbase_gpu_vm_lock(kctx);
- if (kbase_mem_pool_grow(pool, pool_delta))
- goto update_failed_unlocked;
+ if (ret)
+ goto update_failed;
- kbase_gpu_vm_lock(kctx);
spin_lock(&kctx->mem_partials_lock);
kbase_mem_pool_lock(pool);
}
@@ -3459,11 +3494,6 @@ done:
reg->extent = info->extent;
update_failed:
- kbase_gpu_vm_unlock(kctx);
-update_failed_unlocked:
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
- kfree(prealloc_sas[i]);
-
return ret;
}
@@ -3492,9 +3522,9 @@ static void trace_jit_stats(struct kbase_context *kctx,
max_allocations, alloc_count, va_pages, ph_pages);
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
- * get_jit_backed_pressure() - calculate the physical backing of all JIT
+ * get_jit_phys_backing() - calculate the physical backing of all JIT
* allocations
*
* @kctx: Pointer to the kbase context whose active JIT allocations will be
@@ -3502,83 +3532,48 @@ static void trace_jit_stats(struct kbase_context *kctx,
*
* Return: number of pages that are committed by JIT allocations
*/
-static size_t get_jit_backed_pressure(struct kbase_context *kctx)
+static size_t get_jit_phys_backing(struct kbase_context *kctx)
{
- size_t backed_pressure = 0;
- int jit_id;
-
- lockdep_assert_held(&kctx->jctx.lock);
+ struct kbase_va_region *walker;
+ size_t backing = 0;
- kbase_gpu_vm_lock(kctx);
- for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) {
- struct kbase_va_region *reg = kctx->jit_alloc[jit_id];
+ lockdep_assert_held(&kctx->jit_evict_lock);
- if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) {
- /* If region has no report, be pessimistic */
- if (reg->used_pages == reg->nr_pages) {
- backed_pressure += reg->nr_pages;
- } else {
- backed_pressure +=
- kbase_reg_current_backed_size(reg);
- }
- }
+ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
+ backing += kbase_reg_current_backed_size(walker);
}
- kbase_gpu_vm_unlock(kctx);
- return backed_pressure;
+ return backing;
}
-/**
- * jit_trim_necessary_pages() - calculate and trim the least pages possible to
- * satisfy a new JIT allocation
- *
- * @kctx: Pointer to the kbase context
- * @info: Pointer to JIT allocation information for the new allocation
- *
- * Before allocating a new just-in-time memory region or reusing a previous
- * one, ensure that the total JIT physical page usage also will not exceed the
- * pressure limit.
- *
- * If there are no reported-on allocations, then we already guarantee this will
- * be the case - because our current pressure then only comes from the va_pages
- * of each JIT region, hence JIT physical page usage is guaranteed to be
- * bounded by this.
- *
- * However as soon as JIT allocations become "reported on", the pressure is
- * lowered to allow new JIT regions to be allocated. It is after such a point
- * that the total JIT physical page usage could (either now or in the future on
- * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly
- * allocated JIT regions. Hence, trim any "reported on" regions.
- *
- * Any pages freed will go into the pool and be allocated from there in
- * kbase_mem_alloc().
- */
-static void jit_trim_necessary_pages(struct kbase_context *kctx,
- const struct base_jit_alloc_info *info)
+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx,
+ size_t needed_pages)
{
- size_t backed_pressure = 0;
- size_t needed_pages = 0;
+ size_t jit_backing = 0;
+ size_t pages_to_trim = 0;
- backed_pressure = get_jit_backed_pressure(kctx);
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&kctx->jit_evict_lock);
+
+ jit_backing = get_jit_phys_backing(kctx);
/* It is possible that this is the case - if this is the first
* allocation after "ignore_pressure_limit" allocation.
*/
- if (backed_pressure > kctx->jit_phys_pages_limit) {
- needed_pages +=
- (backed_pressure - kctx->jit_phys_pages_limit)
- + info->va_pages;
+ if (jit_backing > kctx->jit_phys_pages_limit) {
+ pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) +
+ needed_pages;
} else {
- size_t backed_diff =
- kctx->jit_phys_pages_limit - backed_pressure;
+ size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing;
- if (info->va_pages > backed_diff)
- needed_pages += info->va_pages - backed_diff;
+ if (needed_pages > backed_diff)
+ pages_to_trim += needed_pages - backed_diff;
}
- if (needed_pages) {
- size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx,
- needed_pages);
+ if (pages_to_trim) {
+ size_t trimmed_pages =
+ kbase_mem_jit_trim_pages(kctx, pages_to_trim);
/* This should never happen - we already asserted that
* we are not violating JIT pressure limit in earlier
@@ -3586,10 +3581,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx,
* must have enough unused pages to satisfy the new
* allocation
*/
- WARN_ON(trimmed_pages < needed_pages);
+ WARN_ON(trimmed_pages < pages_to_trim);
}
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/**
* jit_allow_allocate() - check whether basic conditions are satisfied to allow
@@ -3608,8 +3603,8 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
{
lockdep_assert_held(&kctx->jctx.lock);
-#if MALI_JIT_PRESSURE_LIMIT
- if (likely(!ignore_pressure_limit) &&
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (!ignore_pressure_limit &&
((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) ||
(info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) {
dev_dbg(kctx->kbdev->dev,
@@ -3618,7 +3613,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
kctx->jit_phys_pages_limit);
return false;
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
/* Too many current allocations */
@@ -3644,123 +3639,152 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
return true;
}
+static struct kbase_va_region *
+find_reasonable_region(const struct base_jit_alloc_info *info,
+ struct list_head *pool_head, bool ignore_usage_id)
+{
+ struct kbase_va_region *closest_reg = NULL;
+ struct kbase_va_region *walker;
+ size_t current_diff = SIZE_MAX;
+
+ list_for_each_entry(walker, pool_head, jit_node) {
+ if ((ignore_usage_id ||
+ walker->jit_usage_id == info->usage_id) &&
+ walker->jit_bin_id == info->bin_id &&
+ meet_size_and_tiler_align_top_requirements(walker, info)) {
+ size_t min_size, max_size, diff;
+
+ /*
+ * The JIT allocations VA requirements have been met,
+ * it's suitable but other allocations might be a
+ * better fit.
+ */
+ min_size = min_t(size_t, walker->gpu_alloc->nents,
+ info->commit_pages);
+ max_size = max_t(size_t, walker->gpu_alloc->nents,
+ info->commit_pages);
+ diff = max_size - min_size;
+
+ if (current_diff > diff) {
+ current_diff = diff;
+ closest_reg = walker;
+ }
+
+ /* The allocation is an exact match */
+ if (current_diff == 0)
+ break;
+ }
+ }
+
+ return closest_reg;
+}
+
struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
const struct base_jit_alloc_info *info,
bool ignore_pressure_limit)
{
struct kbase_va_region *reg = NULL;
+ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+ int i;
lockdep_assert_held(&kctx->jctx.lock);
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
-#if MALI_JIT_PRESSURE_LIMIT
- if (!ignore_pressure_limit)
- jit_trim_necessary_pages(kctx, info);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#ifdef CONFIG_MALI_2MB_ALLOC
+ /* Preallocate memory for the sub-allocation structs */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+ if (!prealloc_sas[i])
+ goto end;
+ }
+#endif
+ kbase_gpu_vm_lock(kctx);
mutex_lock(&kctx->jit_evict_lock);
/*
* Scan the pool for an existing allocation which meets our
* requirements and remove it.
*/
- if (info->usage_id != 0) {
+ if (info->usage_id != 0)
/* First scan for an allocation with the same usage ID */
- struct kbase_va_region *walker;
- size_t current_diff = SIZE_MAX;
-
- list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
-
- if (walker->jit_usage_id == info->usage_id &&
- walker->jit_bin_id == info->bin_id &&
- meet_size_and_tiler_align_top_requirements(
- kctx, walker, info)) {
- size_t min_size, max_size, diff;
-
- /*
- * The JIT allocations VA requirements have been
- * met, it's suitable but other allocations
- * might be a better fit.
- */
- min_size = min_t(size_t,
- walker->gpu_alloc->nents,
- info->commit_pages);
- max_size = max_t(size_t,
- walker->gpu_alloc->nents,
- info->commit_pages);
- diff = max_size - min_size;
-
- if (current_diff > diff) {
- current_diff = diff;
- reg = walker;
- }
+ reg = find_reasonable_region(info, &kctx->jit_pool_head, false);
- /* The allocation is an exact match */
- if (current_diff == 0)
- break;
- }
- }
- }
-
- if (!reg) {
+ if (!reg)
/* No allocation with the same usage ID, or usage IDs not in
* use. Search for an allocation we can reuse.
*/
- struct kbase_va_region *walker;
- size_t current_diff = SIZE_MAX;
-
- list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
-
- if (walker->jit_bin_id == info->bin_id &&
- meet_size_and_tiler_align_top_requirements(
- kctx, walker, info)) {
- size_t min_size, max_size, diff;
-
- /*
- * The JIT allocations VA requirements have been
- * met, it's suitable but other allocations
- * might be a better fit.
- */
- min_size = min_t(size_t,
- walker->gpu_alloc->nents,
- info->commit_pages);
- max_size = max_t(size_t,
- walker->gpu_alloc->nents,
- info->commit_pages);
- diff = max_size - min_size;
-
- if (current_diff > diff) {
- current_diff = diff;
- reg = walker;
- }
-
- /* The allocation is an exact match, so stop
- * looking.
- */
- if (current_diff == 0)
- break;
- }
- }
- }
+ reg = find_reasonable_region(info, &kctx->jit_pool_head, true);
if (reg) {
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ size_t needed_pages = 0;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+ int ret;
+
/*
* Remove the found region from the pool and add it to the
* active list.
*/
list_move(&reg->jit_node, &kctx->jit_active_head);
+ WARN_ON(reg->gpu_alloc->evicted);
+
/*
* Remove the allocation from the eviction list as it's no
* longer eligible for eviction. This must be done before
* dropping the jit_evict_lock
*/
list_del_init(&reg->gpu_alloc->evict_node);
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (!ignore_pressure_limit) {
+ if (info->commit_pages > reg->gpu_alloc->nents)
+ needed_pages = info->commit_pages -
+ reg->gpu_alloc->nents;
+
+ /* Update early the recycled JIT region's estimate of
+ * used_pages to ensure it doesn't get trimmed
+ * undesirably. This is needed as the recycled JIT
+ * region has been added to the active list but the
+ * number of used pages for it would be zero, so it
+ * could get trimmed instead of other allocations only
+ * to be regrown later resulting in a breach of the JIT
+ * physical pressure limit.
+ * Also that trimming would disturb the accounting of
+ * physical pages, i.e. the VM stats, as the number of
+ * backing pages would have changed when the call to
+ * kbase_mem_evictable_unmark_reclaim is made.
+ *
+ * The second call to update pressure at the end of
+ * this function would effectively be a nop.
+ */
+ kbase_jit_report_update_pressure(
+ kctx, reg, info->va_pages,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+
+ kbase_jit_request_phys_increase_locked(kctx,
+ needed_pages);
+ }
+#endif
mutex_unlock(&kctx->jit_evict_lock);
- if (kbase_jit_grow(kctx, info, reg) < 0) {
+ /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock',
+ * so any state protected by that lock might need to be
+ * re-evaluated if more code is added here in future.
+ */
+ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas);
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (!ignore_pressure_limit)
+ kbase_jit_done_phys_increase(kctx, needed_pages);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+ kbase_gpu_vm_unlock(kctx);
+
+ if (ret < 0) {
/*
* An update to an allocation from the pool failed,
* chances are slim a new allocation would fair any
@@ -3770,10 +3794,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
dev_dbg(kctx->kbdev->dev,
"JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n",
info->va_pages, info->commit_pages);
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ /* Undo the early change made to the recycled JIT
+ * region's estimate of used_pages.
+ */
+ if (!ignore_pressure_limit) {
+ kbase_jit_report_update_pressure(
+ kctx, reg, 0,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
mutex_lock(&kctx->jit_evict_lock);
list_move(&reg->jit_node, &kctx->jit_pool_head);
mutex_unlock(&kctx->jit_evict_lock);
- return NULL;
+ reg = NULL;
+ goto end;
}
} else {
/* No suitable JIT allocation was found so create a new one */
@@ -3783,12 +3818,23 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
BASEP_MEM_NO_USER_FREE;
u64 gpu_addr;
- mutex_unlock(&kctx->jit_evict_lock);
-
if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
flags |= BASE_MEM_TILER_ALIGN_TOP;
flags |= base_mem_group_id_set(kctx->jit_group_id);
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (!ignore_pressure_limit) {
+ flags |= BASEP_MEM_PERFORM_JIT_TRIM;
+ /* The corresponding call to 'done_phys_increase' would
+ * be made inside the kbase_mem_alloc().
+ */
+ kbase_jit_request_phys_increase_locked(
+ kctx, info->commit_pages);
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
info->extent, &flags, &gpu_addr);
@@ -3799,12 +3845,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
dev_dbg(kctx->kbdev->dev,
"Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n",
info->va_pages, info->commit_pages);
- return NULL;
+ goto end;
}
- mutex_lock(&kctx->jit_evict_lock);
- list_add(&reg->jit_node, &kctx->jit_active_head);
- mutex_unlock(&kctx->jit_evict_lock);
+ if (!ignore_pressure_limit) {
+ /* Due to enforcing of pressure limit, kbase_mem_alloc
+ * was instructed to perform the trimming which in turn
+ * would have ensured that the new JIT allocation is
+ * already in the jit_active_head list, so nothing to
+ * do here.
+ */
+ WARN_ON(list_empty(&reg->jit_node));
+ } else {
+ mutex_lock(&kctx->jit_evict_lock);
+ list_add(&reg->jit_node, &kctx->jit_active_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+ }
}
trace_mali_jit_alloc(reg, info->id);
@@ -3816,13 +3872,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
reg->jit_usage_id = info->usage_id;
reg->jit_bin_id = info->bin_id;
-#if MALI_JIT_PRESSURE_LIMIT
+ reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC;
+#if MALI_JIT_PRESSURE_LIMIT_BASE
if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE;
reg->heap_info_gpu_addr = info->heap_info_gpu_addr;
kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+end:
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+ kfree(prealloc_sas[i]);
return reg;
}
@@ -3848,11 +3909,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
kbase_mem_shrink(kctx, reg, old_pages - delta);
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
reg->heap_info_gpu_addr = 0;
kbase_jit_report_update_pressure(kctx, reg, 0,
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
kctx->jit_current_allocations--;
kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
@@ -3863,6 +3924,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
kbase_gpu_vm_lock(kctx);
reg->flags |= KBASE_REG_DONT_NEED;
+ reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC;
kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
kbase_gpu_vm_unlock(kctx);
@@ -3962,6 +4024,9 @@ void kbase_jit_term(struct kbase_context *kctx)
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ WARN_ON(kctx->jit_phys_pages_to_be_allocated);
+#endif
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_unlock(kctx);
@@ -3972,7 +4037,7 @@ void kbase_jit_term(struct kbase_context *kctx)
cancel_work_sync(&kctx->jit_work);
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
struct kbase_va_region *reg, unsigned int flags)
{
@@ -4015,9 +4080,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
out:
return;
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
void kbase_jit_report_update_pressure(struct kbase_context *kctx,
struct kbase_va_region *reg, u64 new_used_pages,
unsigned int flags)
@@ -4053,7 +4118,7 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx,
}
}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
bool kbase_has_exec_va_zone(struct kbase_context *kctx)
{
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 6e921ec..a057f61 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -141,6 +141,7 @@ struct kbase_mem_phy_alloc {
union {
struct {
+ struct kbase_context *kctx;
struct dma_buf *dma_buf;
struct dma_buf_attachment *dma_attachment;
unsigned int current_mapping_usage_count;
@@ -330,7 +331,8 @@ struct kbase_va_region {
/* Bit 22 is reserved.
*
- * Do not remove, use the next unreserved bit for new flags */
+ * Do not remove, use the next unreserved bit for new flags
+ */
#define KBASE_REG_RESERVED_BIT_22 (1ul << 22)
/* The top of the initial commit is aligned to extent pages.
@@ -367,6 +369,9 @@ struct kbase_va_region {
*/
#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
+/* Allocation is actively used for JIT memory */
+#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
+
#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0)
/* only used with 32-bit clients */
@@ -398,7 +403,7 @@ struct kbase_va_region {
struct list_head jit_node;
u16 jit_usage_id;
u8 jit_bin_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/* Pointer to an object in GPU memory defining an end of an allocated
* region
*
@@ -423,7 +428,7 @@ struct kbase_va_region {
* gpu_alloc->nents)
*/
size_t used_pages;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
int va_refcnt;
};
@@ -1497,7 +1502,7 @@ bool kbase_jit_evict(struct kbase_context *kctx);
*/
void kbase_jit_term(struct kbase_context *kctx);
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
* kbase_trace_jit_report_gpu_mem_trace_enabled - variant of
* kbase_trace_jit_report_gpu_mem() that should only be called once the
@@ -1508,7 +1513,7 @@ void kbase_jit_term(struct kbase_context *kctx);
*/
void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
struct kbase_va_region *reg, unsigned int flags);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/**
* kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used
@@ -1530,7 +1535,7 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
* been included. Also gives no opportunity for the compiler to mess up
* inlining it.
*/
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
do { \
if (trace_mali_jit_report_gpu_mem_enabled()) \
@@ -1540,9 +1545,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
#else
#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
CSTD_NOP(kctx, reg, flags)
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
* kbase_jit_report_update_pressure - safely update the JIT physical page
* pressure and JIT region's estimate of used_pages
@@ -1562,7 +1567,123 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
void kbase_jit_report_update_pressure(struct kbase_context *kctx,
struct kbase_va_region *reg, u64 new_used_pages,
unsigned int flags);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+/**
+ * jit_trim_necessary_pages() - calculate and trim the least pages possible to
+ * satisfy a new JIT allocation
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of JIT physical pages by which trimming is requested.
+ * The actual number of pages trimmed could differ.
+ *
+ * Before allocating a new just-in-time memory region or reusing a previous
+ * one, ensure that the total JIT physical page usage also will not exceed the
+ * pressure limit.
+ *
+ * If there are no reported-on allocations, then we already guarantee this will
+ * be the case - because our current pressure then only comes from the va_pages
+ * of each JIT region, hence JIT physical page usage is guaranteed to be
+ * bounded by this.
+ *
+ * However as soon as JIT allocations become "reported on", the pressure is
+ * lowered to allow new JIT regions to be allocated. It is after such a point
+ * that the total JIT physical page usage could (either now or in the future on
+ * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly
+ * allocated JIT regions. Hence, trim any "reported on" regions.
+ *
+ * Any pages freed will go into the pool and be allocated from there in
+ * kbase_mem_alloc().
+ */
+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx,
+ size_t needed_pages);
+
+/*
+ * Same as kbase_jit_request_phys_increase(), except that Caller is supposed
+ * to take jit_evict_lock also on @kctx before calling this function.
+ */
+static inline void
+kbase_jit_request_phys_increase_locked(struct kbase_context *kctx,
+ size_t needed_pages)
+{
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&kctx->jit_evict_lock);
+
+ kctx->jit_phys_pages_to_be_allocated += needed_pages;
+
+ kbase_jit_trim_necessary_pages(kctx,
+ kctx->jit_phys_pages_to_be_allocated);
+}
+
+/**
+ * kbase_jit_request_phys_increase() - Increment the backing pages count and do
+ * the required trimming before allocating pages for a JIT allocation.
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of pages to be allocated for the JIT allocation.
+ *
+ * This function needs to be called before allocating backing pages for a
+ * just-in-time memory region. The backing pages are currently allocated when,
+ *
+ * - A new JIT region is created.
+ * - An old JIT region is reused from the cached pool.
+ * - GPU page fault occurs for the active JIT region.
+ * - Backing is grown for the JIT region through the commit ioctl.
+ *
+ * This function would ensure that the total JIT physical page usage does not
+ * exceed the pressure limit even when the backing pages get allocated
+ * simultaneously for multiple JIT allocations from different threads.
+ *
+ * There should be a matching call to kbase_jit_done_phys_increase(), after
+ * the pages have been allocated and accounted against the active JIT
+ * allocation.
+ *
+ * Caller is supposed to take reg_lock on @kctx before calling this function.
+ */
+static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx,
+ size_t needed_pages)
+{
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+
+ mutex_lock(&kctx->jit_evict_lock);
+ kbase_jit_request_phys_increase_locked(kctx, needed_pages);
+ mutex_unlock(&kctx->jit_evict_lock);
+}
+
+/**
+ * kbase_jit_done_phys_increase() - Decrement the backing pages count after the
+ * allocation of pages for a JIT allocation.
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of pages that were allocated for the JIT allocation.
+ *
+ * This function should be called after backing pages have been allocated and
+ * accounted against the active JIT allocation.
+ * The call should be made when the following have been satisfied:
+ * when the allocation is on the jit_active_head.
+ * when additional needed_pages have been allocated.
+ * kctx->reg_lock was held during the above and has not yet been unlocked.
+ * Failure to call this function before unlocking the kctx->reg_lock when
+ * either the above have changed may result in over-accounting the memory.
+ * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of
+ * the memory.
+ *
+ * A matching call to kbase_jit_request_phys_increase() should have been made,
+ * before the allocation of backing pages.
+ *
+ * Caller is supposed to take reg_lock on @kctx before calling this function.
+ */
+static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx,
+ size_t needed_pages)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+
+ WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages);
+
+ kctx->jit_phys_pages_to_be_allocated -= needed_pages;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/**
* kbase_has_exec_va_zone - EXEC_VA zone predicate
@@ -1742,7 +1863,6 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
int kbase_mem_do_sync_imported(struct kbase_context *kctx,
struct kbase_va_region *reg, enum kbase_sync_type sync_fn);
-
/**
* kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to
* an unaligned address at a given offset from the start of a target page.
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 219e0af..d7863e1 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -49,6 +49,8 @@
#include <tl/mali_kbase_tracepoints.h>
#include <mali_kbase_ioctl.h>
#include <mmu/mali_kbase_mmu.h>
+#include <mali_kbase_caps.h>
+#include <mali_kbase_trace_gpu_mem.h>
#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \
(KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE))
@@ -372,10 +374,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
} else
reg->threshold_pages = 0;
- if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
+ if (*flags & BASE_MEM_GROW_ON_GPF) {
/* kbase_check_alloc_sizes() already checks extent is valid for
* assigning to reg->extent */
reg->extent = extent;
+ } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) {
+ reg->extent = extent;
} else {
reg->extent = 0;
}
@@ -436,6 +440,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
*gpu_va = reg->start_pfn << PAGE_SHIFT;
}
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) {
+ kbase_jit_done_phys_increase(kctx, commit_pages);
+
+ mutex_lock(&kctx->jit_evict_lock);
+ WARN_ON(!list_empty(&reg->jit_node));
+ list_add(&reg->jit_node, &kctx->jit_active_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
kbase_gpu_vm_unlock(kctx);
return reg;
@@ -443,6 +458,13 @@ no_mmap:
no_cookie:
no_kern_mapping:
no_mem:
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) {
+ kbase_gpu_vm_lock(kctx);
+ kbase_jit_done_phys_increase(kctx, commit_pages);
+ kbase_gpu_vm_unlock(kctx);
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
kbase_mem_phy_alloc_put(reg->cpu_alloc);
kbase_mem_phy_alloc_put(reg->gpu_alloc);
invalid_flags:
@@ -511,14 +533,23 @@ int kbase_mem_query(struct kbase_context *kctx,
*out |= BASE_MEM_COHERENT_SYSTEM;
if (KBASE_REG_SHARE_IN & reg->flags)
*out |= BASE_MEM_COHERENT_LOCAL;
- if (kctx->api_version >= KBASE_API_VERSION(11, 2)) {
- /* Prior to 11.2, these were known about by user-side
- * but we did not return them. Returning some of these
- * caused certain clients that were not expecting them
- * to fail, so we omit all of them as a special-case
- * for compatibility reasons */
+ if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) {
+ /* Prior to this version, this was known about by
+ * user-side but we did not return them. Returning
+ * it caused certain clients that were not expecting
+ * it to fail, so we omit it as a special-case for
+ * compatibility reasons
+ */
if (KBASE_REG_PF_GROW & reg->flags)
*out |= BASE_MEM_GROW_ON_GPF;
+ }
+ if (mali_kbase_supports_mem_protected(kctx->api_version)) {
+ /* Prior to this version, this was known about by
+ * user-side but we did not return them. Returning
+ * it caused certain clients that were not expecting
+ * it to fail, so we omit it as a special-case for
+ * compatibility reasons
+ */
if (KBASE_REG_PROTECTED & reg->flags)
*out |= BASE_MEM_PROTECTED;
}
@@ -705,6 +736,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
kbdev,
kctx->id,
(u64)new_page_count);
+ kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents);
}
/**
@@ -731,6 +763,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
kbdev,
kctx->id,
(u64)new_page_count);
+ kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents);
}
int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
@@ -1056,6 +1089,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx,
alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
alloc->imported.umm.sgt = NULL;
+ kbase_remove_dma_buf_usage(kctx, alloc);
+
memset(pa, 0xff, sizeof(*pa) * alloc->nents);
alloc->nents = 0;
}
@@ -1123,6 +1158,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx,
/* Update nents as we now have pages to map */
alloc->nents = count;
+ kbase_add_dma_buf_usage(kctx, alloc);
return 0;
@@ -1383,6 +1419,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
reg->gpu_alloc->imported.umm.need_sync = need_sync;
+ reg->gpu_alloc->imported.umm.kctx = kctx;
reg->extent = 0;
if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
@@ -2024,7 +2061,7 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
{
u64 old_pages;
- u64 delta;
+ u64 delta = 0;
int res = -EINVAL;
struct kbase_va_region *reg;
bool read_locked = false;
@@ -2054,6 +2091,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
if (0 == (reg->flags & KBASE_REG_GROWABLE))
goto out_unlock;
+ if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)
+ goto out_unlock;
+
/* Would overflow the VA region */
if (new_pages > reg->nr_pages)
goto out_unlock;
@@ -2216,8 +2256,6 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
kfree(map);
}
-KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
-
static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma,
struct kbase_va_region *reg,
pgoff_t *start_off,
@@ -2935,9 +2973,9 @@ KBASE_EXPORT_TEST_API(kbase_vunmap);
static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0))
/* To avoid the build breakage due to an unexported kernel symbol
- * 'mm_trace_rss_stat' from later kernels, i.e. from V5.5.0 onwards,
+ * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards,
* we inline here the equivalent of 'add_mm_counter()' from linux
* kernel V5.4.0~8.
*/
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index b9ed8c3..7263b58 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -39,6 +39,8 @@
#include <arbiter/mali_kbase_arbiter_pm.h>
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+#include <mali_kbase_clk_rate_trace_mgr.h>
+
int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
{
return kbase_hwaccess_pm_powerup(kbdev, flags);
@@ -101,6 +103,7 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
#ifdef CONFIG_MALI_ARBITER_SUPPORT
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+ kbase_clk_rate_trace_manager_gpu_active(kbdev);
}
kbase_pm_unlock(kbdev);
@@ -128,6 +131,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
if (c == 0) {
/* Last context has gone idle */
kbase_hwaccess_pm_gpu_idle(kbdev);
+ kbase_clk_rate_trace_manager_gpu_idle(kbdev);
/* Wake up anyone waiting for this to become 0 (e.g. suspend).
* The waiters must synchronize with us by locking the pm.lock
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index cbb0c76..7a784ac 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -32,6 +32,7 @@
#include <linux/dma-mapping.h>
#include <mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
#include <mali_linux_trace.h>
@@ -899,7 +900,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx,
if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS))
return -EINVAL;
-#if !MALI_JIT_PRESSURE_LIMIT
+#if !MALI_JIT_PRESSURE_LIMIT_BASE
/* If just-in-time memory allocation pressure limit feature is disabled,
* heap_info_gpu_addr must be zeroed-out
*/
@@ -1091,14 +1092,19 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
}
}
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
- * If this is the only JIT_ALLOC atom in-flight then allow it to exceed
- * the defined pressure limit.
+ * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit
+ * is disabled at the context scope, then bypass JIT pressure limit
+ * logic in kbase_jit_allocate().
*/
- if (kctx->jit_current_allocations == 0)
+ if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)
+ || (kctx->jit_current_allocations == 0)) {
ignore_pressure_limit = true;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+ }
+#else
+ ignore_pressure_limit = true;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
if (kctx->jit_alloc[info->id]) {
@@ -1358,12 +1364,16 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx)
list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
struct kbase_jd_atom *pending_atom = list_entry(i,
struct kbase_jd_atom, queue);
+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom);
+ kbase_kinstr_jm_atom_sw_start(pending_atom);
if (kbase_jit_allocate_process(pending_atom) == 0) {
/* Atom has completed */
INIT_WORK(&pending_atom->work,
kbasep_jit_finish_worker);
queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
}
+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom);
+ kbase_kinstr_jm_atom_sw_stop(pending_atom);
}
}
@@ -1538,6 +1548,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
struct kbase_device *kbdev = kctx->kbdev;
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom);
+ kbase_kinstr_jm_atom_sw_start(katom);
trace_sysgraph(SGR_SUBMIT, kctx->id,
kbase_jd_atom_id(kctx, katom));
@@ -1600,6 +1611,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
/* Atom is complete */
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom);
+ kbase_kinstr_jm_atom_sw_stop(katom);
return ret;
}
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.c b/mali_kbase/mali_kbase_trace_gpu_mem.c
new file mode 100644
index 0000000..0a053da
--- /dev/null
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.c
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_trace_gpu_mem.h>
+
+/**
+ * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation
+ * is mapped to GPU for the first time within a process.
+ * Another instantiation is done for the case when that
+ * allocation is mapped for the first time to GPU.
+ *
+ * @dma_buf: Reference to dma_buf been imported.
+ * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf.
+ * @import_count: The number of times the dma_buf was imported.
+ */
+struct kbase_dma_buf {
+ struct dma_buf *dma_buf;
+ struct rb_node dma_buf_node;
+ u32 import_count;
+};
+
+/**
+ * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping.
+ *
+ * @kctx: Pointer to kbase context.
+ * @dma_buf: Pointer to a dma buffer mapping.
+ * @tree: Pointer to root of rb_tree containing the dma_buf's mapped.
+ *
+ * when we un-map any dma mapping we need to remove them from rb_tree,
+ * rb_tree is maintained at kbase_device level and kbase_process level
+ * by passing the root of kbase_device or kbase_process we can remove
+ * the node from the tree.
+ */
+static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx,
+ struct dma_buf *dma_buf,
+ struct rb_root *tree)
+{
+ struct kbase_dma_buf *buf_node = NULL;
+ struct rb_node *node = tree->rb_node;
+ bool mapping_removed = false;
+
+ lockdep_assert_held(&kctx->kbdev->dma_buf_lock);
+
+ while (node) {
+ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node);
+
+ if (dma_buf == buf_node->dma_buf) {
+ WARN_ON(!buf_node->import_count);
+
+ buf_node->import_count--;
+
+ if (!buf_node->import_count) {
+ rb_erase(&buf_node->dma_buf_node, tree);
+ kfree(buf_node);
+ mapping_removed = true;
+ }
+
+ break;
+ }
+
+ if (dma_buf < buf_node->dma_buf)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+
+ WARN_ON(!buf_node);
+ return mapping_removed;
+}
+
+/**
+ * kbase_capture_dma_buf_mapping - capture a dma buffer mapping.
+ *
+ * @kctx: Pointer to kbase context.
+ * @dma_buf: Pointer to a dma buffer mapping.
+ * @root: Pointer to root of rb_tree containing the dma_buf's.
+ *
+ * We maintain a kbase_device level and kbase_process level rb_tree
+ * of all unique dma_buf's mapped to gpu memory. So when attach any
+ * dma_buf add it the rb_tree's. To add the unique mapping we need
+ * check if the mapping is not a duplicate and then add them.
+ */
+static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx,
+ struct dma_buf *dma_buf,
+ struct rb_root *root)
+{
+ struct kbase_dma_buf *buf_node = NULL;
+ struct rb_node *node = root->rb_node;
+ bool unique_buf_imported = true;
+
+ lockdep_assert_held(&kctx->kbdev->dma_buf_lock);
+
+ while (node) {
+ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node);
+
+ if (dma_buf == buf_node->dma_buf) {
+ unique_buf_imported = false;
+ break;
+ }
+
+ if (dma_buf < buf_node->dma_buf)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+
+ if (unique_buf_imported) {
+ struct kbase_dma_buf *buf_node =
+ kzalloc(sizeof(*buf_node), GFP_KERNEL);
+
+ if (buf_node == NULL) {
+ dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n");
+ /* Dont account for it if we fail to allocate memory */
+ unique_buf_imported = false;
+ } else {
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ buf_node->dma_buf = dma_buf;
+ buf_node->import_count = 1;
+ while (*new) {
+ struct kbase_dma_buf *node;
+
+ parent = *new;
+ node = rb_entry(parent, struct kbase_dma_buf,
+ dma_buf_node);
+ if (dma_buf < node->dma_buf)
+ new = &(*new)->rb_left;
+ else
+ new = &(*new)->rb_right;
+ }
+ rb_link_node(&buf_node->dma_buf_node, parent, new);
+ rb_insert_color(&buf_node->dma_buf_node, root);
+ }
+ } else if (!WARN_ON(!buf_node)) {
+ buf_node->import_count++;
+ }
+
+ return unique_buf_imported;
+}
+
+void kbase_remove_dma_buf_usage(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ bool dev_mapping_removed, prcs_mapping_removed;
+
+ mutex_lock(&kbdev->dma_buf_lock);
+
+ dev_mapping_removed = kbase_delete_dma_buf_mapping(
+ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root);
+
+ prcs_mapping_removed = kbase_delete_dma_buf_mapping(
+ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root);
+
+ WARN_ON(dev_mapping_removed && !prcs_mapping_removed);
+
+ spin_lock(&kbdev->gpu_mem_usage_lock);
+ if (dev_mapping_removed)
+ kbdev->total_gpu_pages -= alloc->nents;
+
+ if (prcs_mapping_removed)
+ kctx->kprcs->total_gpu_pages -= alloc->nents;
+
+ if (dev_mapping_removed || prcs_mapping_removed)
+ kbase_trace_gpu_mem_usage(kbdev, kctx);
+ spin_unlock(&kbdev->gpu_mem_usage_lock);
+
+ mutex_unlock(&kbdev->dma_buf_lock);
+}
+
+void kbase_add_dma_buf_usage(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ bool unique_dev_dmabuf, unique_prcs_dmabuf;
+
+ mutex_lock(&kbdev->dma_buf_lock);
+
+ /* add dma_buf to device and process. */
+ unique_dev_dmabuf = kbase_capture_dma_buf_mapping(
+ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root);
+
+ unique_prcs_dmabuf = kbase_capture_dma_buf_mapping(
+ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root);
+
+ WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf);
+
+ spin_lock(&kbdev->gpu_mem_usage_lock);
+ if (unique_dev_dmabuf)
+ kbdev->total_gpu_pages += alloc->nents;
+
+ if (unique_prcs_dmabuf)
+ kctx->kprcs->total_gpu_pages += alloc->nents;
+
+ if (unique_prcs_dmabuf || unique_dev_dmabuf)
+ kbase_trace_gpu_mem_usage(kbdev, kctx);
+ spin_unlock(&kbdev->gpu_mem_usage_lock);
+
+ mutex_unlock(&kbdev->dma_buf_lock);
+}
+
+#ifndef CONFIG_TRACE_GPU_MEM
+#define CREATE_TRACE_POINTS
+#include "mali_gpu_mem_trace.h"
+#endif
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.h b/mali_kbase/mali_kbase_trace_gpu_mem.h
new file mode 100644
index 0000000..b621525
--- /dev/null
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.h
@@ -0,0 +1,101 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_TRACE_GPU_MEM_H_
+#define _KBASE_TRACE_GPU_MEM_H_
+
+#ifdef CONFIG_TRACE_GPU_MEM
+#include <trace/events/gpu_mem.h>
+#else
+#include "mali_gpu_mem_trace.h"
+#endif
+
+#define DEVICE_TGID ((u32) 0U)
+
+static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ lockdep_assert_held(&kbdev->gpu_mem_usage_lock);
+
+ trace_gpu_mem_total(kbdev->id, DEVICE_TGID,
+ kbdev->total_gpu_pages << PAGE_SHIFT);
+
+ if (likely(kctx))
+ trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid,
+ kctx->kprcs->total_gpu_pages << PAGE_SHIFT);
+}
+
+static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev,
+ struct kbase_context *kctx, size_t pages)
+{
+ spin_lock(&kbdev->gpu_mem_usage_lock);
+
+ if (likely(kctx))
+ kctx->kprcs->total_gpu_pages -= pages;
+
+ kbdev->total_gpu_pages -= pages;
+
+ kbase_trace_gpu_mem_usage(kbdev, kctx);
+
+ spin_unlock(&kbdev->gpu_mem_usage_lock);
+}
+
+static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev,
+ struct kbase_context *kctx, size_t pages)
+{
+ spin_lock(&kbdev->gpu_mem_usage_lock);
+
+ if (likely(kctx))
+ kctx->kprcs->total_gpu_pages += pages;
+
+ kbdev->total_gpu_pages += pages;
+
+ kbase_trace_gpu_mem_usage(kbdev, kctx);
+
+ spin_unlock(&kbdev->gpu_mem_usage_lock);
+}
+
+/**
+ * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured.
+ *
+ * @kctx: Pointer to the kbase context
+ * @alloc: Pointer to the alloc to unmap
+ *
+ * Remove reference to dma buf been unmapped from kbase_device level
+ * rb_tree and Kbase_process level dma buf rb_tree.
+ */
+void kbase_remove_dma_buf_usage(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_add_dma_buf_usage - Add a dma-buf entry captured.
+ *
+ * @kctx: Pointer to the kbase context
+ * @alloc: Pointer to the alloc to map in
+ *
+ * Add reference to dma buf been mapped to kbase_device level
+ * rb_tree and Kbase_process level dma buf rb_tree.
+ */
+void kbase_add_dma_buf_usage(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc);
+
+#endif /* _KBASE_TRACE_GPU_MEM_H_ */
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index d96b565..72cec13 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -184,6 +184,7 @@ static int kbasep_vinstr_client_dump(
unsigned int read_idx;
struct kbase_hwcnt_dump_buffer *dump_buf;
struct kbase_hwcnt_reader_metadata *meta;
+ u8 clk_cnt;
WARN_ON(!vcli);
lockdep_assert_held(&vcli->vctx->lock);
@@ -212,9 +213,14 @@ static int kbasep_vinstr_client_dump(
/* Zero all non-enabled counters (current values are undefined) */
kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map);
+ clk_cnt = vcli->vctx->metadata->clk_cnt;
+
meta->timestamp = ts_end_ns;
meta->event_id = event_id;
meta->buffer_idx = write_idx;
+ meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0;
+ meta->cycles.shader_cores =
+ (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0;
/* Notify client. Make sure all changes to memory are visible. */
wmb();
@@ -404,12 +410,15 @@ static int kbasep_vinstr_client_create(
if (errcode)
goto error;
- phys_em.jm_bm = setup->jm_bm;
+ phys_em.fe_bm = setup->fe_bm;
phys_em.shader_bm = setup->shader_bm;
phys_em.tiler_bm = setup->tiler_bm;
phys_em.mmu_l2_bm = setup->mmu_l2_bm;
kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em);
+ /* Enable all the available clk_enable_map. */
+ vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1;
+
errcode = kbase_hwcnt_dump_buffer_array_alloc(
vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
if (errcode)
@@ -675,23 +684,26 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt;
struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx];
+ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata);
+ const size_t min_size = min(size, meta_size);
/* Metadata sanity check. */
WARN_ON(idx != meta->buffer_idx);
- if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
- return -EINVAL;
-
/* Check if there is any buffer available. */
- if (atomic_read(&cli->write_idx) == meta_idx)
+ if (unlikely(atomic_read(&cli->write_idx) == meta_idx))
return -EAGAIN;
/* Check if previously taken buffer was put back. */
- if (atomic_read(&cli->read_idx) != meta_idx)
+ if (unlikely(atomic_read(&cli->read_idx) != meta_idx))
return -EBUSY;
+ /* Clear user buffer to zero. */
+ if (unlikely(meta_size < size && clear_user(buffer, size)))
+ return -EFAULT;
+
/* Copy next available buffer's metadata to user. */
- if (copy_to_user(buffer, meta, size))
+ if (unlikely(copy_to_user(buffer, meta, min_size)))
return -EFAULT;
atomic_inc(&cli->meta_idx);
@@ -715,24 +727,62 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
unsigned int read_idx = atomic_read(&cli->read_idx);
unsigned int idx = read_idx % cli->dump_bufs.buf_cnt;
- struct kbase_hwcnt_reader_metadata meta;
-
- if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
- return -EINVAL;
+ struct kbase_hwcnt_reader_metadata *meta;
+ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata);
+ const size_t max_size = max(size, meta_size);
+ int ret = 0;
+ u8 stack_kbuf[64];
+ u8 *kbuf = NULL;
+ size_t i;
/* Check if any buffer was taken. */
- if (atomic_read(&cli->meta_idx) == read_idx)
+ if (unlikely(atomic_read(&cli->meta_idx) == read_idx))
return -EPERM;
+ if (likely(max_size <= sizeof(stack_kbuf))) {
+ /* Use stack buffer when the size is small enough. */
+ if (unlikely(meta_size > size))
+ memset(stack_kbuf, 0, sizeof(stack_kbuf));
+ kbuf = stack_kbuf;
+ } else {
+ kbuf = kzalloc(max_size, GFP_KERNEL);
+ if (unlikely(!kbuf))
+ return -ENOMEM;
+ }
+
+ /*
+ * Copy user buffer to zero cleared kernel buffer which has enough
+ * space for both user buffer and kernel metadata.
+ */
+ if (unlikely(copy_from_user(kbuf, buffer, size))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * Make sure any "extra" data passed from userspace is zero.
+ * It's meaningful only in case meta_size < size.
+ */
+ for (i = meta_size; i < size; i++) {
+ /* Check if user data beyond meta size is zero. */
+ if (unlikely(kbuf[i] != 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
/* Check if correct buffer is put back. */
- if (copy_from_user(&meta, buffer, size))
- return -EFAULT;
- if (idx != meta.buffer_idx)
- return -EINVAL;
+ meta = (struct kbase_hwcnt_reader_metadata *)kbuf;
+ if (unlikely(idx != meta->buffer_idx)) {
+ ret = -EINVAL;
+ goto out;
+ }
atomic_inc(&cli->read_idx);
-
- return 0;
+out:
+ if (unlikely(kbuf != stack_kbuf))
+ kfree(kbuf);
+ return ret;
}
/**
@@ -836,6 +886,42 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
}
/**
+ * The hwcnt reader's ioctl command - get API version.
+ * @cli: The non-NULL pointer to the client
+ * @arg: Command's argument.
+ * @size: Size of arg.
+ *
+ * @return 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
+ struct kbase_vinstr_client *cli, unsigned long arg, size_t size)
+{
+ long ret = -EINVAL;
+ u8 clk_cnt = cli->vctx->metadata->clk_cnt;
+
+ if (size == sizeof(u32)) {
+ ret = put_user(HWCNT_READER_API, (u32 __user *)arg);
+ } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) {
+ struct kbase_hwcnt_reader_api_version api_version = {
+ .version = HWCNT_READER_API,
+ .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE,
+ };
+
+ if (clk_cnt > 0)
+ api_version.features |=
+ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP;
+ if (clk_cnt > 1)
+ api_version.features |=
+ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES;
+
+ ret = put_user(api_version,
+ (struct kbase_hwcnt_reader_api_version __user *)
+ arg);
+ }
+ return ret;
+}
+
+/**
* kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
* @filp: Non-NULL pointer to file structure.
* @cmd: User command.
@@ -858,42 +944,43 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
if (!cli)
return -EINVAL;
- switch (cmd) {
- case KBASE_HWCNT_READER_GET_API_VERSION:
- rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+ switch (_IOC_NR(cmd)) {
+ case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION):
+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
+ cli, arg, _IOC_SIZE(cmd));
break;
- case KBASE_HWCNT_READER_GET_HWVER:
+ case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
cli, (u32 __user *)arg);
break;
- case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE):
rcode = put_user(
(u32)cli->vctx->metadata->dump_buf_bytes,
(u32 __user *)arg);
break;
- case KBASE_HWCNT_READER_DUMP:
+ case _IOC_NR(KBASE_HWCNT_READER_DUMP):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
break;
- case KBASE_HWCNT_READER_CLEAR:
+ case _IOC_NR(KBASE_HWCNT_READER_CLEAR):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli);
break;
- case KBASE_HWCNT_READER_GET_BUFFER:
+ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
cli, (void __user *)arg, _IOC_SIZE(cmd));
break;
- case KBASE_HWCNT_READER_PUT_BUFFER:
+ case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
cli, (void __user *)arg, _IOC_SIZE(cmd));
break;
- case KBASE_HWCNT_READER_SET_INTERVAL:
+ case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
cli, (u32)arg);
break;
- case KBASE_HWCNT_READER_ENABLE_EVENT:
+ case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
cli, (enum base_hwcnt_reader_event)arg);
break;
- case KBASE_HWCNT_READER_DISABLE_EVENT:
+ case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
cli, (enum base_hwcnt_reader_event)arg);
break;
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index f618755..36bfd09 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -288,7 +288,7 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free,
TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx",
__entry->start_addr, __entry->nr_pages, __entry->backed_pages));
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/* trace_mali_jit_report
*
* Tracepoint about the GPU data structure read to form a just-in-time memory
@@ -326,13 +326,13 @@ TRACE_EVENT(mali_jit_report,
),
__entry->read_val, __entry->used_pages)
);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE)
TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
#endif
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/* trace_mali_jit_report_pressure
*
* Tracepoint about change in physical memory pressure, due to the information
@@ -366,7 +366,7 @@ TRACE_EVENT(mali_jit_report_pressure,
{ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE,
"HAPPENED_ON_ALLOC_OR_FREE" }))
);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
#ifndef __TRACE_SYSGRAPH_ENUM
#define __TRACE_SYSGRAPH_ENUM
diff --git a/mali_kbase/mali_power_gpu_frequency_trace.c b/mali_kbase/mali_power_gpu_frequency_trace.c
new file mode 100644
index 0000000..b6fb5a0
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_frequency_trace.c
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Create the trace point if not configured in kernel */
+#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY
+#define CREATE_TRACE_POINTS
+#include "mali_power_gpu_frequency_trace.h"
+#endif
diff --git a/mali_kbase/mali_power_gpu_frequency_trace.h b/mali_kbase/mali_power_gpu_frequency_trace.h
new file mode 100644
index 0000000..3b90ae4
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_frequency_trace.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI
+#define _TRACE_POWER_GPU_FREQUENCY_MALI
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_GPU_FREQUENCY_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(gpu,
+
+ TP_PROTO(unsigned int state, unsigned int gpu_id),
+
+ TP_ARGS(state, gpu_id),
+
+ TP_STRUCT__entry(
+ __field( u32, state )
+ __field( u32, gpu_id )
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->gpu_id = gpu_id;
+ ),
+
+ TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state,
+ (unsigned long)__entry->gpu_id)
+);
+
+DEFINE_EVENT(gpu, gpu_frequency,
+
+ TP_PROTO(unsigned int frequency, unsigned int gpu_id),
+
+ TP_ARGS(frequency, gpu_id)
+);
+
+#endif /* _TRACE_POWER_GPU_FREQUENCY_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index db27832..734c9de 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -45,6 +45,7 @@
#include <mmu/mali_kbase_mmu_internal.h>
#include <mali_kbase_cs_experimental.h>
+#include <mali_kbase_trace_gpu_mem.h>
#define KBASE_MMU_PAGE_ENTRIES 512
/**
@@ -150,6 +151,13 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
* Depending on reg's flags, the base used for calculating multiples is
* different
*/
+
+ /* multiple is based from the current backed size, even if the
+ * current backed size/pfn for end of committed memory are not
+ * themselves aligned to multiple
+ */
+ remainder = minimum_extra % multiple;
+
if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
/* multiple is based from the top of the initial commit, which
* has been allocated in such a way that (start_pfn +
@@ -175,12 +183,6 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
remainder = pages_after_initial % multiple;
}
- } else {
- /* multiple is based from the current backed size, even if the
- * current backed size/pfn for end of committed memory are not
- * themselves aligned to multiple
- */
- remainder = minimum_extra % multiple;
}
if (remainder == 0)
@@ -544,7 +546,9 @@ void page_fault_worker(struct work_struct *data)
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
size_t current_backed_size;
-
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ size_t pages_trimmed = 0;
+#endif
faulting_as = container_of(data, struct kbase_as, work_pagefault);
fault = &faulting_as->pf_data;
@@ -568,6 +572,10 @@ void page_fault_worker(struct work_struct *data)
KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ mutex_lock(&kctx->jctx.lock);
+#endif
+
if (unlikely(fault->protected_mode)) {
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Protected mode fault", fault);
@@ -758,6 +766,13 @@ page_fault_retry:
pages_to_grow = 0;
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) {
+ kbase_jit_request_phys_increase(kctx, new_pages);
+ pages_trimmed = new_pages;
+ }
+#endif
+
spin_lock(&kctx->mem_partials_lock);
grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
&grow_2mb_pool, prealloc_sas);
@@ -872,6 +887,13 @@ page_fault_retry:
}
}
#endif
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (pages_trimmed) {
+ kbase_jit_done_phys_increase(kctx, pages_trimmed);
+ pages_trimmed = 0;
+ }
+#endif
kbase_gpu_vm_unlock(kctx);
} else {
int ret = -ENOMEM;
@@ -918,6 +940,15 @@ page_fault_retry:
}
fault_done:
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ if (pages_trimmed) {
+ kbase_gpu_vm_lock(kctx);
+ kbase_jit_done_phys_increase(kctx, pages_trimmed);
+ kbase_gpu_vm_unlock(kctx);
+ }
+ mutex_unlock(&kctx->jctx.lock);
+#endif
+
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
kfree(prealloc_sas[i]);
@@ -964,6 +995,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
atomic_add(1, &kbdev->memdev.used_pages);
+ kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
+
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
kbdev->mmu_mode->entry_invalidate(&page[i]);
@@ -1290,6 +1323,8 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev,
atomic_sub(1, &mmut->kctx->used_pages);
}
atomic_sub(1, &kbdev->memdev.used_pages);
+
+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
}
u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
@@ -1932,6 +1967,8 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
kbase_process_page_usage_dec(mmut->kctx, 1);
atomic_sub(1, &mmut->kctx->used_pages);
}
+
+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
}
int kbase_mmu_init(struct kbase_device *const kbdev,
diff --git a/mali_kbase/platform/devicetree/Kbuild b/mali_kbase/platform/devicetree/Kbuild
index ce637fb..78343c0 100644
--- a/mali_kbase/platform/devicetree/Kbuild
+++ b/mali_kbase/platform/devicetree/Kbuild
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -21,4 +21,5 @@
mali_kbase-y += \
$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
- $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
+ $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+ $(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o
diff --git a/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
new file mode 100644
index 0000000..11a8b77
--- /dev/null
+++ b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/clk.h>
+#include "mali_kbase_config_platform.h"
+
+static void *enumerate_gpu_clk(struct kbase_device *kbdev,
+ unsigned int index)
+{
+ if (index >= kbdev->nr_clocks)
+ return NULL;
+
+ return kbdev->clocks[index];
+}
+
+static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev,
+ void *gpu_clk_handle)
+{
+ return clk_get_rate((struct clk *)gpu_clk_handle);
+}
+
+static int gpu_clk_notifier_register(struct kbase_device *kbdev,
+ void *gpu_clk_handle, struct notifier_block *nb)
+{
+ compiletime_assert(offsetof(struct clk_notifier_data, clk) ==
+ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle),
+ "mismatch in the offset of clk member");
+
+ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) ==
+ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle),
+ "mismatch in the size of clk member");
+
+ return clk_notifier_register((struct clk *)gpu_clk_handle, nb);
+}
+
+static void gpu_clk_notifier_unregister(struct kbase_device *kbdev,
+ void *gpu_clk_handle, struct notifier_block *nb)
+{
+ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb);
+}
+
+struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = {
+ .get_gpu_clk_rate = get_gpu_clk_rate,
+ .enumerate_gpu_clk = enumerate_gpu_clk,
+ .gpu_clk_notifier_register = gpu_clk_notifier_register,
+ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister,
+};
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
index 5990313..2137b42 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,10 @@
*/
#define PLATFORM_FUNCS (NULL)
+#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops)
+
extern struct kbase_pm_callback_conf pm_callbacks;
+extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops;
/**
* Autosuspend delay
diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild
index df16a77..c26bef7 100644
--- a/mali_kbase/tests/Kbuild
+++ b/mali_kbase/tests/Kbuild
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -21,3 +21,4 @@
obj-$(CONFIG_MALI_KUTF) += kutf/
obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
+obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace/kernel/
diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig
index fa91aea..83a4d77 100644
--- a/mali_kbase/tests/Kconfig
+++ b/mali_kbase/tests/Kconfig
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -21,3 +21,4 @@
source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kconfig"
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index 15e168c..858b9c3 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,14 @@
#include <kutf/kutf_suite.h>
/**
+ * kutf_helper_pending_input() - Check any pending lines sent by user space
+ * @context: KUTF context
+ *
+ * Return: true if there are pending lines, otherwise false
+ */
+bool kutf_helper_pending_input(struct kutf_context *context);
+
+/**
* kutf_helper_input_dequeue() - Dequeue a line sent by user space
* @context: KUTF context
* @str_size: Pointer to an integer to receive the size of the string
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index cab5add..4463b04 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,10 +29,11 @@
#include <linux/preempt.h>
#include <linux/wait.h>
#include <linux/uaccess.h>
+#include <linux/export.h>
static DEFINE_SPINLOCK(kutf_input_lock);
-static bool pending_input(struct kutf_context *context)
+bool kutf_helper_pending_input(struct kutf_context *context)
{
bool input_pending;
@@ -44,6 +45,7 @@ static bool pending_input(struct kutf_context *context)
return input_pending;
}
+EXPORT_SYMBOL(kutf_helper_pending_input);
char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
{
@@ -59,7 +61,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
spin_unlock(&kutf_input_lock);
err = wait_event_interruptible(context->userdata.input_waitq,
- pending_input(context));
+ kutf_helper_pending_input(context));
if (err)
return ERR_PTR(-EINTR);
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
new file mode 100644
index 0000000..f5565d3
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace_test_portal.o
+
+mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
new file mode 100644
index 0000000..04b44cf
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config CONFIG_MALI_CLK_RATE_TRACE_PORTAL
+ tristate "Mali GPU Clock Trace Test portal"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+ This option will build a test module mali_kutf_clk_rate_trace_test_portal
+ that can test the clocks integration into the platform and exercise some
+ basic trace test in the system. Choosing M here will generate a single
+ module called mali_kutf_clk_rate_trace_test_portal.
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile
new file mode 100644
index 0000000..71c78b8
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile
@@ -0,0 +1,57 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(KERNELRELEASE),)
+
+ccflags-y := \
+ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+ -I$(src)/../../include \
+ -I$(src)/../../../../../../../include \
+ -I$(src)/../../../../ \
+ -I$(src)/../../../ \
+ -I$(src)/../../../backend/gpu \
+ -I$(src)/../../../debug \
+ -I$(src)/../../../debug/backend \
+ -I$(src)/ \
+ -I$(srctree)/drivers/staging/android \
+ -I$(srctree)/include/linux
+
+obj-m := mali_kutf_clk_rate_trace_test_portal.o
+mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o
+
+else
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../../kutf/Module.symvers $(CURDIR)/../../../Module.symvers" modules
+
+clean:
+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp
new file mode 100644
index 0000000..0cc2904
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp
@@ -0,0 +1,34 @@
+/*
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+bob_kernel_module {
+ name: "mali_kutf_clk_rate_trace_test_portal",
+ defaults: [
+ "mali_kbase_shared_config_defaults",
+ "kernel_test_includes",
+ ],
+ srcs: [
+ "../mali_kutf_clk_rate_trace_test.h",
+ "Makefile",
+ "mali_kutf_clk_rate_trace_test.c",
+ ],
+ extra_symbols: [
+ "mali_kbase",
+ "kutf",
+ ],
+ enabled: false,
+ base_build_kutf: {
+ enabled: true,
+ kbuild_options: ["CONFIG_MALI_CLK_RATE_TRACE_PORTAL=m"],
+ },
+}
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
new file mode 100644
index 0000000..d466661
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -0,0 +1,886 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/fdtable.h>
+#include <linux/module.h>
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/ktime.h>
+#include <linux/version.h>
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
+#include "mali_kbase.h"
+#include "mali_kbase_irq_internal.h"
+#include "mali_kbase_pm_internal.h"
+#include "mali_kbase_clk_rate_trace_mgr.h"
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_helpers_user.h>
+
+#include "../mali_kutf_clk_rate_trace_test.h"
+
+#define MINOR_FOR_FIRST_KBASE_DEV (-1)
+
+/* KUTF test application pointer for this test */
+struct kutf_application *kutf_app;
+
+enum portal_server_state {
+ PORTAL_STATE_NO_CLK,
+ PORTAL_STATE_LIVE,
+ PORTAL_STATE_CLOSING,
+};
+
+/**
+ * struct clk_trace_snapshot - Trace info data on a clock.
+ * @previous_rate: Snapshot start point clock rate.
+ * @current_rate: End point clock rate. It becomes the start rate of the
+ * next trace snapshot.
+ * @rate_up_cnt: Count in the snapshot duration when the clock trace
+ * write is a rate of higher value than the last.
+ * @rate_down_cnt: Count in the snapshot duration when the clock trace write
+ * is a rate of lower value than the last.
+ */
+struct clk_trace_snapshot {
+ unsigned long previous_rate;
+ unsigned long current_rate;
+ u32 rate_up_cnt;
+ u32 rate_down_cnt;
+};
+
+/**
+ * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test.
+ * @kbdev: kbase device for the GPU.
+ * @listener: Clock rate change listener structure.
+ * @invoke_notify: When true, invoke notify command is being executed.
+ * @snapshot: Clock trace update snapshot data array. A snapshot
+ * for each clock contains info accumulated beteen two
+ * GET_TRACE_SNAPSHOT requests.
+ * @nclks: Number of clocks visible to the trace portal.
+ * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC
+ * PM_CTX_CNT requests made to the portal. On change from
+ * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is
+ * triggered.
+ * @total_update_cnt: Total number of received trace write callbacks.
+ * @server_state: Portal server operational state.
+ * @result_msg: Message for the test result.
+ * @test_status: Portal test reslt status.
+ */
+struct kutf_clk_rate_trace_fixture_data {
+ struct kbase_device *kbdev;
+ struct kbase_clk_rate_listener listener;
+ bool invoke_notify;
+ struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS];
+ unsigned int nclks;
+ unsigned int pm_ctx_cnt;
+ unsigned int total_update_cnt;
+ enum portal_server_state server_state;
+ char const *result_msg;
+ enum kutf_result_status test_status;
+};
+
+struct clk_trace_portal_input {
+ struct kutf_helper_named_val cmd_input;
+ enum kbasep_clk_rate_trace_req portal_cmd;
+ int named_val_err;
+};
+
+struct kbasep_cmd_name_pair {
+ enum kbasep_clk_rate_trace_req cmd;
+ const char *name;
+};
+
+struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = {
+ {PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR},
+ {PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE},
+ {PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT},
+ {PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT},
+ {PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT},
+ {PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL},
+ {PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ},
+ };
+
+/* Global pointer for the kutf_portal_trace_write() to use. When
+ * this pointer is engaged, new requests for create fixture will fail
+ * hence limiting the use of the portal at any time to a singleton.
+ */
+struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data;
+
+#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN)
+static char portal_msg_buf[PORTAL_MSG_LEN];
+
+static void kutf_portal_trace_write(
+ struct kbase_clk_rate_listener *listener,
+ u32 index, u32 new_rate)
+{
+ struct clk_trace_snapshot *snapshot;
+ struct kutf_clk_rate_trace_fixture_data *data = container_of(
+ listener, struct kutf_clk_rate_trace_fixture_data, listener);
+
+ lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock);
+
+ if (WARN_ON(g_ptr_portal_data == NULL))
+ return;
+ if (WARN_ON(index >= g_ptr_portal_data->nclks))
+ return;
+
+ /* This callback is triggered by invoke notify command, skipping */
+ if (data->invoke_notify)
+ return;
+
+ snapshot = &g_ptr_portal_data->snapshot[index];
+ if (new_rate > snapshot->current_rate)
+ snapshot->rate_up_cnt++;
+ else
+ snapshot->rate_down_cnt++;
+ snapshot->current_rate = new_rate;
+ g_ptr_portal_data->total_update_cnt++;
+}
+
+static void kutf_set_pm_ctx_active(struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+ if (WARN_ON(data->pm_ctx_cnt != 1))
+ return;
+
+ kbase_pm_context_active(data->kbdev);
+ kbase_pm_wait_for_desired_state(data->kbdev);
+ kbase_pm_request_gpu_cycle_counter(data->kbdev);
+}
+
+static void kutf_set_pm_ctx_idle(struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+ if (WARN_ON(data->pm_ctx_cnt > 0))
+ return;
+
+ kbase_pm_context_idle(data->kbdev);
+ kbase_pm_release_gpu_cycle_counter(data->kbdev);
+}
+
+static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+ const unsigned int cnt = data->pm_ctx_cnt;
+ const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd;
+ char const *errmsg = NULL;
+
+ WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT &&
+ req != PORTAL_CMD_DEC_PM_CTX_CNT);
+
+ if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) {
+ data->pm_ctx_cnt++;
+ if (data->pm_ctx_cnt == 1)
+ kutf_set_pm_ctx_active(context);
+ }
+
+ if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) {
+ data->pm_ctx_cnt--;
+ if (data->pm_ctx_cnt == 0)
+ kutf_set_pm_ctx_idle(context);
+ }
+
+ /* Skip the length check, no chance of overflow for two ints */
+ snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt);
+
+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+ pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n");
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Error in sending ack for adjusting pm_ctx_cnt");
+ }
+
+ return errmsg;
+}
+
+static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ struct kbase_device *kbdev = data->kbdev;
+ int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+ unsigned long rate;
+ bool idle;
+ int ret;
+ int i;
+ char const *errmsg = NULL;
+
+ WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) &&
+ (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE));
+
+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, RATE:[", seq);
+
+ for (i = 0; i < data->nclks; i++) {
+ spin_lock(&kbdev->pm.clk_rtm.lock);
+ if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR)
+ rate = kbdev->pm.clk_rtm.clks[i]->clock_val;
+ else
+ rate = data->snapshot[i].current_rate;
+ idle = kbdev->pm.clk_rtm.gpu_idle;
+ spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+ if ((i + 1) == data->nclks)
+ ret += snprintf(portal_msg_buf + ret,
+ PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}",
+ rate, idle);
+ else
+ ret += snprintf(portal_msg_buf + ret,
+ PORTAL_MSG_LEN - ret, "0x%lx, ", rate);
+
+ if (ret >= PORTAL_MSG_LEN) {
+ pr_warn("Message buf overflow with rate array data\n");
+ return kutf_dsprintf(&context->fixture_pool,
+ "Message buf overflow with rate array data");
+ }
+ }
+
+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+ pr_warn("Error in sending back rate array\n");
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Error in sending rate array");
+ }
+
+ return errmsg;
+}
+
+/**
+ * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot
+ * @context: KUTF context
+ * @cmd: The decoded portal input request
+ *
+ * The accumulated clock rate trace information is kept inside as an snapshot
+ * record. A user request of getting the snapshot marks the closure of the
+ * current snapshot record, and the start of the next one. The response
+ * message contains the current snapshot record, with each clock's
+ * data sequentially placed inside (array marker) [ ].
+ */
+static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ struct clk_trace_snapshot snapshot;
+ int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+ int ret;
+ int i;
+ char const *fmt;
+ char const *errmsg = NULL;
+
+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT);
+
+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, SNAPSHOT_ARRAY:[", seq);
+
+ for (i = 0; i < data->nclks; i++) {
+ spin_lock(&data->kbdev->pm.clk_rtm.lock);
+ /* copy out the snapshot of the clock */
+ snapshot = data->snapshot[i];
+ /* Set the next snapshot start condition */
+ data->snapshot[i].previous_rate = snapshot.current_rate;
+ data->snapshot[i].rate_up_cnt = 0;
+ data->snapshot[i].rate_down_cnt = 0;
+ spin_unlock(&data->kbdev->pm.clk_rtm.lock);
+
+ /* Check i corresponding to the last clock */
+ if ((i + 1) == data->nclks)
+ fmt = "(0x%lx, 0x%lx, %u, %u)]}";
+ else
+ fmt = "(0x%lx, 0x%lx, %u, %u), ";
+ ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret,
+ fmt, snapshot.previous_rate, snapshot.current_rate,
+ snapshot.rate_up_cnt, snapshot.rate_down_cnt);
+ if (ret >= PORTAL_MSG_LEN) {
+ pr_warn("Message buf overflow with snapshot data\n");
+ return kutf_dsprintf(&context->fixture_pool,
+ "Message buf overflow with snapshot data");
+ }
+ }
+
+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+ pr_warn("Error in sending back snapshot array\n");
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Error in sending snapshot array");
+ }
+
+ return errmsg;
+}
+
+/**
+ * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback
+ * @context: KUTF context
+ * @cmd: The decoded portal input request
+ *
+ * Invokes frequency change notification callbacks with a fake
+ * GPU frequency 42 kHz for the top clock domain.
+ */
+static char const *kutf_clk_trace_do_invoke_notify_42k(
+ struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+ const unsigned long new_rate_hz = 42000;
+ int ret;
+ char const *errmsg = NULL;
+ struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm;
+
+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ);
+
+ spin_lock(&clk_rtm->lock);
+
+ data->invoke_notify = true;
+ kbase_clk_rate_trace_manager_notify_all(
+ clk_rtm, 0, new_rate_hz);
+ data->invoke_notify = false;
+
+ spin_unlock(&clk_rtm->lock);
+
+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, HZ:%lu}", seq, new_rate_hz);
+
+ if (ret >= PORTAL_MSG_LEN) {
+ pr_warn("Message buf overflow with invoked data\n");
+ return kutf_dsprintf(&context->fixture_pool,
+ "Message buf overflow with invoked data");
+ }
+
+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+ pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n");
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request");
+ }
+
+ return errmsg;
+}
+
+static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+ char const *errmsg = NULL;
+
+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL);
+
+ data->server_state = PORTAL_STATE_CLOSING;
+
+ /* Skip the length check, no chance of overflow for two ints */
+ snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt);
+
+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n");
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Error in sending ack for " CLOSE_PORTAL "reuquest");
+ }
+
+ return errmsg;
+}
+
+static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ int i;
+ int err = kutf_helper_receive_named_val(context, &cmd->cmd_input);
+
+ cmd->named_val_err = err;
+ if (err == KUTF_HELPER_ERR_NONE &&
+ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) {
+ /* All portal request commands are of format (named u64):
+ * CMD_NAME=1234
+ * where, 1234 is a (variable) sequence number tag.
+ */
+ for (i = 0; i < PORTAL_TOTAL_CMDS; i++) {
+ if (strcmp(cmd->cmd_input.val_name,
+ kbasep_portal_cmd_name_map[i].name))
+ continue;
+
+ cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd;
+ return true;
+ }
+ }
+
+ cmd->portal_cmd = PORTAL_CMD_INVALID;
+ return false;
+}
+
+static void kutf_clk_trace_flag_result(struct kutf_context *context,
+ enum kutf_result_status result, char const *msg)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+ if (result > data->test_status) {
+ data->test_status = result;
+ if (msg)
+ data->result_msg = msg;
+ if (data->server_state == PORTAL_STATE_LIVE &&
+ result > KUTF_RESULT_WARN) {
+ data->server_state = PORTAL_STATE_CLOSING;
+ }
+ }
+}
+
+static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ char const *errmsg = NULL;
+
+ BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) !=
+ PORTAL_TOTAL_CMDS);
+ WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID);
+
+ switch (cmd->portal_cmd) {
+ case PORTAL_CMD_GET_CLK_RATE_MGR:
+ /* Fall through */
+ case PORTAL_CMD_GET_CLK_RATE_TRACE:
+ errmsg = kutf_clk_trace_do_get_rate(context, cmd);
+ break;
+ case PORTAL_CMD_GET_TRACE_SNAPSHOT:
+ errmsg = kutf_clk_trace_do_get_snapshot(context, cmd);
+ break;
+ case PORTAL_CMD_INC_PM_CTX_CNT:
+ /* Fall through */
+ case PORTAL_CMD_DEC_PM_CTX_CNT:
+ errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd);
+ break;
+ case PORTAL_CMD_CLOSE_PORTAL:
+ errmsg = kutf_clk_trace_do_close_portal(context, cmd);
+ break;
+ case PORTAL_CMD_INVOKE_NOTIFY_42KHZ:
+ errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd);
+ break;
+ default:
+ pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n",
+ cmd->portal_cmd);
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Don't know how to handle portal_cmd: %d",
+ cmd->portal_cmd);
+ break;
+ }
+
+ if (errmsg)
+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg);
+
+ return (errmsg == NULL);
+}
+
+/**
+ * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input
+ * @context: KUTF context
+ * @cmd: The erroneous input request
+ *
+ * This function deal with an erroneous input request, and respond with
+ * a proper 'NACK' message.
+ */
+static int kutf_clk_trace_do_nack_response(struct kutf_context *context,
+ struct clk_trace_portal_input *cmd)
+{
+ int seq;
+ int err;
+ char const *errmsg = NULL;
+
+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID);
+
+ if (cmd->named_val_err == KUTF_HELPER_ERR_NONE &&
+ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) {
+ /* Keep seq number as % 256 */
+ seq = cmd->cmd_input.u.val_u64 & 255;
+ snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+ "{SEQ:%d, MSG: Unknown command '%s'.}", seq,
+ cmd->cmd_input.val_name);
+ err = kutf_helper_send_named_str(context, "NACK",
+ portal_msg_buf);
+ } else
+ err = kutf_helper_send_named_str(context, "NACK",
+ "Wrong portal cmd format (Ref example: CMD_NAME=0X16)");
+
+ if (err) {
+ errmsg = kutf_dsprintf(&context->fixture_pool,
+ "Failed to send portal NACK response");
+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg);
+ }
+
+ return err;
+}
+
+/**
+ * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing
+ * @context: KUTF context
+ *
+ * This function carries out some basic test on the tracing operation:
+ * 1). GPU idle on test start, trace rate should be 0 (low power state)
+ * 2). Make sure GPU is powered up, the trace rate should match
+ * that from the clcok manager's internal recorded rate
+ * 3). If the GPU active transition occurs following 2), there
+ * must be rate change event from tracing.
+ */
+void kutf_clk_trace_barebone_check(struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ struct kbase_device *kbdev = data->kbdev;
+ bool fail = false;
+ bool idle[2] = { false };
+ char const *msg = NULL;
+ int i;
+
+ /* Check consistency if gpu happens to be idle */
+ spin_lock(&kbdev->pm.clk_rtm.lock);
+ idle[0] = kbdev->pm.clk_rtm.gpu_idle;
+ if (kbdev->pm.clk_rtm.gpu_idle) {
+ for (i = 0; i < data->nclks; i++) {
+ if (data->snapshot[i].current_rate) {
+ /* Idle should have a rate 0 */
+ fail = true;
+ break;
+ }
+ }
+ }
+ spin_unlock(&kbdev->pm.clk_rtm.lock);
+ if (fail) {
+ msg = kutf_dsprintf(&context->fixture_pool,
+ "GPU Idle not yielding 0-rate");
+ pr_err("Trace did not see idle rate\n");
+ } else {
+ /* Make local PM active if not done so yet */
+ if (data->pm_ctx_cnt == 0) {
+ /* Ensure the GPU is powered */
+ data->pm_ctx_cnt++;
+ kutf_set_pm_ctx_active(context);
+ }
+ /* Checking the rate is consistent */
+ spin_lock(&kbdev->pm.clk_rtm.lock);
+ idle[1] = kbdev->pm.clk_rtm.gpu_idle;
+ for (i = 0; i < data->nclks; i++) {
+ /* Rate match between the manager and the trace */
+ if (kbdev->pm.clk_rtm.clks[i]->clock_val !=
+ data->snapshot[i].current_rate) {
+ fail = true;
+ break;
+ }
+ }
+ spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+ if (idle[1]) {
+ msg = kutf_dsprintf(&context->fixture_pool,
+ "GPU still idle after set_pm_ctx_active");
+ pr_err("GPU still idle after set_pm_ctx_active\n");
+ }
+
+ if (!msg && fail) {
+ msg = kutf_dsprintf(&context->fixture_pool,
+ "Trace rate not matching Clk manager's read");
+ pr_err("Trace rate not matching Clk manager's read\n");
+ }
+ }
+
+ if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) {
+ msg = kutf_dsprintf(&context->fixture_pool,
+ "Trace update did not occur");
+ pr_err("Trace update did not occur\n");
+ }
+ if (msg)
+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg);
+ else if (!data->total_update_cnt) {
+ msg = kutf_dsprintf(&context->fixture_pool,
+ "No trace update seen during the test!");
+ kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg);
+ }
+}
+
+static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd)
+{
+ return (cmd->named_val_err == -EBUSY);
+}
+
+void kutf_clk_trace_no_clks_dummy(struct kutf_context *context)
+{
+ struct clk_trace_portal_input cmd;
+ unsigned long timeout = jiffies + HZ * 2;
+ bool has_cmd;
+
+ while (time_before(jiffies, timeout)) {
+ if (kutf_helper_pending_input(context)) {
+ has_cmd = kutf_clk_trace_dequeue_portal_cmd(context,
+ &cmd);
+ if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd))
+ break;
+
+ kutf_helper_send_named_str(context, "NACK",
+ "Fatal! No clocks visible, aborting");
+ }
+ msleep(20);
+ }
+
+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL,
+ "No clocks visble to the portal");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_portal() - Service portal input
+ * @context: KUTF context
+ *
+ * The test portal operates on input requests. If the input request is one
+ * of the recognized portal commands, it handles it accordingly. Otherwise
+ * a negative response 'NACK' is returned. The portal service terminates
+ * when a 'CLOSE_PORTAL' request is received, or due to an internal error.
+ * Both case would result in the server_state transitioned to CLOSING.
+ *
+ * If the portal is closed on request, a sanity test on the clock rate
+ * trace operation is undertaken via function:
+ * kutf_clk_trace_barebone_check();
+ */
+static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ struct clk_trace_portal_input new_cmd;
+
+ pr_debug("Test portal service start\n");
+
+ while (data->server_state == PORTAL_STATE_LIVE) {
+ if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd))
+ kutf_clk_trace_process_portal_cmd(context, &new_cmd);
+ else if (kutf_clk_trace_end_of_stream(&new_cmd))
+ /* Dequeue on portal input, end of stream */
+ data->server_state = PORTAL_STATE_CLOSING;
+ else
+ kutf_clk_trace_do_nack_response(context, &new_cmd);
+ }
+
+ /* Closing, exhausting all the pending inputs with NACKs. */
+ if (data->server_state == PORTAL_STATE_CLOSING) {
+ while (kutf_helper_pending_input(context) &&
+ (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) ||
+ !kutf_clk_trace_end_of_stream(&new_cmd))) {
+ kutf_helper_send_named_str(context, "NACK",
+ "Portal closing down");
+ }
+ }
+
+ /* If no portal error, do a barebone test here irrespective
+ * whatever the portal live session has been testing, which
+ * is entirely driven by the user-side via portal requests.
+ */
+ if (data->test_status <= KUTF_RESULT_WARN) {
+ if (data->server_state != PORTAL_STATE_NO_CLK)
+ kutf_clk_trace_barebone_check(context);
+ else {
+ /* No clocks case, NACK 2-sec for the fatal situation */
+ kutf_clk_trace_no_clks_dummy(context);
+ }
+ }
+
+ /* If we have changed pm_ctx count, drop it back */
+ if (data->pm_ctx_cnt) {
+ /* Although we count on portal requests, it only has material
+ * impact when from 0 -> 1. So the reverse is a simple one off.
+ */
+ data->pm_ctx_cnt = 0;
+ kutf_set_pm_ctx_idle(context);
+ }
+
+ /* Finally log the test result line */
+ if (data->test_status < KUTF_RESULT_WARN)
+ kutf_test_pass(context, data->result_msg);
+ else if (data->test_status == KUTF_RESULT_WARN)
+ kutf_test_warn(context, data->result_msg);
+ else if (data->test_status == KUTF_RESULT_FATAL)
+ kutf_test_fatal(context, data->result_msg);
+ else
+ kutf_test_fail(context, data->result_msg);
+
+ pr_debug("Test end\n");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data
+ * required for mali_kutf_clk_rate_trace_test_portal.
+ * @context: KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_clk_rate_trace_create_fixture(
+ struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data;
+ struct kbase_device *kbdev;
+ unsigned long rate;
+ int i;
+
+ /* Acquire the kbase device */
+ pr_debug("Finding device\n");
+ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV);
+ if (kbdev == NULL) {
+ kutf_test_fail(context, "Failed to find kbase device");
+ return NULL;
+ }
+
+ pr_debug("Creating fixture\n");
+ data = kutf_mempool_alloc(&context->fixture_pool,
+ sizeof(struct kutf_clk_rate_trace_fixture_data));
+ if (!data)
+ return NULL;
+
+ *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 };
+ pr_debug("Hooking up the test portal to kbdev clk rate trace\n");
+ spin_lock(&kbdev->pm.clk_rtm.lock);
+
+ if (g_ptr_portal_data != NULL) {
+ pr_warn("Test portal is already in use, run aborted\n");
+ kutf_test_fail(context, "Portal allows single session only");
+ spin_unlock(&kbdev->pm.clk_rtm.lock);
+ return NULL;
+ }
+
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ if (kbdev->pm.clk_rtm.clks[i]) {
+ data->nclks++;
+ if (kbdev->pm.clk_rtm.gpu_idle)
+ rate = 0;
+ else
+ rate = kbdev->pm.clk_rtm.clks[i]->clock_val;
+ data->snapshot[i].previous_rate = rate;
+ data->snapshot[i].current_rate = rate;
+ }
+ }
+
+ spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+ if (data->nclks) {
+ /* Subscribe this test server portal */
+ data->listener.notify = kutf_portal_trace_write;
+ data->invoke_notify = false;
+
+ kbase_clk_rate_trace_manager_subscribe(
+ &kbdev->pm.clk_rtm, &data->listener);
+ /* Update the kutf_server_portal fixture_data pointer */
+ g_ptr_portal_data = data;
+ }
+
+ data->kbdev = kbdev;
+ data->result_msg = NULL;
+ data->test_status = KUTF_RESULT_PASS;
+
+ if (data->nclks == 0) {
+ data->server_state = PORTAL_STATE_NO_CLK;
+ pr_debug("Kbdev has no clocks for rate trace");
+ } else
+ data->server_state = PORTAL_STATE_LIVE;
+
+ pr_debug("Created fixture\n");
+
+ return data;
+}
+
+/**
+ * Destroy fixture data previously created by
+ * mali_kutf_clk_rate_trace_create_fixture.
+ *
+ * @context: KUTF context.
+ */
+static void mali_kutf_clk_rate_trace_remove_fixture(
+ struct kutf_context *context)
+{
+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+ struct kbase_device *kbdev = data->kbdev;
+
+ if (data->nclks) {
+ /* Clean up the portal trace write arrangement */
+ g_ptr_portal_data = NULL;
+
+ kbase_clk_rate_trace_manager_unsubscribe(
+ &kbdev->pm.clk_rtm, &data->listener);
+ }
+ pr_debug("Destroying fixture\n");
+ kbase_release_device(kbdev);
+ pr_debug("Destroyed fixture\n");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule.
+ */
+int mali_kutf_clk_rate_trace_test_module_init(void)
+{
+ struct kutf_suite *suite;
+ unsigned int filters;
+ union kutf_callback_data suite_data = { 0 };
+
+ pr_debug("Creating app\n");
+
+ g_ptr_portal_data = NULL;
+ kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME);
+
+ if (!kutf_app) {
+ pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME
+ " failed!\n");
+ return -ENOMEM;
+ }
+
+ pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME);
+ suite = kutf_create_suite_with_filters_and_data(
+ kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1,
+ mali_kutf_clk_rate_trace_create_fixture,
+ mali_kutf_clk_rate_trace_remove_fixture,
+ KUTF_F_TEST_GENERIC,
+ suite_data);
+
+ if (!suite) {
+ pr_warn("Creation of suite %s failed!\n",
+ CLK_RATE_TRACE_SUITE_NAME);
+ kutf_destroy_application(kutf_app);
+ return -ENOMEM;
+ }
+
+ filters = suite->suite_default_flags;
+ kutf_add_test_with_filters(
+ suite, 0x0, CLK_RATE_TRACE_PORTAL,
+ mali_kutf_clk_rate_trace_test_portal,
+ filters);
+
+ pr_debug("Init complete\n");
+ return 0;
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this
+ * test.
+ */
+void mali_kutf_clk_rate_trace_test_module_exit(void)
+{
+ pr_debug("Exit start\n");
+ kutf_destroy_application(kutf_app);
+ pr_debug("Exit complete\n");
+}
+
+
+module_init(mali_kutf_clk_rate_trace_test_module_init);
+module_exit(mali_kutf_clk_rate_trace_test_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
new file mode 100644
index 0000000..f46afd5
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
@@ -0,0 +1,148 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_
+#define _KUTF_CLK_RATE_TRACE_TEST_H_
+
+#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace"
+#define CLK_RATE_TRACE_SUITE_NAME "rate_trace"
+#define CLK_RATE_TRACE_PORTAL "portal"
+
+/**
+ * enum kbasep_clk_rate_trace_req - request command to the clock rate trace
+ * service portal.
+ *
+ * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal
+ * data record. On a positive acknowledgement
+ * the prevailing clock rates and the GPU idle
+ * condition flag are returned.
+ * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its
+ * data record. On a positive acknowledgement
+ * the last trace recorded clock rates and the
+ * GPU idle condition flag are returned.
+ * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its
+ * current snapshot data record. On a positive
+ * acknowledgement the snapshot array matching
+ * the number of clocks are returned. It also
+ * starts a fresh snapshot inside the clock
+ * trace portal.
+ * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase
+ * its internal PM_CTX_COUNT. If this increase
+ * yielded a count of 0 -> 1 change, the portal
+ * will initiate a PM_CTX_ACTIVE call to the
+ * Kbase power management. Futher increase
+ * requests will limit to only affect the
+ * portal internal count value.
+ * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease
+ * its internal PM_CTX_COUNT. If this decrease
+ * yielded a count of 1 -> 0 change, the portal
+ * will initiate a PM_CTX_IDLE call to the
+ * Kbase power management.
+ * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the
+ * client has completed its session. The portal
+ * will start the close down action. If no
+ * error has occurred during the dynamic
+ * interactive session, an inherent basic test
+ * carrying out some sanity check on the clock
+ * trace is undertaken.
+ * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks
+ * for the top clock domain with a new GPU frequency
+ * set to 42 kHZ.
+ * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be
+ * the highest enumeration value, as it
+ * represents valid command array size.
+ * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID.
+ */
+/* PORTAL_CMD_INVALID must be the last one, serving the size */
+enum kbasep_clk_rate_trace_req {
+ PORTAL_CMD_GET_CLK_RATE_MGR,
+ PORTAL_CMD_GET_CLK_RATE_TRACE,
+ PORTAL_CMD_GET_TRACE_SNAPSHOT,
+ PORTAL_CMD_INC_PM_CTX_CNT,
+ PORTAL_CMD_DEC_PM_CTX_CNT,
+ PORTAL_CMD_CLOSE_PORTAL,
+ PORTAL_CMD_INVOKE_NOTIFY_42KHZ,
+ PORTAL_CMD_INVALID,
+ PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID,
+};
+
+/**
+ * Portal service request command names. The portal request consists of a kutf
+ * named u64-value. For those above enumerated PORTAL_CMD, the names defined
+ * here are used to mark the name and then followed with a sequence number
+ * value. Example (manual script here for illustration):
+ * exec 5<>run # open the portal kutf run as fd-5
+ * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1
+ * head -n 1 <&5 # read back the 1-line server reseponse
+ * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string
+ * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1
+ * head -n 1 <&5 # read back the 1-line server reseponse
+ * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}"
+ * echo CLOSE_PORTAL=1 >&5 # close the portal
+ * cat <&5 # read back all the response lines
+ * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command
+ * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed.
+ * exec 5>&- # close the service portal fd.
+ *
+ * Expected request command return format:
+ * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}"
+ * Note, the above contains 2-clock with rates in [], GPU idle
+ * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}"
+ * Note, 1-clock with rate in [], GPU not idle
+ * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}"
+ * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480,
+ * trace_rate_up_count: 1, trace_rate_down_count : 0)
+ * For the specific sample case here, there is a single rate_trace event
+ * that yielded a rate increase change. No rate drop event recorded in the
+ * reporting snapshot duration.
+ * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}"
+ * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1)
+ * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}"
+ * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0)
+ * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}"
+ * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be
+ * dropped down to 0 as part of the portal close clean up.
+ */
+#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR"
+#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE"
+#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT"
+#define INC_PM_CTX_CNT "INC_PM_CTX_CNT"
+#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT"
+#define CLOSE_PORTAL "CLOSE_PORTAL"
+#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ"
+
+/**
+ * Portal service response tag names. The response consists of a kutf
+ * named string-value. In case of a 'NACK' (negative acknowledgement), it
+ * can be one of the two formats:
+ * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2.
+ * Note, the portal has received a valid name and valid sequence number
+ * but can't carry-out the request, reason in the MSG field.
+ * 2. NACK="Failing-message"
+ * Note, unable to parse a valid name or valid sequence number,
+ * or some internal error condition. Reason in the quoted string.
+ */
+#define ACK "ACK"
+#define NACK "NACK"
+#define MAX_REPLY_NAME_LEN 32
+
+#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */