summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2018-12-06 09:09:59 +0100
committerSidath Senanayake <sidaths@google.com>2018-12-06 09:09:59 +0100
commita970431fa55f99aba31ea4263fdc8e70019a9ccd (patch)
tree91bb7f49a4869c0385338fe144f53ac8b98468ea /mali_kbase
parentf10b3de5283d0c196459f18160161e48cfadae81 (diff)
downloadgpu-a970431fa55f99aba31ea4263fdc8e70019a9ccd.tar.gz
Mali Bifrost DDK r16p0 KMD
Provenance: aa8b3ff0f (collaborate/EAC/b_r16p0) BX304L01B-BU-00000-r16p0-01rel0 BX304L06A-BU-00000-r16p0-01rel0 BX304X07X-BU-00000-r16p0-01rel0 Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: I96125862b7cf6596d1b7109853fb4ca39e851056
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/Kbuild18
-rw-r--r--mali_kbase/Kconfig10
-rw-r--r--mali_kbase/Makefile4
-rw-r--r--mali_kbase/Makefile.kbase4
-rw-r--r--mali_kbase/Mconfig16
-rw-r--r--mali_kbase/backend/gpu/Kbuild4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_backend_config.h5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c7
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_device_hw.c106
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_device_internal.h25
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpu.c16
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c146
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_defs.h8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_internal.h8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c92
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h24
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c353
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c190
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c22
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h244
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_demand.c68
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_demand.h69
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c1309
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h156
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c622
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.h158
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.h57
-rw-r--r--mali_kbase/build.bp15
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c46
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.h33
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_simple.c3
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c126
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h10
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c65
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h22
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h20
-rw-r--r--mali_kbase/mali_base_kernel.h52
-rw-r--r--mali_kbase/mali_base_vendor_specific_func.h29
-rw-r--r--mali_kbase/mali_kbase.h18
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h5
-rw-r--r--mali_kbase/mali_kbase_context.c5
-rw-r--r--mali_kbase/mali_kbase_core_linux.c364
-rw-r--r--mali_kbase/mali_kbase_debug_job_fault.c2
-rw-r--r--mali_kbase/mali_kbase_defs.h131
-rw-r--r--mali_kbase/mali_kbase_device.c2
-rw-r--r--mali_kbase/mali_kbase_gator_api.c83
-rw-r--r--mali_kbase/mali_kbase_gpu_id.h2
-rw-r--r--mali_kbase/mali_kbase_hw.c42
-rw-r--r--mali_kbase/mali_kbase_hwaccess_instr.h29
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h15
-rw-r--r--mali_kbase/mali_kbase_hwaccess_pm.h37
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h6
-rw-r--r--mali_kbase/mali_kbase_hwcnt.c796
-rw-r--r--mali_kbase/mali_kbase_hwcnt_accumulator.h137
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend.h217
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_gpu.c538
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_gpu.h61
-rw-r--r--mali_kbase/mali_kbase_hwcnt_context.h119
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.c716
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.h249
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.c152
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.h94
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.c538
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.h1087
-rw-r--r--mali_kbase/mali_kbase_hwcnt_virtualizer.c688
-rw-r--r--mali_kbase/mali_kbase_hwcnt_virtualizer.h139
-rw-r--r--mali_kbase/mali_kbase_ioctl.h32
-rw-r--r--mali_kbase/mali_kbase_jd.c4
-rw-r--r--mali_kbase/mali_kbase_jd_debugfs.c5
-rw-r--r--mali_kbase/mali_kbase_jd_debugfs.h4
-rw-r--r--mali_kbase/mali_kbase_js.c3
-rw-r--r--mali_kbase/mali_kbase_js_ctx_attr.c25
-rw-r--r--mali_kbase/mali_kbase_js_ctx_attr.h10
-rw-r--r--mali_kbase/mali_kbase_js_defs.h19
-rw-r--r--mali_kbase/mali_kbase_mem.c222
-rw-r--r--mali_kbase/mali_kbase_mem.h50
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c205
-rw-r--r--mali_kbase/mali_kbase_mem_linux.h16
-rw-r--r--mali_kbase/mali_kbase_mmu.c24
-rw-r--r--mali_kbase/mali_kbase_pm.c43
-rw-r--r--mali_kbase/mali_kbase_softjobs.c5
-rw-r--r--mali_kbase/mali_kbase_tlstream.c58
-rw-r--r--mali_kbase/mali_kbase_tlstream.h30
-rw-r--r--mali_kbase/mali_kbase_trace_defs.h2
-rw-r--r--mali_kbase/mali_kbase_utility.c38
-rw-r--r--mali_kbase/mali_kbase_utility.h11
-rw-r--r--mali_kbase/mali_kbase_vinstr.c2598
-rw-r--r--mali_kbase/mali_kbase_vinstr.h173
-rw-r--r--mali_kbase/mali_linux_kbase_trace.h1
-rw-r--r--mali_kbase/mali_malisw.h29
-rw-r--r--mali_kbase/mali_midg_regmap.h18
-rw-r--r--mali_kbase/mali_uk.h62
-rw-r--r--mali_kbase/sconscript1
-rw-r--r--mali_kbase/tests/Mconfig5
-rw-r--r--mali_kbase/tests/mali_kutf_irq_test/build.bp1
-rw-r--r--mali_kbase/tests/sconscript3
99 files changed, 8921 insertions, 5224 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 9b3cb91..8e73e1f 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -21,7 +21,7 @@
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r15p0-01rel0"
+MALI_RELEASE_NAME ?= "r16p0-01rel0"
# Paths required for build
KBASE_PATH = $(src)
@@ -33,7 +33,6 @@ MALI_CUSTOMER_RELEASE ?= 1
MALI_USE_CSF ?= 0
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
-MALI_MOCK_TEST ?= 0
MALI_COVERAGE ?= 0
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
@@ -43,7 +42,6 @@ DEFINES = \
-DMALI_USE_CSF=$(MALI_USE_CSF) \
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
- -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
@@ -61,7 +59,7 @@ DEFINES += -DMALI_KBASE_BUILD
# Use our defines when compiling
ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
-subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
SRC := \
mali_kbase_device.c \
@@ -80,10 +78,15 @@ SRC := \
mali_kbase_pm.c \
mali_kbase_config.c \
mali_kbase_vinstr.c \
+ mali_kbase_hwcnt.c \
+ mali_kbase_hwcnt_backend_gpu.c \
+ mali_kbase_hwcnt_gpu.c \
+ mali_kbase_hwcnt_legacy.c \
+ mali_kbase_hwcnt_types.c \
+ mali_kbase_hwcnt_virtualizer.c \
mali_kbase_softjobs.c \
mali_kbase_10969_workaround.c \
mali_kbase_hw.c \
- mali_kbase_utility.c \
mali_kbase_debug.c \
mali_kbase_gpu_memory_debugfs.c \
mali_kbase_mem_linux.c \
@@ -154,11 +157,6 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_sync_common.o \
mali_kbase_fence.o
-ifeq ($(MALI_MOCK_TEST),1)
-# Test functionality
-mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
-endif
-
include $(src)/backend/gpu/Kbuild
mali_kbase-y += $(BACKEND:.c=.o)
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index af2a5aa..7c10016 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -31,14 +31,12 @@ menuconfig MALI_MIDGARD
this will generate a single module, called mali_kbase.
config MALI_GATOR_SUPPORT
- bool "Streamline support via Gator"
+ bool "Enable Streamline tracing support"
depends on MALI_MIDGARD
- default n
+ default y
help
- Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
- You will need the Gator device driver already loaded before loading this driver when enabling
- Streamline debug support.
- This is a legacy interface required by older versions of Streamline.
+ Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+ The tracepoints are used to derive GPU activity charts in Streamline.
config MALI_MIDGARD_DVFS
bool "Enable legacy DVFS"
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 13af9f4..08b2fa9 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -25,10 +25,6 @@ KDIR ?= /lib/modules/$(shell uname -r)/build
BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
KBASE_PATH_RELATIVE = $(CURDIR)
-ifeq ($(MALI_UNIT_TEST), 1)
- EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
-endif
-
ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
#Add bus logger symbols
EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
diff --git a/mali_kbase/Makefile.kbase b/mali_kbase/Makefile.kbase
index d7898cb..6b0f81e 100644
--- a/mali_kbase/Makefile.kbase
+++ b/mali_kbase/Makefile.kbase
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -19,5 +19,5 @@
#
#
-EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM)
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index 1b6bffc..46dca14 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -23,15 +23,12 @@ menuconfig MALI_MIDGARD
this will generate a single module, called mali_kbase.
config MALI_GATOR_SUPPORT
- bool "Streamline support via Gator"
+ bool "Enable Streamline tracing support"
depends on MALI_MIDGARD && !BACKEND_USER
- default y if INSTRUMENTATION_STREAMLINE_OLD
- default n
+ default y
help
- Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
- You will need the Gator device driver already loaded before loading this driver when enabling
- Streamline debug support.
- This is a legacy interface required by older versions of Streamline.
+ Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+ The tracepoints are used to derive GPU activity charts in Streamline.
config MALI_MIDGARD_DVFS
bool "Enable legacy DVFS"
@@ -88,11 +85,6 @@ config MALI_PLATFORM_NAME
When PLATFORM_CUSTOM is set, this needs to be set manually to
pick up the desired platform files.
-config MALI_MOCK_TEST
- bool
- depends on MALI_MIDGARD && !RELEASE
- default y
-
# MALI_EXPERT configuration options
menuconfig MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index dcd8ca4..2dc1455 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -38,14 +38,12 @@ BACKEND += \
backend/gpu/mali_kbase_pm_ca.c \
backend/gpu/mali_kbase_pm_always_on.c \
backend/gpu/mali_kbase_pm_coarse_demand.c \
- backend/gpu/mali_kbase_pm_demand.c \
backend/gpu/mali_kbase_pm_policy.c \
backend/gpu/mali_kbase_time.c
ifeq ($(MALI_CUSTOMER_RELEASE),0)
BACKEND += \
- backend/gpu/mali_kbase_pm_demand_always_powered.c \
- backend/gpu/mali_kbase_pm_fast_start.c
+ backend/gpu/mali_kbase_pm_always_on_demand.c
endif
ifeq ($(CONFIG_MALI_DEVFREQ),y)
diff --git a/mali_kbase/backend/gpu/mali_kbase_backend_config.h b/mali_kbase/backend/gpu/mali_kbase_backend_config.h
index 196a776..4a61f96 100644
--- a/mali_kbase/backend/gpu/mali_kbase_backend_config.h
+++ b/mali_kbase/backend/gpu/mali_kbase_backend_config.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,5 @@
#ifndef _KBASE_BACKEND_CONFIG_H_
#define _KBASE_BACKEND_CONFIG_H_
-/* Enable GPU reset API */
-#define KBASE_GPU_RESET_EN 1
-
#endif /* _KBASE_BACKEND_CONFIG_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 683a24c..5ade012 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -283,8 +283,11 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
real_freq = opp_freq;
if (of_property_read_u64(node, "opp-core-mask", &core_mask))
core_mask = shader_present;
- if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) &&
- core_mask != shader_present) {
+ if (core_mask != shader_present &&
+ (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) ||
+ corestack_driver_control ||
+ platform_power_down_only)) {
+
dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
opp_freq);
continue;
diff --git a/mali_kbase/backend/gpu/mali_kbase_device_hw.c b/mali_kbase/backend/gpu/mali_kbase_device_hw.c
index ebc3022..5dd059f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_device_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_device_hw.c
@@ -29,6 +29,7 @@
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_config_defaults.h>
#if !defined(CONFIG_MALI_NO_MALI)
@@ -220,6 +221,84 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
}
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
+{
+ u32 irq_mask;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbdev->cache_clean_in_progress) {
+ /* If this is called while another clean is in progress, we
+ * can't rely on the current one to flush any new changes in
+ * the cache. Instead, trigger another cache clean immediately
+ * after this one finishes.
+ */
+ kbdev->cache_clean_queued = true;
+ return;
+ }
+
+ /* Enable interrupt */
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask | CLEAN_CACHES_COMPLETED);
+
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CLEAN_INV_CACHES);
+
+ kbdev->cache_clean_in_progress = true;
+}
+
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_gpu_start_cache_clean_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+static void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+ u32 irq_mask;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (kbdev->cache_clean_queued) {
+ kbdev->cache_clean_queued = false;
+
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CLEAN_INV_CACHES);
+ } else {
+ /* Disable interrupt */
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+ kbdev->cache_clean_in_progress = false;
+
+ wake_up(&kbdev->cache_clean_wait);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ while (kbdev->cache_clean_in_progress) {
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ wait_event_interruptible(kbdev->cache_clean_wait,
+ !kbdev->cache_clean_in_progress);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
{
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
@@ -232,18 +311,29 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
if (val & PRFCNT_SAMPLE_COMPLETED)
kbase_instr_hwcnt_sample_done(kbdev);
- if (val & CLEAN_CACHES_COMPLETED)
- kbase_clean_caches_done(kbdev);
-
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
- /* kbase_pm_check_transitions must be called after the IRQ has been
- * cleared. This is because it might trigger further power transitions
- * and we don't want to miss the interrupt raised to notify us that
- * these further transitions have finished.
+ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
+ * be called after the IRQ has been cleared. This is because it might
+ * trigger further power transitions and we don't want to miss the
+ * interrupt raised to notify us that these further transitions have
+ * finished. The same applies to kbase_clean_caches_done() - if another
+ * clean was queued, it might trigger another clean, which might
+ * generate another interrupt which shouldn't be missed.
+ */
+
+ if (val & CLEAN_CACHES_COMPLETED)
+ kbase_clean_caches_done(kbdev);
+
+ /* When 'platform_power_down_only' is enabled, the L2 cache is not
+ * powered down, but flushed before the GPU power down (which is done
+ * by the platform code). So the L2 state machine requests a cache
+ * flush. And when that flush completes, the L2 state machine needs to
+ * be re-invoked to proceed with the GPU power down.
*/
- if (val & POWER_CHANGED_ALL)
+ if (val & POWER_CHANGED_ALL ||
+ (platform_power_down_only && (val & CLEAN_CACHES_COMPLETED)))
kbase_pm_power_changed(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
diff --git a/mali_kbase/backend/gpu/mali_kbase_device_internal.h b/mali_kbase/backend/gpu/mali_kbase_device_internal.h
index 928efe9..7886e96 100644
--- a/mali_kbase/backend/gpu/mali_kbase_device_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_device_internal.h
@@ -50,6 +50,31 @@ void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
*/
u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
+/**
+ * kbase_gpu_start_cache_clean - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. This function will
+ * take hwaccess_lock.
+ */
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_start_cache_clean_nolock - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. hwaccess_lock
+ * must be held by the caller.
+ */
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
+ * @kbdev: Kbase device
+ *
+ * This function will take hwaccess_lock, and may sleep.
+ */
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
/**
* kbase_gpu_interrupt - GPU interrupt handler
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpu.c b/mali_kbase/backend/gpu/mali_kbase_gpu.c
index 881d50c..995d34d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpu.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpu.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -56,7 +56,7 @@ int kbase_backend_early_init(struct kbase_device *kbdev)
if (err)
goto fail_interrupts;
- err = kbase_hwaccess_pm_init(kbdev);
+ err = kbase_hwaccess_pm_early_init(kbdev);
if (err)
goto fail_pm;
@@ -74,7 +74,7 @@ fail_runtime_pm:
void kbase_backend_early_term(struct kbase_device *kbdev)
{
- kbase_hwaccess_pm_term(kbdev);
+ kbase_hwaccess_pm_early_term(kbdev);
kbase_release_interrupts(kbdev);
kbase_pm_runtime_term(kbdev);
kbasep_platform_device_term(kbdev);
@@ -84,10 +84,14 @@ int kbase_backend_late_init(struct kbase_device *kbdev)
{
int err;
- err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+ err = kbase_hwaccess_pm_late_init(kbdev);
if (err)
return err;
+ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+ if (err)
+ goto fail_pm_powerup;
+
err = kbase_backend_timer_init(kbdev);
if (err)
goto fail_timer;
@@ -121,6 +125,8 @@ fail_interrupt_test:
kbase_backend_timer_term(kbdev);
fail_timer:
kbase_hwaccess_pm_halt(kbdev);
+fail_pm_powerup:
+ kbase_hwaccess_pm_late_term(kbdev);
return err;
}
@@ -131,5 +137,5 @@ void kbase_backend_late_term(struct kbase_device *kbdev)
kbase_job_slot_term(kbdev);
kbase_backend_timer_term(kbdev);
kbase_hwaccess_pm_halt(kbdev);
+ kbase_hwaccess_pm_late_term(kbdev);
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 6c69132..79c04d9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -33,49 +33,17 @@
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
-/**
- * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
- * hardware
- *
- * @kbdev: Kbase device
- */
-static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
-{
- unsigned long flags;
- unsigned long pm_flags;
- u32 irq_mask;
-
- spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
- KBASE_INSTR_STATE_REQUEST_CLEAN);
-
- /* Enable interrupt */
- spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask | CLEAN_CACHES_COMPLETED);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
- /* clean&invalidate the caches so we're sure the mmu tables for the dump
- * buffer is valid */
- KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES);
- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
-
- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-}
-
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
- struct kbase_ioctl_hwcnt_enable *enable)
+ struct kbase_instr_hwcnt_enable *enable)
{
- unsigned long flags, pm_flags;
+ unsigned long flags;
int err = -EINVAL;
u32 irq_mask;
- int ret;
u32 prfcnt_config;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
/* alignment failure */
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
goto out_err;
@@ -84,53 +52,30 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
*/
kbase_pm_ca_instr_enable(kbdev);
- /* Request the cores early on synchronously - we'll release them on any
- * errors (e.g. instrumentation already active) */
- kbase_pm_request_cores_sync(kbdev, true, true);
-
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- goto out_unrequest_cores;
+ goto out_err;
}
/* Enable interrupt */
- spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
PRFCNT_SAMPLE_COMPLETED);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* In use, this context is the owner */
kbdev->hwcnt.kctx = kctx;
/* Remember the dump address so we can reprogram it later */
kbdev->hwcnt.addr = enable->dump_buffer;
-
- /* Request the clean */
- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
- kbdev->hwcnt.backend.triggered = 0;
- /* Clean&invalidate the caches so we're sure the mmu tables for the dump
- * buffer is valid */
- ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
- &kbdev->hwcnt.backend.cache_clean_work);
- KBASE_DEBUG_ASSERT(ret);
+ kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- /* Wait for cacheclean to complete */
- wait_event(kbdev->hwcnt.backend.wait,
- kbdev->hwcnt.backend.triggered != 0);
-
- KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
- KBASE_INSTR_STATE_IDLE);
-
- kbase_pm_request_l2_caches(kbdev);
-
/* Configure */
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+ if (enable->use_secondary)
{
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
@@ -140,7 +85,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
if (arch_v6)
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
}
-#endif
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
@@ -184,10 +128,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
return err;
- out_unrequest_cores:
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_release_cores(kbdev, true, true);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
out_err:
return err;
}
@@ -200,17 +140,20 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
goto out;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
goto out;
}
@@ -218,6 +161,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
break;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* Ongoing dump/setup - wait for its completion */
wait_event(kbdev->hwcnt.backend.wait,
@@ -228,7 +172,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
kbdev->hwcnt.backend.triggered = 0;
/* Disable interrupt */
- spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
@@ -238,15 +181,12 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
kbdev->hwcnt.kctx = NULL;
kbdev->hwcnt.addr = 0ULL;
+ kbdev->hwcnt.addr_bytes = 0ULL;
kbase_pm_ca_instr_disable(kbdev);
- kbase_pm_release_cores(kbdev, true, true);
-
- kbase_pm_release_l2_caches(kbdev);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
kctx);
@@ -331,33 +271,34 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
void kbasep_cache_clean_worker(struct work_struct *data)
{
struct kbase_device *kbdev;
- unsigned long flags;
+ unsigned long flags, pm_flags;
kbdev = container_of(data, struct kbase_device,
hwcnt.backend.cache_clean_work);
- mutex_lock(&kbdev->cacheclean_lock);
- kbasep_instr_hwcnt_cacheclean(kbdev);
-
+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- /* Wait for our condition, and any reset to complete */
- while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- wait_event(kbdev->hwcnt.backend.cache_clean_wait,
- kbdev->hwcnt.backend.state !=
- KBASE_INSTR_STATE_CLEANING);
- spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- }
+
+ /* Clean and invalidate the caches so we're sure the mmu tables for the
+ * dump buffer is valid.
+ */
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
- KBASE_INSTR_STATE_CLEANED);
+ KBASE_INSTR_STATE_REQUEST_CLEAN);
+ kbase_gpu_start_cache_clean_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+ kbase_gpu_wait_cache_clean(kbdev);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_REQUEST_CLEAN);
/* All finished and idle */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- mutex_unlock(&kbdev->cacheclean_lock);
}
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
@@ -389,40 +330,13 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
-void kbase_clean_caches_done(struct kbase_device *kbdev)
-{
- u32 irq_mask;
-
- if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
- unsigned long flags;
- unsigned long pm_flags;
-
- spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- /* Disable interrupt */
- spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask & ~CLEAN_CACHES_COMPLETED);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
- /* Wakeup... */
- if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
- /* Only wake if we weren't resetting */
- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
- wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
- }
-
- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- }
-}
-
int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
unsigned long flags;
int err;
- /* Wait for dump & cacheclean to complete */
+ /* Wait for dump & cache clean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
@@ -477,7 +391,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
init_waitqueue_head(&kbdev->hwcnt.backend.wait);
- init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
kbasep_cache_clean_worker);
kbdev->hwcnt.backend.triggered = 0;
@@ -494,4 +407,3 @@ void kbase_instr_backend_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index fb55d2d..c9fb759 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,11 +39,6 @@ enum kbase_instr_state {
KBASE_INSTR_STATE_DUMPING,
/* We've requested a clean to occur on a workqueue */
KBASE_INSTR_STATE_REQUEST_CLEAN,
- /* Hardware is currently cleaning and invalidating caches. */
- KBASE_INSTR_STATE_CLEANING,
- /* Cache clean completed, and either a) a dump is complete, or
- * b) instrumentation can now be setup. */
- KBASE_INSTR_STATE_CLEANED,
/* An error has occured during DUMPING (page fault). */
KBASE_INSTR_STATE_FAULT
};
@@ -54,7 +49,6 @@ struct kbase_instr_backend {
int triggered;
enum kbase_instr_state state;
- wait_queue_head_t cache_clean_wait;
struct workqueue_struct *cache_clean_wq;
struct work_struct cache_clean_work;
};
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_internal.h b/mali_kbase/backend/gpu/mali_kbase_instr_internal.h
index 608379e..2254b9f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,12 +36,6 @@
void kbasep_cache_clean_worker(struct work_struct *data);
/**
- * kbase_clean_caches_done() - Cache clean interrupt received
- * @kbdev: Kbase device
- */
-void kbase_clean_caches_done(struct kbase_device *kbdev);
-
-/**
* kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
* @kbdev: Kbase device
*/
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index fee19aa..acd4a5a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -31,10 +31,10 @@
#include <mali_kbase_gator.h>
#endif
#include <mali_kbase_tlstream.h>
-#include <mali_kbase_vinstr.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
@@ -42,11 +42,9 @@
#define beenthere(kctx, f, a...) \
dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
-#if KBASE_GPU_RESET_EN
static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
static void kbasep_reset_timeout_worker(struct work_struct *data);
static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
-#endif /* KBASE_GPU_RESET_EN */
static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
struct kbase_context *kctx)
@@ -77,7 +75,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
struct mali_base_gpu_coherent_group_info *coherency_info =
&kbdev->gpu_props.props.coherency_info;
- affinity = kbase_pm_ca_get_core_mask(kbdev) &
+ affinity = kbdev->pm.backend.shaders_avail &
kbdev->pm.debug_core_mask[js];
/* JS2 on a dual core group system targets core group 1. All
@@ -89,7 +87,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
affinity &= coherency_info->group[0].core_mask;
} else {
/* Use all cores */
- affinity = kbase_pm_ca_get_core_mask(kbdev) &
+ affinity = kbdev->pm.backend.shaders_avail &
kbdev->pm.debug_core_mask[js];
}
@@ -141,6 +139,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+ else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+ cfg |= JS_CONFIG_END_FLUSH_CLEAN;
else
cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
@@ -465,7 +465,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#if KBASE_GPU_RESET_EN
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
KBASE_RESET_GPU_COMMITTED) {
/* If we're trying to reset the GPU then we might be able to do
@@ -474,7 +473,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
*/
kbasep_try_reset_gpu_early(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
}
KBASE_EXPORT_TEST_API(kbase_job_done);
@@ -800,7 +798,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
if (timeout != 0)
goto exit;
-#if KBASE_GPU_RESET_EN
if (kbase_prepare_to_reset_gpu(kbdev)) {
dev_err(kbdev->dev,
"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
@@ -812,12 +809,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
wait_event(kbdev->hwaccess.backend.reset_wait,
atomic_read(&kbdev->hwaccess.backend.reset_gpu)
== KBASE_RESET_GPU_NOT_PENDING);
-#else
- dev_warn(kbdev->dev,
- "Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
- ZAP_TIMEOUT);
-
-#endif
exit:
dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
@@ -845,7 +836,6 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
int kbase_job_slot_init(struct kbase_device *kbdev)
{
-#if KBASE_GPU_RESET_EN
kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
"Mali reset workqueue", 0, 1);
if (NULL == kbdev->hwaccess.backend.reset_workq)
@@ -858,7 +848,6 @@ int kbase_job_slot_init(struct kbase_device *kbdev)
HRTIMER_MODE_REL);
kbdev->hwaccess.backend.reset_timer.function =
kbasep_reset_timer_callback;
-#endif
return 0;
}
@@ -871,13 +860,10 @@ void kbase_job_slot_halt(struct kbase_device *kbdev)
void kbase_job_slot_term(struct kbase_device *kbdev)
{
-#if KBASE_GPU_RESET_EN
destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
-#endif
}
KBASE_EXPORT_TEST_API(kbase_job_slot_term);
-#if KBASE_GPU_RESET_EN
/**
* kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
* @kbdev: kbase device pointer
@@ -935,7 +921,6 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
return ret;
}
-#endif /* KBASE_GPU_RESET_EN */
/**
* kbase_job_slot_softstop_swflags - Soft-stop a job with flags
@@ -992,7 +977,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
{
struct kbase_device *kbdev = kctx->kbdev;
bool stopped;
-#if KBASE_GPU_RESET_EN
/* We make the check for AFBC before evicting/stopping atoms. Note
* that no other thread can modify the slots whilst we have the
* hwaccess_lock. */
@@ -1000,12 +984,10 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
target_katom);
-#endif
stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
target_katom,
JS_COMMAND_HARD_STOP);
-#if KBASE_GPU_RESET_EN
if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
needs_workaround_for_afbc)) {
@@ -1020,7 +1002,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
kbase_reset_gpu_locked(kbdev);
}
}
-#endif
}
/**
@@ -1085,8 +1066,6 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
}
}
-
-#if KBASE_GPU_RESET_EN
static void kbase_debug_dump_registers(struct kbase_device *kbdev)
{
int i;
@@ -1129,7 +1108,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
struct kbase_device *kbdev;
ktime_t end_timestamp = ktime_get();
struct kbasep_js_device_data *js_devdata;
- bool try_schedule = false;
bool silent = false;
u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
@@ -1147,9 +1125,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
- /* Suspend vinstr.
- * This call will block until vinstr is suspended. */
- kbase_vinstr_suspend(kbdev->vinstr_ctx);
+ /* Disable GPU hardware counters.
+ * This call will block until counters are disabled.
+ */
+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
/* Make sure the timer has completed - this cannot be done from
* interrupt context, so this cannot be done within
@@ -1164,15 +1143,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
KBASE_RESET_GPU_NOT_PENDING);
kbase_disjoint_state_down(kbdev);
wake_up(&kbdev->hwaccess.backend.reset_wait);
- kbase_vinstr_resume(kbdev->vinstr_ctx);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return;
}
KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
- spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- spin_lock(&kbdev->hwaccess_lock);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&kbdev->mmu_mask_change);
+ kbase_pm_reset_start_locked(kbdev);
+
/* We're about to flush out the IRQs and their bottom half's */
kbdev->irq_reset_flush = true;
@@ -1181,8 +1163,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
kbase_pm_disable_interrupts_nolock(kbdev);
spin_unlock(&kbdev->mmu_mask_change);
- spin_unlock(&kbdev->hwaccess_lock);
- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* Ensure that any IRQ handlers have finished
* Must be done without any locks IRQ handlers will take */
@@ -1244,37 +1225,33 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
kbase_pm_enable_interrupts(kbdev);
- atomic_set(&kbdev->hwaccess.backend.reset_gpu,
- KBASE_RESET_GPU_NOT_PENDING);
-
kbase_disjoint_state_down(kbdev);
- wake_up(&kbdev->hwaccess.backend.reset_wait);
- if (!silent)
- dev_err(kbdev->dev, "Reset complete");
-
- if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
- try_schedule = true;
-
mutex_unlock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
+ kbase_pm_reset_complete(kbdev);
+
/* Find out what cores are required now */
kbase_pm_update_cores_state(kbdev);
/* Synchronously request and wait for those cores, because if
* instrumentation is enabled it would need them immediately. */
- kbase_pm_check_transitions_sync(kbdev);
+ kbase_pm_wait_for_desired_state(kbdev);
mutex_unlock(&kbdev->pm.lock);
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_NOT_PENDING);
+
+ wake_up(&kbdev->hwaccess.backend.reset_wait);
+ if (!silent)
+ dev_err(kbdev->dev, "Reset complete");
+
/* Try submitting some jobs to restart processing */
- if (try_schedule) {
- KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
- 0);
- kbase_js_sched_all(kbdev);
- }
+ KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
+ kbase_js_sched_all(kbdev);
/* Process any pending slot updates */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1283,8 +1260,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
kbase_pm_context_idle(kbdev);
- /* Release vinstr */
- kbase_vinstr_resume(kbdev->vinstr_ctx);
+ /* Re-enable GPU hardware counters */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
}
@@ -1458,20 +1437,22 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)
kbasep_try_reset_gpu_early_locked(kbdev);
}
-void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+int kbase_reset_gpu_silent(struct kbase_device *kbdev)
{
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_NOT_PENDING,
KBASE_RESET_GPU_SILENT) !=
KBASE_RESET_GPU_NOT_PENDING) {
/* Some other thread is already resetting the GPU */
- return;
+ return -EAGAIN;
}
kbase_disjoint_state_up(kbdev);
queue_work(kbdev->hwaccess.backend.reset_workq,
&kbdev->hwaccess.backend.reset_work);
+
+ return 0;
}
bool kbase_reset_gpu_active(struct kbase_device *kbdev)
@@ -1482,4 +1463,3 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev)
return true;
}
-#endif /* KBASE_GPU_RESET_EN */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 831491e..452ddee 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -159,31 +159,11 @@ void kbase_job_slot_halt(struct kbase_device *kbdev);
void kbase_job_slot_term(struct kbase_device *kbdev);
/**
- * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * kbase_gpu_cache_clean - Cause a GPU cache clean & flush
* @kbdev: Device pointer
*
* Caller must not be in IRQ context
*/
-void kbase_gpu_cacheclean(struct kbase_device *kbdev);
-
-static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev,
- base_jd_core_req core_req)
-{
- return core_req & BASE_JD_REQ_T;
-}
-
-static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev,
- base_jd_core_req core_req)
-{
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
- return true;
- if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
- BASE_JD_REQ_T) {
- /* Tiler only atom */
- return false;
- }
-
- return true;
-}
+void kbase_gpu_cache_clean(struct kbase_device *kbdev);
#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index bdb94be..c714582 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -30,6 +30,7 @@
#include <mali_kbase_jm.h>
#include <mali_kbase_js.h>
#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hwcnt_context.h>
#include <mali_kbase_10969_workaround.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <backend/gpu/mali_kbase_device_internal.h>
@@ -296,143 +297,14 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
}
-static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom);
-
-static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
- int js,
- struct kbase_jd_atom *katom)
-{
- base_jd_core_req core_req = katom->core_req;
-
- /* NOTE: The following uses a number of FALLTHROUGHs to optimize the
- * calls to this function. Ending of the function is indicated by BREAK
- * OUT.
- */
- switch (katom->coreref_state) {
- /* State when job is first attempted to be run */
- case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
- /* Request the cores */
- kbase_pm_request_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, core_req),
- kbase_atom_needs_shaders(kbdev, core_req));
-
- /* Proceed to next state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
- /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
- case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- {
- bool cores_ready;
-
- cores_ready = kbase_pm_cores_requested(kbdev,
- kbase_atom_needs_tiler(kbdev, core_req),
- kbase_atom_needs_shaders(kbdev, core_req));
-
- if (!cores_ready) {
- /* Stay in this state and return, to retry at
- * this state later.
- */
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REGISTER_INUSE_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) 0);
- /* *** BREAK OUT: No state transition *** */
- break;
- }
- /* Proceed to next state */
- katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
- /* *** BREAK OUT: Cores Ready *** */
- break;
- }
-
- default:
- KBASE_DEBUG_ASSERT_MSG(false,
- "Unhandled kbase_atom_coreref_state %d",
- katom->coreref_state);
- break;
- }
-
- return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
-}
-
-static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom)
-{
- base_jd_core_req core_req = katom->core_req;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(katom != NULL);
-
- switch (katom->coreref_state) {
- case KBASE_ATOM_COREREF_STATE_READY:
- /* State where atom was submitted to the HW - just proceed to
- * power-down */
-
- /* *** FALLTHROUGH *** */
-
- case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- /* State where cores were requested */
- kbase_pm_release_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, core_req),
- kbase_atom_needs_shaders(kbdev, core_req));
- break;
-
- case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
- /* Initial state - nothing required */
- break;
-
- default:
- KBASE_DEBUG_ASSERT_MSG(false,
- "Unhandled coreref_state: %d",
- katom->coreref_state);
- break;
- }
-
- katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-}
-
-static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
- base_jd_core_req core_req,
- enum kbase_atom_coreref_state coreref_state)
-{
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- switch (coreref_state) {
- case KBASE_ATOM_COREREF_STATE_READY:
- /* State where atom was submitted to the HW - just proceed to
- * power-down */
-
- /* *** FALLTHROUGH *** */
-
- case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- /* State where cores were requested */
- kbase_pm_release_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, core_req),
- kbase_atom_needs_shaders(kbdev, core_req));
- break;
-
- case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
- /* Initial state - nothing required */
- break;
-
- default:
- KBASE_DEBUG_ASSERT_MSG(false,
- "Unhandled coreref_state: %d",
- coreref_state);
- break;
- }
-}
-
static void kbase_gpu_release_atom(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
ktime_t *end_timestamp)
{
struct kbase_context *kctx = katom->kctx;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
switch (katom->gpu_rb_state) {
case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
/* Should be impossible */
@@ -468,26 +340,47 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
break;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+ if (kbase_jd_katom_is_protected(katom) &&
+ (katom->protected_state.enter !=
+ KBASE_ATOM_ENTER_PROTECTED_CHECK) &&
+ (katom->protected_state.enter !=
+ KBASE_ATOM_ENTER_PROTECTED_HWCNT))
+ kbase_pm_protected_override_disable(kbdev);
+ if (!kbase_jd_katom_is_protected(katom) &&
+ (katom->protected_state.exit !=
+ KBASE_ATOM_EXIT_PROTECTED_CHECK) &&
+ (katom->protected_state.exit !=
+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT))
+ kbase_pm_protected_override_disable(kbdev);
+
if (katom->protected_state.enter !=
KBASE_ATOM_ENTER_PROTECTED_CHECK ||
katom->protected_state.exit !=
KBASE_ATOM_EXIT_PROTECTED_CHECK)
kbdev->protected_mode_transition = false;
-
+ /* If the atom has suspended hwcnt but has not yet entered
+ * protected mode, then resume hwcnt now. If the GPU is now in
+ * protected mode then hwcnt will be resumed by GPU reset so
+ * don't resume it here.
+ */
if (kbase_jd_katom_is_protected(katom) &&
((katom->protected_state.enter ==
KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
(katom->protected_state.enter ==
- KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
- (katom->protected_state.enter ==
- KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
- kbase_vinstr_resume(kbdev->vinstr_ctx);
+ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(
+ kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
}
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
if (katom->atom_flags &
KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
- kbdev->l2_users_count--;
+ kbase_pm_protected_l2_override(kbdev, false);
katom->atom_flags &=
~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
}
@@ -512,6 +405,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
kbase_gpu_release_atom(kbdev, katom, NULL);
katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
}
@@ -630,9 +525,7 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
/* The protected mode disable callback will be called as part of reset
*/
- kbase_reset_gpu_silent(kbdev);
-
- return 0;
+ return kbase_reset_gpu_silent(kbdev);
}
static int kbase_jm_protected_entry(struct kbase_device *kbdev,
@@ -640,6 +533,8 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
{
int err = 0;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
err = kbase_gpu_protected_mode_enter(kbdev);
/*
@@ -648,14 +543,23 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
*/
kbdev->protected_mode_transition = false;
+ kbase_pm_protected_override_disable(kbdev);
+ kbase_pm_update_cores_state_nolock(kbdev);
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
if (err) {
/*
* Failed to switch into protected mode, resume
- * vinstr core and fail atom.
+ * GPU hwcnt and fail atom.
*/
- kbase_vinstr_resume(kbdev->vinstr_ctx);
+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(
+ kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
+
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
/*
@@ -692,6 +596,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
{
int err = 0;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
switch (katom[idx]->protected_state.enter) {
case KBASE_ATOM_ENTER_PROTECTED_CHECK:
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
@@ -700,25 +606,41 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
* there are no atoms currently on the GPU. */
WARN_ON(kbdev->protected_mode_transition);
WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+ /* If hwcnt is disabled, it means we didn't clean up correctly
+ * during last exit from protected mode.
+ */
+ WARN_ON(kbdev->protected_mode_hwcnt_disabled);
- kbdev->protected_mode_transition = true;
katom[idx]->protected_state.enter =
- KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+ KBASE_ATOM_ENTER_PROTECTED_HWCNT;
+
+ kbdev->protected_mode_transition = true;
/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
- case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
- if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
- /*
- * We can't switch now because
- * the vinstr core state switch
- * is not done yet.
- */
+ case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
+ /* See if we can get away with disabling hwcnt atomically */
+ kbdev->protected_mode_hwcnt_desired = false;
+ if (!kbdev->protected_mode_hwcnt_disabled) {
+ if (kbase_hwcnt_context_disable_atomic(
+ kbdev->hwcnt_gpu_ctx))
+ kbdev->protected_mode_hwcnt_disabled = true;
+ }
+
+ /* We couldn't disable atomically, so kick off a worker */
+ if (!kbdev->protected_mode_hwcnt_disabled) {
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+ queue_work(system_wq,
+ &kbdev->protected_mode_hwcnt_disable_work);
+#else
+ queue_work(system_highpri_wq,
+ &kbdev->protected_mode_hwcnt_disable_work);
+#endif
return -EAGAIN;
}
/* Once reaching this point GPU must be
- * switched to protected mode or vinstr
+ * switched to protected mode or hwcnt
* re-enabled. */
/*
@@ -729,6 +651,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
katom[idx]->protected_state.enter =
KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+ kbase_pm_protected_override_enable(kbdev);
kbase_pm_update_cores_state_nolock(kbdev);
/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -764,7 +687,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
* Power on L2 caches; this will also result in the
* correct value written to coherency enable register.
*/
- kbase_pm_request_l2_caches_nolock(kbdev);
+ kbase_pm_protected_l2_override(kbdev, true);
+
/*
* Set the flag on the atom that additional
* L2 references are taken.
@@ -787,14 +711,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
* Check that L2 caches are powered and, if so,
* enter protected mode.
*/
- if (kbdev->pm.backend.l2_powered != 0) {
+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
/*
* Remove additional L2 reference and reset
* the atom flag which denotes it.
*/
if (katom[idx]->atom_flags &
KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
- kbdev->l2_users_count--;
+ kbase_pm_protected_l2_override(kbdev,
+ false);
katom[idx]->atom_flags &=
~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
}
@@ -825,6 +750,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
{
int err = 0;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
switch (katom[idx]->protected_state.exit) {
case KBASE_ATOM_EXIT_PROTECTED_CHECK:
@@ -844,6 +770,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
kbdev->protected_mode_transition = true;
+ kbase_pm_protected_override_enable(kbdev);
kbase_pm_update_cores_state_nolock(kbdev);
/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -865,8 +792,12 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
/* Issue the reset to the GPU */
err = kbase_gpu_protected_mode_reset(kbdev);
+ if (err == -EAGAIN)
+ return -EAGAIN;
+
if (err) {
kbdev->protected_mode_transition = false;
+ kbase_pm_protected_override_disable(kbdev);
/* Failed to exit protected mode, fail atom */
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
@@ -880,7 +811,16 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
kbase_jm_return_atom_to_js(kbdev, katom[idx]);
}
- kbase_vinstr_resume(kbdev->vinstr_ctx);
+ /* If we're exiting from protected mode, hwcnt must have
+ * been disabled during entry.
+ */
+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(
+ kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
return -EINVAL;
}
@@ -909,6 +849,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
+ if (kbase_reset_gpu_active(kbdev))
+ return;
+
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
struct kbase_jd_atom *katom[2];
int idx;
@@ -1014,9 +957,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
break;
}
- cores_ready =
- kbasep_js_job_check_ref_cores(kbdev, js,
- katom[idx]);
+ cores_ready = kbase_pm_cores_requested(kbdev,
+ true);
if (katom[idx]->event_code ==
BASE_JD_EVENT_PM_EVENT) {
@@ -1204,19 +1146,11 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* corruption we need to flush the cache manually before any
* affected memory gets reused. */
katom->need_cache_flush_cores_retained = true;
- kbase_pm_request_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, katom->core_req),
- kbase_atom_needs_shaders(kbdev,
- katom->core_req));
} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
if (kbdev->gpu_props.num_core_groups > 1 &&
katom->device_nr >= 1) {
dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
katom->need_cache_flush_cores_retained = true;
- kbase_pm_request_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, katom->core_req),
- kbase_atom_needs_shaders(kbdev,
- katom->core_req));
}
}
@@ -1408,10 +1342,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
break;
if (katom->protected_state.exit ==
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
- KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
-
- kbase_vinstr_resume(kbdev->vinstr_ctx);
-
/* protected mode sanity checks */
KBASE_DEBUG_ASSERT_MSG(
kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
@@ -1434,8 +1364,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
* it will be processed again from the starting state.
*/
if (keep_in_jm_rb) {
- kbasep_js_job_check_deref_cores(kbdev, katom);
- katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
/* As the atom was not removed, increment the
* index so that we read the correct atom in the
@@ -1454,7 +1382,19 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
}
}
+ /* Re-enable GPU hardware counters if we're resetting from protected
+ * mode.
+ */
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+
+ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+ }
+
kbdev->protected_mode_transition = false;
+ kbase_pm_protected_override_disable(kbdev);
}
static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
@@ -1475,6 +1415,8 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
u32 action,
bool disjoint)
{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_gpu_mark_atom_for_return(kbdev, katom);
katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
@@ -1698,52 +1640,13 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
return ret;
}
-void kbase_gpu_cacheclean(struct kbase_device *kbdev)
-{
- /* Limit the number of loops to avoid a hang if the interrupt is missed
- */
- u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
-
- mutex_lock(&kbdev->cacheclean_lock);
-
- /* use GPU_COMMAND completion solution */
- /* clean & invalidate the caches */
- KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES);
-
- /* wait for cache flush to complete before continuing */
- while (--max_loops &&
- (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
- CLEAN_CACHES_COMPLETED) == 0)
- ;
-
- /* clear the CLEAN_CACHES_COMPLETED irq */
- KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
- CLEAN_CACHES_COMPLETED);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
- CLEAN_CACHES_COMPLETED);
- KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
- KBASE_INSTR_STATE_CLEANING,
- "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
-
- mutex_unlock(&kbdev->cacheclean_lock);
-}
-
-void kbase_backend_cacheclean(struct kbase_device *kbdev,
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
if (katom->need_cache_flush_cores_retained) {
- unsigned long flags;
-
- kbase_gpu_cacheclean(kbdev);
+ kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_wait_cache_clean(kbdev);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_release_cores(kbdev,
- kbase_atom_needs_tiler(kbdev, katom->core_req),
- kbase_atom_needs_shaders(kbdev,
- katom->core_req));
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
katom->need_cache_flush_cores_retained = false;
}
}
@@ -1755,7 +1658,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev,
* If cache flush required due to HW workaround then perform the flush
* now
*/
- kbase_backend_cacheclean(kbdev, katom);
+ kbase_backend_cache_clean(kbdev, katom);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) &&
(katom->core_req & BASE_JD_REQ_FS) &&
@@ -1774,24 +1677,11 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev,
katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
}
}
-
- /* Clear the coreref_state now - while check_deref_cores() may not have
- * been called yet, the caller will have taken a copy of this field. If
- * this is not done, then if the atom is re-scheduled (following a soft
- * stop) then the core reference would not be retaken. */
- katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
}
void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
- base_jd_core_req core_req,
- enum kbase_atom_coreref_state coreref_state)
+ base_jd_core_req core_req)
{
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
if (!kbdev->pm.active_count) {
mutex_lock(&kbdev->js_data.runpool_mutex);
mutex_lock(&kbdev->pm.lock);
@@ -1830,6 +1720,3 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
-
-
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index 205a31d..7307be4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -250,14 +250,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
}
}
}
-#if KBASE_GPU_RESET_EN
if (reset_needed) {
dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
if (kbase_prepare_to_reset_gpu_locked(kbdev))
kbase_reset_gpu_locked(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
/* the timer is re-issued if there is contexts in the run-pool */
if (backend->timer_running)
diff --git a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c
index f3487d9..ba5bf72 100644
--- a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -242,16 +242,20 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
- u32 transcfg = 0;
+ u64 transcfg = 0;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
- transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+ transcfg = current_setup->transcfg;
/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
/* Clear PTW_MEMATTR bits */
transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
/* Enable correct PTW_MEMATTR bits */
transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+ /* Ensure page-tables reads use read-allocate cache-policy in
+ * the L2
+ */
+ transcfg |= AS_TRANSCFG_R_ALLOCATE;
if (kbdev->system_coherency == COHERENCY_ACE) {
/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
@@ -264,7 +268,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
transcfg);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
- (current_setup->transcfg >> 32) & 0xFFFFFFFFUL);
+ (transcfg >> 32) & 0xFFFFFFFFUL);
} else {
if (kbdev->system_coherency == COHERENCY_ACE)
current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index a448a3b..c19a0d1 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -31,11 +31,13 @@
#include <mali_kbase_pm.h>
#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data);
int kbase_pm_runtime_init(struct kbase_device *kbdev)
{
@@ -112,7 +114,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev)
kbdev->pm.backend.gpu_powered = false;
}
-int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev)
{
int ret = 0;
@@ -128,12 +130,12 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
kbase_pm_gpu_poweroff_wait_wq);
+ kbdev->pm.backend.ca_cores_enabled = ~0ull;
kbdev->pm.backend.gpu_powered = false;
kbdev->pm.suspending = false;
#ifdef CONFIG_MALI_DEBUG
kbdev->pm.backend.driver_ready_for_irqs = false;
#endif /* CONFIG_MALI_DEBUG */
- kbdev->pm.backend.gpu_in_desired_state = true;
init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
/* Initialise the metrics subsystem */
@@ -141,9 +143,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
if (ret)
return ret;
- init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
- kbdev->pm.backend.l2_powered = 0;
-
init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
kbdev->pm.backend.reset_done = false;
@@ -161,8 +160,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
if (kbase_pm_policy_init(kbdev) != 0)
goto pm_policy_fail;
+ if (kbase_pm_state_machine_init(kbdev) != 0)
+ goto pm_state_machine_fail;
+
return 0;
+pm_state_machine_fail:
+ kbase_pm_policy_term(kbdev);
pm_policy_fail:
kbase_pm_ca_term(kbdev);
workq_fail:
@@ -170,6 +174,19 @@ workq_fail:
return -EINVAL;
}
+int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ kbdev->pm.backend.hwcnt_desired = false;
+ kbdev->pm.backend.hwcnt_disabled = true;
+ INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work,
+ kbase_pm_hwcnt_disable_worker);
+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+ return 0;
+}
+
void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
{
lockdep_assert_held(&kbdev->pm.lock);
@@ -178,6 +195,17 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
* kbase_pm_clock_off() */
kbase_pm_clock_on(kbdev, is_resume);
+ if (!is_resume) {
+ unsigned long flags;
+
+ /* Force update of L2 state - if we have abandoned a power off
+ * then this may be required to power the L2 back on.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+
/* Update core status as required by the policy */
kbase_pm_update_cores_state(kbdev);
@@ -194,36 +222,24 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
unsigned long flags;
-#if !PLATFORM_POWER_DOWN_ONLY
- /* Wait for power transitions to complete. We do this with no locks held
- * so that we don't deadlock with any pending workqueues */
- kbase_pm_check_transitions_sync(kbdev);
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
+ if (!platform_power_down_only)
+ /* Wait for power transitions to complete. We do this with no locks held
+ * so that we don't deadlock with any pending workqueues.
+ */
+ kbase_pm_wait_for_desired_state(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
-#if PLATFORM_POWER_DOWN_ONLY
- if (kbdev->pm.backend.gpu_powered) {
- if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
- /* If L2 cache is powered then we must flush it before
- * we power off the GPU. Normally this would have been
- * handled when the L2 was powered off. */
- kbase_gpu_cacheclean(kbdev);
- }
- }
-#endif /* PLATFORM_POWER_DOWN_ONLY */
-
if (!backend->poweron_required) {
-#if !PLATFORM_POWER_DOWN_ONLY
- unsigned long flags;
+ if (!platform_power_down_only) {
+ unsigned long flags;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- WARN_ON(kbdev->l2_available_bitmap ||
- kbdev->shader_available_bitmap ||
- kbdev->tiler_available_bitmap);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF ||
+ backend->l2_state != KBASE_L2_OFF);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
/* Disable interrupts and turn the clock off */
if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
@@ -256,6 +272,8 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
backend->poweroff_wait_in_progress = false;
if (backend->poweron_required) {
backend->poweron_required = false;
+ kbdev->pm.backend.l2_desired = true;
+ kbase_pm_update_state(kbdev);
kbase_pm_update_cores_state_nolock(kbdev);
kbase_backend_slot_update(kbdev);
}
@@ -267,6 +285,45 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
wake_up(&kbdev->pm.backend.poweroff_wait);
}
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
+{
+ struct kbase_device *kbdev = container_of(data, struct kbase_device,
+ pm.backend.hwcnt_disable_work);
+ struct kbase_pm_device_data *pm = &kbdev->pm;
+ struct kbase_pm_backend_data *backend = &pm->backend;
+ unsigned long flags;
+
+ bool do_disable;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (!do_disable)
+ return;
+
+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+
+ if (do_disable) {
+ /* PM state did not change while we were doing the disable,
+ * so commit the work we just performed and continue the state
+ * machine.
+ */
+ backend->hwcnt_disabled = true;
+ kbase_pm_update_state(kbdev);
+ } else {
+ /* PM state was updated while we were doing the disable,
+ * so we need to undo the disable we just performed.
+ */
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
{
unsigned long flags;
@@ -274,29 +331,36 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
lockdep_assert_held(&kbdev->pm.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (!kbdev->pm.backend.poweroff_wait_in_progress) {
- /* Force all cores off */
- kbdev->pm.backend.desired_shader_state = 0;
- kbdev->pm.backend.desired_tiler_state = 0;
-
- /* Force all cores to be unavailable, in the situation where
- * transitions are in progress for some cores but not others,
- * and kbase_pm_check_transitions_nolock can not immediately
- * power off the cores */
- kbdev->shader_available_bitmap = 0;
- kbdev->tiler_available_bitmap = 0;
- kbdev->l2_available_bitmap = 0;
-
- kbdev->pm.backend.poweroff_wait_in_progress = true;
- kbdev->pm.backend.poweroff_is_suspend = is_suspend;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- /*Kick off wq here. Callers will have to wait*/
- queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
- &kbdev->pm.backend.gpu_poweroff_wait_work);
+ spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+ if (!kbdev->pm.backend.gpu_powered) {
+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+ goto unlock_hwaccess;
} else {
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
}
+
+ if (kbdev->pm.backend.poweroff_wait_in_progress)
+ goto unlock_hwaccess;
+
+ /* Force all cores off */
+ kbdev->pm.backend.shaders_desired = false;
+ kbdev->pm.backend.l2_desired = false;
+
+ kbdev->pm.backend.poweroff_wait_in_progress = true;
+ kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true;
+
+ /* l2_desired being false should cause the state machine to
+ * start powering off the L2. When it actually is powered off,
+ * the interrupt handler will call kbase_pm_l2_update_state()
+ * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq.
+ * Callers of this function will need to wait on poweroff_wait.
+ */
+ kbase_pm_update_state(kbdev);
+
+unlock_hwaccess:
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
static bool is_poweroff_in_progress(struct kbase_device *kbdev)
@@ -341,8 +405,6 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
return ret;
}
- kbasep_pm_init_core_use_bitmaps(kbdev);
-
kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
kbdev->pm.debug_core_mask[1] =
kbdev->pm.debug_core_mask[2] =
@@ -385,20 +447,20 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_lock(&kbdev->pm.lock);
- kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, false);
mutex_unlock(&kbdev->pm.lock);
}
KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
-void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
/* Free any resources the policy allocated */
+ kbase_pm_state_machine_term(kbdev);
kbase_pm_policy_term(kbdev);
kbase_pm_ca_term(kbdev);
@@ -408,16 +470,29 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
}
+void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work);
+
+ if (kbdev->pm.backend.hwcnt_disabled) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+}
+
void kbase_pm_power_changed(struct kbase_device *kbdev)
{
- bool cores_are_available;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ kbase_pm_update_state(kbdev);
- if (cores_are_available)
- kbase_backend_slot_update(kbdev);
+ kbase_backend_slot_update(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
@@ -455,7 +530,6 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
- kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, true);
kbase_backend_timer_suspend(kbdev);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index d4e8e42..2cb9452 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -30,15 +30,15 @@
int kbase_pm_ca_init(struct kbase_device *kbdev)
{
- struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
#ifdef CONFIG_MALI_DEVFREQ
+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
if (kbdev->current_core_mask)
pm_backend->ca_cores_enabled = kbdev->current_core_mask;
else
pm_backend->ca_cores_enabled =
kbdev->gpu_props.props.raw_props.shader_present;
#endif
- pm_backend->ca_in_transition = false;
return 0;
}
@@ -55,10 +55,17 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
+ dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+ core_mask, kbdev->pm.debug_core_mask_all);
+ goto unlock;
+ }
+
pm_backend->ca_cores_enabled = core_mask;
- kbase_pm_update_cores_state_nolock(kbdev);
+ kbase_pm_update_state(kbdev);
+unlock:
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
@@ -89,19 +96,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
{
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.instr_enabled = true;
-
- kbase_pm_update_cores_state_nolock(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.instr_enabled = false;
-
- kbase_pm_update_cores_state_nolock(kbdev);
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
index 2b005c9..274581d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 7fe8eb3..0cff22e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -29,10 +29,8 @@
#include "mali_kbase_pm_always_on.h"
#include "mali_kbase_pm_coarse_demand.h"
-#include "mali_kbase_pm_demand.h"
#if !MALI_CUSTOMER_RELEASE
-#include "mali_kbase_pm_demand_always_powered.h"
-#include "mali_kbase_pm_fast_start.h"
+#include "mali_kbase_pm_always_on_demand.h"
#endif
/* Forward definition - see mali_kbase.h */
@@ -65,6 +63,70 @@ enum kbase_pm_core_type {
};
/**
+ * enum kbase_l2_core_state - The states used for the L2 cache & tiler power
+ * state machine.
+ *
+ * @KBASE_L2_OFF: The L2 cache and tiler are off
+ * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
+ * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
+ * enabled
+ * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
+ * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
+ * disabled
+ * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
+ * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
+ * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
+ * are unknown
+ */
+enum kbase_l2_core_state {
+ KBASE_L2_OFF = 0,
+ KBASE_L2_PEND_ON,
+ KBASE_L2_ON_HWCNT_ENABLE,
+ KBASE_L2_ON,
+ KBASE_L2_ON_HWCNT_DISABLE,
+ KBASE_L2_POWER_DOWN,
+ KBASE_L2_PEND_OFF,
+ KBASE_L2_RESET_WAIT
+};
+
+/**
+ * enum kbase_shader_core_state - The states used for the shaders' state machine.
+ *
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
+ * been requested to power on
+ * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
+ * requested to power on
+ * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on
+ * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
+ * power off, but they remain on for the
+ * duration of the hysteresis timer
+ * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
+ * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
+ * have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
+ * have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
+ * off, but the tick timer
+ * cancellation is still
+ * pending.
+ * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
+ * states are unknown
+ */
+enum kbase_shader_core_state {
+ KBASE_SHADERS_OFF_CORESTACK_OFF = 0,
+ KBASE_SHADERS_OFF_CORESTACK_PEND_ON,
+ KBASE_SHADERS_PEND_ON_CORESTACK_ON,
+ KBASE_SHADERS_ON_CORESTACK_ON,
+ KBASE_SHADERS_WAIT_OFF_CORESTACK_ON,
+ KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON,
+ KBASE_SHADERS_PEND_OFF_CORESTACK_ON,
+ KBASE_SHADERS_OFF_CORESTACK_PEND_OFF,
+ KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF,
+ KBASE_SHADERS_RESET_WAIT
+};
+
+/**
* struct kbasep_pm_metrics - Metrics data collected for use by the power
* management framework.
*
@@ -128,13 +190,39 @@ struct kbasep_pm_metrics_state {
#endif
};
+/**
+ * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer
+ * @wq: Work queue to wait for the timer to stopped
+ * @work: Work item which cancels the timer
+ * @timer: Timer for powering off the shader cores
+ * @configured_interval: Period of GPU poweroff timer
+ * @configured_ticks: User-configured number of ticks to wait after the shader
+ * power down request is received before turning off the cores
+ * @remaining_ticks: Number of remaining timer ticks until shaders are powered off
+ * @cancel_queued: True if the cancellation work item has been queued. This is
+ * required to ensure that it is not queued twice, e.g. after
+ * a reset, which could cause the timer to be incorrectly
+ * cancelled later by a delayed workitem.
+ * @needed: Whether the timer should restart itself
+ */
+struct kbasep_pm_tick_timer_state {
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ struct hrtimer timer;
+
+ ktime_t configured_interval;
+ unsigned int configured_ticks;
+ unsigned int remaining_ticks;
+
+ bool cancel_queued;
+ bool needed;
+};
+
union kbase_pm_policy_data {
struct kbasep_pm_policy_always_on always_on;
struct kbasep_pm_policy_coarse_demand coarse_demand;
- struct kbasep_pm_policy_demand demand;
#if !MALI_CUSTOMER_RELEASE
- struct kbasep_pm_policy_demand_always_powered demand_always_powered;
- struct kbasep_pm_policy_fast_start fast_start;
+ struct kbasep_pm_policy_always_on_demand always_on_demand;
#endif
};
@@ -147,39 +235,14 @@ union kbase_pm_policy_data {
* @pm_current_policy: The policy that is currently actively controlling the
* power state.
* @pm_policy_data: Private data for current PM policy
- * @ca_in_transition: Flag indicating when core availability policy is
- * transitioning cores. The core availability policy must
- * set this when a change in core availability is occurring.
- * power_change_lock must be held when accessing this.
* @reset_done: Flag when a reset is complete
* @reset_done_wait: Wait queue to wait for changes to @reset_done
- * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as
- * requested
- * @l2_powered: State indicating whether all the l2 caches are powered.
- * Non-zero indicates they're *all* powered
- * Zero indicates that some (or all) are not powered
* @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
* users
* @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
- * @desired_shader_state: A bit mask identifying the shader cores that the
- * power policy would like to be on. The current state
- * of the cores may be different, but there should be
- * transitions in progress that will eventually achieve
- * this state (assuming that the policy doesn't change
- * its mind in the mean time).
- * @powering_on_shader_state: A bit mask indicating which shader cores are
- * currently in a power-on transition
- * @desired_tiler_state: A bit mask identifying the tiler cores that the power
- * policy would like to be on. See @desired_shader_state
- * @powering_on_tiler_state: A bit mask indicating which tiler core are
- * currently in a power-on transition
- * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
- * in a power-on transition
- * @powering_on_stack_state: A bit mask indicating which core stacks are
- * currently in a power-on transition
- * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
- * by the desired_xxx_state variables
- * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired
+ * state according to the L2 and shader power state
+ * machines
* @gpu_powered: Set to true when the GPU is powered and register
* accesses are possible, false otherwise
* @instr_enabled: Set to true when instrumentation is enabled,
@@ -192,26 +255,12 @@ union kbase_pm_policy_data {
* @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or
* accessing @driver_ready_for_irqs
* @metrics: Structure to hold metrics for the GPU
- * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
- * powered off
- * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
- * and/or timers are powered off
- * @gpu_poweroff_timer: Timer for powering off GPU
- * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires
- * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
- * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
- * timer callback
- * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
- * callback
- * @poweroff_timer_needed: true if the poweroff timer is currently required,
- * false otherwise
- * @poweroff_timer_running: true if the poweroff timer is currently running,
- * false otherwise
- * power_change_lock should be held when accessing,
- * unless there is no way the timer can be running (eg
- * hrtimer_cancel() was called immediately before)
+ * @shader_tick_timer: Structure to hold the shader poweroff tick timer state
* @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
* hwaccess_lock must be held when accessing
+ * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state
+ * machine should invoke the poweroff
+ * worker after the L2 has turned off.
* @poweron_required: true if a GPU power on is required. Should only be set
* when poweroff_wait_in_progress is true, and therefore the
* GPU can not immediately be powered on. pm.lock must be
@@ -236,35 +285,49 @@ union kbase_pm_policy_data {
* @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
* &struct kbase_pm_callback_conf
* @ca_cores_enabled: Cores that are currently available
+ * @l2_state: The current state of the L2 cache state machine. See
+ * &enum kbase_l2_core_state
+ * @l2_desired: True if the L2 cache should be powered on by the L2 cache state
+ * machine
+ * @shaders_state: The current state of the shader state machine.
+ * @shaders_avail: This is updated by the state machine when it is in a state
+ * where it can handle changes to the core availability. This
+ * is internal to the shader state machine and should *not* be
+ * modified elsewhere.
+ * @shaders_desired: True if the PM active count or power policy requires the
+ * shader cores to be on. This is used as an input to the
+ * shader power state machine. The current state of the
+ * cores may be different, but there should be transitions in
+ * progress that will eventually achieve this state (assuming
+ * that the policy doesn't change its mind in the mean time).
+ * @in_reset: True if a GPU is resetting and normal power manager operation is
+ * suspended
+ * @protected_transition_override : True if a protected mode transition is in
+ * progress and is overriding power manager
+ * behaviour.
+ * @protected_l2_override : Non-zero if the L2 cache is required during a
+ * protected mode transition. Has no effect if not
+ * transitioning.
+ * @hwcnt_desired: True if we want GPU hardware counters to be enabled.
+ * @hwcnt_disabled: True if GPU hardware counters are not enabled.
+ * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if
+ * atomic disable is not possible.
*
* Note:
* During an IRQ, @pm_current_policy can be NULL when the policy is being
* changed with kbase_pm_set_policy(). The change is protected under
- * kbase_device.pm.power_change_lock. Direct access to this from IRQ context
+ * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context
* must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
* re-issue the policy functions that would have been done under IRQ.
*/
struct kbase_pm_backend_data {
const struct kbase_pm_policy *pm_current_policy;
union kbase_pm_policy_data pm_policy_data;
- bool ca_in_transition;
bool reset_done;
wait_queue_head_t reset_done_wait;
- wait_queue_head_t l2_powered_wait;
- int l2_powered;
int gpu_cycle_counter_requests;
spinlock_t gpu_cycle_counter_requests_lock;
- u64 desired_shader_state;
- u64 powering_on_shader_state;
- u64 desired_tiler_state;
- u64 powering_on_tiler_state;
- u64 powering_on_l2_state;
-#ifdef CONFIG_MALI_CORESTACK
- u64 powering_on_stack_state;
-#endif /* CONFIG_MALI_CORESTACK */
-
- bool gpu_in_desired_state;
wait_queue_head_t gpu_in_desired_state_wait;
bool gpu_powered;
@@ -279,23 +342,12 @@ struct kbase_pm_backend_data {
spinlock_t gpu_powered_lock;
-
struct kbasep_pm_metrics_state metrics;
- int gpu_poweroff_pending;
- int shader_poweroff_pending_time;
-
- struct hrtimer gpu_poweroff_timer;
- struct workqueue_struct *gpu_poweroff_wq;
- struct work_struct gpu_poweroff_work;
-
- u64 shader_poweroff_pending;
- u64 tiler_poweroff_pending;
-
- bool poweroff_timer_needed;
- bool poweroff_timer_running;
+ struct kbasep_pm_tick_timer_state shader_tick_timer;
bool poweroff_wait_in_progress;
+ bool invoke_poweroff_wait_wq_when_l2_off;
bool poweron_required;
bool poweroff_is_suspend;
@@ -312,25 +364,38 @@ struct kbase_pm_backend_data {
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
-#ifdef CONFIG_MALI_DEVFREQ
u64 ca_cores_enabled;
-#endif
+
+ enum kbase_l2_core_state l2_state;
+ enum kbase_shader_core_state shaders_state;
+ u64 shaders_avail;
+ bool l2_desired;
+ bool shaders_desired;
+
+ bool in_reset;
+
+ bool protected_transition_override;
+ int protected_l2_override;
+
+ bool hwcnt_desired;
+ bool hwcnt_disabled;
+ struct work_struct hwcnt_disable_work;
};
/* List of policy IDs */
enum kbase_pm_policy_id {
- KBASE_PM_POLICY_ID_DEMAND = 1,
- KBASE_PM_POLICY_ID_ALWAYS_ON,
KBASE_PM_POLICY_ID_COARSE_DEMAND,
#if !MALI_CUSTOMER_RELEASE
- KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
- KBASE_PM_POLICY_ID_FAST_START
+ KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND,
#endif
+ KBASE_PM_POLICY_ID_ALWAYS_ON
};
typedef u32 kbase_pm_policy_flags;
+#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u)
+
/**
* struct kbase_pm_policy - Power policy structure.
*
@@ -377,13 +442,8 @@ struct kbase_pm_policy {
/**
* Function called to find out if shader cores are needed
*
- * This needs to at least satisfy kbdev->shader_needed_cnt, and so must
- * never return false when kbdev->shader_needed_cnt > 0.
- *
- * Note that kbdev->pm.active_count being 0 is not a good indicator
- * that kbdev->shader_needed_cnt is also 0 - refer to the documentation
- * on the active_count member in struct kbase_pm_device_data and
- * kbase_pm_is_active().
+ * This needs to at least satisfy kbdev->pm.backend.shaders_desired,
+ * and so must never return false when shaders_desired is true.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_demand.c b/mali_kbase/backend/gpu/mali_kbase_pm_demand.c
deleted file mode 100644
index 01727d6..0000000
--- a/mali_kbase/backend/gpu/mali_kbase_pm_demand.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * A simple demand based power management policy
- */
-
-#include <mali_kbase.h>
-#include <mali_kbase_pm.h>
-
-static bool demand_shaders_needed(struct kbase_device *kbdev)
-{
- return (kbdev->shader_needed_cnt > 0);
-}
-
-static bool demand_get_core_active(struct kbase_device *kbdev)
-{
- return kbase_pm_is_active(kbdev);
-}
-
-static void demand_init(struct kbase_device *kbdev)
-{
- CSTD_UNUSED(kbdev);
-}
-
-static void demand_term(struct kbase_device *kbdev)
-{
- CSTD_UNUSED(kbdev);
-}
-
-/*
- * The struct kbase_pm_policy structure for the demand power policy.
- *
- * This is the static structure that defines the demand power policy's callback
- * and name.
- */
-const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
- "demand", /* name */
- demand_init, /* init */
- demand_term, /* term */
- demand_shaders_needed, /* shaders_needed */
- demand_get_core_active, /* get_core_active */
- 0u, /* flags */
- KBASE_PM_POLICY_ID_DEMAND, /* id */
-};
-
-KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_demand.h b/mali_kbase/backend/gpu/mali_kbase_pm_demand.h
deleted file mode 100644
index 4b05e6d..0000000
--- a/mali_kbase/backend/gpu/mali_kbase_pm_demand.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * A simple demand based power management policy
- */
-
-#ifndef MALI_KBASE_PM_DEMAND_H
-#define MALI_KBASE_PM_DEMAND_H
-
-/**
- * DOC: Demand power management policy
- *
- * The demand power management policy has the following characteristics:
- * - When KBase indicates that the GPU will be powered up, but we don't yet
- * know which Job Chains are to be run:
- * - The Shader Cores are not powered up
- *
- * - When KBase indicates that Shader Cores are needed to submit the currently
- * queued Job Chains:
- * - Shader Cores are powered up
- *
- * - When KBase indicates that the GPU need not be powered:
- * - The Shader Cores are powered off, and the GPU itself is powered off too.
- *
- * Note:
- * - KBase indicates the GPU will be powered up when it has a User Process that
- * has just started to submit Job Chains.
- *
- * - KBase indicates the GPU need not be powered when all the Job Chains from
- * User Processes have finished, and it is waiting for a User Process to
- * submit some more Job Chains.
- */
-
-/**
- * struct kbasep_pm_policy_demand - Private structure for policy instance data
- *
- * @dummy: No state is needed, a dummy variable
- *
- * This contains data that is private to the demand power policy.
- */
-struct kbasep_pm_policy_demand {
- int dummy;
-};
-
-extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
-
-#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index cdd5cf7..2e6599a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -29,15 +29,14 @@
#include <mali_kbase.h>
#include <mali_kbase_config_defaults.h>
#include <mali_midg_regmap.h>
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
#include <mali_kbase_gator.h>
-#endif
#include <mali_kbase_tlstream.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_smc.h>
#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -45,11 +44,23 @@
#include <linux/of.h>
-#if MALI_MOCK_TEST
-#define MOCKABLE(function) function##_original
+#ifdef CONFIG_MALI_CORESTACK
+bool corestack_driver_control = true;
#else
-#define MOCKABLE(function) function
-#endif /* MALI_MOCK_TEST */
+bool corestack_driver_control; /* Default value of 0/false */
+#endif
+module_param(corestack_driver_control, bool, 0000);
+MODULE_PARM_DESC(corestack_driver_control,
+ "Let the driver power on/off the GPU core stack independently "
+ "without involving the Power Domain Controller. This should "
+ "only be enabled on platforms for which integration of the PDC "
+ "to the Mali GPU is known to be problematic.");
+KBASE_EXPORT_TEST_API(corestack_driver_control);
+
+bool platform_power_down_only = PLATFORM_POWER_DOWN_ONLY;
+module_param(platform_power_down_only, bool, 0000);
+MODULE_PARM_DESC(platform_power_down_only,
+ "Disable power down of individual cores.");
/**
* enum kbasep_pm_action - Actions that can be performed on a core.
@@ -79,6 +90,47 @@ static u64 kbase_pm_get_state(
enum kbase_pm_core_type core_type,
enum kbasep_pm_action action);
+static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
+{
+ if (kbdev->pm.backend.protected_transition_override &&
+ kbdev->pm.backend.protected_l2_override)
+ return true;
+
+ if (kbdev->pm.backend.protected_transition_override &&
+ !kbdev->pm.backend.shaders_desired)
+ return false;
+
+ return kbdev->pm.backend.l2_desired;
+}
+
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbdev->pm.backend.protected_transition_override = true;
+}
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbdev->pm.backend.protected_transition_override = false;
+}
+
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (override) {
+ kbdev->pm.backend.protected_l2_override++;
+ WARN_ON(kbdev->pm.backend.protected_l2_override <= 0);
+ } else {
+ kbdev->pm.backend.protected_l2_override--;
+ WARN_ON(kbdev->pm.backend.protected_l2_override < 0);
+ }
+
+ kbase_pm_update_state(kbdev);
+}
+
/**
* core_type_to_reg - Decode a core type and action to a register.
*
@@ -96,24 +148,24 @@ static u64 kbase_pm_get_state(
static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
enum kbasep_pm_action action)
{
-#ifdef CONFIG_MALI_CORESTACK
- if (core_type == KBASE_PM_CORE_STACK) {
- switch (action) {
- case ACTION_PRESENT:
- return STACK_PRESENT_LO;
- case ACTION_READY:
- return STACK_READY_LO;
- case ACTION_PWRON:
- return STACK_PWRON_LO;
- case ACTION_PWROFF:
- return STACK_PWROFF_LO;
- case ACTION_PWRTRANS:
- return STACK_PWRTRANS_LO;
- default:
- BUG();
+ if (corestack_driver_control) {
+ if (core_type == KBASE_PM_CORE_STACK) {
+ switch (action) {
+ case ACTION_PRESENT:
+ return STACK_PRESENT_LO;
+ case ACTION_READY:
+ return STACK_READY_LO;
+ case ACTION_PWRON:
+ return STACK_PWRON_LO;
+ case ACTION_PWROFF:
+ return STACK_PWROFF_LO;
+ case ACTION_PWRTRANS:
+ return STACK_PWRTRANS_LO;
+ default:
+ WARN(1, "Invalid action for core type\n");
+ }
}
}
-#endif /* CONFIG_MALI_CORESTACK */
return (u32)core_type + (u32)action;
}
@@ -170,6 +222,12 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
u32 lo = cores & 0xFFFFFFFF;
u32 hi = (cores >> 32) & 0xFFFFFFFF;
+ /* When 'platform_power_down_only' is enabled, no core type should be
+ * turned off individually.
+ */
+ KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF &&
+ platform_power_down_only));
+
lockdep_assert_held(&kbdev->hwaccess_lock);
reg = core_type_to_reg(core_type, action);
@@ -272,16 +330,6 @@ static u64 kbase_pm_get_state(struct kbase_device *kbdev,
return (((u64) hi) << 32) | ((u64) lo);
}
-void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
-{
- kbdev->shader_available_bitmap = 0;
- kbdev->tiler_available_bitmap = 0;
- kbdev->l2_users_count = 0;
- kbdev->l2_available_bitmap = 0;
- kbdev->tiler_needed_cnt = 0;
- kbdev->shader_needed_cnt = 0;
-}
-
/**
* kbase_pm_get_present_cores - Get the cores that are present
*
@@ -385,525 +433,776 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
-/**
- * kbase_pm_transition_core_type - Perform power transitions for a particular
- * core type.
- *
- * This function will perform any available power transitions to make the actual
- * hardware state closer to the desired state. If a core is currently
- * transitioning then changes to the power state of that call cannot be made
- * until the transition has finished. Cores which are not present in the
- * hardware are ignored if they are specified in the desired_state bitmask,
- * however the return value will always be 0 in this case.
- *
- * @kbdev: The kbase device
- * @type: The core type to perform transitions for
- * @desired_state: A bit mask of the desired state of the cores
- * @in_use: A bit mask of the cores that are currently running
- * jobs. These cores have to be kept powered up because
- * there are jobs running (or about to run) on them.
- * @available: Receives a bit mask of the cores that the job
- * scheduler can use to submit jobs to. May be NULL if
- * this is not needed.
- * @powering_on: Bit mask to update with cores that are
- * transitioning to a power-on state.
- *
- * Return: true if the desired state has been reached, false otherwise
- */
-static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
- enum kbase_pm_core_type type,
- u64 desired_state,
- u64 in_use,
- u64 * const available,
- u64 *powering_on)
+static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
- u64 present;
- u64 ready;
- u64 trans;
- u64 powerup;
- u64 powerdown;
- u64 powering_on_trans;
- u64 desired_state_in_use;
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+ u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
+ enum kbase_l2_core_state prev_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
- /* Get current state */
- present = kbase_pm_get_present_cores(kbdev, type);
- trans = kbase_pm_get_trans_cores(kbdev, type);
- ready = kbase_pm_get_ready_cores(kbdev, type);
-
- /* mask off ready from trans in case transitions finished between the
- * register reads */
- trans &= ~ready;
+ do {
+ /* Get current state */
+ u64 l2_trans = kbase_pm_get_trans_cores(kbdev,
+ KBASE_PM_CORE_L2);
+ u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_L2);
+ u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
+ KBASE_PM_CORE_TILER);
+ u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_TILER);
+
+ /* mask off ready from trans in case transitions finished
+ * between the register reads
+ */
+ l2_trans &= ~l2_ready;
+ tiler_trans &= ~tiler_ready;
+
+ prev_state = backend->l2_state;
+
+ switch (backend->l2_state) {
+ case KBASE_L2_OFF:
+ if (kbase_pm_is_l2_desired(kbdev)) {
+ /* L2 is required, power on. Powering on the
+ * tiler will also power the first L2 cache.
+ */
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
+ tiler_present, ACTION_PWRON);
+
+ /* If we have more than one L2 cache then we
+ * must power them on explicitly.
+ */
+ if (l2_present != 1)
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+ l2_present & ~1,
+ ACTION_PWRON);
+ backend->l2_state = KBASE_L2_PEND_ON;
+ }
+ break;
- powering_on_trans = trans & *powering_on;
+ case KBASE_L2_PEND_ON:
+ if (!l2_trans && l2_ready == l2_present && !tiler_trans
+ && tiler_ready == tiler_present) {
+ KBASE_TRACE_ADD(kbdev,
+ PM_CORES_CHANGE_AVAILABLE_TILER,
+ NULL, NULL, 0u,
+ (u32)tiler_ready);
+ /*
+ * Ensure snoops are enabled after L2 is powered
+ * up. Note that kbase keeps track of the snoop
+ * state, so safe to repeatedly call.
+ */
+ kbase_pm_cache_snoop_enable(kbdev);
+
+ /* With the L2 enabled, we can now enable
+ * hardware counters.
+ */
+ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+
+ /* Now that the L2 is on, the shaders can start
+ * powering on if they're required. The obvious
+ * way to do this would be to call
+ * kbase_pm_shaders_update_state() here.
+ * However, that would make the two state
+ * machines mutually recursive, as the opposite
+ * would be needed for powering down. Instead,
+ * callers of this function should use the
+ * kbase_pm_update_state() wrapper, which will
+ * call the shader state machine immediately
+ * after the L2 (for power up), or
+ * automatically re-invoke the L2 state machine
+ * when the shaders power down.
+ */
+ }
+ break;
+
+ case KBASE_L2_ON_HWCNT_ENABLE:
+ backend->hwcnt_desired = true;
+ if (backend->hwcnt_disabled) {
+ kbase_hwcnt_context_enable(
+ kbdev->hwcnt_gpu_ctx);
+ backend->hwcnt_disabled = false;
+ }
+ backend->l2_state = KBASE_L2_ON;
+ break;
+
+ case KBASE_L2_ON:
+ if (!kbase_pm_is_l2_desired(kbdev)) {
+ /* Do not power off L2 until the shaders and
+ * core stacks are off.
+ */
+ if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+ break;
+
+ /* We need to make sure hardware counters are
+ * disabled before powering down the L2, to
+ * prevent loss of data.
+ *
+ * We waited until after the cores were powered
+ * down to prevent ping-ponging between hwcnt
+ * enabled and disabled, which would have
+ * happened if userspace submitted more work
+ * while we were trying to power down.
+ */
+ backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE;
+ }
+ break;
+
+ case KBASE_L2_ON_HWCNT_DISABLE:
+ /* If the L2 became desired while we were waiting on the
+ * worker to do the actual hwcnt disable (which might
+ * happen if some work was submitted immediately after
+ * the shaders powered off), then we need to early-out
+ * of this state and re-enable hwcnt.
+ *
+ * If we get lucky, the hwcnt disable might not have
+ * actually started yet, and the logic in the hwcnt
+ * enable state will prevent the worker from
+ * performing the disable entirely, preventing loss of
+ * any hardware counter data.
+ *
+ * If the hwcnt disable has started, then we'll lose
+ * a tiny amount of hardware counter data between the
+ * disable and the re-enable occurring.
+ *
+ * This loss of data is preferable to the alternative,
+ * which is to block the shader cores from doing any
+ * work until we're sure hwcnt has been re-enabled.
+ */
+ if (kbase_pm_is_l2_desired(kbdev)) {
+ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+ break;
+ }
- if (available != NULL)
- *available = (ready | powering_on_trans) & desired_state;
+ /* See if we can get away with disabling hwcnt
+ * atomically, otherwise kick off a worker.
+ */
+ backend->hwcnt_desired = false;
+ if (!backend->hwcnt_disabled) {
+ if (kbase_hwcnt_context_disable_atomic(
+ kbdev->hwcnt_gpu_ctx))
+ backend->hwcnt_disabled = true;
+ else
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+ queue_work(system_wq,
+ &backend->hwcnt_disable_work);
+#else
+ queue_work(system_highpri_wq,
+ &backend->hwcnt_disable_work);
+#endif
+ }
- if (trans) /* Do not progress if any cores are transitioning */
- return false;
+ if (backend->hwcnt_disabled)
+ backend->l2_state = KBASE_L2_POWER_DOWN;
+ break;
+
+ case KBASE_L2_POWER_DOWN:
+ if (!platform_power_down_only)
+ /* Powering off the L2 will also power off the
+ * tiler.
+ */
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+ l2_present,
+ ACTION_PWROFF);
+ else
+ /* If L2 cache is powered then we must flush it
+ * before we power off the GPU. Normally this
+ * would have been handled when the L2 was
+ * powered off.
+ */
+ kbase_gpu_start_cache_clean_nolock(
+ kbdev);
- *powering_on = powering_on_trans;
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+ NULL, NULL, 0u, 0u);
+
+ backend->l2_state = KBASE_L2_PEND_OFF;
+ break;
+
+ case KBASE_L2_PEND_OFF:
+ if (!platform_power_down_only) {
+ /* We only need to check the L2 here - if the L2
+ * is off then the tiler is definitely also off.
+ */
+ if (!l2_trans && !l2_ready)
+ /* L2 is now powered off */
+ backend->l2_state = KBASE_L2_OFF;
+ } else {
+ if (!kbdev->cache_clean_in_progress)
+ backend->l2_state = KBASE_L2_OFF;
+ }
+ break;
- /* Update desired state to include the in-use cores. These have to be
- * kept powered up because there are jobs running or about to run on
- * these cores
- */
- desired_state_in_use = desired_state | in_use;
-
- /* Update state of whether l2 caches are powered */
- if (type == KBASE_PM_CORE_L2) {
- if ((ready == present) && (desired_state_in_use == ready) &&
- (trans == 0)) {
- /* All are ready, none will be turned off, and none are
- * transitioning */
- kbdev->pm.backend.l2_powered = 1;
- /*
- * Ensure snoops are enabled after L2 is powered up,
- * note that kbase keeps track of the snoop state, so
- * safe to repeatedly call.
- */
- kbase_pm_cache_snoop_enable(kbdev);
- if (kbdev->l2_users_count > 0) {
- /* Notify any registered l2 cache users
- * (optimized out when no users waiting) */
- wake_up(&kbdev->pm.backend.l2_powered_wait);
+ case KBASE_L2_RESET_WAIT:
+ if (!backend->in_reset) {
+ /* Reset complete */
+ backend->l2_state = KBASE_L2_OFF;
}
- } else
- kbdev->pm.backend.l2_powered = 0;
- }
+ break;
- if (desired_state == ready && (trans == 0))
- return true;
+ default:
+ WARN(1, "Invalid state in l2_state: %d",
+ backend->l2_state);
+ }
+ } while (backend->l2_state != prev_state);
- /* Restrict the cores to those that are actually present */
- powerup = desired_state_in_use & present;
- powerdown = (~desired_state_in_use) & present;
-
- /* Restrict to cores that are not already in the desired state */
- powerup &= ~ready;
- powerdown &= ready;
-
- /* Don't transition any cores that are already transitioning, except for
- * Mali cores that support the following case:
- *
- * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
- * a core that is currently transitioning to power off, then this is
- * remembered and the shader core is automatically powered up again once
- * the original transition completes. Once the automatic power on is
- * complete any job scheduled on the shader core should start.
- */
- powerdown &= ~trans;
+ if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+ backend->l2_state == KBASE_L2_OFF) {
+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false;
+ queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+ &kbdev->pm.backend.gpu_poweroff_wait_work);
+ }
+
+ if (backend->l2_state == KBASE_L2_ON)
+ return l2_present;
+ return 0;
+}
- if (kbase_hw_has_feature(kbdev,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
- if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
- trans = powering_on_trans; /* for exception cases, only
- * mask off cores in power on
- * transitions */
+static void shader_poweroff_timer_stop_callback(struct work_struct *data)
+{
+ unsigned long flags;
+ struct kbasep_pm_tick_timer_state *stt = container_of(data,
+ struct kbasep_pm_tick_timer_state, work);
+ struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+ pm.backend.shader_tick_timer);
- powerup &= ~trans;
+ hrtimer_cancel(&stt->timer);
- /* Perform transitions if any */
- kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
-#if !PLATFORM_POWER_DOWN_ONLY
- kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
-#endif
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- /* Recalculate cores transitioning on, and re-evaluate our state */
- powering_on_trans |= powerup;
- *powering_on = powering_on_trans;
- if (available != NULL)
- *available = (ready | powering_on_trans) & desired_state;
+ stt->cancel_queued = false;
+ if (kbdev->pm.backend.gpu_powered)
+ kbase_pm_update_state(kbdev);
- return false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
-KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
-
/**
- * get_desired_cache_status - Determine which caches should be on for a
- * particular core state
+ * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer
+ * @kbdev: pointer to kbase device
*
- * This function takes a bit mask of the present caches and the cores (or
- * caches) that are attached to the caches that will be powered. It then
- * computes which caches should be turned on to allow the cores requested to be
- * powered up.
+ * Synchronization between the shader state machine and the timer thread is
+ * difficult. This is because situations may arise where the state machine
+ * wants to start the timer, but the callback is already running, and has
+ * already passed the point at which it checks whether it is required, and so
+ * cancels itself, even though the state machine may have just tried to call
+ * hrtimer_start.
*
- * @present: The bit mask of present caches
- * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
- * be powered
- * @tilers_powered: The bit mask of tilers that are desired to be powered
+ * This cannot be stopped by holding hwaccess_lock in the timer thread,
+ * because there are still infinitesimally small sections at the start and end
+ * of the callback where the lock is not held.
*
- * Return: A bit mask of the caches that should be turned on
+ * Instead, a new state is added to the shader state machine,
+ * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee
+ * that when the shaders are switched off, the timer has definitely been
+ * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the
+ * timer is started, it is guaranteed that either the timer is already running
+ * (from an availability change or cancelled timer), or hrtimer_start will
+ * succeed. It is critical to avoid ending up in
+ * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could
+ * hang there forever.
*/
-static u64 get_desired_cache_status(u64 present, u64 cores_powered,
- u64 tilers_powered)
+static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev)
{
- u64 desired = 0;
+ struct kbasep_pm_tick_timer_state *stt =
+ &kbdev->pm.backend.shader_tick_timer;
- while (present) {
- /* Find out which is the highest set bit */
- u64 bit = fls64(present) - 1;
- u64 bit_mask = 1ull << bit;
- /* Create a mask which has all bits from 'bit' upwards set */
+ lockdep_assert_held(&kbdev->hwaccess_lock);
- u64 mask = ~(bit_mask - 1);
+ stt->needed = false;
- /* If there are any cores powered at this bit or above (that
- * haven't previously been processed) then we need this core on
- */
- if (cores_powered & mask)
- desired |= bit_mask;
-
- /* Remove bits from cores_powered and present */
- cores_powered &= ~mask;
- present &= ~bit_mask;
+ if (hrtimer_active(&stt->timer) && !stt->cancel_queued) {
+ stt->cancel_queued = true;
+ queue_work(stt->wq, &stt->work);
}
+}
- /* Power up the required L2(s) for the tiler */
- if (tilers_powered)
- desired |= 1;
+static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ struct kbasep_pm_tick_timer_state *stt =
+ &kbdev->pm.backend.shader_tick_timer;
+ enum kbase_shader_core_state prev_state;
+ u64 stacks_avail = 0;
- return desired;
-}
+ lockdep_assert_held(&kbdev->hwaccess_lock);
-KBASE_EXPORT_TEST_API(get_desired_cache_status);
+ if (corestack_driver_control)
+ /* Always power on all the corestacks. Disabling certain
+ * corestacks when their respective shaders are not in the
+ * available bitmap is not currently supported.
+ */
+ stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK);
-#ifdef CONFIG_MALI_CORESTACK
-u64 kbase_pm_core_stack_mask(u64 cores)
-{
- u64 stack_mask = 0;
- size_t const MAX_CORE_ID = 31;
- size_t const NUM_CORES_PER_STACK = 4;
- size_t i;
-
- for (i = 0; i <= MAX_CORE_ID; ++i) {
- if (test_bit(i, (unsigned long *)&cores)) {
- /* Every core which ID >= 16 is filled to stacks 4-7
- * instead of 0-3 */
- size_t const stack_num = (i >= 16) ?
- (i % NUM_CORES_PER_STACK) + 4 :
- (i % NUM_CORES_PER_STACK);
- set_bit(stack_num, (unsigned long *)&stack_mask);
+ do {
+ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER);
+ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+ u64 stacks_trans = 0;
+ u64 stacks_ready = 0;
+
+ if (corestack_driver_control) {
+ stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK);
+ stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK);
}
- }
- return stack_mask;
-}
-#endif /* CONFIG_MALI_CORESTACK */
+ /* mask off ready from trans in case transitions finished
+ * between the register reads
+ */
+ shaders_trans &= ~shaders_ready;
+ stacks_trans &= ~stacks_ready;
-bool
-MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
-{
- bool cores_are_available = false;
- bool in_desired_state = true;
- u64 desired_l2_state;
-#ifdef CONFIG_MALI_CORESTACK
- u64 desired_stack_state;
- u64 stacks_powered;
-#endif /* CONFIG_MALI_CORESTACK */
- u64 cores_powered;
- u64 tilers_powered;
- u64 tiler_available_bitmap;
- u64 tiler_transitioning_bitmap;
- u64 shader_available_bitmap;
- u64 shader_ready_bitmap;
- u64 shader_transitioning_bitmap;
- u64 l2_available_bitmap;
- u64 prev_l2_available_bitmap;
- u64 l2_inuse_bitmap;
+ prev_state = backend->shaders_state;
- KBASE_DEBUG_ASSERT(NULL != kbdev);
- lockdep_assert_held(&kbdev->hwaccess_lock);
+ switch (backend->shaders_state) {
+ case KBASE_SHADERS_OFF_CORESTACK_OFF:
+ /* Ignore changes to the shader core availability
+ * except at certain points where we can handle it,
+ * i.e. off and SHADERS_ON_CORESTACK_ON.
+ */
+ backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
- spin_lock(&kbdev->pm.backend.gpu_powered_lock);
- if (kbdev->pm.backend.gpu_powered == false) {
- spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
- if (kbdev->pm.backend.desired_shader_state == 0 &&
- kbdev->pm.backend.desired_tiler_state == 0)
- return true;
- return false;
- }
+ if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) {
+ if (corestack_driver_control)
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK,
+ stacks_avail, ACTION_PWRON);
- /* If any cores are already powered then, we must keep the caches on */
- shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
- KBASE_PM_CORE_SHADER);
- cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
- cores_powered |= kbdev->pm.backend.desired_shader_state;
+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON;
+ }
+ break;
-#ifdef CONFIG_MALI_CORESTACK
- /* Work out which core stacks want to be powered */
- desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
- stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
- desired_stack_state;
-#endif /* CONFIG_MALI_CORESTACK */
-
- /* Work out which tilers want to be powered */
- tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
- KBASE_PM_CORE_TILER);
- tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
- tilers_powered |= kbdev->pm.backend.desired_tiler_state;
-
- /* If there are l2 cache users registered, keep all l2s powered even if
- * all other cores are off. */
- if (kbdev->l2_users_count > 0)
- cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
-
- desired_l2_state = get_desired_cache_status(
- kbdev->gpu_props.props.raw_props.l2_present,
- cores_powered, tilers_powered);
-
- l2_inuse_bitmap = get_desired_cache_status(
- kbdev->gpu_props.props.raw_props.l2_present,
- cores_powered | shader_transitioning_bitmap,
- tilers_powered | tiler_transitioning_bitmap);
+ case KBASE_SHADERS_OFF_CORESTACK_PEND_ON:
+ if (!stacks_trans && stacks_ready == stacks_avail) {
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+ backend->shaders_avail, ACTION_PWRON);
-#ifdef CONFIG_MALI_CORESTACK
- if (stacks_powered)
- desired_l2_state |= 1;
-#endif /* CONFIG_MALI_CORESTACK */
+ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
- /* If any l2 cache is on, then enable l2 #0, for use by job manager */
- if (0 != desired_l2_state)
- desired_l2_state |= 1;
+ }
+ break;
+
+ case KBASE_SHADERS_PEND_ON_CORESTACK_ON:
+ if (!shaders_trans && shaders_ready == backend->shaders_avail) {
+ KBASE_TRACE_ADD(kbdev,
+ PM_CORES_CHANGE_AVAILABLE,
+ NULL, NULL, 0u, (u32)shaders_ready);
+ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON;
+ }
+ break;
+
+ case KBASE_SHADERS_ON_CORESTACK_ON:
+ backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+
+ if (!backend->shaders_desired) {
+ if (kbdev->pm.backend.protected_transition_override ||
+ !stt->configured_ticks ||
+ WARN_ON(stt->cancel_queued)) {
+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+ } else {
+ stt->remaining_ticks = stt->configured_ticks;
+ stt->needed = true;
+
+ /* The shader hysteresis timer is not
+ * done the obvious way, which would be
+ * to start an hrtimer when the shader
+ * power off is requested. Instead,
+ * use a 'tick' timer, and set the
+ * remaining number of ticks on a power
+ * off request. This avoids the
+ * latency of starting, then
+ * immediately cancelling an hrtimer
+ * when the shaders are re-requested
+ * before the timeout expires.
+ */
+ if (!hrtimer_active(&stt->timer))
+ hrtimer_start(&stt->timer,
+ stt->configured_interval,
+ HRTIMER_MODE_REL);
+
+ backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON;
+ }
+ } else if (!platform_power_down_only) {
+ if (backend->shaders_avail & ~shaders_ready) {
+ backend->shaders_avail |= shaders_ready;
+
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+ backend->shaders_avail & ~shaders_ready,
+ ACTION_PWRON);
+ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
+
+ }
+ }
+ break;
- prev_l2_available_bitmap = kbdev->l2_available_bitmap;
- in_desired_state &= kbase_pm_transition_core_type(kbdev,
- KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
- &l2_available_bitmap,
- &kbdev->pm.backend.powering_on_l2_state);
+ case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON:
+ if (WARN_ON(!hrtimer_active(&stt->timer))) {
+ stt->remaining_ticks = 0;
+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+ }
- kbdev->l2_available_bitmap = l2_available_bitmap;
+ if (backend->shaders_desired) {
+ stt->remaining_ticks = 0;
+ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON;
+ } else if (stt->remaining_ticks == 0) {
+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+ }
+ break;
+ case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON:
+ shader_poweroff_timer_queue_cancel(kbdev);
-#ifdef CONFIG_MALI_CORESTACK
- if (in_desired_state) {
- in_desired_state &= kbase_pm_transition_core_type(kbdev,
- KBASE_PM_CORE_STACK, desired_stack_state, 0,
- &kbdev->stack_available_bitmap,
- &kbdev->pm.backend.powering_on_stack_state);
- }
-#endif /* CONFIG_MALI_CORESTACK */
-
- if (in_desired_state) {
- in_desired_state &= kbase_pm_transition_core_type(kbdev,
- KBASE_PM_CORE_TILER,
- kbdev->pm.backend.desired_tiler_state,
- 0, &tiler_available_bitmap,
- &kbdev->pm.backend.powering_on_tiler_state);
- in_desired_state &= kbase_pm_transition_core_type(kbdev,
- KBASE_PM_CORE_SHADER,
- kbdev->pm.backend.desired_shader_state,
- 0, &shader_available_bitmap,
- &kbdev->pm.backend.powering_on_shader_state);
-
- if (kbdev->shader_available_bitmap != shader_available_bitmap)
- KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
- NULL, 0u,
- (u32) shader_available_bitmap);
-
- kbdev->shader_available_bitmap = shader_available_bitmap;
-
- if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
- KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
- NULL, NULL, 0u,
- (u32) tiler_available_bitmap);
+ if (!platform_power_down_only)
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+ shaders_ready, ACTION_PWROFF);
- kbdev->tiler_available_bitmap = tiler_available_bitmap;
+ KBASE_TRACE_ADD(kbdev,
+ PM_CORES_CHANGE_AVAILABLE,
+ NULL, NULL, 0u, 0u);
- } else if ((l2_available_bitmap &
- kbdev->gpu_props.props.raw_props.tiler_present) !=
- kbdev->gpu_props.props.raw_props.tiler_present) {
- tiler_available_bitmap = 0;
+ backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON;
+ break;
- kbdev->tiler_available_bitmap = tiler_available_bitmap;
- }
+ case KBASE_SHADERS_PEND_OFF_CORESTACK_ON:
+ if ((!shaders_trans && !shaders_ready) || platform_power_down_only) {
+ if (corestack_driver_control && !platform_power_down_only)
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK,
+ stacks_avail, ACTION_PWROFF);
- /* State updated for slow-path waiters */
- kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
-
- shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
- KBASE_PM_CORE_SHADER);
- shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
- KBASE_PM_CORE_SHADER);
-
- /* Determine whether the cores are now available (even if the set of
- * available cores is empty). Note that they can be available even if
- * we've not finished transitioning to the desired state */
- if ((kbdev->shader_available_bitmap &
- kbdev->pm.backend.desired_shader_state)
- == kbdev->pm.backend.desired_shader_state &&
- (kbdev->tiler_available_bitmap &
- kbdev->pm.backend.desired_tiler_state)
- == kbdev->pm.backend.desired_tiler_state) {
- cores_are_available = true;
-
- KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
- (u32)(kbdev->shader_available_bitmap &
- kbdev->pm.backend.desired_shader_state));
- KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
- (u32)(kbdev->tiler_available_bitmap &
- kbdev->pm.backend.desired_tiler_state));
- }
+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF;
+ }
+ break;
+
+ case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF:
+ if ((!stacks_trans && !stacks_ready) || platform_power_down_only)
+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+ break;
+
+ case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF:
+ if (!hrtimer_active(&stt->timer) && !stt->cancel_queued)
+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF;
+ break;
+
+ case KBASE_SHADERS_RESET_WAIT:
+ /* Reset complete */
+ if (!backend->in_reset)
+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+ break;
+ }
+ } while (backend->shaders_state != prev_state);
+}
- if (in_desired_state) {
- KBASE_DEBUG_ASSERT(cores_are_available);
+static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
+{
+ bool in_desired_state = true;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbase_pm_is_l2_desired(kbdev) &&
+ kbdev->pm.backend.l2_state != KBASE_L2_ON)
+ in_desired_state = false;
+ else if (!kbase_pm_is_l2_desired(kbdev) &&
+ kbdev->pm.backend.l2_state != KBASE_L2_OFF)
+ in_desired_state = false;
+
+ if (kbdev->pm.backend.shaders_desired &&
+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON)
+ in_desired_state = false;
+ else if (!kbdev->pm.backend.shaders_desired &&
+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+ in_desired_state = false;
+
+ return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev)
+{
+ bool in_desired_state;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state_with_l2_powered(
+ struct kbase_device *kbdev)
+{
+ bool in_desired_state = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (kbase_pm_is_in_desired_state_nolock(kbdev) &&
+ (kbdev->pm.backend.l2_state == KBASE_L2_ON))
+ in_desired_state = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return in_desired_state;
+}
+
+static void kbase_pm_trace_power_state(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
#if defined(CONFIG_MALI_GATOR_SUPPORT)
- kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
- kbase_pm_get_ready_cores(kbdev,
- KBASE_PM_CORE_L2));
- kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
- kbase_pm_get_ready_cores(kbdev,
- KBASE_PM_CORE_SHADER));
- kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
- kbase_pm_get_ready_cores(kbdev,
- KBASE_PM_CORE_TILER));
-#ifdef CONFIG_MALI_CORESTACK
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_L2));
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_SHADER));
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_TILER));
+ if (corestack_driver_control)
kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_STACK));
-#endif /* CONFIG_MALI_CORESTACK */
#endif
- KBASE_TLSTREAM_AUX_PM_STATE(
- KBASE_PM_CORE_L2,
- kbase_pm_get_ready_cores(
- kbdev, KBASE_PM_CORE_L2));
- KBASE_TLSTREAM_AUX_PM_STATE(
- KBASE_PM_CORE_SHADER,
- kbase_pm_get_ready_cores(
- kbdev, KBASE_PM_CORE_SHADER));
- KBASE_TLSTREAM_AUX_PM_STATE(
- KBASE_PM_CORE_TILER,
- kbase_pm_get_ready_cores(
- kbdev,
- KBASE_PM_CORE_TILER));
-#ifdef CONFIG_MALI_CORESTACK
+ KBASE_TLSTREAM_AUX_PM_STATE(
+ KBASE_PM_CORE_L2,
+ kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_L2));
+ KBASE_TLSTREAM_AUX_PM_STATE(
+ KBASE_PM_CORE_SHADER,
+ kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_SHADER));
+ KBASE_TLSTREAM_AUX_PM_STATE(
+ KBASE_PM_CORE_TILER,
+ kbase_pm_get_ready_cores(
+ kbdev,
+ KBASE_PM_CORE_TILER));
+
+ if (corestack_driver_control)
KBASE_TLSTREAM_AUX_PM_STATE(
KBASE_PM_CORE_STACK,
kbase_pm_get_ready_cores(
kbdev,
KBASE_PM_CORE_STACK));
-#endif /* CONFIG_MALI_CORESTACK */
+}
+
+void kbase_pm_update_state(struct kbase_device *kbdev)
+{
+ enum kbase_shader_core_state prev_shaders_state =
+ kbdev->pm.backend.shaders_state;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (!kbdev->pm.backend.gpu_powered)
+ return; /* Do nothing if the GPU is off */
+
+ kbase_pm_l2_update_state(kbdev);
+ kbase_pm_shaders_update_state(kbdev);
+
+ /* If the shaders just turned off, re-invoke the L2 state machine, in
+ * case it was waiting for the shaders to turn off before powering down
+ * the L2.
+ */
+ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF &&
+ kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF)
+ kbase_pm_l2_update_state(kbdev);
+
+ if (kbase_pm_is_in_desired_state_nolock(kbdev)) {
KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
- kbdev->pm.backend.gpu_in_desired_state,
- (u32)kbdev->pm.backend.desired_shader_state);
- KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
- (u32)kbdev->pm.backend.desired_tiler_state);
+ true, kbdev->pm.backend.shaders_avail);
- /* Wake slow-path waiters. Job scheduler does not use this. */
- KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+ kbase_pm_trace_power_state(kbdev);
+ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
}
+}
- spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
-
- kbdev->shader_ready_bitmap = shader_ready_bitmap;
- kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
-
- /* The core availability policy is not allowed to keep core group 0
- * turned off (unless it was changing the l2 power state) */
- if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
- kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
- (prev_l2_available_bitmap == desired_l2_state) &&
- !(kbase_pm_ca_get_core_mask(kbdev) &
- kbdev->gpu_props.props.coherency_info.group[0].core_mask))
- BUG();
-
- /* The core availability policy is allowed to keep core group 1 off,
- * but all jobs specifically targeting CG1 must fail */
- if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
- kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
- !(kbase_pm_ca_get_core_mask(kbdev) &
- kbdev->gpu_props.props.coherency_info.group[1].core_mask))
- kbdev->pm.backend.cg1_disabled = true;
- else
- kbdev->pm.backend.cg1_disabled = false;
+static enum hrtimer_restart
+shader_tick_timer_callback(struct hrtimer *timer)
+{
+ struct kbasep_pm_tick_timer_state *stt = container_of(timer,
+ struct kbasep_pm_tick_timer_state, timer);
+ struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+ pm.backend.shader_tick_timer);
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ unsigned long flags;
+ enum hrtimer_restart restart = HRTIMER_NORESTART;
- return cores_are_available;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (stt->remaining_ticks &&
+ backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) {
+ stt->remaining_ticks--;
+
+ /* If the remaining ticks just changed from 1 to 0, invoke the
+ * PM state machine to power off the shader cores.
+ */
+ if (!stt->remaining_ticks && !backend->shaders_desired)
+ kbase_pm_update_state(kbdev);
+ }
+
+ if (stt->needed) {
+ hrtimer_forward_now(timer, stt->configured_interval);
+ restart = HRTIMER_RESTART;
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return restart;
}
-KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
-/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+int kbase_pm_state_machine_init(struct kbase_device *kbdev)
+{
+ struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer;
+
+ stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!stt->wq)
+ return -ENOMEM;
+
+ INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback);
+
+ stt->needed = false;
+ hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ stt->timer.function = shader_tick_timer_callback;
+ stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+ stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+
+ return 0;
+}
+
+void kbase_pm_state_machine_term(struct kbase_device *kbdev)
+{
+ hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer);
+ destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq);
+}
+
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ backend->in_reset = true;
+ backend->l2_state = KBASE_L2_RESET_WAIT;
+ backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
+
+ /* We're in a reset, so hwcnt will have been synchronously disabled by
+ * this function's caller as part of the reset process. We therefore
+ * know that any call to kbase_hwcnt_context_disable_atomic, if
+ * required to sync the hwcnt refcount with our internal state, is
+ * guaranteed to succeed.
+ */
+ backend->hwcnt_desired = false;
+ if (!backend->hwcnt_disabled) {
+ WARN_ON(!kbase_hwcnt_context_disable_atomic(
+ kbdev->hwcnt_gpu_ctx));
+ backend->hwcnt_disabled = true;
+ }
+
+ shader_poweroff_timer_queue_cancel(kbdev);
+}
+
+void kbase_pm_reset_complete(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ backend->in_reset = false;
+ kbase_pm_update_state(kbdev);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
* aborted due to a fatal signal. If the time spent waiting has exceeded this
* threshold then there is most likely a hardware issue. */
#define PM_TIMEOUT (5*HZ) /* 5s */
-void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+static void kbase_pm_timed_out(struct kbase_device *kbdev)
+{
+ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+ dev_err(kbdev->dev, "Desired state :\n");
+ dev_err(kbdev->dev, "\tShader=%016llx\n",
+ kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
+ dev_err(kbdev->dev, "Current state :\n");
+ dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_READY_HI)),
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_READY_LO)));
+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_READY_HI)),
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_READY_LO)));
+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_READY_HI)),
+ kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_READY_LO)));
+ dev_err(kbdev->dev, "Cores transitioning :\n");
+ dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ SHADER_PWRTRANS_HI)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ SHADER_PWRTRANS_LO)));
+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ TILER_PWRTRANS_HI)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ TILER_PWRTRANS_LO)));
+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ L2_PWRTRANS_HI)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(
+ L2_PWRTRANS_LO)));
+
+ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+ if (kbase_prepare_to_reset_gpu(kbdev))
+ kbase_reset_gpu(kbdev);
+}
+
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
{
unsigned long flags;
unsigned long timeout;
- bool cores_are_available;
- int ret;
+ int err;
- /* Force the transition to be checked and reported - the cores may be
- * 'available' (for job submission) but not fully powered up. */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ timeout = jiffies + PM_TIMEOUT;
- cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ /* Wait for cores */
+ err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbase_pm_is_in_desired_state_with_l2_powered(kbdev));
+
+ if (err < 0 && time_after(jiffies, timeout))
+ kbase_pm_timed_out(kbdev);
+}
- /* Don't need 'cores_are_available', because we don't return anything */
- CSTD_UNUSED(cores_are_available);
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ unsigned long timeout;
+ int err;
+
+ /* Let the state machine latch the most recent desired state. */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
timeout = jiffies + PM_TIMEOUT;
/* Wait for cores */
- ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
- kbdev->pm.backend.gpu_in_desired_state);
-
- if (ret < 0 && time_after(jiffies, timeout)) {
- dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
- dev_err(kbdev->dev, "Desired state :\n");
- dev_err(kbdev->dev, "\tShader=%016llx\n",
- kbdev->pm.backend.desired_shader_state);
- dev_err(kbdev->dev, "\tTiler =%016llx\n",
- kbdev->pm.backend.desired_tiler_state);
- dev_err(kbdev->dev, "Current state :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_READY_HI)),
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_READY_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_READY_HI)),
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_READY_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_READY_HI)),
- kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_READY_LO)));
- dev_err(kbdev->dev, "Cores transitioning :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- SHADER_PWRTRANS_HI)),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- SHADER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- TILER_PWRTRANS_HI)),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- TILER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- L2_PWRTRANS_HI)),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(
- L2_PWRTRANS_LO)));
-#if KBASE_GPU_RESET_EN
- dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
- if (kbase_prepare_to_reset_gpu(kbdev))
- kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
- }
+ err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbase_pm_is_in_desired_state(kbdev));
+
+ if (err < 0 && time_after(jiffies, timeout))
+ kbase_pm_timed_out(kbdev);
}
-KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
{
@@ -957,7 +1256,6 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
-
/*
* pmu layout:
* 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -990,12 +1288,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
kbdev->pm.backend.callback_power_resume(kbdev);
return;
} else if (kbdev->pm.backend.callback_power_on) {
- kbdev->pm.backend.callback_power_on(kbdev);
- /* If your platform properly keeps the GPU state you may use the
- * return value of the callback_power_on function to
- * conditionally reset the GPU on power up. Currently we are
- * conservative and always reset the GPU. */
- reset_required = true;
+ reset_required = kbdev->pm.backend.callback_power_on(kbdev);
}
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
@@ -1014,8 +1307,14 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
- /* Lastly, enable the interrupts */
+ /* Enable the interrupts */
kbase_pm_enable_interrupts(kbdev);
+
+ /* Turn on the L2 caches */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.l2_desired = true;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
@@ -1028,7 +1327,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
lockdep_assert_held(&kbdev->pm.lock);
/* ASSERT that the cores should now be unavailable. No lock needed. */
- KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+ WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF);
kbdev->poweroff_pending = true;
@@ -1252,14 +1551,31 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
+ int default_idvs_group_size = 0xF;
+ u32 tmp;
+
+ if (of_property_read_u32(kbdev->dev->of_node,
+ "idvs-group-size", &tmp))
+ tmp = default_idvs_group_size;
+
+ if (tmp > JM_MAX_IDVS_GROUP_SIZE) {
+ dev_err(kbdev->dev,
+ "idvs-group-size of %d is too large. Maximum value is %d",
+ tmp, JM_MAX_IDVS_GROUP_SIZE);
+ tmp = default_idvs_group_size;
+ }
+
+ kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT;
+ }
+
if (!kbdev->hw_quirks_jm)
kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
GPU_CONTROL_REG(JM_CONFIG));
-#ifdef CONFIG_MALI_CORESTACK
#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
- kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
-#endif /* CONFIG_MALI_CORESTACK */
+ if (corestack_driver_control)
+ kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
}
static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
@@ -1411,7 +1727,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
{
unsigned long irq_flags;
int err;
- bool resume_vinstr = false;
KBASE_DEBUG_ASSERT(NULL != kbdev);
lockdep_assert_held(&kbdev->pm.lock);
@@ -1438,15 +1753,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
/* The cores should be made unavailable due to the reset */
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- if (kbdev->shader_available_bitmap != 0u)
- KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
- NULL, 0u, (u32)0u);
- if (kbdev->tiler_available_bitmap != 0u)
- KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
- NULL, NULL, 0u, (u32)0u);
- kbdev->shader_available_bitmap = 0u;
- kbdev->tiler_available_bitmap = 0u;
- kbdev->l2_available_bitmap = 0u;
+ if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+ NULL, 0u, (u32)0u);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
/* Soft reset the GPU */
@@ -1457,10 +1766,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
err = kbase_pm_do_reset(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- if (kbdev->protected_mode)
- resume_vinstr = true;
kbdev->protected_mode = false;
-
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
if (err)
@@ -1484,9 +1790,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
* false when called from kbase_pm_powerup */
if (kbdev->pm.backend.gpu_cycle_counter_requests &&
(flags & PM_ENABLE_IRQS)) {
- /* enable interrupts as the L2 may have to be powered on */
kbase_pm_enable_interrupts(kbdev);
- kbase_pm_request_l2_caches(kbdev);
/* Re-enable the counters if we need to */
spin_lock_irqsave(
@@ -1499,10 +1803,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
- spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- kbase_pm_release_l2_caches(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
-
kbase_pm_disable_interrupts(kbdev);
}
@@ -1510,10 +1810,16 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
kbase_pm_enable_interrupts(kbdev);
exit:
- /* If GPU is leaving protected mode resume vinstr operation. */
- if (kbdev->vinstr_ctx && resume_vinstr)
- kbase_vinstr_resume(kbdev->vinstr_ctx);
-
+ /* Re-enable GPU hardware counters if we're resetting from protected
+ * mode.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
return err;
}
@@ -1527,9 +1833,8 @@ exit:
* kbase_pm_request_gpu_cycle_counter() or
* kbase_pm_request_gpu_cycle_counter_l2_is_on() only
*
- * When this function is called the l2 cache must be on and the l2 cache users
- * count must have been incremented by a call to (
- * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ * When this function is called the l2 cache must be on - i.e., the GPU must be
+ * on.
*
* @kbdev: The kbase device structure of the device
*/
@@ -1561,8 +1866,6 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
INT_MAX);
- kbase_pm_request_l2_caches(kbdev);
-
kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
}
@@ -1577,8 +1880,6 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
INT_MAX);
- kbase_pm_request_l2_caches_l2_is_on(kbdev);
-
kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
}
@@ -1606,8 +1907,6 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
flags);
-
- kbase_pm_release_l2_caches(kbdev);
}
void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 0d3599a..e88b3a8 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -163,7 +163,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
* kbase_pm_disable_interrupts - Disable interrupts on the device.
*
* This prevents delivery of Power Management interrupts to the CPU so that
- * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * kbase_pm_update_state() will not be called from the IRQ handler
* until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
*
* Interrupts are also disabled after a call to kbase_pm_clock_off().
@@ -206,58 +206,38 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
*/
void kbase_pm_reset_done(struct kbase_device *kbdev);
-
/**
- * kbase_pm_check_transitions_nolock - Check if there are any power transitions
- * to make, and if so start them.
- *
- * This function will check the desired_xx_state members of
- * struct kbase_pm_device_data and the actual status of the hardware to see if
- * any power transitions can be made at this time to make the hardware state
- * closer to the state desired by the power policy.
+ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
+ * reached
*
- * The return value can be used to check whether all the desired cores are
- * available, and so whether it's worth submitting a job (e.g. from a Power
- * Management IRQ).
+ * Wait for the L2 and shader power state machines to reach the states
+ * corresponding to the values of 'l2_desired' and 'shaders_desired'.
*
- * Note that this still returns true when desired_xx_state has no
- * cores. That is: of the no cores desired, none were *un*available. In
- * this case, the caller may still need to try submitting jobs. This is because
- * the Core Availability Policy might have taken us to an intermediate state
- * where no cores are powered, before powering on more cores (e.g. for core
- * rotation)
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
*
- * The caller must hold kbase_device.pm.power_change_lock
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Return: non-zero when all desired cores are available. That is,
- * it's worthwhile for the caller to submit a job.
- * false otherwise
*/
-bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
/**
- * kbase_pm_check_transitions_sync - Synchronous and locking variant of
- * kbase_pm_check_transitions_nolock()
- *
- * On returning, the desired state at the time of the call will have been met.
+ * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
*
- * There is nothing to stop the core being switched off by calls to
- * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
- * caller must have already made a call to
- * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ * Wait for the L2 to be powered on, and for the L2 and shader state machines to
+ * stabilise by reaching the states corresponding to the values of 'l2_desired'
+ * and 'shaders_desired'.
*
- * The usual use-case for this is to ensure cores are 'READY' after performing
- * a GPU Reset.
+ * kbdev->pm.active_count must be non-zero when calling this function.
*
- * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
- * kbase_device.pm.power_change_lock, because this function will take that
- * lock itself.
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
-void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
@@ -269,6 +249,25 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
/**
+ * kbase_pm_update_state - Update the L2 and shader power state machines
+ * @kbdev: Device pointer
+ */
+void kbase_pm_update_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_init - Initialize the state machines, primarily the
+ * shader poweroff timer
+ * @kbdev: Device pointer
+ */
+int kbase_pm_state_machine_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_term - Clean up the PM state machines' data
+ * @kbdev: Device pointer
+ */
+void kbase_pm_state_machine_term(struct kbase_device *kbdev);
+
+/**
* kbase_pm_update_cores_state - Update the desired state of shader cores from
* the Power Policy, and begin any power
* transitions.
@@ -283,24 +282,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
void kbase_pm_update_cores_state(struct kbase_device *kbdev);
/**
- * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
- * the GPU and/or shader cores.
- *
- * This should be called by any functions which directly power off the GPU.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
-
-/**
- * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
- * and used cores.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
-
-/**
* kbasep_pm_metrics_init - Initialize the metrics gathering framework.
*
* This must be called before other metric gathering APIs are called.
@@ -577,4 +558,67 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
#endif
+/**
+ * kbase_pm_reset_start_locked - Signal that GPU reset has started
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be suspended until the reset has
+ * completed.
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_reset_complete - Signal that GPU reset has completed
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be resumed. The power manager will
+ * re-evaluate what cores are needed and power on or off as required.
+ */
+void kbase_pm_reset_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_enable - Enable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * When the protected mode override is enabled, all shader cores are requested
+ * to power down, and the L2 power state can be controlled by
+ * kbase_pm_protected_l2_override().
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_disable - Disable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_l2_override - Control the protected mode L2 override
+ * @kbdev: Device pointer
+ * @override: true to enable the override, false to disable
+ *
+ * When the driver is transitioning in or out of protected mode, the L2 cache is
+ * forced to power off. This can be overridden to force the L2 cache to power
+ * on. This is required to change coherency settings on some GPUs.
+ */
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override);
+
+/* If true, the driver should explicitly control corestack power management,
+ * instead of relying on the Power Domain Controller.
+ */
+extern bool corestack_driver_control;
+
+/* If true, disable powering-down of individual cores, and just power-down at
+ * the top-level using platform-specific code.
+ * If false, use the expected behaviour of controlling the individual cores
+ * from within the driver.
+ */
+extern bool platform_power_down_only;
+
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 6dd00a9..2f06a0a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -30,215 +30,51 @@
#include <mali_kbase_config_defaults.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-static const struct kbase_pm_policy *const policy_list[] = {
+static const struct kbase_pm_policy *const all_policy_list[] = {
#ifdef CONFIG_MALI_NO_MALI
&kbase_pm_always_on_policy_ops,
- &kbase_pm_demand_policy_ops,
&kbase_pm_coarse_demand_policy_ops,
#if !MALI_CUSTOMER_RELEASE
- &kbase_pm_demand_always_powered_policy_ops,
- &kbase_pm_fast_start_policy_ops,
+ &kbase_pm_always_on_demand_policy_ops,
#endif
#else /* CONFIG_MALI_NO_MALI */
-#if !PLATFORM_POWER_DOWN_ONLY
- &kbase_pm_demand_policy_ops,
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
&kbase_pm_coarse_demand_policy_ops,
- &kbase_pm_always_on_policy_ops,
#if !MALI_CUSTOMER_RELEASE
-#if !PLATFORM_POWER_DOWN_ONLY
- &kbase_pm_demand_always_powered_policy_ops,
- &kbase_pm_fast_start_policy_ops,
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
+ &kbase_pm_always_on_demand_policy_ops,
#endif
+ &kbase_pm_always_on_policy_ops
#endif /* CONFIG_MALI_NO_MALI */
};
-/* The number of policies available in the system.
- * This is derived from the number of functions listed in policy_get_functions.
+/* A filtered list of policies available in the system, calculated by filtering
+ * all_policy_list based on the flags provided by each policy.
*/
-#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
-
-
-/* Function IDs for looking up Timeline Trace codes in
- * kbase_pm_change_state_trace_code */
-enum kbase_pm_func_id {
- KBASE_PM_FUNC_ID_REQUEST_CORES_START,
- KBASE_PM_FUNC_ID_REQUEST_CORES_END,
- KBASE_PM_FUNC_ID_RELEASE_CORES_START,
- KBASE_PM_FUNC_ID_RELEASE_CORES_END,
- /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
- * expect to hit it nor tend to hit it very much anyway. We can detect
- * whether we need more instrumentation by a difference between
- * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
-
- /* Must be the last */
- KBASE_PM_FUNC_ID_COUNT
-};
-
-
-/* State changes during request/unrequest/release-ing cores */
-enum {
- KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
- KBASE_PM_CHANGE_STATE_TILER = (1u << 1),
-
- /* These two must be last */
- KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
- KBASE_PM_CHANGE_STATE_SHADER),
- KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
-};
-typedef u32 kbase_pm_change_state;
+static const struct kbase_pm_policy *enabled_policy_list[ARRAY_SIZE(all_policy_list)];
+static size_t enabled_policy_count;
-/**
- * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
- * requested shader cores
- * @kbdev: Device pointer
- */
-static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+static void generate_filtered_policy_list(void)
{
- u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
- u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- kbdev->pm.backend.desired_shader_state &=
- ~kbdev->pm.backend.shader_poweroff_pending;
- kbdev->pm.backend.desired_tiler_state &=
- ~kbdev->pm.backend.tiler_poweroff_pending;
-
- kbdev->pm.backend.shader_poweroff_pending = 0;
- kbdev->pm.backend.tiler_poweroff_pending = 0;
-
- if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
- prev_tiler_state !=
- kbdev->pm.backend.desired_tiler_state ||
- kbdev->pm.backend.ca_in_transition) {
- bool cores_are_available;
-
- cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-
- /* Don't need 'cores_are_available',
- * because we don't return anything */
- CSTD_UNUSED(cores_are_available);
- }
-}
-
-static enum hrtimer_restart
-kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
-{
- struct kbase_device *kbdev;
- unsigned long flags;
-
- kbdev = container_of(timer, struct kbase_device,
- pm.backend.gpu_poweroff_timer);
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- /* It is safe for this call to do nothing if the work item is already
- * queued. The worker function will read the must up-to-date state of
- * kbdev->pm.backend.gpu_poweroff_pending under lock.
- *
- * If a state change occurs while the worker function is processing,
- * this call will succeed as a work item can be requeued once it has
- * started processing.
- */
- if (kbdev->pm.backend.gpu_poweroff_pending)
- queue_work(kbdev->pm.backend.gpu_poweroff_wq,
- &kbdev->pm.backend.gpu_poweroff_work);
-
- if (kbdev->pm.backend.shader_poweroff_pending ||
- kbdev->pm.backend.tiler_poweroff_pending) {
- kbdev->pm.backend.shader_poweroff_pending_time--;
-
- KBASE_DEBUG_ASSERT(
- kbdev->pm.backend.shader_poweroff_pending_time
- >= 0);
-
- if (!kbdev->pm.backend.shader_poweroff_pending_time)
- kbasep_pm_do_poweroff_cores(kbdev);
- }
+ size_t i;
- if (kbdev->pm.backend.poweroff_timer_needed) {
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) {
+ const struct kbase_pm_policy *pol = all_policy_list[i];
- hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+ if (platform_power_down_only &&
+ (pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY))
+ continue;
- return HRTIMER_RESTART;
+ enabled_policy_list[enabled_policy_count++] = pol;
}
-
- kbdev->pm.backend.poweroff_timer_running = false;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return HRTIMER_NORESTART;
-}
-
-static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
-{
- unsigned long flags;
- struct kbase_device *kbdev;
- bool do_poweroff = false;
-
- kbdev = container_of(data, struct kbase_device,
- pm.backend.gpu_poweroff_work);
-
- mutex_lock(&kbdev->pm.lock);
-
- if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
- mutex_unlock(&kbdev->pm.lock);
- return;
- }
-
- kbdev->pm.backend.gpu_poweroff_pending--;
-
- if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
- mutex_unlock(&kbdev->pm.lock);
- return;
- }
-
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- /* Only power off the GPU if a request is still pending */
- if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
- do_poweroff = true;
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- if (do_poweroff) {
- kbdev->pm.backend.poweroff_timer_needed = false;
- hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
- kbdev->pm.backend.poweroff_timer_running = false;
-
- /* Power off the GPU */
- kbase_pm_do_poweroff(kbdev, false);
- }
-
- mutex_unlock(&kbdev->pm.lock);
}
int kbase_pm_policy_init(struct kbase_device *kbdev)
{
- struct workqueue_struct *wq;
-
- wq = alloc_workqueue("kbase_pm_do_poweroff",
- WQ_HIGHPRI | WQ_UNBOUND, 1);
- if (!wq)
- return -ENOMEM;
-
- kbdev->pm.backend.gpu_poweroff_wq = wq;
- INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
- kbasep_pm_do_gpu_poweroff_wq);
- hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- kbdev->pm.backend.gpu_poweroff_timer.function =
- kbasep_pm_do_gpu_poweroff_callback;
- kbdev->pm.backend.pm_current_policy = policy_list[0];
+ generate_filtered_policy_list();
+ if (enabled_policy_count == 0)
+ return -EINVAL;
+
+ kbdev->pm.backend.pm_current_policy = enabled_policy_list[0];
kbdev->pm.backend.pm_current_policy->init(kbdev);
- kbdev->pm.gpu_poweroff_time =
- HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
- kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
- kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
return 0;
}
@@ -246,29 +82,6 @@ int kbase_pm_policy_init(struct kbase_device *kbdev)
void kbase_pm_policy_term(struct kbase_device *kbdev)
{
kbdev->pm.backend.pm_current_policy->term(kbdev);
- destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
-}
-
-void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
-{
- unsigned long flags;
-
- lockdep_assert_held(&kbdev->pm.lock);
-
- kbdev->pm.backend.poweroff_timer_needed = false;
- hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.poweroff_timer_running = false;
-
- /* If wq is already running but is held off by pm.lock, make sure it has
- * no effect */
- kbdev->pm.backend.gpu_poweroff_pending = 0;
-
- kbdev->pm.backend.shader_poweroff_pending = 0;
- kbdev->pm.backend.tiler_poweroff_pending = 0;
- kbdev->pm.backend.shader_poweroff_pending_time = 0;
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_update_active(struct kbase_device *kbdev)
@@ -291,35 +104,24 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
kbdev->pm.backend.pm_current_policy->name);
if (active) {
- if (backend->gpu_poweroff_pending) {
- /* Cancel any pending power off request */
- backend->gpu_poweroff_pending = 0;
-
- /* If a request was pending then the GPU was still
- * powered, so no need to continue */
- if (!kbdev->poweroff_pending) {
- spin_unlock_irqrestore(&kbdev->hwaccess_lock,
- flags);
- return;
- }
- }
-
- if (!backend->poweroff_timer_running && !backend->gpu_powered &&
- (pm->poweroff_gpu_ticks ||
- pm->poweroff_shader_ticks)) {
- backend->poweroff_timer_needed = true;
- backend->poweroff_timer_running = true;
- hrtimer_start(&backend->gpu_poweroff_timer,
- pm->gpu_poweroff_time,
- HRTIMER_MODE_REL);
- }
-
/* Power on the GPU and any cores requested by the policy */
- if (pm->backend.poweroff_wait_in_progress) {
+ if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off &&
+ pm->backend.poweroff_wait_in_progress) {
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
pm->backend.poweron_required = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
} else {
+ /* Cancel the the invocation of
+ * kbase_pm_gpu_poweroff_wait_wq() from the L2 state
+ * machine. This is safe - it
+ * invoke_poweroff_wait_wq_when_l2_off is true, then
+ * the poweroff work hasn't even been queued yet,
+ * meaning we can go straight to powering on.
+ */
+ pm->backend.invoke_poweroff_wait_wq_when_l2_off = false;
+ pm->backend.poweroff_wait_in_progress = false;
+ pm->backend.l2_desired = true;
+
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_do_poweron(kbdev, false);
}
@@ -328,89 +130,21 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
* when there are contexts active */
KBASE_DEBUG_ASSERT(pm->active_count == 0);
- if (backend->shader_poweroff_pending ||
- backend->tiler_poweroff_pending) {
- backend->shader_poweroff_pending = 0;
- backend->tiler_poweroff_pending = 0;
- backend->shader_poweroff_pending_time = 0;
- }
-
/* Request power off */
if (pm->backend.gpu_powered) {
- if (pm->poweroff_gpu_ticks) {
- backend->gpu_poweroff_pending =
- pm->poweroff_gpu_ticks;
- backend->poweroff_timer_needed = true;
- if (!backend->poweroff_timer_running) {
- /* Start timer if not running (eg if
- * power policy has been changed from
- * always_on to something else). This
- * will ensure the GPU is actually
- * powered off */
- backend->poweroff_timer_running
- = true;
- hrtimer_start(
- &backend->gpu_poweroff_timer,
- pm->gpu_poweroff_time,
- HRTIMER_MODE_REL);
- }
- spin_unlock_irqrestore(&kbdev->hwaccess_lock,
- flags);
- } else {
- spin_unlock_irqrestore(&kbdev->hwaccess_lock,
- flags);
-
- /* Power off the GPU immediately */
- kbase_pm_do_poweroff(kbdev, false);
- }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* Power off the GPU immediately */
+ kbase_pm_do_poweroff(kbdev, false);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
}
-/**
- * get_desired_shader_bitmap - Get the desired shader bitmap, based on the
- * current power policy
- *
- * @kbdev: The kbase device structure for the device
- *
- * Queries the current power policy to determine if shader cores will be
- * required in the current state, and apply any HW workarounds.
- *
- * Return: bitmap of desired shader cores
- */
-
-static u64 get_desired_shader_bitmap(struct kbase_device *kbdev)
-{
- u64 desired_bitmap = 0u;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev))
- desired_bitmap = kbase_pm_ca_get_core_mask(kbdev);
-
- WARN(!desired_bitmap && kbdev->shader_needed_cnt,
- "Shader cores are needed but policy '%s' did not make them needed",
- kbdev->pm.backend.pm_current_policy->name);
-
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
- /* Unless XAFFINITY is supported, enable core 0 if tiler
- * required, regardless of core availability
- */
- if (kbdev->tiler_needed_cnt > 0)
- desired_bitmap |= 1;
- }
-
- return desired_bitmap;
-}
-
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
{
- u64 desired_bitmap;
- u64 desired_tiler_bitmap;
- bool cores_are_available;
- bool do_poweroff = false;
+ bool shaders_desired;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -419,105 +153,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
if (kbdev->pm.backend.poweroff_wait_in_progress)
return;
- if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt &&
- !kbdev->tiler_needed_cnt) {
+ if (kbdev->pm.backend.protected_transition_override)
/* We are trying to change in/out of protected mode - force all
* cores off so that the L2 powers down */
- desired_bitmap = 0;
- desired_tiler_bitmap = 0;
- } else {
- desired_bitmap = get_desired_shader_bitmap(kbdev);
-
- if (kbdev->tiler_needed_cnt > 0)
- desired_tiler_bitmap = 1;
- else
- desired_tiler_bitmap = 0;
- }
+ shaders_desired = false;
+ else
+ shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
- if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+ if (kbdev->pm.backend.shaders_desired != shaders_desired) {
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
- (u32)desired_bitmap);
- /* Are any cores being powered on? */
- if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
- ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
- kbdev->pm.backend.ca_in_transition) {
- /* Check if we are powering off any cores before updating shader
- * state */
- if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
- kbdev->pm.backend.desired_tiler_state &
- ~desired_tiler_bitmap) {
- /* Start timer to power off cores */
- kbdev->pm.backend.shader_poweroff_pending |=
- (kbdev->pm.backend.desired_shader_state &
- ~desired_bitmap);
- kbdev->pm.backend.tiler_poweroff_pending |=
- (kbdev->pm.backend.desired_tiler_state &
- ~desired_tiler_bitmap);
-
- if (kbdev->pm.poweroff_shader_ticks &&
- !kbdev->protected_mode_transition)
- kbdev->pm.backend.shader_poweroff_pending_time =
- kbdev->pm.poweroff_shader_ticks;
- else
- do_poweroff = true;
- }
-
- kbdev->pm.backend.desired_shader_state = desired_bitmap;
- kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
-
- /* If any cores are being powered on, transition immediately */
- cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
- } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
- kbdev->pm.backend.desired_tiler_state &
- ~desired_tiler_bitmap) {
- /* Start timer to power off cores */
- kbdev->pm.backend.shader_poweroff_pending |=
- (kbdev->pm.backend.desired_shader_state &
- ~desired_bitmap);
- kbdev->pm.backend.tiler_poweroff_pending |=
- (kbdev->pm.backend.desired_tiler_state &
- ~desired_tiler_bitmap);
- if (kbdev->pm.poweroff_shader_ticks &&
- !kbdev->protected_mode_transition)
- kbdev->pm.backend.shader_poweroff_pending_time =
- kbdev->pm.poweroff_shader_ticks;
- else
- kbasep_pm_do_poweroff_cores(kbdev);
- } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
- desired_tiler_bitmap != 0 &&
- kbdev->pm.backend.poweroff_timer_needed) {
- /* If power policy is keeping cores on despite there being no
- * active contexts then disable poweroff timer as it isn't
- * required.
- * Only reset poweroff_timer_needed if we're not in the middle
- * of the power off callback */
- kbdev->pm.backend.poweroff_timer_needed = false;
- }
+ (u32)kbdev->pm.backend.shaders_desired);
- /* Ensure timer does not power off wanted cores and make sure to power
- * off unwanted cores */
- if (kbdev->pm.backend.shader_poweroff_pending ||
- kbdev->pm.backend.tiler_poweroff_pending) {
- kbdev->pm.backend.shader_poweroff_pending &=
- ~(kbdev->pm.backend.desired_shader_state &
- desired_bitmap);
- kbdev->pm.backend.tiler_poweroff_pending &=
- ~(kbdev->pm.backend.desired_tiler_state &
- desired_tiler_bitmap);
-
- if (!kbdev->pm.backend.shader_poweroff_pending &&
- !kbdev->pm.backend.tiler_poweroff_pending)
- kbdev->pm.backend.shader_poweroff_pending_time = 0;
+ kbdev->pm.backend.shaders_desired = shaders_desired;
+ kbase_pm_update_state(kbdev);
}
-
- /* Shader poweroff is deferred to the end of the function, to eliminate
- * issues caused by the core availability policy recursing into this
- * function */
- if (do_poweroff)
- kbasep_pm_do_poweroff_cores(kbdev);
-
- /* Don't need 'cores_are_available', because we don't return anything */
- CSTD_UNUSED(cores_are_available);
}
void kbase_pm_update_cores_state(struct kbase_device *kbdev)
@@ -533,12 +182,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev)
int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
{
- if (!list)
- return POLICY_COUNT;
-
- *list = policy_list;
+ WARN_ON(enabled_policy_count == 0);
+ if (list)
+ *list = enabled_policy_list;
- return POLICY_COUNT;
+ return enabled_policy_count;
}
KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
@@ -607,171 +255,3 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
}
KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
-
-void kbase_pm_request_cores(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required)
-{
- kbase_pm_change_state change_gpu_state = 0u;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (shader_required) {
- int cnt = ++kbdev->shader_needed_cnt;
-
- if (cnt == 1)
- change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
-
- KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0);
- }
-
- if (tiler_required) {
- int cnt = ++kbdev->tiler_needed_cnt;
-
- if (cnt == 1)
- change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
-
- KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
- }
-
- if (change_gpu_state) {
- KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
- NULL, 0u, kbdev->shader_needed_cnt);
- KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL,
- NULL, 0u, kbdev->tiler_needed_cnt);
-
- kbase_pm_update_cores_state_nolock(kbdev);
- }
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
-
-void kbase_pm_release_cores(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required)
-{
- kbase_pm_change_state change_gpu_state = 0u;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (shader_required) {
- int cnt;
-
- KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0);
-
- cnt = --kbdev->shader_needed_cnt;
-
- if (0 == cnt) {
- change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
- }
- }
-
- if (tiler_required) {
- int cnt;
-
- KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
-
- cnt = --kbdev->tiler_needed_cnt;
-
- if (0 == cnt)
- change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
- }
-
- if (change_gpu_state) {
- KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL,
- NULL, 0u, kbdev->shader_needed_cnt);
- KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL,
- NULL, 0u, kbdev->tiler_needed_cnt);
-
- kbase_pm_update_cores_state_nolock(kbdev);
- }
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
-
-void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required)
-{
- unsigned long flags;
-
- kbase_pm_wait_for_poweroff_complete(kbdev);
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_request_cores(kbdev, tiler_required, shader_required);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- kbase_pm_check_transitions_sync(kbdev);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
-
-static void kbase_pm_l2_caches_ref(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- kbdev->l2_users_count++;
-
- KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
-
- /* Check for the required L2 transitions.
- * Caller would block here for the L2 caches of all core groups to be
- * powered on, so need to inform the Hw to power up all the L2 caches.
- * Can't rely on the l2_users_count value being non-zero previously to
- * avoid checking for the transition, as the count could be non-zero
- * even if not all the instances of L2 cache are powered up since
- * currently the power status of L2 is not tracked separately for each
- * core group. Also if the GPU is reset while the L2 is on, L2 will be
- * off but the count will be non-zero.
- */
- kbase_pm_check_transitions_nolock(kbdev);
-}
-
-void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- /* Take the reference on l2_users_count and check core transitions.
- */
- kbase_pm_l2_caches_ref(kbdev);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- wait_event(kbdev->pm.backend.l2_powered_wait,
- kbdev->pm.backend.l2_powered == 1);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
-
-void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev)
-{
- /* Take the reference on l2_users_count and check core transitions.
- */
- kbase_pm_l2_caches_ref(kbdev);
-}
-
-void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- kbdev->l2_users_count++;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
-
-void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
-
- --kbdev->l2_users_count;
-
- if (!kbdev->l2_users_count)
- kbase_pm_check_transitions_nolock(kbdev);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h
index 2e86929..28d258f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h
@@ -64,61 +64,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev);
*/
void kbase_pm_update_cores(struct kbase_device *kbdev);
-
-enum kbase_pm_cores_ready {
- KBASE_CORES_NOT_READY = 0,
- KBASE_NEW_AFFINITY = 1,
- KBASE_CORES_READY = 2
-};
-
-
-/**
- * kbase_pm_request_cores - Request the desired cores to be powered up.
- * @kbdev: Kbase device
- * @tiler_required: true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to request power to the desired cores.
- *
- * There is no guarantee that the HW will be powered up on return. Use
- * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are
- * now powered, or instead call kbase_pm_request_cores_sync().
- */
-void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required,
- bool shader_required);
-
-/**
- * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
- * @kbdev: Kbase device
- * @tiler_required: true if tiler is required
- * @shader_required: true if shaders are required
- *
- * When this function returns, the @shader_cores will be in the READY state.
- *
- * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
- * work of ensuring the requested cores will remain powered until a matching
- * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
- * is made.
- */
-void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required);
-
-/**
- * kbase_pm_release_cores - Request the desired cores to be powered down.
- * @kbdev: Kbase device
- * @tiler_required: true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to release its power reference on the desired cores.
- */
-void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
- bool shader_required);
-
/**
* kbase_pm_cores_requested - Check that a power request has been locked into
* the HW.
* @kbdev: Kbase device
- * @tiler_required: true if tiler is required
* @shader_required: true if shaders are required
*
* Called by the scheduler to check if a power on request has been locked into
@@ -136,112 +85,23 @@ void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
* request is still pending.
*/
static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required)
+ bool shader_required)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
- if ((shader_required && !kbdev->shader_available_bitmap) ||
- (tiler_required && !kbdev->tiler_available_bitmap))
+ /* If the L2 & tiler are not on or pending, then the tiler is not yet
+ * available, and shaders are definitely not powered.
+ */
+ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON &&
+ kbdev->pm.backend.l2_state != KBASE_L2_ON)
return false;
- return true;
-}
-
-/**
- * kbase_pm_cores_ready - Check that the required cores have been powered on by
- * the HW.
- * @kbdev: Kbase device
- * @tiler_required: true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to check if cores are ready.
- *
- * Note that the caller should ensure that they have first requested cores
- * before calling this function.
- *
- * Caller must hold the hwaccess_lock.
- *
- * Return: true if the cores are ready.
- */
-static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev,
- bool tiler_required, bool shader_required)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if ((shader_required && !kbdev->shader_ready_bitmap) ||
- (tiler_required && !kbdev->tiler_available_bitmap))
+ if (shader_required &&
+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON)
return false;
return true;
}
-/**
- * kbase_pm_request_l2_caches - Request l2 caches
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Request the use of l2 caches for all core groups, power up, wait and prevent
- * the power manager from powering down the l2 caches.
- *
- * This tells the power management that the caches should be powered up, and
- * they should remain powered, irrespective of the usage of shader cores. This
- * does not return until the l2 caches are powered up.
- *
- * The caller must call kbase_pm_release_l2_caches() when they are finished
- * to allow normal power management of the l2 caches to resume.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_request_l2_caches_nolock - Request l2 caches, nolock version
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Request the use of l2 caches for all core groups and power up without
- * waiting for power manager to actually power up the cores. This is done
- * because the call to this function is done from within the atomic context
- * and the actual l2 caches being powered up is checked at a later stage.
- * The reference taken on l2 caches is removed when the protected mode atom
- * is released so there is no need to make a call to a matching
- * release_l2_caches().
- *
- * This function is used specifically for the case when l2 caches are
- * to be powered up as part of the sequence for entering protected mode.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Increment the count of l2 users but do not attempt to power on the l2
- *
- * It is the callers responsibility to ensure that the l2 is already powered up
- * and to eventually call kbase_pm_release_l2_caches()
- */
-void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_release_l2_caches - Release l2 caches
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Release the use of l2 caches for all core groups and allow the power manager
- * to power them down when necessary.
- *
- * This tells the power management that the caches can be powered down if
- * necessary, with respect to the usage of shader cores.
- *
- * The caller must have called kbase_pm_request_l2_caches() prior to a call
- * to this.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
-
#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.h b/mali_kbase/backend/gpu/mali_kbase_time.h
deleted file mode 100644
index ece7009..0000000
--- a/mali_kbase/backend/gpu/mali_kbase_time.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-#ifndef _KBASE_BACKEND_TIME_H_
-#define _KBASE_BACKEND_TIME_H_
-
-/**
- * kbase_backend_get_gpu_time() - Get current GPU time
- * @kbdev: Device pointer
- * @cycle_counter: Pointer to u64 to store cycle counter in
- * @system_time: Pointer to u64 to store system time in
- * @ts: Pointer to struct timespec to store current monotonic
- * time in
- */
-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
- u64 *system_time, struct timespec *ts);
-
-/**
- * kbase_wait_write_flush() - Wait for GPU write flush
- * @kbdev: Kbase device
- *
- * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
- * its write buffer.
- *
- * If GPU resets occur then the counters are reset to zero, the delay may not be
- * as expected.
- *
- * This function is only in use for BASE_HW_ISSUE_6367
- */
-#ifdef CONFIG_MALI_NO_MALI
-static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
-{
-}
-#else
-void kbase_wait_write_flush(struct kbase_device *kbdev);
-#endif
-
-#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index ba3a25c..2cf685c 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -19,9 +19,6 @@ bob_defaults {
no_mali: {
kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
},
- mali_corestack: {
- kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
- },
mali_devfreq: {
kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
},
@@ -84,8 +81,15 @@ bob_kernel_module {
"CONFIG_MALI_MIDGARD=m",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
- "MALI_MOCK_TEST={{.mali_mock_test}}",
],
+ mali_fpga_bus_logger: {
+ extra_symbols: [
+ "bus_logger",
+ ],
+ },
+ mali_corestack: {
+ kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+ },
mali_error_inject: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
},
@@ -98,9 +102,6 @@ bob_kernel_module {
mali_2mb_alloc: {
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
},
- mali_mock_test: {
- srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
- },
gpu_has_csf: {
srcs: [
"csf/*.c",
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 520f8fc..9da2878 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -38,16 +38,15 @@
#endif
#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
-#define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model"
-#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model"
-#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model"
-#define KBASE_IPA_TGOX_R1_MODEL_NAME "mali-tgox_r1-power-model"
-static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
&kbase_simple_ipa_model_ops,
&kbase_g71_ipa_model_ops,
&kbase_g72_ipa_model_ops,
- &kbase_tnox_ipa_model_ops
+ &kbase_g76_ipa_model_ops,
+ &kbase_g52_ipa_model_ops,
+ &kbase_g52_r1_ipa_model_ops,
+ &kbase_g51_ipa_model_ops
};
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
@@ -68,13 +67,13 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
return err;
}
-static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
const char *name)
{
int i;
for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
- struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+ const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
if (!strcmp(ops->name, name))
return ops;
@@ -84,6 +83,7 @@ static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device
return NULL;
}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
{
@@ -93,18 +93,20 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id)
if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
case GPU_ID2_PRODUCT_TMIX:
- return KBASE_IPA_G71_MODEL_NAME;
+ return "mali-g71-power-model";
case GPU_ID2_PRODUCT_THEX:
- return KBASE_IPA_G72_MODEL_NAME;
+ return "mali-g72-power-model";
case GPU_ID2_PRODUCT_TNOX:
- return KBASE_IPA_TNOX_MODEL_NAME;
+ return "mali-g76-power-model";
+ case GPU_ID2_PRODUCT_TSIX:
+ return "mali-g51-power-model";
case GPU_ID2_PRODUCT_TGOX:
if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
- /* TGOX r0 shares a power model with TNOX */
- return KBASE_IPA_TNOX_MODEL_NAME;
+ /* g52 aliased to g76 power-model's ops */
+ return "mali-g52-power-model";
else
- return KBASE_IPA_TGOX_R1_MODEL_NAME;
+ return "mali-g52_r1-power-model";
default:
return KBASE_IPA_FALLBACK_MODEL_NAME;
}
@@ -112,6 +114,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id)
return KBASE_IPA_FALLBACK_MODEL_NAME;
}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
{
@@ -244,7 +247,7 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model)
KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
- struct kbase_ipa_model_ops *ops)
+ const struct kbase_ipa_model_ops *ops)
{
struct kbase_ipa_model *model;
int err;
@@ -298,7 +301,7 @@ int kbase_ipa_init(struct kbase_device *kbdev)
{
const char *model_name;
- struct kbase_ipa_model_ops *ops;
+ const struct kbase_ipa_model_ops *ops;
struct kbase_ipa_model *default_model = NULL;
int err;
@@ -371,6 +374,8 @@ void kbase_ipa_term(struct kbase_device *kbdev)
mutex_lock(&kbdev->ipa.lock);
kbase_ipa_term_locked(kbdev);
mutex_unlock(&kbdev->ipa.lock);
+
+ mutex_destroy(&kbdev->ipa.lock);
}
KBASE_EXPORT_TEST_API(kbase_ipa_term);
@@ -517,6 +522,9 @@ static unsigned long kbase_get_static_power(unsigned long voltage)
struct kbase_device *kbdev = kbase_find_device(-1);
#endif
+ if (!kbdev)
+ return 0ul;
+
mutex_lock(&kbdev->ipa.lock);
model = get_current_model(kbdev);
@@ -552,6 +560,9 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
struct kbase_device *kbdev = kbase_find_device(-1);
#endif
+ if (!kbdev)
+ return 0ul;
+
mutex_lock(&kbdev->ipa.lock);
model = kbdev->ipa.fallback_model;
@@ -627,6 +638,9 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
int ret;
struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+ if (!kbdev)
+ return -ENODEV;
+
mutex_lock(&kbdev->ipa.lock);
ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
mutex_unlock(&kbdev->ipa.lock);
diff --git a/mali_kbase/ipa/mali_kbase_ipa.h b/mali_kbase/ipa/mali_kbase_ipa.h
index 4656ded..7462048 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.h
+++ b/mali_kbase/ipa/mali_kbase_ipa.h
@@ -40,7 +40,7 @@ struct devfreq;
struct kbase_ipa_model {
struct kbase_device *kbdev;
void *model_data;
- struct kbase_ipa_model_ops *ops;
+ const struct kbase_ipa_model_ops *ops;
struct list_head params;
bool missing_dt_node_warning;
};
@@ -154,6 +154,25 @@ void kbase_ipa_term(struct kbase_device *kbdev);
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
/**
+ * kbase_ipa_model_ops_find - Lookup an IPA model using its name
+ * @kbdev: pointer to kbase device
+ * @name: name of model to lookup
+ *
+ * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed.
+ */
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+ const char *name);
+
+/**
+ * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
+ * @gpu_id: GPU ID of GPU the model will be used for
+ *
+ * Return: The name of the appropriate counter-based model, or the name of the
+ * fallback model if no counter model exists.
+ */
+const char *kbase_ipa_model_name_from_id(u32 gpu_id);
+
+/**
* kbase_ipa_init_model - Initilaize the particular IPA model
* @kbdev: pointer to kbase device
* @ops: pointer to object containing model specific methods.
@@ -164,7 +183,7 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
* Return: pointer to kbase_ipa_model on success, NULL on error
*/
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
- struct kbase_ipa_model_ops *ops);
+ const struct kbase_ipa_model_ops *ops);
/**
* kbase_ipa_term_model - Terminate the particular IPA model
* @model: pointer to the IPA model object, already initialized
@@ -183,10 +202,12 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model);
*/
void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
-extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
/**
* kbase_get_real_power() - get the real power consumption of the GPU
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index e684df4..c8399ab 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -268,8 +268,9 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
(void *) model_data,
"mali-simple-power-model-temp-poll");
if (IS_ERR(model_data->poll_temperature_thread)) {
+ err = PTR_ERR(model_data->poll_temperature_thread);
kfree(model_data);
- return PTR_ERR(model_data->poll_temperature_thread);
+ return err;
}
err = add_params(model);
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
index 69c3230..1a6ba01 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
@@ -44,7 +44,7 @@ static inline u32 kbase_ipa_read_hwcnt(
struct kbase_ipa_model_vinstr_data *model_data,
u32 offset)
{
- u8 *p = model_data->vinstr_buffer;
+ u8 *p = (u8 *)model_data->dump_buf.dump_buf;
return *(u32 *)&p[offset];
}
@@ -118,125 +118,69 @@ s64 kbase_ipa_single_counter(
return counter_value * (s64) coeff;
}
-#ifndef CONFIG_MALI_NO_MALI
-/**
- * kbase_ipa_gpu_active - Inform IPA that GPU is now active
- * @model_data: Pointer to model data
- *
- * This function may cause vinstr to become active.
- */
-static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
{
+ int errcode;
struct kbase_device *kbdev = model_data->kbdev;
+ struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
+ struct kbase_hwcnt_enable_map enable_map;
+ const struct kbase_hwcnt_metadata *metadata =
+ kbase_hwcnt_virtualizer_metadata(hvirt);
- lockdep_assert_held(&kbdev->pm.lock);
+ if (!metadata)
+ return -1;
- if (!kbdev->ipa.vinstr_active) {
- kbdev->ipa.vinstr_active = true;
- kbase_vinstr_resume_client(model_data->vinstr_cli);
+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
+ if (errcode) {
+ dev_err(kbdev->dev, "Failed to allocate IPA enable map");
+ return errcode;
}
-}
-/**
- * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
- * @model_data: Pointer to model data
- *
- * This function may cause vinstr to become idle.
- */
-static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
-{
- struct kbase_device *kbdev = model_data->kbdev;
+ kbase_hwcnt_enable_map_enable_all(&enable_map);
- lockdep_assert_held(&kbdev->pm.lock);
-
- if (kbdev->ipa.vinstr_active) {
- kbase_vinstr_suspend_client(model_data->vinstr_cli);
- kbdev->ipa.vinstr_active = false;
+ errcode = kbase_hwcnt_virtualizer_client_create(
+ hvirt, &enable_map, &model_data->hvirt_cli);
+ kbase_hwcnt_enable_map_free(&enable_map);
+ if (errcode) {
+ dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
+ model_data->hvirt_cli = NULL;
+ return errcode;
}
-}
-#endif
-
-int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
-{
- struct kbase_device *kbdev = model_data->kbdev;
- struct kbase_ioctl_hwcnt_reader_setup setup;
- size_t dump_size;
- dump_size = kbase_vinstr_dump_size(kbdev);
- model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
- if (!model_data->vinstr_buffer) {
+ errcode = kbase_hwcnt_dump_buffer_alloc(
+ metadata, &model_data->dump_buf);
+ if (errcode) {
dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
- return -1;
- }
-
- setup.jm_bm = ~0u;
- setup.shader_bm = ~0u;
- setup.tiler_bm = ~0u;
- setup.mmu_l2_bm = ~0u;
- model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
- &setup, model_data->vinstr_buffer);
- if (!model_data->vinstr_cli) {
- dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
- kfree(model_data->vinstr_buffer);
- model_data->vinstr_buffer = NULL;
- return -1;
+ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+ model_data->hvirt_cli = NULL;
+ return errcode;
}
- kbase_vinstr_hwc_clear(model_data->vinstr_cli);
-
-#ifndef CONFIG_MALI_NO_MALI
- kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
- kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
- kbdev->ipa.model_data = model_data;
- kbdev->ipa.vinstr_active = false;
- /* Suspend vinstr, to ensure that the GPU is powered off until there is
- * something to execute.
- */
- kbase_vinstr_suspend_client(model_data->vinstr_cli);
-#else
- kbdev->ipa.gpu_active_callback = NULL;
- kbdev->ipa.gpu_idle_callback = NULL;
- kbdev->ipa.vinstr_active = true;
-#endif
-
return 0;
}
void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
{
- struct kbase_device *kbdev = model_data->kbdev;
-
- kbdev->ipa.gpu_active_callback = NULL;
- kbdev->ipa.gpu_idle_callback = NULL;
- kbdev->ipa.model_data = NULL;
- kbdev->ipa.vinstr_active = false;
-
- if (model_data->vinstr_cli)
- kbase_vinstr_detach_client(model_data->vinstr_cli);
-
- model_data->vinstr_cli = NULL;
- kfree(model_data->vinstr_buffer);
- model_data->vinstr_buffer = NULL;
+ if (model_data->hvirt_cli) {
+ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+ kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
+ model_data->hvirt_cli = NULL;
+ }
}
int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
struct kbase_ipa_model_vinstr_data *model_data =
(struct kbase_ipa_model_vinstr_data *)model->model_data;
- struct kbase_device *kbdev = model_data->kbdev;
s64 energy = 0;
size_t i;
u64 coeff = 0, coeff_mul = 0;
+ u64 start_ts_ns, end_ts_ns;
u32 active_cycles;
int err = 0;
- if (!kbdev->ipa.vinstr_active) {
- err = -ENODATA;
- goto err0; /* GPU powered off - no counters to collect */
- }
-
- err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
- BASE_HWCNT_READER_EVENT_MANUAL);
+ err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
+ &start_ts_ns, &end_ts_ns, &model_data->dump_buf);
if (err)
goto err0;
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
index 0deafae..46e3cd4 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
@@ -24,6 +24,8 @@
#define _KBASE_IPA_VINSTR_COMMON_H_
#include "mali_kbase.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
/* Maximum number of IPA groups for an IPA model. */
#define KBASE_IPA_MAX_GROUP_DEF_NUM 16
@@ -49,8 +51,8 @@ typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinst
* @groups_def_num: Number of elements in the array of IPA groups.
* @get_active_cycles: Callback to return number of active cycles during
* counter sample period
- * @vinstr_cli: vinstr client handle
- * @vinstr_buffer: buffer to dump hardware counters onto
+ * @hvirt_cli: hardware counter virtualizer client handle
+ * @dump_buf: buffer to dump hardware counters onto
* @reference_voltage: voltage, in mV, of the operating point used when
* deriving the power model coefficients. Range approx
* 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
@@ -72,8 +74,8 @@ struct kbase_ipa_model_vinstr_data {
const struct kbase_ipa_group *groups_def;
size_t groups_def_num;
kbase_ipa_get_active_cycles_callback get_active_cycles;
- struct kbase_vinstr_client *vinstr_cli;
- void *vinstr_buffer;
+ struct kbase_hwcnt_virtualizer_client *hvirt_cli;
+ struct kbase_hwcnt_dump_buffer dump_buf;
s32 reference_voltage;
s32 scaling_factor;
s32 min_sample_cycles;
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
index 8366033..6365d2f 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -248,7 +248,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = {
},
};
-static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+static const struct kbase_ipa_group ipa_groups_def_g76[] = {
{
.name = "gpu_active",
.default_value = 122000,
@@ -281,7 +281,7 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
},
};
-static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
+static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = {
{
.name = "gpu_active",
.default_value = 224200,
@@ -314,6 +314,48 @@ static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
},
};
+static const struct kbase_ipa_group ipa_groups_def_g51[] = {
+ {
+ .name = "gpu_active",
+ .default_value = 201400,
+ .op = kbase_g7x_jm_single_counter,
+ .counter_block_offset = JM_GPU_ACTIVE,
+ },
+ {
+ .name = "exec_instr_count",
+ .default_value = 392700,
+ .op = kbase_g7x_sum_all_shader_cores,
+ .counter_block_offset = SC_EXEC_INSTR_COUNT,
+ },
+ {
+ .name = "vary_instr",
+ .default_value = 274000,
+ .op = kbase_g7x_sum_all_shader_cores,
+ .counter_block_offset = SC_VARY_INSTR,
+ },
+ {
+ .name = "tex_tfch_num_operations",
+ .default_value = 528000,
+ .op = kbase_g7x_sum_all_shader_cores,
+ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+ },
+ {
+ .name = "l2_access",
+ .default_value = 506400,
+ .op = kbase_g7x_sum_all_memsys_blocks,
+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+ },
+};
+
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+ .name = "mali-" #gpu "-power-model", \
+ .init = kbase_ ## init_token ## _power_model_init, \
+ .term = kbase_ipa_vinstr_common_model_term, \
+ .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+ }; \
+ KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
static int kbase_ ## gpu ## _power_model_init(\
struct kbase_ipa_model *model) \
@@ -326,15 +368,16 @@ static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
kbase_g7x_get_active_cycles, \
(reference_voltage)); \
} \
- struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
- .name = "mali-" #gpu "-power-model", \
- .init = kbase_ ## gpu ## _power_model_init, \
- .term = kbase_ipa_vinstr_common_model_term, \
- .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
- }; \
- KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+ IPA_POWER_MODEL_OPS(gpu, gpu)
+
+#define ALIAS_POWER_MODEL(gpu, as_gpu) \
+ IPA_POWER_MODEL_OPS(gpu, as_gpu)
STANDARD_POWER_MODEL(g71, 800);
STANDARD_POWER_MODEL(g72, 800);
-STANDARD_POWER_MODEL(tnox, 800);
-STANDARD_POWER_MODEL(tgox_r1, 1000);
+STANDARD_POWER_MODEL(g76, 800);
+STANDARD_POWER_MODEL(g52_r1, 1000);
+STANDARD_POWER_MODEL(g51, 1000);
+
+/* g52 is an alias of g76 (TNOX) for IPA */
+ALIAS_POWER_MODEL(g52, g76);
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 69c22f2..5571f84 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -59,6 +59,8 @@ enum base_hw_feature {
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -203,6 +205,7 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
+ BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -230,6 +233,7 @@ static const enum base_hw_feature base_hw_features_tHEx[] = {
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
+ BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -257,6 +261,7 @@ static const enum base_hw_feature base_hw_features_tSIx[] = {
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
+ BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -284,6 +289,7 @@ static const enum base_hw_feature base_hw_features_tDVx[] = {
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
+ BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -314,6 +320,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -344,6 +351,7 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -398,6 +406,8 @@ static const enum base_hw_feature base_hw_features_tTRx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -425,10 +435,12 @@ static const enum base_hw_feature base_hw_features_tNAx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tULx[] = {
+static const enum base_hw_feature base_hw_features_tBEx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
@@ -452,10 +464,12 @@ static const enum base_hw_feature base_hw_features_tULx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tDUx[] = {
+static const enum base_hw_feature base_hw_features_tULx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
@@ -479,6 +493,7 @@ static const enum base_hw_feature base_hw_features_tDUx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -506,6 +521,7 @@ static const enum base_hw_feature base_hw_features_tBOx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -533,6 +549,7 @@ static const enum base_hw_feature base_hw_features_tIDx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
@@ -560,6 +577,7 @@ static const enum base_hw_feature base_hw_features_tVAx[] = {
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
+ BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index b8bd3d0..d7c40ef 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1222,7 +1222,6 @@ static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1232,7 +1231,6 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1241,7 +1239,6 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1251,45 +1248,40 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = {
+static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tULx[] = {
+static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
+static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
+static const enum base_hw_issue base_hw_issues_model_tULx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1317,7 +1309,6 @@ static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1327,7 +1318,6 @@ static const enum base_hw_issue base_hw_issues_model_tIDx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1336,7 +1326,6 @@ static const enum base_hw_issue base_hw_issues_model_tIDx[] = {
static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
@@ -1346,7 +1335,6 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
- BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
index 297df8b..70dc3c5 100644
--- a/mali_kbase/mali_base_kernel.h
+++ b/mali_kbase/mali_base_kernel.h
@@ -348,15 +348,6 @@ struct base_mem_import_user_buffer {
/**
- * @brief Result codes of changing the size of the backing store allocated to a tmem region
- */
-typedef enum base_backing_threshold_status {
- BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */
- BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */
- BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
-} base_backing_threshold_status;
-
-/**
* @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
* @{
*/
@@ -797,24 +788,6 @@ typedef u32 base_jd_core_req;
((core_req & BASE_JD_REQ_SOFT_JOB) || \
(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
-/**
- * enum kbase_atom_coreref_state - States to model state machine processed by
- * kbasep_js_job_check_ref_cores(), which handles retaining cores for power
- * management.
- *
- * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be
- * requested.
- * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but
- * waiting for them to be powered
- * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to
- * HW
- */
-enum kbase_atom_coreref_state {
- KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
- KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
- KBASE_ATOM_COREREF_STATE_READY
-};
-
/*
* Base Atom priority
*
@@ -822,15 +795,16 @@ enum kbase_atom_coreref_state {
* BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
* level that is not one of those defined below.
*
- * Priority levels only affect scheduling between atoms of the same type within
- * a base context, and only after the atoms have had dependencies resolved.
- * Fragment atoms does not affect non-frament atoms with lower priorities, and
- * the other way around. For example, a low priority atom that has had its
- * dependencies resolved might run before a higher priority atom that has not
- * had its dependencies resolved.
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
*
- * The scheduling between base contexts/processes and between atoms from
- * different base contexts/processes is unaffected by atom priority.
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
*
* The atoms are scheduled as follows with respect to their priorities:
* - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
@@ -842,6 +816,14 @@ enum kbase_atom_coreref_state {
* - Any two atoms that have the same priority could run in any order with
* respect to each other. That is, there is no ordering constraint between
* atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
*/
typedef u8 base_jd_prio;
diff --git a/mali_kbase/mali_base_vendor_specific_func.h b/mali_kbase/mali_base_vendor_specific_func.h
deleted file mode 100644
index 5e8add8..0000000
--- a/mali_kbase/mali_base_vendor_specific_func.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-#ifndef _BASE_VENDOR_SPEC_FUNC_H_
-#define _BASE_VENDOR_SPEC_FUNC_H_
-
-int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
-
-#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index cdd9ecc..24a021d 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -68,7 +68,7 @@
#include "mali_kbase_jd_debugfs.h"
#include "mali_kbase_gpuprops.h"
#include "mali_kbase_jm.h"
-#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ioctl.h"
#include "ipa/mali_kbase_ipa.h"
@@ -353,22 +353,13 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
- * This takes into account the following
- *
- * - whether there is an active context reference
- *
- * - whether any of the shader cores or the tiler are needed
- *
- * It should generally be preferred against checking just
- * kbdev->pm.active_count on its own, because some code paths drop their
- * reference on this whilst still having the shader cores/tiler in use.
+ * This takes into account whether there is an active context reference.
*
* Return: true if the GPU is active, false otherwise
*/
static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
{
- return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt ||
- kbdev->tiler_needed_cnt);
+ return kbdev->pm.active_count > 0;
}
/**
@@ -713,6 +704,3 @@ int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
#endif
-
-
-
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 9d918a8..bb2ab53 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -171,11 +171,6 @@ enum {
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
/**
- * Power Manager number of ticks before GPU is powered off
- */
-#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
-
-/**
* Default scheduling tick granuality
*/
#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
diff --git a/mali_kbase/mali_kbase_context.c b/mali_kbase/mali_kbase_context.c
index 628f89a..59609d7 100644
--- a/mali_kbase/mali_kbase_context.c
+++ b/mali_kbase/mali_kbase_context.c
@@ -149,7 +149,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
- mutex_init(&kctx->vinstr_cli_lock);
+ mutex_init(&kctx->legacy_hwcnt_lock);
kbase_timer_setup(&kctx->soft_job_timeout,
kbasep_soft_job_timeout_worker);
@@ -325,9 +325,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
- /* Latch the initial attributes into the Job Scheduler */
- kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
-
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
out:
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index d101d97..382285f 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -48,7 +48,12 @@
#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_ctx_sched.h>
#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
#include "mali_kbase_ioctl.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_vinstr.h"
#ifdef CONFIG_MALI_CINSTR_GWT
#include "mali_kbase_gwt.h"
@@ -161,22 +166,25 @@ enum {
#endif /* CONFIG_MALI_DEVFREQ */
inited_tlstream = (1u << 4),
inited_backend_early = (1u << 5),
- inited_backend_late = (1u << 6),
- inited_device = (1u << 7),
- inited_vinstr = (1u << 8),
- inited_job_fault = (1u << 10),
- inited_sysfs_group = (1u << 11),
- inited_misc_register = (1u << 12),
- inited_get_device = (1u << 13),
- inited_dev_list = (1u << 14),
- inited_debugfs = (1u << 15),
- inited_gpu_device = (1u << 16),
- inited_registers_map = (1u << 17),
- inited_io_history = (1u << 18),
- inited_power_control = (1u << 19),
- inited_buslogger = (1u << 20),
- inited_protected = (1u << 21),
- inited_ctx_sched = (1u << 22)
+ inited_hwcnt_gpu_iface = (1u << 6),
+ inited_hwcnt_gpu_ctx = (1u << 7),
+ inited_hwcnt_gpu_virt = (1u << 8),
+ inited_vinstr = (1u << 9),
+ inited_backend_late = (1u << 10),
+ inited_device = (1u << 11),
+ inited_job_fault = (1u << 13),
+ inited_sysfs_group = (1u << 14),
+ inited_misc_register = (1u << 15),
+ inited_get_device = (1u << 16),
+ inited_dev_list = (1u << 17),
+ inited_debugfs = (1u << 18),
+ inited_gpu_device = (1u << 19),
+ inited_registers_map = (1u << 20),
+ inited_io_history = (1u << 21),
+ inited_power_control = (1u << 22),
+ inited_buslogger = (1u << 23),
+ inited_protected = (1u << 24),
+ inited_ctx_sched = (1u << 25)
};
static struct kbase_device *to_kbase_device(struct device *dev)
@@ -494,17 +502,13 @@ static int kbase_release(struct inode *inode, struct file *filp)
filp->private_data = NULL;
- mutex_lock(&kctx->vinstr_cli_lock);
+ mutex_lock(&kctx->legacy_hwcnt_lock);
/* If this client was performing hwcnt dumping and did not explicitly
- * detach itself, remove it from the vinstr core now */
- if (kctx->vinstr_cli) {
- struct kbase_ioctl_hwcnt_enable enable;
-
- enable.dump_buffer = 0llu;
- kbase_vinstr_legacy_hwc_setup(
- kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
- }
- mutex_unlock(&kctx->vinstr_cli_lock);
+ * detach itself, destroy it now
+ */
+ kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli);
+ kctx->legacy_hwcnt_cli = NULL;
+ mutex_unlock(&kctx->legacy_hwcnt_lock);
kbase_destroy_context(kctx);
@@ -592,10 +596,15 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
return -ENOMEM;
+ /* Force SAME_VA if a 64-bit client.
+ * The only exception is GPU-executable memory if an EXEC_VA zone
+ * has been initialized. In that case, GPU-executable memory may
+ * or may not be SAME_VA.
+ */
if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
- /* force SAME_VA if a 64-bit client */
- flags |= BASE_MEM_SAME_VA;
+ if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx))
+ flags |= BASE_MEM_SAME_VA;
}
@@ -629,13 +638,7 @@ static int kbase_api_mem_free(struct kbase_context *kctx,
static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
struct kbase_ioctl_hwcnt_reader_setup *setup)
{
- int ret;
-
- mutex_lock(&kctx->vinstr_cli_lock);
- ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
- mutex_unlock(&kctx->vinstr_cli_lock);
-
- return ret;
+ return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
}
static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
@@ -643,10 +646,31 @@ static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
{
int ret;
- mutex_lock(&kctx->vinstr_cli_lock);
- ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
- &kctx->vinstr_cli, enable);
- mutex_unlock(&kctx->vinstr_cli_lock);
+ mutex_lock(&kctx->legacy_hwcnt_lock);
+ if (enable->dump_buffer != 0) {
+ /* Non-zero dump buffer, so user wants to create the client */
+ if (kctx->legacy_hwcnt_cli == NULL) {
+ ret = kbase_hwcnt_legacy_client_create(
+ kctx->kbdev->hwcnt_gpu_virt,
+ enable,
+ &kctx->legacy_hwcnt_cli);
+ } else {
+ /* This context already has a client */
+ ret = -EBUSY;
+ }
+ } else {
+ /* Zero dump buffer, so user wants to destroy the client */
+ if (kctx->legacy_hwcnt_cli != NULL) {
+ kbase_hwcnt_legacy_client_destroy(
+ kctx->legacy_hwcnt_cli);
+ kctx->legacy_hwcnt_cli = NULL;
+ ret = 0;
+ } else {
+ /* This context has no client to destroy */
+ ret = -EINVAL;
+ }
+ }
+ mutex_unlock(&kctx->legacy_hwcnt_lock);
return ret;
}
@@ -655,10 +679,9 @@ static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
{
int ret;
- mutex_lock(&kctx->vinstr_cli_lock);
- ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
- BASE_HWCNT_READER_EVENT_MANUAL);
- mutex_unlock(&kctx->vinstr_cli_lock);
+ mutex_lock(&kctx->legacy_hwcnt_lock);
+ ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli);
+ mutex_unlock(&kctx->legacy_hwcnt_lock);
return ret;
}
@@ -667,9 +690,9 @@ static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
{
int ret;
- mutex_lock(&kctx->vinstr_cli_lock);
- ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
- mutex_unlock(&kctx->vinstr_cli_lock);
+ mutex_lock(&kctx->legacy_hwcnt_lock);
+ ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli);
+ mutex_unlock(&kctx->legacy_hwcnt_lock);
return ret;
}
@@ -749,6 +772,12 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx,
jit_init->max_allocations, jit_init->trim_level);
}
+static int kbase_api_mem_exec_init(struct kbase_context *kctx,
+ struct kbase_ioctl_mem_exec_init *exec_init)
+{
+ return kbase_region_tracker_init_exec(kctx, exec_init->va_pages);
+}
+
static int kbase_api_mem_sync(struct kbase_context *kctx,
struct kbase_ioctl_mem_sync *sync)
{
@@ -1169,6 +1198,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
kbase_api_mem_jit_init,
struct kbase_ioctl_mem_jit_init);
break;
+ case KBASE_IOCTL_MEM_EXEC_INIT:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT,
+ kbase_api_mem_exec_init,
+ struct kbase_ioctl_mem_exec_init);
+ break;
case KBASE_IOCTL_MEM_SYNC:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
kbase_api_mem_sync,
@@ -1550,7 +1584,10 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
{
struct kbase_device *kbdev;
u64 new_core_mask[3];
- int items;
+ int items, i;
+ ssize_t err = count;
+ unsigned long flags;
+ u64 shader_present, group0_core_mask;
kbdev = to_kbase_device(dev);
@@ -1561,50 +1598,59 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
&new_core_mask[0], &new_core_mask[1],
&new_core_mask[2]);
+ if (items != 1 && items != 3) {
+ dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
+ "Use format <core_mask>\n"
+ "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+ err = -EINVAL;
+ goto end;
+ }
+
if (items == 1)
new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
- if (items == 1 || items == 3) {
- u64 shader_present =
- kbdev->gpu_props.props.raw_props.shader_present;
- u64 group0_core_mask =
- kbdev->gpu_props.props.coherency_info.group[0].
- core_mask;
-
- if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
- !(new_core_mask[0] & group0_core_mask) ||
- (new_core_mask[1] & shader_present) !=
- new_core_mask[1] ||
- !(new_core_mask[1] & group0_core_mask) ||
- (new_core_mask[2] & shader_present) !=
- new_core_mask[2] ||
- !(new_core_mask[2] & group0_core_mask)) {
- dev_err(dev, "power_policy: invalid core specification\n");
- return -EINVAL;
- }
-
- if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
- kbdev->pm.debug_core_mask[1] !=
- new_core_mask[1] ||
- kbdev->pm.debug_core_mask[2] !=
- new_core_mask[2]) {
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
- new_core_mask[1], new_core_mask[2]);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+ group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+ for (i = 0; i < 3; ++i) {
+ if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)",
+ new_core_mask[i], i, shader_present);
+ err = -EINVAL;
+ goto unlock;
+
+ } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+ new_core_mask[i], i,
+ kbdev->gpu_props.props.raw_props.shader_present,
+ kbdev->pm.backend.ca_cores_enabled);
+ err = -EINVAL;
+ goto unlock;
+
+ } else if (!(new_core_mask[i] & group0_core_mask)) {
+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n",
+ new_core_mask[i], i, group0_core_mask);
+ err = -EINVAL;
+ goto unlock;
}
+ }
- return count;
+ if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+ kbdev->pm.debug_core_mask[1] !=
+ new_core_mask[1] ||
+ kbdev->pm.debug_core_mask[2] !=
+ new_core_mask[2]) {
+
+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+ new_core_mask[1], new_core_mask[2]);
}
- dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
- "Use format <core_mask>\n"
- "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
- return -EINVAL;
+unlock:
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+end:
+ return err;
}
/*
@@ -2438,9 +2484,11 @@ static ssize_t set_pm_poweroff(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct kbase_device *kbdev;
+ struct kbasep_pm_tick_timer_state *stt;
int items;
- s64 gpu_poweroff_time;
- int poweroff_shader_ticks, poweroff_gpu_ticks;
+ u64 gpu_poweroff_time;
+ unsigned int poweroff_shader_ticks, poweroff_gpu_ticks;
+ unsigned long flags;
kbdev = to_kbase_device(dev);
if (!kbdev)
@@ -2455,9 +2503,16 @@ static ssize_t set_pm_poweroff(struct device *dev,
return -EINVAL;
}
- kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
- kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
- kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ stt = &kbdev->pm.backend.shader_tick_timer;
+ stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+ stt->configured_ticks = poweroff_shader_ticks;
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (poweroff_gpu_ticks != 0)
+ dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n");
return count;
}
@@ -2477,16 +2532,22 @@ static ssize_t show_pm_poweroff(struct device *dev,
struct device_attribute *attr, char * const buf)
{
struct kbase_device *kbdev;
+ struct kbasep_pm_tick_timer_state *stt;
ssize_t ret;
+ unsigned long flags;
kbdev = to_kbase_device(dev);
if (!kbdev)
return -ENODEV;
- ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
- ktime_to_ns(kbdev->pm.gpu_poweroff_time),
- kbdev->pm.poweroff_shader_ticks,
- kbdev->pm.poweroff_gpu_ticks);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ stt = &kbdev->pm.backend.shader_tick_timer;
+ ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n",
+ ktime_to_ns(stt->configured_interval),
+ stt->configured_ticks);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
@@ -2958,6 +3019,45 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
#endif /* CONFIG_DEBUG_FS */
+static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
+{
+ struct kbase_device *kbdev = container_of(data, struct kbase_device,
+ protected_mode_hwcnt_disable_work);
+ unsigned long flags;
+
+ bool do_disable;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ do_disable = !kbdev->protected_mode_hwcnt_desired &&
+ !kbdev->protected_mode_hwcnt_disabled;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (!do_disable)
+ return;
+
+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ do_disable = !kbdev->protected_mode_hwcnt_desired &&
+ !kbdev->protected_mode_hwcnt_disabled;
+
+ if (do_disable) {
+ /* Protected mode state did not change while we were doing the
+ * disable, so commit the work we just performed and continue
+ * the state machine.
+ */
+ kbdev->protected_mode_hwcnt_disabled = true;
+ kbase_backend_slot_update(kbdev);
+ } else {
+ /* Protected mode state was updated while we were doing the
+ * disable, so we need to undo the disable we just performed.
+ */
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
static int kbasep_protected_mode_init(struct kbase_device *kbdev)
{
#ifdef CONFIG_OF
@@ -2975,6 +3075,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev)
kbdev->protected_dev->data = kbdev;
kbdev->protected_ops = &kbase_native_protected_ops;
kbdev->protected_mode_support = true;
+ INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work,
+ kbasep_protected_mode_hwcnt_disable_worker);
+ kbdev->protected_mode_hwcnt_desired = true;
+ kbdev->protected_mode_hwcnt_disabled = false;
return 0;
}
@@ -3024,8 +3128,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev)
static void kbasep_protected_mode_term(struct kbase_device *kbdev)
{
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+ cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work);
kfree(kbdev->protected_dev);
+ }
}
#ifdef CONFIG_MALI_NO_MALI
@@ -3211,7 +3317,6 @@ static void power_control_term(struct kbase_device *kbdev)
#ifdef MALI_KBASE_BUILD
#ifdef CONFIG_DEBUG_FS
-#if KBASE_GPU_RESET_EN
#include <mali_kbase_hwaccess_jm.h>
static void trigger_quirks_reload(struct kbase_device *kbdev)
@@ -3247,7 +3352,6 @@ MAKE_QUIRK_ACCESSORS(tiler);
MAKE_QUIRK_ACCESSORS(mmu);
MAKE_QUIRK_ACCESSORS(jm);
-#endif /* KBASE_GPU_RESET_EN */
/**
* debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
@@ -3328,7 +3432,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbase_debug_job_fault_debugfs_init(kbdev);
kbasep_gpu_memory_debugfs_init(kbdev);
kbase_as_fault_debugfs_init(kbdev);
-#if KBASE_GPU_RESET_EN
/* fops_* variables created by invocations of macro
* MAKE_QUIRK_ACCESSORS() above. */
debugfs_create_file("quirks_sc", 0644,
@@ -3343,7 +3446,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
debugfs_create_file("quirks_jm", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_jm_quirks);
-#endif /* KBASE_GPU_RESET_EN */
debugfs_create_bool("infinite_cache", 0644,
debugfs_ctx_defaults_directory,
@@ -3558,14 +3660,29 @@ static int kbase_platform_device_remove(struct platform_device *pdev)
#endif
+ if (kbdev->inited_subsys & inited_backend_late) {
+ kbase_backend_late_term(kbdev);
+ kbdev->inited_subsys &= ~inited_backend_late;
+ }
+
if (kbdev->inited_subsys & inited_vinstr) {
kbase_vinstr_term(kbdev->vinstr_ctx);
kbdev->inited_subsys &= ~inited_vinstr;
}
- if (kbdev->inited_subsys & inited_backend_late) {
- kbase_backend_late_term(kbdev);
- kbdev->inited_subsys &= ~inited_backend_late;
+ if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) {
+ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
+ kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt;
+ }
+
+ if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) {
+ kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx);
+ kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx;
+ }
+
+ if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) {
+ kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface);
+ kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface;
}
if (kbdev->inited_subsys & inited_tlstream) {
@@ -3790,20 +3907,40 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
}
kbdev->inited_subsys |= inited_tlstream;
- err = kbase_backend_late_init(kbdev);
+ /* Initialize the kctx list. This is used by vinstr. */
+ mutex_init(&kbdev->kctx_list_lock);
+ INIT_LIST_HEAD(&kbdev->kctx_list);
+
+ err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface);
if (err) {
- dev_err(kbdev->dev, "Late backend initialization failed\n");
+ dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n");
kbase_platform_device_remove(pdev);
return err;
}
- kbdev->inited_subsys |= inited_backend_late;
+ kbdev->inited_subsys |= inited_hwcnt_gpu_iface;
- /* Initialize the kctx list. This is used by vinstr. */
- mutex_init(&kbdev->kctx_list_lock);
- INIT_LIST_HEAD(&kbdev->kctx_list);
+ err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface,
+ &kbdev->hwcnt_gpu_ctx);
+ if (err) {
+ dev_err(kbdev->dev,
+ "GPU hwcnt context initialization failed\n");
+ kbase_platform_device_remove(pdev);
+ return err;
+ }
+ kbdev->inited_subsys |= inited_hwcnt_gpu_ctx;
- kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
- if (!kbdev->vinstr_ctx) {
+ err = kbase_hwcnt_virtualizer_init(
+ kbdev->hwcnt_gpu_ctx, &kbdev->hwcnt_gpu_virt);
+ if (err) {
+ dev_err(kbdev->dev,
+ "GPU hwcnt virtualizer initialization failed\n");
+ kbase_platform_device_remove(pdev);
+ return err;
+ }
+ kbdev->inited_subsys |= inited_hwcnt_gpu_virt;
+
+ err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx);
+ if (err) {
dev_err(kbdev->dev,
"Virtual instrumentation initialization failed\n");
kbase_platform_device_remove(pdev);
@@ -3811,9 +3948,18 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
}
kbdev->inited_subsys |= inited_vinstr;
+ err = kbase_backend_late_init(kbdev);
+ if (err) {
+ dev_err(kbdev->dev, "Late backend initialization failed\n");
+ kbase_platform_device_remove(pdev);
+ return err;
+ }
+ kbdev->inited_subsys |= inited_backend_late;
+
+
#ifdef CONFIG_MALI_DEVFREQ
- /* Devfreq uses vinstr, so must be initialized after it. */
+ /* Devfreq uses hardware counters, so must be initialized after it. */
err = kbase_devfreq_init(kbdev);
if (!err)
kbdev->inited_subsys |= inited_devfreq;
diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c
index 0029fe3..88bb0d3 100644
--- a/mali_kbase/mali_kbase_debug_job_fault.c
+++ b/mali_kbase/mali_kbase_debug_job_fault.c
@@ -364,7 +364,7 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
* job done but we delayed it. Now we should clean cache
* earlier. Then the GPU memory dump should be correct.
*/
- kbase_backend_cacheclean(kbdev, event->katom);
+ kbase_backend_cache_clean(kbdev, event->katom);
} else
return NULL;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 07ef140..a135742 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -40,6 +40,7 @@
#include <mali_kbase_instr_defs.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_gpuprops_types.h>
+#include <mali_kbase_hwcnt_backend_gpu.h>
#include <protected_mode_switcher.h>
@@ -143,8 +144,6 @@
#define BASE_MAX_NR_AS 16
/* mmu */
-#define MIDGARD_MMU_VA_BITS 48
-
#define MIDGARD_MMU_LEVEL(x) (x)
#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0)
@@ -425,8 +424,8 @@ enum kbase_atom_gpu_rb_state {
* @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms
* currently submitted to GPU and protected mode transition is
* not already in progress.
- * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
- * protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to
+ * become disabled before entry into protected mode.
* @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
* for the coherency change. L2 shall be powered down and GPU shall
* come out of fully coherent mode before entering protected mode.
@@ -442,7 +441,7 @@ enum kbase_atom_enter_protected_state {
* NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
*/
KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
- KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+ KBASE_ATOM_ENTER_PROTECTED_HWCNT,
KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
KBASE_ATOM_ENTER_PROTECTED_FINISHED,
@@ -513,8 +512,6 @@ struct kbase_ext_res {
* @jc: GPU address of the job-chain.
* @softjob_data: Copy of data read from the user space buffer that @jc
* points to.
- * @coreref_state: state of the atom with respect to retention of shader
- * cores for affinity & power management.
* @fence: Stores either an input or output sync fence, depending
* on soft-job type
* @sync_waiter: Pointer to the sync fence waiter structure passed to the
@@ -607,7 +604,6 @@ struct kbase_jd_atom {
u32 device_nr;
u64 jc;
void *softjob_data;
- enum kbase_atom_coreref_state coreref_state;
#if defined(CONFIG_SYNC)
struct sync_fence *fence;
struct sync_fence_waiter sync_waiter;
@@ -1073,15 +1069,6 @@ struct kbase_pm_device_data {
/* Time in milliseconds between each dvfs sample */
u32 dvfs_period;
- /* Period of GPU poweroff timer */
- ktime_t gpu_poweroff_time;
-
- /* Number of ticks of GPU poweroff timer before shader is powered off */
- int poweroff_shader_ticks;
-
- /* Number of ticks of GPU poweroff timer before GPU is powered off */
- int poweroff_gpu_ticks;
-
struct kbase_pm_backend_data backend;
};
@@ -1254,34 +1241,19 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
* @hw_features_mask: List of available HW features.
- * @shader_needed_cnt: Count for the 64 shader cores, incremented when
- * shaders are requested for use and decremented later
- * when they are no longer required.
- * @tiler_needed_cnt: Count for the Tiler block shader cores, incremented
- * when Tiler is requested for use and decremented
- * later when the Tiler is no longer required.
* @disjoint_event: struct for keeping track of the disjoint information,
* that whether the GPU is in a disjoint state and the
* number of disjoint events that have occurred on GPU.
- * @l2_users_count: Refcount for tracking users of the l2 cache, e.g.
- * when using hardware counter instrumentation.
- * @shader_available_bitmap: Bitmap of shader cores that are currently available,
- * powered up and the power policy is happy for jobs
- * to be submitted to these cores. These are updated
- * by the power management code. The job scheduler
- * should avoid submitting new jobs to any cores
- * that are not marked as available.
- * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
- * @l2_available_bitmap: Bitmap of the currently available Level 2 caches.
- * @stack_available_bitmap: Bitmap of the currently available Core stacks.
- * @shader_ready_bitmap: Bitmap of shader cores that are ready (powered on)
- * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
- * power state.
* @nr_hw_address_spaces: Number of address spaces actually available in the
* GPU, remains constant after driver initialisation.
* @nr_user_address_spaces: Number of address spaces available to user contexts
* @hwcnt: Structure used for instrumentation and HW counters
* dumping
+ * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access.
+ * @hwcnt_gpu_ctx: Context for GPU hardware counter access.
+ * @hwaccess_lock must be held when calling
+ * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
+ * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters.
* @vinstr_ctx: vinstr context created per device
* @trace_lock: Lock to serialize the access to trace buffer.
* @trace_first_out: Index/offset in the trace buffer at which the first
@@ -1294,8 +1266,14 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
* @reset_timeout_ms: Number of milliseconds to wait for the soft stop to
* complete for the GPU jobs before proceeding with the
* GPU reset.
- * @cacheclean_lock: Lock to serialize the clean & invalidation of GPU caches,
- * between Job Manager backend & Instrumentation code.
+ * @cache_clean_in_progress: Set when a cache clean has been started, and
+ * cleared when it has finished. This prevents multiple
+ * cache cleans being done simultaneously.
+ * @cache_clean_queued: Set if a cache clean is invoked while another is in
+ * progress. If this happens, another cache clean needs
+ * to be triggered immediately after completion of the
+ * current one.
+ * @cache_clean_wait: Signalled when a cache clean has finished.
* @platform_context: Platform specific private data to be accessed by
* platform specific config files only.
* @kctx_list: List of kbase_contexts created for the device, including
@@ -1398,6 +1376,13 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
* @protected_mode: set to TRUE when GPU is put into protected mode
* @protected_mode_transition: set to TRUE when GPU is transitioning into or
* out of protected mode.
+ * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be
+ * enabled. Counters must be disabled before transition
+ * into protected mode.
+ * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not
+ * enabled.
+ * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
+ * counters, used if atomic disable is not possible.
* @protected_mode_support: set to true if protected mode is supported.
* @buslogger: Pointer to the structure required for interfacing
* with the bus logger module to set the size of buffer
@@ -1471,24 +1456,11 @@ struct kbase_device {
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
- u32 tiler_needed_cnt;
- u32 shader_needed_cnt;
-
struct {
atomic_t count;
atomic_t state;
} disjoint_event;
- u32 l2_users_count;
-
- u64 shader_available_bitmap;
- u64 tiler_available_bitmap;
- u64 l2_available_bitmap;
- u64 stack_available_bitmap;
-
- u64 shader_ready_bitmap;
- u64 shader_transitioning_bitmap;
-
s8 nr_hw_address_spaces;
s8 nr_user_address_spaces;
@@ -1498,10 +1470,14 @@ struct kbase_device {
struct kbase_context *kctx;
u64 addr;
+ u64 addr_bytes;
struct kbase_instr_backend backend;
} hwcnt;
+ struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
+ struct kbase_hwcnt_context *hwcnt_gpu_ctx;
+ struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
struct kbase_vinstr_context *vinstr_ctx;
#if KBASE_TRACE_ENABLE
@@ -1513,7 +1489,9 @@ struct kbase_device {
u32 reset_timeout_ms;
- struct mutex cacheclean_lock;
+ bool cache_clean_in_progress;
+ bool cache_clean_queued;
+ wait_queue_head_t cache_clean_wait;
void *platform_context;
@@ -1548,27 +1526,9 @@ struct kbase_device {
* the difference between last_metrics and the current values.
*/
struct kbasep_pm_metrics last_metrics;
-
- /*
- * gpu_active_callback - Inform IPA that GPU is now active
- * @model_data: Pointer to model data
- */
- void (*gpu_active_callback)(
- struct kbase_ipa_model_vinstr_data *model_data);
-
- /*
- * gpu_idle_callback - Inform IPA that GPU is now idle
- * @model_data: Pointer to model data
- */
- void (*gpu_idle_callback)(
- struct kbase_ipa_model_vinstr_data *model_data);
-
/* Model data to pass to ipa_gpu_active/idle() */
struct kbase_ipa_model_vinstr_data *model_data;
- /* true if IPA is currently using vinstr */
- bool vinstr_active;
-
/* true if use of fallback model has been forced by the User */
bool force_fallback_model;
} ipa;
@@ -1642,6 +1602,12 @@ struct kbase_device {
bool protected_mode_transition;
+ bool protected_mode_hwcnt_desired;
+
+ bool protected_mode_hwcnt_disabled;
+
+ struct work_struct protected_mode_hwcnt_disable_work;
+
bool protected_mode_support;
#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
@@ -1824,6 +1790,9 @@ struct kbase_sub_alloc {
* having the same value for GPU & CPU virtual address.
* @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA
* zone of the GPU virtual address space.
+ * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA
+ * zone of the GPU virtual address space. Used for GPU-executable
+ * allocations which don't need the SAME_VA property.
* @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for
* SAME_VA allocations to defer the reservation of memory region
* (from the GPU virtual address space) from base_mem_alloc
@@ -1896,6 +1865,10 @@ struct kbase_sub_alloc {
* pages used for GPU allocations, done for the context,
* to the memory consumed by the process.
* @same_va_end: End address of the SAME_VA zone (in 4KB page units)
+ * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units)
+ * or U64_MAX if the EXEC_VA zone is uninitialized.
+ * @gpu_va_end: End address of the GPU va space (in 4KB page units)
+ * @jit_va: Indicates if a JIT_VA zone has been created.
* @timeline: Object tracking the number of atoms currently in flight for
* the context and thread group id of the process, i.e. @tgid.
* @mem_profile_data: Buffer containing the profiling information provided by
@@ -1930,9 +1903,11 @@ struct kbase_sub_alloc {
* @slots_pullable: Bitmask of slots, indicating the slots for which the
* context has pullable atoms in the runnable tree.
* @work: Work structure used for deferred ASID assignment.
- * @vinstr_cli: Pointer to the legacy userspace vinstr client, there can
- * be only such client per kbase context.
- * @vinstr_cli_lock: Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters
+ * client, there can be only such client per kbase
+ * context.
+ * @legacy_hwcnt_lock: Lock used to prevent concurrent access to
+ * @legacy_hwcnt_cli.
* @completed_jobs: List containing completed atoms for which base_jd_event is
* to be posted.
* @work_count: Number of work items, corresponding to atoms, currently
@@ -2017,6 +1992,7 @@ struct kbase_context {
struct mutex reg_lock;
struct rb_root reg_rbtree_same;
struct rb_root reg_rbtree_custom;
+ struct rb_root reg_rbtree_exec;
unsigned long cookies;
@@ -2060,6 +2036,9 @@ struct kbase_context {
spinlock_t mm_update_lock;
struct mm_struct __rcu *process_mm;
u64 same_va_end;
+ u64 exec_va_start;
+ u64 gpu_va_end;
+ bool jit_va;
#ifdef CONFIG_DEBUG_FS
char *mem_profile_data;
@@ -2087,8 +2066,8 @@ struct kbase_context {
struct work_struct work;
- struct kbase_vinstr_client *vinstr_cli;
- struct mutex vinstr_cli_lock;
+ struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
+ struct mutex legacy_hwcnt_lock;
struct list_head completed_jobs;
atomic_t work_count;
diff --git a/mali_kbase/mali_kbase_device.c b/mali_kbase/mali_kbase_device.c
index 44d16a7..530bb45 100644
--- a/mali_kbase/mali_kbase_device.c
+++ b/mali_kbase/mali_kbase_device.c
@@ -222,7 +222,7 @@ int kbase_device_init(struct kbase_device * const kbdev)
if (err)
goto term_as;
- mutex_init(&kbdev->cacheclean_lock);
+ init_waitqueue_head(&kbdev->cache_clean_wait);
kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
diff --git a/mali_kbase/mali_kbase_gator_api.c b/mali_kbase/mali_kbase_gator_api.c
index 7077c3a..1719edf 100644
--- a/mali_kbase/mali_kbase_gator_api.c
+++ b/mali_kbase/mali_kbase_gator_api.c
@@ -25,6 +25,9 @@
#include "mali_kbase_mem_linux.h"
#include "mali_kbase_gator_api.h"
#include "mali_kbase_gator_hwcnt_names.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
#define MALI_MAX_CORES_PER_GROUP 4
#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8
@@ -33,8 +36,9 @@
struct kbase_gator_hwcnt_handles {
struct kbase_device *kbdev;
- struct kbase_vinstr_client *vinstr_cli;
- void *vinstr_buffer;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer dump_buf;
struct work_struct dump_work;
int dump_complete;
spinlock_t dump_lock;
@@ -173,8 +177,10 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
{
+ int errcode;
struct kbase_gator_hwcnt_handles *hand;
- struct kbase_ioctl_hwcnt_reader_setup setup;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_physical_enable_map phys_map;
uint32_t dump_size = 0, i = 0;
if (!in_out_info)
@@ -192,11 +198,20 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
if (!hand->kbdev)
goto free_hand;
- dump_size = kbase_vinstr_dump_size(hand->kbdev);
- hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
- if (!hand->vinstr_buffer)
+ metadata = kbase_hwcnt_virtualizer_metadata(
+ hand->kbdev->hwcnt_gpu_virt);
+ if (!metadata)
goto release_device;
- in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hand->enable_map);
+ if (errcode)
+ goto release_device;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hand->dump_buf);
+ if (errcode)
+ goto free_enable_map;
+
+ in_out_info->kernel_dump_buffer = hand->dump_buf.dump_buf;
in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
@@ -213,7 +228,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
in_out_info->nr_core_groups, GFP_KERNEL);
if (!in_out_info->hwc_layout)
- goto free_vinstr_buffer;
+ goto free_dump_buf;
dump_size = in_out_info->nr_core_groups *
MALI_MAX_NUM_BLOCKS_PER_GROUP *
@@ -256,7 +271,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
if (!in_out_info->hwc_layout)
- goto free_vinstr_buffer;
+ goto free_dump_buf;
dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
@@ -275,17 +290,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
}
}
+ /* Calculated dump size must be the same as real dump size */
+ if (WARN_ON(dump_size != metadata->dump_buf_bytes))
+ goto free_layout;
+
in_out_info->nr_hwc_blocks = i;
in_out_info->size = dump_size;
- setup.jm_bm = in_out_info->bitmask[0];
- setup.tiler_bm = in_out_info->bitmask[1];
- setup.shader_bm = in_out_info->bitmask[2];
- setup.mmu_l2_bm = in_out_info->bitmask[3];
- hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
- &setup, hand->vinstr_buffer);
- if (!hand->vinstr_cli) {
- dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+ phys_map.jm_bm = in_out_info->bitmask[JM_BLOCK];
+ phys_map.tiler_bm = in_out_info->bitmask[TILER_BLOCK];
+ phys_map.shader_bm = in_out_info->bitmask[SHADER_BLOCK];
+ phys_map.mmu_l2_bm = in_out_info->bitmask[MMU_L2_BLOCK];
+ kbase_hwcnt_gpu_enable_map_from_physical(&hand->enable_map, &phys_map);
+ errcode = kbase_hwcnt_virtualizer_client_create(
+ hand->kbdev->hwcnt_gpu_virt, &hand->enable_map, &hand->hvcli);
+ if (errcode) {
+ dev_err(hand->kbdev->dev,
+ "Failed to register gator with hwcnt virtualizer core");
goto free_layout;
}
@@ -293,13 +314,12 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
free_layout:
kfree(in_out_info->hwc_layout);
-
-free_vinstr_buffer:
- kfree(hand->vinstr_buffer);
-
+free_dump_buf:
+ kbase_hwcnt_dump_buffer_free(&hand->dump_buf);
+free_enable_map:
+ kbase_hwcnt_enable_map_free(&hand->enable_map);
release_device:
kbase_release_device(hand->kbdev);
-
free_hand:
kfree(hand);
return NULL;
@@ -313,8 +333,9 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k
if (opaque_handles) {
cancel_work_sync(&opaque_handles->dump_work);
- kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
- kfree(opaque_handles->vinstr_buffer);
+ kbase_hwcnt_virtualizer_client_destroy(opaque_handles->hvcli);
+ kbase_hwcnt_dump_buffer_free(&opaque_handles->dump_buf);
+ kbase_hwcnt_enable_map_free(&opaque_handles->enable_map);
kbase_release_device(opaque_handles->kbdev);
kfree(opaque_handles);
}
@@ -323,11 +344,21 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
static void dump_worker(struct work_struct *work)
{
+ int errcode;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
struct kbase_gator_hwcnt_handles *hand;
hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
- if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
- BASE_HWCNT_READER_EVENT_MANUAL)) {
+ errcode = kbase_hwcnt_virtualizer_client_dump(
+ hand->hvcli, &ts_start_ns, &ts_end_ns, &hand->dump_buf);
+ if (!errcode) {
+ /* Patch the header to hide other client's counter choices */
+ kbase_hwcnt_gpu_patch_dump_headers(
+ &hand->dump_buf, &hand->enable_map);
+ /* Zero all non-enabled counters (currently undefined values) */
+ kbase_hwcnt_dump_buffer_zero_non_enabled(
+ &hand->dump_buf, &hand->enable_map);
spin_lock_bh(&hand->dump_lock);
hand->dump_complete = 1;
spin_unlock_bh(&hand->dump_lock);
diff --git a/mali_kbase/mali_kbase_gpu_id.h b/mali_kbase/mali_kbase_gpu_id.h
index 5f84ba9..d432f8e 100644
--- a/mali_kbase/mali_kbase_gpu_id.h
+++ b/mali_kbase/mali_kbase_gpu_id.h
@@ -114,8 +114,8 @@
#define GPU_ID2_PRODUCT_TEGX GPU_ID2_MODEL_MAKE(8, 3)
#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0)
#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2)
#define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0)
-#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1)
#define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3)
#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index fc6b644..450926c 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -74,12 +74,12 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TNAX:
features = base_hw_features_tNAx;
break;
+ case GPU_ID2_PRODUCT_TBEX:
+ features = base_hw_features_tBEx;
+ break;
case GPU_ID2_PRODUCT_TULX:
features = base_hw_features_tULx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- features = base_hw_features_tDUx;
- break;
case GPU_ID2_PRODUCT_TBOX:
features = base_hw_features_tBOx;
break;
@@ -213,12 +213,12 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0},
{U32_MAX, NULL} } },
- {GPU_ID2_PRODUCT_TULX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0},
+ {GPU_ID2_PRODUCT_TBEX,
+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
{U32_MAX, NULL} } },
- {GPU_ID2_PRODUCT_TDUX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0},
+ {GPU_ID2_PRODUCT_TULX,
+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0},
{U32_MAX, NULL} } },
{GPU_ID2_PRODUCT_TBOX,
@@ -250,10 +250,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
if (product != NULL) {
/* Found a matching product. */
const u32 version = gpu_id & GPU_ID2_VERSION;
-#if !MALI_CUSTOMER_RELEASE
u32 fallback_version = 0;
const enum base_hw_issue *fallback_issues = NULL;
-#endif
size_t v;
/* Stop when we reach the end of the map. */
@@ -265,25 +263,34 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
break;
}
-#if !MALI_CUSTOMER_RELEASE
/* Check whether this is a candidate for most recent
known version not later than the actual
version. */
if ((version > product->map[v].version) &&
(product->map[v].version >= fallback_version)) {
- fallback_version = product->map[v].version;
- fallback_issues = product->map[v].issues;
- }
+#if MALI_CUSTOMER_RELEASE
+ /* Match on version's major and minor fields */
+ if (((version ^ product->map[v].version) >>
+ GPU_ID2_VERSION_MINOR_SHIFT) == 0)
#endif
+ {
+ fallback_version = product->map[v].version;
+ fallback_issues = product->map[v].issues;
+ }
+ }
}
-#if !MALI_CUSTOMER_RELEASE
if ((issues == NULL) && (fallback_issues != NULL)) {
/* Fall back to the issue set of the most recent known
version not later than the actual version. */
issues = fallback_issues;
+#if MALI_CUSTOMER_RELEASE
+ dev_warn(kbdev->dev,
+ "GPU hardware issue table may need updating:\n"
+#else
dev_info(kbdev->dev,
+#endif
"r%dp%d status %d is unknown; treating as r%dp%d status %d",
(gpu_id & GPU_ID2_VERSION_MAJOR) >>
GPU_ID2_VERSION_MAJOR_SHIFT,
@@ -305,7 +312,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
kbase_gpuprops_update_core_props_gpu_id(
&kbdev->gpu_props.props);
}
-#endif
}
return issues;
}
@@ -467,12 +473,12 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TNAX:
issues = base_hw_issues_model_tNAx;
break;
+ case GPU_ID2_PRODUCT_TBEX:
+ issues = base_hw_issues_model_tBEx;
+ break;
case GPU_ID2_PRODUCT_TULX:
issues = base_hw_issues_model_tULx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- issues = base_hw_issues_model_tDUx;
- break;
case GPU_ID2_PRODUCT_TBOX:
issues = base_hw_issues_model_tBOx;
break;
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index 0c5ceff..d5b9099 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,28 @@
#include <mali_kbase_instr_defs.h>
/**
- * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection.
+ * @dump_buffer: GPU address to write counters to.
+ * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer.
+ * @jm_bm: counters selection bitmask (JM).
+ * @shader_bm: counters selection bitmask (Shader).
+ * @tiler_bm: counters selection bitmask (Tiler).
+ * @mmu_l2_bm: counters selection bitmask (MMU_L2).
+ * @use_secondary: use secondary performance counters set for applicable
+ * counter blocks.
+ */
+struct kbase_instr_hwcnt_enable {
+ u64 dump_buffer;
+ u64 dump_buffer_bytes;
+ u32 jm_bm;
+ u32 shader_bm;
+ u32 tiler_bm;
+ u32 mmu_l2_bm;
+ bool use_secondary;
+};
+
+/**
+ * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection
* @kbdev: Kbase device
* @kctx: Kbase context
* @enable: HW counter setup parameters
@@ -43,10 +64,10 @@
*/
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
- struct kbase_ioctl_hwcnt_enable *enable);
+ struct kbase_instr_hwcnt_enable *enable);
/**
- * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection
* @kctx: Kbase context
*
* Context: might sleep, waiting for an ongoing dump to complete
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 580ac98..e2798eb 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -128,7 +128,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
struct kbase_context *kctx);
/**
- * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ * kbase_backend_cache_clean - Perform a cache clean if the given atom requires
* one
* @kbdev: Device pointer
* @katom: Pointer to the failed atom
@@ -136,7 +136,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
* On some GPUs, the GPU cache must be cleaned following a failed atom. This
* function performs a clean if it is required by @katom.
*/
-void kbase_backend_cacheclean(struct kbase_device *kbdev,
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
struct kbase_jd_atom *katom);
@@ -160,14 +160,12 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev,
* any scheduling has taken place.
* @kbdev: Device pointer
* @core_req: Core requirements of atom
- * @coreref_state: Coreref state of atom
*
* This function should only be called from kbase_jd_done_worker() or
* js_return_worker().
*/
void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
- base_jd_core_req core_req,
- enum kbase_atom_coreref_state coreref_state);
+ base_jd_core_req core_req);
/**
* kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
@@ -277,7 +275,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
*/
u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
-#if KBASE_GPU_RESET_EN
/**
* kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
* @kbdev: Device pointer
@@ -345,8 +342,11 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev);
* of the GPU as part of normal processing (e.g. exiting protected mode) where
* the driver will have ensured the scheduler has been idled and all other
* users of the GPU (e.g. instrumentation) have been suspended.
+ *
+ * Return: 0 if the reset was started successfully
+ * -EAGAIN if another reset is currently in progress
*/
-void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+int kbase_reset_gpu_silent(struct kbase_device *kbdev);
/**
* kbase_reset_gpu_active - Reports if the GPU is being reset
@@ -355,7 +355,6 @@ void kbase_reset_gpu_silent(struct kbase_device *kbdev);
* Return: True if the GPU is in the process of being reset.
*/
bool kbase_reset_gpu_active(struct kbase_device *kbdev);
-#endif
/**
* kbase_job_slot_hardstop - Hard-stop the specified job slot
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index 4598d80..5bb3887 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,13 +44,23 @@ struct kbase_device;
*
* Must be called before any other power management function
*
- * @param kbdev The kbase device structure for the device (must be a valid
- * pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the power management framework was successfully initialized.
+ */
+int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev);
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function (except
+ * @ref kbase_hwaccess_pm_early_init)
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
- * @return 0 if the power management framework was successfully
- * initialized.
+ * Return: 0 if the power management framework was successfully initialized.
*/
-int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev);
/**
* Terminate the power management framework.
@@ -58,10 +68,19 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
* No power management functions may be called after this (except
* @ref kbase_pm_init)
*
- * @param kbdev The kbase device structure for the device (must be a valid
- * pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_hwaccess_pm_early_term or @ref kbase_hwaccess_pm_late_init)
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
-void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev);
/**
* kbase_hwaccess_pm_powerup - Power up the GPU.
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index 9b86b51..f7539f5 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -51,7 +51,11 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
*
* This function is only in use for BASE_HW_ISSUE_6367
*/
-#ifndef CONFIG_MALI_NO_MALI
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+}
+#else
void kbase_wait_write_flush(struct kbase_device *kbdev);
#endif
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
new file mode 100644
index 0000000..efbac6f
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -0,0 +1,796 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of hardware counter context and accumulator APIs.
+ */
+
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_backend.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/**
+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states.
+ * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail.
+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled.
+ * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are
+ * any enabled counters.
+ */
+enum kbase_hwcnt_accum_state {
+ ACCUM_STATE_ERROR,
+ ACCUM_STATE_DISABLED,
+ ACCUM_STATE_ENABLED
+};
+
+/**
+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @backend: Pointer to created counter backend.
+ * @state: The current state of the accumulator.
+ * - State transition from disabled->enabled or
+ * disabled->error requires state_lock.
+ * - State transition from enabled->disabled or
+ * enabled->error requires both accum_lock and
+ * state_lock.
+ * - Error state persists until next disable.
+ * @enable_map: The current set of enabled counters.
+ * - Must only be modified while holding both
+ * accum_lock and state_lock.
+ * - Can be read while holding either lock.
+ * - Must stay in sync with enable_map_any_enabled.
+ * @enable_map_any_enabled: True if any counters in the map are enabled, else
+ * false. If true, and state is ACCUM_STATE_ENABLED,
+ * then the counter backend will be enabled.
+ * - Must only be modified while holding both
+ * accum_lock and state_lock.
+ * - Can be read while holding either lock.
+ * - Must stay in sync with enable_map.
+ * @scratch_map: Scratch enable map, used as temporary enable map
+ * storage during dumps.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @accum_buf: Accumulation buffer, where dumps will be accumulated
+ * into on transition to a disable state.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @accumulated: True if the accumulation buffer has been accumulated
+ * into and not subsequently read from yet, else false.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent
+ * dump that was requested by the user.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ */
+struct kbase_hwcnt_accumulator {
+ struct kbase_hwcnt_backend *backend;
+ enum kbase_hwcnt_accum_state state;
+ struct kbase_hwcnt_enable_map enable_map;
+ bool enable_map_any_enabled;
+ struct kbase_hwcnt_enable_map scratch_map;
+ struct kbase_hwcnt_dump_buffer accum_buf;
+ bool accumulated;
+ u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_context - Hardware counter context structure.
+ * @iface: Pointer to hardware counter backend interface.
+ * @state_lock: Spinlock protecting state.
+ * @disable_count: Disable count of the context. Initialised to 1.
+ * Decremented when the accumulator is acquired, and incremented
+ * on release. Incremented on calls to
+ * kbase_hwcnt_context_disable[_atomic], and decremented on
+ * calls to kbase_hwcnt_context_enable.
+ * - Must only be read or modified while holding state_lock.
+ * @accum_lock: Mutex protecting accumulator.
+ * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or
+ * termination. Set to true before accumulator initialisation,
+ * and false after accumulator termination.
+ * - Must only be modified while holding both accum_lock and
+ * state_lock.
+ * - Can be read while holding either lock.
+ * @accum: Hardware counter accumulator structure.
+ */
+struct kbase_hwcnt_context {
+ const struct kbase_hwcnt_backend_interface *iface;
+ spinlock_t state_lock;
+ size_t disable_count;
+ struct mutex accum_lock;
+ bool accum_inited;
+ struct kbase_hwcnt_accumulator accum;
+};
+
+int kbase_hwcnt_context_init(
+ const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx)
+{
+ struct kbase_hwcnt_context *hctx = NULL;
+
+ if (!iface || !out_hctx)
+ return -EINVAL;
+
+ hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
+ if (!hctx)
+ return -ENOMEM;
+
+ hctx->iface = iface;
+ spin_lock_init(&hctx->state_lock);
+ hctx->disable_count = 1;
+ mutex_init(&hctx->accum_lock);
+ hctx->accum_inited = false;
+
+ *out_hctx = hctx;
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init);
+
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
+{
+ if (!hctx)
+ return;
+
+ /* Make sure we didn't leak the accumulator */
+ WARN_ON(hctx->accum_inited);
+ kfree(hctx);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term);
+
+/**
+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx)
+{
+ WARN_ON(!hctx);
+ WARN_ON(!hctx->accum_inited);
+
+ kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map);
+ kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf);
+ kbase_hwcnt_enable_map_free(&hctx->accum.enable_map);
+ hctx->iface->term(hctx->accum.backend);
+ memset(&hctx->accum, 0, sizeof(hctx->accum));
+}
+
+/**
+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
+{
+ int errcode;
+
+ WARN_ON(!hctx);
+ WARN_ON(!hctx->accum_inited);
+
+ errcode = hctx->iface->init(
+ hctx->iface->info, &hctx->accum.backend);
+ if (errcode)
+ goto error;
+
+ hctx->accum.state = ACCUM_STATE_ERROR;
+
+ errcode = kbase_hwcnt_enable_map_alloc(
+ hctx->iface->metadata, &hctx->accum.enable_map);
+ if (errcode)
+ goto error;
+
+ hctx->accum.enable_map_any_enabled = false;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(
+ hctx->iface->metadata, &hctx->accum.accum_buf);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_enable_map_alloc(
+ hctx->iface->metadata, &hctx->accum.scratch_map);
+ if (errcode)
+ goto error;
+
+ hctx->accum.accumulated = false;
+
+ hctx->accum.ts_last_dump_ns =
+ hctx->iface->timestamp_ns(hctx->accum.backend);
+
+ return 0;
+
+error:
+ kbasep_hwcnt_accumulator_term(hctx);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the
+ * disabled state, from the enabled or
+ * error states.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_accumulator_disable(
+ struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+ int errcode = 0;
+ bool backend_enabled = false;
+ struct kbase_hwcnt_accumulator *accum;
+ unsigned long flags;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->accum_lock);
+ WARN_ON(!hctx->accum_inited);
+
+ accum = &hctx->accum;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ WARN_ON(hctx->disable_count != 0);
+ WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
+
+ if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
+ (accum->enable_map_any_enabled))
+ backend_enabled = true;
+
+ if (!backend_enabled)
+ hctx->accum.state = ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ /* Early out if the backend is not already enabled */
+ if (!backend_enabled)
+ return;
+
+ if (!accumulate)
+ goto disable;
+
+ /* Try and accumulate before disabling */
+ errcode = hctx->iface->dump_request(accum->backend);
+ if (errcode)
+ goto disable;
+
+ errcode = hctx->iface->dump_wait(accum->backend);
+ if (errcode)
+ goto disable;
+
+ errcode = hctx->iface->dump_get(accum->backend,
+ &accum->accum_buf, &accum->enable_map, accum->accumulated);
+ if (errcode)
+ goto disable;
+
+ accum->accumulated = true;
+
+disable:
+ hctx->iface->dump_disable(accum->backend);
+
+ /* Regardless of any errors during the accumulate, put the accumulator
+ * in the disabled state.
+ */
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum.state = ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the
+ * enabled state, from the disabled state.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
+{
+ int errcode = 0;
+ struct kbase_hwcnt_accumulator *accum;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->state_lock);
+ WARN_ON(!hctx->accum_inited);
+ WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED);
+
+ accum = &hctx->accum;
+
+ /* The backend only needs enabling if any counters are enabled */
+ if (accum->enable_map_any_enabled)
+ errcode = hctx->iface->dump_enable_nolock(
+ accum->backend, &accum->enable_map);
+
+ if (!errcode)
+ accum->state = ACCUM_STATE_ENABLED;
+ else
+ accum->state = ACCUM_STATE_ERROR;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date
+ * values of enabled counters possible, and
+ * optionally update the set of enabled
+ * counters.
+ * @hctx : Non-NULL pointer to the hardware counter context
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ * @new_map: Pointer to the new counter enable map. If non-NULL, must have
+ * the same metadata as the accumulator. If NULL, the set of
+ * enabled counters will be unchanged.
+ */
+static int kbasep_hwcnt_accumulator_dump(
+ struct kbase_hwcnt_context *hctx,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf,
+ const struct kbase_hwcnt_enable_map *new_map)
+{
+ int errcode = 0;
+ unsigned long flags;
+ enum kbase_hwcnt_accum_state state;
+ bool dump_requested = false;
+ bool dump_written = false;
+ bool cur_map_any_enabled;
+ struct kbase_hwcnt_enable_map *cur_map;
+ bool new_map_any_enabled = false;
+ u64 dump_time_ns;
+ struct kbase_hwcnt_accumulator *accum;
+
+ WARN_ON(!hctx);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata));
+ WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata));
+ WARN_ON(!hctx->accum_inited);
+ lockdep_assert_held(&hctx->accum_lock);
+
+ accum = &hctx->accum;
+ cur_map = &accum->scratch_map;
+
+ /* Save out info about the current enable map */
+ cur_map_any_enabled = accum->enable_map_any_enabled;
+ kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
+
+ if (new_map)
+ new_map_any_enabled =
+ kbase_hwcnt_enable_map_any_enabled(new_map);
+
+ /*
+ * We're holding accum_lock, so the accumulator state might transition
+ * from disabled to enabled during this function (as enabling is lock
+ * free), but it will never disable (as disabling needs to hold the
+ * accum_lock), nor will it ever transition from enabled to error (as
+ * an enable while we're already enabled is impossible).
+ *
+ * If we're already disabled, we'll only look at the accumulation buffer
+ * rather than do a real dump, so a concurrent enable does not affect
+ * us.
+ *
+ * If a concurrent enable fails, we might transition to the error
+ * state, but again, as we're only looking at the accumulation buffer,
+ * it's not an issue.
+ */
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ state = accum->state;
+
+ /*
+ * Update the new map now, such that if an enable occurs during this
+ * dump then that enable will set the new map. If we're already enabled,
+ * then we'll do it ourselves after the dump.
+ */
+ if (new_map) {
+ kbase_hwcnt_enable_map_copy(
+ &accum->enable_map, new_map);
+ accum->enable_map_any_enabled = new_map_any_enabled;
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ /* Error state, so early out. No need to roll back any map updates */
+ if (state == ACCUM_STATE_ERROR)
+ return -EIO;
+
+ /* Initiate the dump if the backend is enabled. */
+ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
+ /* Disable pre-emption, to make the timestamp as accurate as
+ * possible.
+ */
+ preempt_disable();
+ {
+ dump_time_ns = hctx->iface->timestamp_ns(
+ accum->backend);
+ if (dump_buf) {
+ errcode = hctx->iface->dump_request(
+ accum->backend);
+ dump_requested = true;
+ } else {
+ errcode = hctx->iface->dump_clear(
+ accum->backend);
+ }
+ }
+ preempt_enable();
+ if (errcode)
+ goto error;
+ } else {
+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+ }
+
+ /* Copy any accumulation into the dest buffer */
+ if (accum->accumulated && dump_buf) {
+ kbase_hwcnt_dump_buffer_copy(
+ dump_buf, &accum->accum_buf, cur_map);
+ dump_written = true;
+ }
+
+ /* Wait for any requested dumps to complete */
+ if (dump_requested) {
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+ errcode = hctx->iface->dump_wait(accum->backend);
+ if (errcode)
+ goto error;
+ }
+
+ /* If we're enabled and there's a new enable map, change the enabled set
+ * as soon after the dump has completed as possible.
+ */
+ if ((state == ACCUM_STATE_ENABLED) && new_map) {
+ /* Backend is only enabled if there were any enabled counters */
+ if (cur_map_any_enabled)
+ hctx->iface->dump_disable(accum->backend);
+
+ /* (Re-)enable the backend if the new map has enabled counters.
+ * No need to acquire the spinlock, as concurrent enable while
+ * we're already enabled and holding accum_lock is impossible.
+ */
+ if (new_map_any_enabled) {
+ errcode = hctx->iface->dump_enable(
+ accum->backend, new_map);
+ if (errcode)
+ goto error;
+ }
+ }
+
+ /* Copy, accumulate, or zero into the dest buffer to finish */
+ if (dump_buf) {
+ /* If we dumped, copy or accumulate it into the destination */
+ if (dump_requested) {
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+ errcode = hctx->iface->dump_get(
+ accum->backend,
+ dump_buf,
+ cur_map,
+ dump_written);
+ if (errcode)
+ goto error;
+ dump_written = true;
+ }
+
+ /* If we've not written anything into the dump buffer so far, it
+ * means there was nothing to write. Zero any enabled counters.
+ */
+ if (!dump_written)
+ kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+ }
+
+ /* Write out timestamps */
+ *ts_start_ns = accum->ts_last_dump_ns;
+ *ts_end_ns = dump_time_ns;
+
+ accum->accumulated = false;
+ accum->ts_last_dump_ns = dump_time_ns;
+
+ return 0;
+error:
+ /* An error was only physically possible if the backend was enabled */
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+
+ /* Disable the backend, and transition to the error state */
+ hctx->iface->dump_disable(accum->backend);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ accum->state = ACCUM_STATE_ERROR;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_context_disable(
+ struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+ unsigned long flags;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->accum_lock);
+
+ if (!kbase_hwcnt_context_disable_atomic(hctx)) {
+ kbasep_hwcnt_accumulator_disable(hctx, accumulate);
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ /* Atomic disable failed and we're holding the mutex, so current
+ * disable count must be 0.
+ */
+ WARN_ON(hctx->disable_count != 0);
+ hctx->disable_count++;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ }
+}
+
+int kbase_hwcnt_accumulator_acquire(
+ struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum)
+{
+ int errcode = 0;
+ unsigned long flags;
+
+ if (!hctx || !accum)
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!hctx->accum_inited)
+ /* Set accum initing now to prevent concurrent init */
+ hctx->accum_inited = true;
+ else
+ /* Already have an accum, or already being inited */
+ errcode = -EBUSY;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+
+ if (errcode)
+ return errcode;
+
+ errcode = kbasep_hwcnt_accumulator_init(hctx);
+
+ if (errcode) {
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum_inited = false;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+ }
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ WARN_ON(hctx->disable_count == 0);
+ WARN_ON(hctx->accum.enable_map_any_enabled);
+
+ /* Decrement the disable count to allow the accumulator to be accessible
+ * now that it's fully constructed.
+ */
+ hctx->disable_count--;
+
+ /*
+ * Make sure the accumulator is initialised to the correct state.
+ * Regardless of initial state, counters don't need to be enabled via
+ * the backend, as the initial enable map has no enabled counters.
+ */
+ hctx->accum.state = (hctx->disable_count == 0) ?
+ ACCUM_STATE_ENABLED :
+ ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ *accum = &hctx->accum;
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire);
+
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum)
+ return;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ mutex_lock(&hctx->accum_lock);
+
+ /* Double release is a programming error */
+ WARN_ON(!hctx->accum_inited);
+
+ /* Disable the context to ensure the accumulator is inaccesible while
+ * we're destroying it. This performs the corresponding disable count
+ * increment to the decrement done during acquisition.
+ */
+ kbasep_hwcnt_context_disable(hctx, false);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ kbasep_hwcnt_accumulator_term(hctx);
+
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum_inited = false;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release);
+
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
+{
+ if (WARN_ON(!hctx))
+ return;
+
+ /* Try and atomically disable first, so we can avoid locking the mutex
+ * if we don't need to.
+ */
+ if (kbase_hwcnt_context_disable_atomic(hctx))
+ return;
+
+ mutex_lock(&hctx->accum_lock);
+
+ kbasep_hwcnt_context_disable(hctx, true);
+
+ mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable);
+
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
+{
+ unsigned long flags;
+ bool atomic_disabled = false;
+
+ if (WARN_ON(!hctx))
+ return false;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
+ /*
+ * If disable count is non-zero or no counters are enabled, we
+ * can just bump the disable count.
+ *
+ * Otherwise, we can't disable in an atomic context.
+ */
+ if (hctx->disable_count != 0) {
+ hctx->disable_count++;
+ atomic_disabled = true;
+ } else {
+ WARN_ON(!hctx->accum_inited);
+ if (!hctx->accum.enable_map_any_enabled) {
+ hctx->disable_count++;
+ hctx->accum.state = ACCUM_STATE_DISABLED;
+ atomic_disabled = true;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ return atomic_disabled;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic);
+
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!hctx))
+ return;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!WARN_ON(hctx->disable_count == 0)) {
+ if (hctx->disable_count == 1)
+ kbasep_hwcnt_accumulator_enable(hctx);
+
+ hctx->disable_count--;
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable);
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+ struct kbase_hwcnt_context *hctx)
+{
+ if (!hctx)
+ return NULL;
+
+ return hctx->iface->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata);
+
+int kbase_hwcnt_accumulator_set_counters(
+ struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum || !new_map || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ if ((new_map->metadata != hctx->iface->metadata) ||
+ (dump_buf && (dump_buf->metadata != hctx->iface->metadata)))
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+
+ errcode = kbasep_hwcnt_accumulator_dump(
+ hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters);
+
+int kbase_hwcnt_accumulator_dump(
+ struct kbase_hwcnt_accumulator *accum,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ if (dump_buf && (dump_buf->metadata != hctx->iface->metadata))
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+
+ errcode = kbasep_hwcnt_accumulator_dump(
+ hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump);
diff --git a/mali_kbase/mali_kbase_hwcnt_accumulator.h b/mali_kbase/mali_kbase_hwcnt_accumulator.h
new file mode 100644
index 0000000..fc45743
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_accumulator.h
@@ -0,0 +1,137 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter accumulator API.
+ */
+
+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_
+#define _KBASE_HWCNT_ACCUMULATOR_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_accumulator;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator
+ * for a hardware counter context.
+ * @hctx: Non-NULL pointer to a hardware counter context.
+ * @accum: Non-NULL pointer to where the pointer to the created accumulator
+ * will be stored on success.
+ *
+ * There can exist at most one instance of the hardware counter accumulator per
+ * context at a time.
+ *
+ * If multiple clients need access to the hardware counters at the same time,
+ * then an abstraction built on top of the single instance to the hardware
+ * counter accumulator is required.
+ *
+ * No counters will be enabled with the returned accumulator. A subsequent call
+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on.
+ *
+ * There are four components to a hardware counter dump:
+ * - A set of enabled counters
+ * - A start time
+ * - An end time
+ * - A dump buffer containing the accumulated counter values for all enabled
+ * counters between the start and end times.
+ *
+ * For each dump, it is guaranteed that all enabled counters were active for the
+ * entirety of the period between the start and end times.
+ *
+ * It is also guaranteed that the start time of dump "n" is always equal to the
+ * end time of dump "n - 1".
+ *
+ * For all dumps, the values of any counters that were not enabled is undefined.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_acquire(
+ struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum);
+
+/**
+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * The accumulator must be released before the context the accumulator was
+ * created from is terminated.
+ */
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
+
+/**
+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently
+ * enabled counters, and enable a new
+ * set of counters that will be used
+ * for subsequent dumps.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ * @new_map: Non-NULL pointer to the new counter enable map. Must have the
+ * same metadata as the accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_set_counters(
+ struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
+ * counters.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_dump(
+ struct kbase_hwcnt_accumulator *accum,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/mali_kbase_hwcnt_backend.h
new file mode 100644
index 0000000..b7aa0e1
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter backends.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_H_
+#define _KBASE_HWCNT_BACKEND_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to
+ * create an instance of a hardware counter
+ * backend.
+ */
+struct kbase_hwcnt_backend_info;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter
+ * backend, used to perform dumps.
+ */
+struct kbase_hwcnt_backend;
+
+/**
+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
+ * @info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * All uses of the created hardware counter backend must be externally
+ * synchronised.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_init_fn)(
+ const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend);
+
+/**
+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
+ * @backend: Pointer to backend to be terminated.
+ */
+typedef void (*kbase_hwcnt_backend_term_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
+ * timestamp.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * Return: Backend timestamp in nanoseconds.
+ */
+typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
+ * backend.
+ * @backend: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * The enable_map must have been created using the interface's metadata.
+ * If the backend has already been enabled, an error is returned.
+ *
+ * May be called in an atomic context.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_fn)(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
+ * with the backend.
+ * @backend: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be
+ * called in an atomic context with the spinlock documented by the specific
+ * backend interface held.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
+ * the backend.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is already disabled, does nothing.
+ * Any undumped counter values since the last dump get will be lost.
+ */
+typedef void (*kbase_hwcnt_backend_dump_disable_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
+ * counters.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
+ * dump.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled or another dump is already in progress,
+ * returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_request_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
+ * counter dump has completed.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_wait_fn)(
+ struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
+ * counters dumped after the last dump
+ * request into the dump buffer.
+ * @backend: Non-NULL pointer to backend.
+ * @dump_buffer: Non-NULL pointer to destination dump buffer.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate: True if counters should be accumulated into dump_buffer, rather
+ * than copied.
+ *
+ * If the backend is not enabled, returns an error.
+ * If a dump is in progress (i.e. dump_wait has not yet returned successfully)
+ * then the resultant contents of the dump buffer will be undefined.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_get_fn)(
+ struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ bool accumulate);
+
+/**
+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
+ * interface.
+ * @metadata: Immutable hardware counter metadata.
+ * @info: Immutable info used to initialise an instance of the
+ * backend.
+ * @init: Function ptr to initialise an instance of the backend.
+ * @term: Function ptr to terminate an instance of the backend.
+ * @timestamp_ns: Function ptr to get the current backend timestamp.
+ * @dump_enable: Function ptr to enable dumping.
+ * @dump_enable_nolock: Function ptr to enable dumping while the
+ * backend-specific spinlock is already held.
+ * @dump_disable: Function ptr to disable dumping.
+ * @dump_clear: Function ptr to clear counters.
+ * @dump_request: Function ptr to request a dump.
+ * @dump_wait: Function ptr to wait until dump to complete.
+ * @dump_get: Function ptr to copy or accumulate dump into a dump
+ * buffer.
+ */
+struct kbase_hwcnt_backend_interface {
+ const struct kbase_hwcnt_metadata *metadata;
+ const struct kbase_hwcnt_backend_info *info;
+ kbase_hwcnt_backend_init_fn init;
+ kbase_hwcnt_backend_term_fn term;
+ kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns;
+ kbase_hwcnt_backend_dump_enable_fn dump_enable;
+ kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock;
+ kbase_hwcnt_backend_dump_disable_fn dump_disable;
+ kbase_hwcnt_backend_dump_clear_fn dump_clear;
+ kbase_hwcnt_backend_dump_request_fn dump_request;
+ kbase_hwcnt_backend_dump_wait_fn dump_wait;
+ kbase_hwcnt_backend_dump_get_fn dump_get;
+};
+
+#endif /* _KBASE_HWCNT_BACKEND_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
new file mode 100644
index 0000000..4bc8916
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
@@ -0,0 +1,538 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_gpu.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "mali_kbase_pm_policy.h"
+#include "mali_kbase_hwaccess_instr.h"
+#include "mali_kbase_tlstream.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+/**
+ * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance
+ * of a GPU hardware counter backend.
+ * @kbdev: KBase device.
+ * @use_secondary: True if secondary performance counters should be used,
+ * else false. Ignored if secondary counters are not supported.
+ * @metadata: Hardware counter metadata.
+ * @dump_bytes: Bytes of GPU memory required to perform a
+ * hardware counter dump.
+ */
+struct kbase_hwcnt_backend_gpu_info {
+ struct kbase_device *kbdev;
+ bool use_secondary;
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend.
+ * @info: Info used to create the backend.
+ * @kctx: KBase context used for GPU memory allocation and
+ * counter dumping.
+ * @kctx_element: List element used to add kctx to device context list.
+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va: CPU mapping of gpu_dump_va.
+ * @vmap: Dump buffer vmap.
+ * @enabled: True if dumping has been enabled, else false.
+ */
+struct kbase_hwcnt_backend_gpu {
+ const struct kbase_hwcnt_backend_gpu_info *info;
+ struct kbase_context *kctx;
+ struct kbasep_kctx_list_element *kctx_element;
+ u64 gpu_dump_va;
+ void *cpu_dump_va;
+ struct kbase_vmap_struct *vmap;
+ bool enabled;
+};
+
+/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_gpu_timestamp_ns(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct timespec ts;
+
+ (void)backend;
+ getrawmonotonic(&ts);
+ return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_physical_enable_map phys;
+ struct kbase_instr_hwcnt_enable enable;
+
+ if (!backend_gpu || !enable_map || backend_gpu->enabled ||
+ (enable_map->metadata != backend_gpu->info->metadata))
+ return -EINVAL;
+
+ kctx = backend_gpu->kctx;
+ kbdev = backend_gpu->kctx->kbdev;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+
+ enable.jm_bm = phys.jm_bm;
+ enable.shader_bm = phys.shader_bm;
+ enable.tiler_bm = phys.tiler_bm;
+ enable.mmu_l2_bm = phys.mmu_l2_bm;
+ enable.use_secondary = backend_gpu->info->use_secondary;
+ enable.dump_buffer = backend_gpu->gpu_dump_va;
+ enable.dump_buffer_bytes = backend_gpu->info->dump_bytes;
+
+ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+ if (errcode)
+ goto error;
+
+ backend_gpu->enabled = true;
+
+ return 0;
+error:
+ return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable(
+ struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ unsigned long flags;
+ int errcode;
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+ struct kbase_device *kbdev;
+
+ if (!backend_gpu)
+ return -EINVAL;
+
+ kbdev = backend_gpu->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+ backend, enable_map);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_gpu_dump_disable(
+ struct kbase_hwcnt_backend *backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+
+ if (WARN_ON(!backend_gpu) || !backend_gpu->enabled)
+ return;
+
+ errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx);
+ WARN_ON(errcode);
+
+ backend_gpu->enabled = false;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_gpu_dump_clear(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+
+ if (!backend_gpu || !backend_gpu->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_clear(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_gpu_dump_request(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+
+ if (!backend_gpu || !backend_gpu->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_request_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_gpu_dump_wait(
+ struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+
+ if (!backend_gpu || !backend_gpu->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_gpu_dump_get(
+ struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
+{
+ struct kbase_hwcnt_backend_gpu *backend_gpu =
+ (struct kbase_hwcnt_backend_gpu *)backend;
+
+ if (!backend_gpu || !dst || !dst_enable_map ||
+ (backend_gpu->info->metadata != dst->metadata) ||
+ (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ /* Invalidate the kernel buffer before reading from it. */
+ kbase_sync_mem_regions(
+ backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU);
+
+ return kbase_hwcnt_gpu_dump_get(
+ dst, backend_gpu->cpu_dump_va, dst_enable_map, accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer.
+ * @info: Non-NULL pointer to GPU backend info.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ * is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_dump_alloc(
+ const struct kbase_hwcnt_backend_gpu_info *info,
+ struct kbase_context *kctx,
+ u64 *gpu_dump_va)
+{
+ struct kbase_va_region *reg;
+ u64 flags;
+ u64 nr_pages;
+
+ WARN_ON(!info);
+ WARN_ON(!kctx);
+ WARN_ON(!gpu_dump_va);
+
+ flags = BASE_MEM_PROT_CPU_RD |
+ BASE_MEM_PROT_GPU_WR |
+ BASE_MEM_PERMANENT_KERNEL_MAPPING |
+ BASE_MEM_CACHED_CPU;
+
+ if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+ flags |= BASE_MEM_UNCACHED_GPU;
+
+ nr_pages = PFN_UP(info->dump_bytes);
+
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+
+ if (!reg)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_gpu_dump_free(
+ struct kbase_context *kctx,
+ u64 gpu_dump_va)
+{
+ WARN_ON(!kctx);
+ if (gpu_dump_va)
+ kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend.
+ * @backend: Pointer to GPU backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_destroy(
+ struct kbase_hwcnt_backend_gpu *backend)
+{
+ if (!backend)
+ return;
+
+ if (backend->kctx) {
+ struct kbase_context *kctx = backend->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (backend->cpu_dump_va)
+ kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+ if (backend->gpu_dump_va)
+ kbasep_hwcnt_backend_gpu_dump_free(
+ kctx, backend->gpu_dump_va);
+
+ if (backend->kctx_element) {
+ mutex_lock(&kbdev->kctx_list_lock);
+
+ KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+ list_del(&backend->kctx_element->link);
+
+ mutex_unlock(&kbdev->kctx_list_lock);
+ kfree(backend->kctx_element);
+ }
+
+ kbasep_js_release_privileged_ctx(kbdev, kctx);
+ kbase_destroy_context(kctx);
+ }
+
+ kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend.
+ * @info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_create(
+ const struct kbase_hwcnt_backend_gpu_info *info,
+ struct kbase_hwcnt_backend_gpu **out_backend)
+{
+ int errcode;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+ WARN_ON(!info);
+ WARN_ON(!out_backend);
+
+ kbdev = info->kbdev;
+
+ backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+ if (!backend)
+ goto alloc_error;
+
+ backend->info = info;
+
+ backend->kctx = kbase_create_context(kbdev, true);
+ if (!backend->kctx)
+ goto alloc_error;
+
+ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+ backend->kctx_element = kzalloc(
+ sizeof(*backend->kctx_element), GFP_KERNEL);
+ if (!backend->kctx_element)
+ goto alloc_error;
+
+ backend->kctx_element->kctx = backend->kctx;
+
+ /* Add kernel context to list of contexts associated with device. */
+ mutex_lock(&kbdev->kctx_list_lock);
+
+ list_add(&backend->kctx_element->link, &kbdev->kctx_list);
+ /* Fire tracepoint while lock is held, to ensure tracepoint is not
+ * created in both body and summary stream
+ */
+ KBASE_TLSTREAM_TL_NEW_CTX(
+ backend->kctx, backend->kctx->id, (u32)(backend->kctx->tgid));
+
+ mutex_unlock(&kbdev->kctx_list_lock);
+
+ errcode = kbasep_hwcnt_backend_gpu_dump_alloc(
+ info, backend->kctx, &backend->gpu_dump_va);
+ if (errcode)
+ goto error;
+
+ backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
+ backend->gpu_dump_va, &backend->vmap);
+ if (!backend->cpu_dump_va)
+ goto alloc_error;
+
+#ifdef CONFIG_MALI_NO_MALI
+ /* The dummy model needs the CPU mapping. */
+ gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
+#endif
+
+ *out_backend = backend;
+ return 0;
+
+alloc_error:
+ errcode = -ENOMEM;
+error:
+ kbasep_hwcnt_backend_gpu_destroy(backend);
+ return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_gpu_init(
+ const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+ if (!info || !out_backend)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_gpu_create(
+ (const struct kbase_hwcnt_backend_gpu_info *) info, &backend);
+ if (errcode)
+ return errcode;
+
+ *out_backend = (struct kbase_hwcnt_backend *)backend;
+
+ return 0;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend)
+{
+ if (!backend)
+ return;
+
+ kbasep_hwcnt_backend_gpu_dump_disable(backend);
+ kbasep_hwcnt_backend_gpu_destroy(
+ (struct kbase_hwcnt_backend_gpu *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_info_destroy(
+ const struct kbase_hwcnt_backend_gpu_info *info)
+{
+ if (!info)
+ return;
+
+ kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+ kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_info_create(
+ struct kbase_device *kbdev,
+ const struct kbase_hwcnt_backend_gpu_info **out_info)
+{
+ int errcode = -ENOMEM;
+ struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+ struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+ WARN_ON(!kbdev);
+ WARN_ON(!out_info);
+
+ errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+ if (errcode)
+ return errcode;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto error;
+
+ info->kbdev = kbdev;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+ info->use_secondary = true;
+#else
+ info->use_secondary = false;
+#endif
+
+ errcode = kbase_hwcnt_gpu_metadata_create(
+ &hwcnt_gpu_info, info->use_secondary,
+ &info->metadata,
+ &info->dump_bytes);
+ if (errcode)
+ goto error;
+
+ *out_info = info;
+
+ return 0;
+error:
+ kbasep_hwcnt_backend_gpu_info_destroy(info);
+ return errcode;
+}
+
+int kbase_hwcnt_backend_gpu_create(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ int errcode;
+ const struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+ if (!kbdev || !iface)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info);
+
+ if (errcode)
+ return errcode;
+
+ iface->metadata = info->metadata;
+ iface->info = (struct kbase_hwcnt_backend_info *)info;
+ iface->init = kbasep_hwcnt_backend_gpu_init;
+ iface->term = kbasep_hwcnt_backend_gpu_term;
+ iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns;
+ iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable;
+ iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock;
+ iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable;
+ iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear;
+ iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request;
+ iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait;
+ iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get;
+
+ return 0;
+}
+
+void kbase_hwcnt_backend_gpu_destroy(
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ if (!iface)
+ return;
+
+ kbasep_hwcnt_backend_gpu_info_destroy(
+ (const struct kbase_hwcnt_backend_gpu_info *)iface->info);
+ memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h b/mali_kbase/mali_kbase_hwcnt_backend_gpu.h
new file mode 100644
index 0000000..7712f14
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_gpu.h
@@ -0,0 +1,61 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_GPU_H_
+#define _KBASE_HWCNT_BACKEND_GPU_H_
+
+#include "mali_kbase_hwcnt_backend.h"
+
+struct kbase_device;
+
+/**
+ * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend
+ * interface.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @iface: Non-NULL pointer to backend interface structure that is filled in
+ * on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_gpu_create(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend
+ * interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_gpu_destroy(
+ struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_context.h b/mali_kbase/mali_kbase_hwcnt_context.h
new file mode 100644
index 0000000..bc50ad1
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_context.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter context API.
+ */
+
+#ifndef _KBASE_HWCNT_CONTEXT_H_
+#define _KBASE_HWCNT_CONTEXT_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_backend_interface;
+struct kbase_hwcnt_context;
+
+/**
+ * kbase_hwcnt_context_init() - Initialise a hardware counter context.
+ * @iface: Non-NULL pointer to a hardware counter backend interface.
+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will
+ * be stored on success.
+ *
+ * On creation, the disable count of the context will be 0.
+ * A hardware counter accumulator can be acquired using a created context.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_context_init(
+ const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx);
+
+/**
+ * kbase_hwcnt_context_term() - Terminate a hardware counter context.
+ * @hctx: Pointer to context to be terminated.
+ */
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by
+ * the context, so related counter data
+ * structures can be created.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+ struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function increments the disable count from 0 to 1, and
+ * an accumulator has been acquired, then a counter dump will be performed
+ * before counters are disabled via the backend interface.
+ *
+ * Subsequent dumps via the accumulator while counters are disabled will first
+ * return the accumulated dump, then will return dumps with zeroed counters.
+ *
+ * After this function call returns, it is guaranteed that counters will not be
+ * enabled via the backend interface.
+ */
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
+ * context if possible in an atomic
+ * context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * This function will only succeed if hardware counters are effectively already
+ * disabled, i.e. there is no accumulator, the disable count is already
+ * non-zero, or the accumulator has no counters set.
+ *
+ * After this function call returns true, it is guaranteed that counters will
+ * not be enabled via the backend interface.
+ *
+ * Return: True if the disable count was incremented, else False.
+ */
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function decrements the disable count from 1 to 0, and
+ * an accumulator has been acquired, then counters will be re-enabled via the
+ * backend interface.
+ *
+ * If an accumulator has been acquired and enabling counters fails for some
+ * reason, the accumulator will be placed into an error state.
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_hwcnt_context_disable.
+ *
+ * The spinlock documented in the backend interface that was passed in to
+ * kbase_hwcnt_context_init() must be held before calling this function.
+ */
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
+
+#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
new file mode 100644
index 0000000..647d3ec
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -0,0 +1,716 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8
+#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4
+#define KBASE_HWCNT_V4_MAX_GROUPS \
+ (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP)
+#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \
+ (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V4 counter block */
+#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2
+
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \
+ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter
+ * metadata for a v4 GPU.
+ * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU.
+ * @metadata: Non-NULL pointer to where created metadata is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v4_create(
+ const struct kbase_hwcnt_gpu_v4_info *v4_info,
+ const struct kbase_hwcnt_metadata **metadata)
+{
+ size_t grp;
+ int errcode = -ENOMEM;
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description *grps;
+ size_t avail_mask_bit;
+
+ WARN_ON(!v4_info);
+ WARN_ON(!metadata);
+
+ /* Check if there are enough bits in the availability mask to represent
+ * all the hardware counter blocks in the system.
+ */
+ if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS)
+ return -EINVAL;
+
+ grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL);
+ if (!grps)
+ goto clean_up;
+
+ desc.grp_cnt = v4_info->cg_count;
+ desc.grps = grps;
+
+ for (grp = 0; grp < v4_info->cg_count; grp++) {
+ size_t blk;
+ size_t sc;
+ const u64 core_mask = v4_info->cgs[grp].core_mask;
+ struct kbase_hwcnt_block_description *blks = kcalloc(
+ KBASE_HWCNT_V4_BLOCKS_PER_GROUP,
+ sizeof(*blks),
+ GFP_KERNEL);
+
+ if (!blks)
+ goto clean_up;
+
+ grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+ grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP;
+ grps[grp].blks = blks;
+
+ for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) {
+ blks[blk].inst_cnt = 1;
+ blks[blk].hdr_cnt =
+ KBASE_HWCNT_V4_HEADERS_PER_BLOCK;
+ blks[blk].ctr_cnt =
+ KBASE_HWCNT_V4_COUNTERS_PER_BLOCK;
+ }
+
+ for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) {
+ blks[sc].type = core_mask & (1ull << sc) ?
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER :
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+ }
+
+ blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER;
+ blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2;
+ blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+ blks[7].type = (grp == 0) ?
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM :
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+
+ WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8);
+ }
+
+ /* Initialise the availability mask */
+ desc.avail_mask = 0;
+ avail_mask_bit = 0;
+
+ for (grp = 0; grp < desc.grp_cnt; grp++) {
+ size_t blk;
+ const struct kbase_hwcnt_block_description *blks =
+ desc.grps[grp].blks;
+ for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) {
+ WARN_ON(blks[blk].inst_cnt != 1);
+ if (blks[blk].type !=
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)
+ desc.avail_mask |= (1ull << avail_mask_bit);
+
+ avail_mask_bit++;
+ }
+ }
+
+ errcode = kbase_hwcnt_metadata_create(&desc, metadata);
+
+ /* Always clean up, as metadata will make a copy of the input args */
+clean_up:
+ if (grps) {
+ for (grp = 0; grp < v4_info->cg_count; grp++)
+ kfree(grps[grp].blks);
+ kfree(grps);
+ }
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a
+ * V4 GPU.
+ * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU.
+ *
+ * Return: Size of buffer the V4 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes(
+ const struct kbase_hwcnt_gpu_v4_info *v4_info)
+{
+ return v4_info->cg_count *
+ KBASE_HWCNT_V4_BLOCKS_PER_GROUP *
+ KBASE_HWCNT_V4_VALUES_PER_BLOCK *
+ KBASE_HWCNT_VALUE_BYTES;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter
+ * metadata for a v5 GPU.
+ * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU.
+ * @use_secondary: True if secondary performance counters should be used, else
+ * false. Ignored if secondary counters are not supported.
+ * @metadata: Non-NULL pointer to where created metadata is stored
+ * on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
+ const struct kbase_hwcnt_gpu_v5_info *v5_info,
+ bool use_secondary,
+ const struct kbase_hwcnt_metadata **metadata)
+{
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description group;
+ struct kbase_hwcnt_block_description
+ blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ size_t non_sc_block_count;
+ size_t sc_block_count;
+
+ WARN_ON(!v5_info);
+ WARN_ON(!metadata);
+
+ /* Calculate number of block instances that aren't shader cores */
+ non_sc_block_count = 2 + v5_info->l2_count;
+ /* Calculate number of block instances that are shader cores */
+ sc_block_count = fls64(v5_info->core_mask);
+
+ /*
+ * A system can have up to 64 shader cores, but the 64-bit
+ * availability mask can't physically represent that many cores as well
+ * as the other hardware blocks.
+ * Error out if there are more blocks than our implementation can
+ * support.
+ */
+ if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+ return -EINVAL;
+
+ /* One Job Manager block */
+ blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM;
+ blks[0].inst_cnt = 1;
+ blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+ /* One Tiler block */
+ blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+ blks[1].inst_cnt = 1;
+ blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+ /* l2_count memsys blks */
+ blks[2].type = use_secondary ?
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 :
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+ blks[2].inst_cnt = v5_info->l2_count;
+ blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+ /*
+ * There are as many shader cores in the system as there are bits set in
+ * the core mask. However, the dump buffer memory requirements need to
+ * take into account the fact that the core mask may be non-contiguous.
+ *
+ * For example, a system with a core mask of 0b1011 has the same dump
+ * buffer memory requirements as a system with 0b1111, but requires more
+ * memory than a system with 0b0111. However, core 2 of the system with
+ * 0b1011 doesn't physically exist, and the dump buffer memory that
+ * accounts for that core will never be written to when we do a counter
+ * dump.
+ *
+ * We find the core mask's last set bit to determine the memory
+ * requirements, and embed the core mask into the availability mask so
+ * we can determine later which shader cores physically exist.
+ */
+ blks[3].type = use_secondary ?
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 :
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+ blks[3].inst_cnt = sc_block_count;
+ blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+ WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+
+ group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+ group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
+ group.blks = blks;
+
+ desc.grp_cnt = 1;
+ desc.grps = &group;
+
+ /* The JM, Tiler, and L2s are always available, and are before cores */
+ desc.avail_mask = (1ull << non_sc_block_count) - 1;
+ /* Embed the core mask directly in the availability mask */
+ desc.avail_mask |= (v5_info->core_mask << non_sc_block_count);
+
+ return kbase_hwcnt_metadata_create(&desc, metadata);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a
+ * V5 GPU.
+ * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU.
+ *
+ * Return: Size of buffer the V5 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes(
+ const struct kbase_hwcnt_gpu_v5_info *v5_info)
+{
+ WARN_ON(!v5_info);
+ return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) *
+ KBASE_HWCNT_V5_VALUES_PER_BLOCK *
+ KBASE_HWCNT_VALUE_BYTES;
+}
+
+int kbase_hwcnt_gpu_info_init(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_gpu_info *info)
+{
+ if (!kbdev || !info)
+ return -EINVAL;
+
+#ifdef CONFIG_MALI_NO_MALI
+ /* NO_MALI uses V5 layout, regardless of the underlying platform. */
+ info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+ info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+ info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+ info->v4.cg_count = kbdev->gpu_props.num_core_groups;
+ info->v4.cgs = kbdev->gpu_props.props.coherency_info.group;
+ } else {
+ const struct base_gpu_props *props = &kbdev->gpu_props.props;
+ const size_t l2_count = props->l2_props.num_l2_slices;
+ const size_t core_mask =
+ props->coherency_info.group[0].core_mask;
+
+ info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+ info->v5.l2_count = l2_count;
+ info->v5.core_mask = core_mask;
+ }
+#endif
+ return 0;
+}
+
+int kbase_hwcnt_gpu_metadata_create(
+ const struct kbase_hwcnt_gpu_info *info,
+ bool use_secondary,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes)
+{
+ int errcode;
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_bytes;
+
+ if (!info || !out_metadata || !out_dump_bytes)
+ return -EINVAL;
+
+ switch (info->type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+ dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4);
+ errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create(
+ &info->v4, &metadata);
+ break;
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5);
+ errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create(
+ &info->v5, use_secondary, &metadata);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (errcode)
+ return errcode;
+
+ /*
+ * Dump abstraction size should be exactly the same size and layout as
+ * the physical dump size, for backwards compatibility.
+ */
+ WARN_ON(dump_bytes != metadata->dump_buf_bytes);
+
+ *out_metadata = metadata;
+ *out_dump_bytes = dump_bytes;
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create);
+
+void kbase_hwcnt_gpu_metadata_destroy(
+ const struct kbase_hwcnt_metadata *metadata)
+{
+ if (!metadata)
+ return;
+
+ kbase_hwcnt_metadata_destroy(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy);
+
+int kbase_hwcnt_gpu_dump_get(
+ struct kbase_hwcnt_dump_buffer *dst,
+ void *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ const u32 *dump_src;
+ size_t src_offset, grp, blk, blk_inst;
+
+ if (!dst || !src || !dst_enable_map ||
+ (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ metadata = dst->metadata;
+ dump_src = (const u32 *)src;
+ src_offset = 0;
+
+ kbase_hwcnt_metadata_for_each_block(
+ metadata, grp, blk, blk_inst) {
+ const size_t hdr_cnt =
+ kbase_hwcnt_metadata_block_headers_count(
+ metadata, grp, blk);
+ const size_t ctr_cnt =
+ kbase_hwcnt_metadata_block_counters_count(
+ metadata, grp, blk);
+
+ /* Early out if no values in the dest block are enabled */
+ if (kbase_hwcnt_enable_map_block_enabled(
+ dst_enable_map, grp, blk, blk_inst)) {
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ const u32 *src_blk = dump_src + src_offset;
+
+ if (accumulate) {
+ kbase_hwcnt_dump_buffer_block_accumulate(
+ dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ } else {
+ kbase_hwcnt_dump_buffer_block_copy(
+ dst_blk, src_blk, (hdr_cnt + ctr_cnt));
+ }
+ }
+
+ src_offset += (hdr_cnt + ctr_cnt);
+ }
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get);
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ * enable map abstraction to
+ * a physical block enable
+ * map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical(
+ u64 lo,
+ u64 hi)
+{
+ u32 phys = 0;
+ u64 dwords[2] = {lo, hi};
+ size_t dword_idx;
+
+ for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+ const u64 dword = dwords[dword_idx];
+ u16 packed = 0;
+
+ size_t hword_bit;
+
+ for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+ const size_t dword_bit = hword_bit * 4;
+ const u16 mask =
+ ((dword >> (dword_bit + 0)) & 0x1) |
+ ((dword >> (dword_bit + 1)) & 0x1) |
+ ((dword >> (dword_bit + 2)) & 0x1) |
+ ((dword >> (dword_bit + 3)) & 0x1);
+ packed |= (mask << hword_bit);
+ }
+ phys |= ((u32)packed) << (16 * dword_idx);
+ }
+ return phys;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
+ * block enable map to a
+ * block enable map
+ * abstraction.
+ * @phys: Physical 32-bit block enable map
+ * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction
+ * will be stored.
+ * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
+ * will be stored.
+ */
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
+ u32 phys,
+ u64 *lo,
+ u64 *hi)
+{
+ u64 dwords[2] = {0, 0};
+
+ size_t dword_idx;
+
+ for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+ const u16 packed = phys >> (16 * dword_idx);
+ u64 dword = 0;
+
+ size_t hword_bit;
+
+ for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+ const size_t dword_bit = hword_bit * 4;
+ const u64 mask = (packed >> (hword_bit)) & 0x1;
+
+ dword |= mask << (dword_bit + 0);
+ dword |= mask << (dword_bit + 1);
+ dword |= mask << (dword_bit + 2);
+ dword |= mask << (dword_bit + 3);
+ }
+ dwords[dword_idx] = dword;
+ }
+ *lo = dwords[0];
+ *hi = dwords[1];
+}
+
+void kbase_hwcnt_gpu_enable_map_to_physical(
+ struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+
+ u64 jm_bm = 0;
+ u64 shader_bm = 0;
+ u64 tiler_bm = 0;
+ u64 mmu_l2_bm = 0;
+
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!src) || WARN_ON(!dst))
+ return;
+
+ metadata = src->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(
+ metadata, grp, blk, blk_inst) {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(
+ metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(
+ metadata, grp, blk);
+ const size_t blk_val_cnt =
+ kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+ src, grp, blk, blk_inst);
+
+ switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+ WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+ switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+ shader_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+ tiler_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+ mmu_l2_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+ jm_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+ break;
+ default:
+ WARN_ON(true);
+ }
+ break;
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+ jm_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ tiler_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ shader_bm |= *blk_map;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ mmu_l2_bm |= *blk_map;
+ break;
+ default:
+ WARN_ON(true);
+ }
+ break;
+ default:
+ WARN_ON(true);
+ }
+ }
+
+ dst->jm_bm =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0);
+ dst->shader_bm =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
+ dst->tiler_bm =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
+ dst->mmu_l2_bm =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical);
+
+void kbase_hwcnt_gpu_enable_map_from_physical(
+ struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+
+ u64 ignored_hi;
+ u64 jm_bm;
+ u64 shader_bm;
+ u64 tiler_bm;
+ u64 mmu_l2_bm;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!src) || WARN_ON(!dst))
+ return;
+
+ metadata = dst->metadata;
+
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(
+ src->jm_bm, &jm_bm, &ignored_hi);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(
+ src->shader_bm, &shader_bm, &ignored_hi);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(
+ src->tiler_bm, &tiler_bm, &ignored_hi);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(
+ src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi);
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(
+ metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(
+ metadata, grp, blk);
+ const size_t blk_val_cnt =
+ kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+ dst, grp, blk, blk_inst);
+
+ switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+ WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+ switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+ *blk_map = shader_bm;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+ *blk_map = tiler_bm;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+ *blk_map = mmu_l2_bm;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+ *blk_map = jm_bm;
+ break;
+ case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+ break;
+ default:
+ WARN_ON(true);
+ }
+ break;
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+ *blk_map = jm_bm;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ *blk_map = tiler_bm;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ *blk_map = shader_bm;
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ *blk_map = mmu_l2_bm;
+ break;
+ default:
+ WARN_ON(true);
+ }
+ break;
+ default:
+ WARN_ON(true);
+ }
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical);
+
+void kbase_hwcnt_gpu_patch_dump_headers(
+ struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
+ WARN_ON(buf->metadata != enable_map->metadata))
+ return;
+
+ metadata = buf->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ const u64 grp_type =
+ kbase_hwcnt_metadata_group_type(metadata, grp);
+ u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
+ buf, grp, blk, blk_inst);
+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+ enable_map, grp, blk, blk_inst);
+ const u32 prfcnt_en =
+ kbasep_hwcnt_backend_gpu_block_map_to_physical(
+ blk_map[0], 0);
+
+ switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+ buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en;
+ break;
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
+ break;
+ default:
+ WARN_ON(true);
+ }
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers);
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
new file mode 100644
index 0000000..509608a
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -0,0 +1,249 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_H_
+#define _KBASE_HWCNT_GPU_H_
+
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
+ * identify metadata groups.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
+ */
+enum kbase_hwcnt_gpu_group_type {
+ KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10,
+ KBASE_HWCNT_GPU_GROUP_TYPE_V5,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types,
+ * used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block.
+ */
+enum kbase_hwcnt_gpu_v4_block_type {
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20,
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER,
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2,
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM,
+ KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
+ * used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ */
+enum kbase_hwcnt_gpu_v5_block_type {
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+};
+
+/**
+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map
+ * directly used by GPU.
+ * @jm_bm: Job Manager counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm: Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ */
+struct kbase_hwcnt_physical_enable_map {
+ u32 jm_bm;
+ u32 shader_bm;
+ u32 tiler_bm;
+ u32 mmu_l2_bm;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs.
+ * @cg_count: Core group count.
+ * @cgs: Non-NULL pointer to array of cg_count coherent group structures.
+ *
+ * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups,
+ * where each core group may have a physically different layout.
+ */
+struct kbase_hwcnt_gpu_v4_info {
+ size_t cg_count;
+ const struct mali_base_gpu_coherent_group *cgs;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
+ * @l2_count: L2 cache count.
+ * @core_mask: Shader core mask. May be sparse.
+ */
+struct kbase_hwcnt_gpu_v5_info {
+ size_t l2_count;
+ u64 core_mask;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_info - Tagged union with information about the current
+ * GPU's hwcnt blocks.
+ * @type: GPU type.
+ * @v4: Info filled in if a v4 GPU.
+ * @v5: Info filled in if a v5 GPU.
+ */
+struct kbase_hwcnt_gpu_info {
+ enum kbase_hwcnt_gpu_group_type type;
+ union {
+ struct kbase_hwcnt_gpu_v4_info v4;
+ struct kbase_hwcnt_gpu_v5_info v5;
+ };
+};
+
+/**
+ * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the
+ * hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info: Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ */
+int kbase_hwcnt_gpu_info_init(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_gpu_info *info);
+
+/**
+ * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the
+ * current GPU.
+ * @info: Non-NULL pointer to info struct initialised by
+ * kbase_hwcnt_gpu_info_init.
+ * @use_secondary: True if secondary performance counters should be used, else
+ * false. Ignored if secondary counters are not supported.
+ * @out_metadata: Non-NULL pointer to where created metadata is stored on
+ * success.
+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
+ * buffer is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_create(
+ const struct kbase_hwcnt_gpu_info *info,
+ bool use_secondary,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes);
+
+/**
+ * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_gpu_metadata_destroy(
+ const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw
+ * dump buffer in src into the dump buffer
+ * abstraction in dst.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src raw dump buffer, of same length
+ * as returned in out_dump_bytes parameter of
+ * kbase_hwcnt_gpu_metadata_create.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate: True if counters in src should be accumulated into dst,
+ * rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_dump_get(
+ struct kbase_hwcnt_dump_buffer *dst,
+ void *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
+ * into a physical enable map.
+ * @dst: Non-NULL pointer to dst physical enable map.
+ * @src: Non-NULL pointer to src enable map abstraction.
+ *
+ * The src must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the enable map abstraction has one bit per
+ * individual counter block value, but the physical enable map uses 1 bit for
+ * every 4 counters, shared over all instances of a block.
+ */
+void kbase_hwcnt_gpu_enable_map_to_physical(
+ struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
+ * an enable map abstraction.
+ * @dst: Non-NULL pointer to dst enable map abstraction.
+ * @src: Non-NULL pointer to src physical enable map.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the physical enable map can technically
+ * support counter blocks with 128 counters each, but no hardware actually uses
+ * more than 64, so the enable map abstraction has nowhere to store the enable
+ * information for the 64 non-existent counters.
+ */
+void kbase_hwcnt_gpu_enable_map_from_physical(
+ struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
+ * enable headers in a dump buffer to
+ * reflect the specified enable map.
+ * @buf: Non-NULL pointer to dump buffer to patch.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * The buf and enable_map must have been created from a metadata returned from
+ * a call to kbase_hwcnt_gpu_metadata_create.
+ *
+ * This function should be used before handing off a dump buffer over the
+ * kernel-user boundary, to ensure the header is accurate for the enable map
+ * used by the user.
+ */
+void kbase_hwcnt_gpu_patch_dump_headers(
+ struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
new file mode 100644
index 0000000..b0e6aee
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.c
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+/**
+ * struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
+ * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
+ * @enable_map: Counter enable map.
+ * @dump_buf: Dump buffer used to manipulate dumps before copied to user.
+ * @hvcli: Hardware counter virtualizer client.
+ */
+struct kbase_hwcnt_legacy_client {
+ void __user *user_dump_buf;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer dump_buf;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+};
+
+int kbase_hwcnt_legacy_client_create(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_ioctl_hwcnt_enable *enable,
+ struct kbase_hwcnt_legacy_client **out_hlcli)
+{
+ int errcode;
+ struct kbase_hwcnt_legacy_client *hlcli;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_physical_enable_map phys_em;
+
+ if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli)
+ return -EINVAL;
+
+ metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+
+ hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL);
+ if (!hlcli)
+ return -ENOMEM;
+
+ hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
+
+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
+ if (errcode)
+ goto error;
+
+ /* Translate from the ioctl enable map to the internal one */
+ phys_em.jm_bm = enable->jm_bm;
+ phys_em.shader_bm = enable->shader_bm;
+ phys_em.tiler_bm = enable->tiler_bm;
+ phys_em.mmu_l2_bm = enable->mmu_l2_bm;
+ kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em);
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_virtualizer_client_create(
+ hvirt, &hlcli->enable_map, &hlcli->hvcli);
+ if (errcode)
+ goto error;
+
+ *out_hlcli = hlcli;
+ return 0;
+
+error:
+ kbase_hwcnt_legacy_client_destroy(hlcli);
+ return errcode;
+}
+
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
+{
+ if (!hlcli)
+ return;
+
+ kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
+ kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
+ kbase_hwcnt_enable_map_free(&hlcli->enable_map);
+ kfree(hlcli);
+}
+
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
+{
+ int errcode;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ if (!hlcli)
+ return -EINVAL;
+
+ /* Dump into the kernel buffer */
+ errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+ &ts_start_ns, &ts_end_ns, &hlcli->dump_buf);
+ if (errcode)
+ return errcode;
+
+ /* Patch the dump buf headers, to hide the counters that other hwcnt
+ * clients are using.
+ */
+ kbase_hwcnt_gpu_patch_dump_headers(
+ &hlcli->dump_buf, &hlcli->enable_map);
+
+ /* Zero all non-enabled counters (current values are undefined) */
+ kbase_hwcnt_dump_buffer_zero_non_enabled(
+ &hlcli->dump_buf, &hlcli->enable_map);
+
+ /* Copy into the user's buffer */
+ errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf,
+ hlcli->dump_buf.metadata->dump_buf_bytes);
+ /* Non-zero errcode implies user buf was invalid or too small */
+ if (errcode)
+ return -EFAULT;
+
+ return 0;
+}
+
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli)
+{
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ if (!hlcli)
+ return -EINVAL;
+
+ /* Dump with a NULL buffer to clear this client's counters */
+ return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+ &ts_start_ns, &ts_end_ns, NULL);
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.h b/mali_kbase/mali_kbase_hwcnt_legacy.h
new file mode 100644
index 0000000..7a610ae
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Legacy hardware counter interface, giving userspace clients simple,
+ * synchronous access to hardware counters.
+ *
+ * Any functions operating on an single legacy hardware counter client instance
+ * must be externally synchronised.
+ * Different clients may safely be used concurrently.
+ */
+
+#ifndef _KBASE_HWCNT_LEGACY_H_
+#define _KBASE_HWCNT_LEGACY_H_
+
+struct kbase_hwcnt_legacy_client;
+struct kbase_ioctl_hwcnt_enable;
+struct kbase_hwcnt_virtualizer;
+
+/**
+ * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client.
+ * @hvirt: Non-NULL pointer to hardware counter virtualizer the client
+ * should be attached to.
+ * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid
+ * pointer to a user dump buffer large enough to hold a dump, and
+ * the counters that should be enabled.
+ * @out_hlcli: Non-NULL pointer to where the pointer to the created client will
+ * be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_create(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_ioctl_hwcnt_enable *enable,
+ struct kbase_hwcnt_legacy_client **out_hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter
+ * client.
+ * @hlcli: Pointer to the legacy hardware counter client.
+ *
+ * Will safely destroy a client in any partial state of construction.
+ */
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the
+ * client's user buffer.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously dump hardware counters into the user buffer
+ * specified on client creation, with the counters specified on client creation.
+ *
+ * The counters are automatically cleared after each dump, such that the next
+ * dump performed will return the counter values accumulated between the time of
+ * this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter
+ * dump.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously clear the hardware counters, such that the
+ * next dump performed will return the counter values accumulated between the
+ * time of this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli);
+
+#endif /* _KBASE_HWCNT_LEGACY_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c
new file mode 100644
index 0000000..1e9efde
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_types.c
@@ -0,0 +1,538 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+
+/* Minimum alignment of each block of hardware counters */
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \
+ (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+
+/**
+ * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment.
+ * @value: The value to align upwards.
+ * @alignment: The alignment.
+ *
+ * Return: A number greater than or equal to value that is aligned to alignment.
+ */
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+ (value + ((alignment - (value % alignment)) % alignment))
+
+int kbase_hwcnt_metadata_create(
+ const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **out_metadata)
+{
+ char *buf;
+ struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_group_metadata *grp_mds;
+ size_t grp;
+ size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
+ size_t dump_buf_count; /* Number of u32 values (inc padding) */
+ size_t avail_mask_bits; /* Number of availability mask bits */
+
+ size_t size;
+ size_t offset;
+
+ if (!desc || !out_metadata)
+ return -EINVAL;
+
+ /* Calculate the bytes needed to tightly pack the metadata */
+
+ /* Top level metadata */
+ size = 0;
+ size += sizeof(struct kbase_hwcnt_metadata);
+
+ /* Group metadata */
+ size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+ /* Block metadata */
+ for (grp = 0; grp < desc->grp_cnt; grp++) {
+ size += sizeof(struct kbase_hwcnt_block_metadata) *
+ desc->grps[grp].blk_cnt;
+ }
+
+ /* Single allocation for the entire metadata */
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Use the allocated memory for the metadata and its members */
+
+ /* Bump allocate the top level metadata */
+ offset = 0;
+ metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_metadata);
+
+ /* Bump allocate the group metadata */
+ grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+ enable_map_count = 0;
+ dump_buf_count = 0;
+ avail_mask_bits = 0;
+
+ for (grp = 0; grp < desc->grp_cnt; grp++) {
+ size_t blk;
+
+ const struct kbase_hwcnt_group_description *grp_desc =
+ desc->grps + grp;
+ struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
+
+ size_t group_enable_map_count = 0;
+ size_t group_dump_buffer_count = 0;
+ size_t group_avail_mask_bits = 0;
+
+ /* Bump allocate this group's block metadata */
+ struct kbase_hwcnt_block_metadata *blk_mds =
+ (struct kbase_hwcnt_block_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_block_metadata) *
+ grp_desc->blk_cnt;
+
+ /* Fill in each block in the group's information */
+ for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
+ const struct kbase_hwcnt_block_description *blk_desc =
+ grp_desc->blks + blk;
+ struct kbase_hwcnt_block_metadata *blk_md =
+ blk_mds + blk;
+ const size_t n_values =
+ blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+ blk_md->type = blk_desc->type;
+ blk_md->inst_cnt = blk_desc->inst_cnt;
+ blk_md->hdr_cnt = blk_desc->hdr_cnt;
+ blk_md->ctr_cnt = blk_desc->ctr_cnt;
+ blk_md->enable_map_index = group_enable_map_count;
+ blk_md->enable_map_stride =
+ kbase_hwcnt_bitfield_count(n_values);
+ blk_md->dump_buf_index = group_dump_buffer_count;
+ blk_md->dump_buf_stride =
+ KBASE_HWCNT_ALIGN_UPWARDS(
+ n_values,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+ blk_md->avail_mask_index = group_avail_mask_bits;
+
+ group_enable_map_count +=
+ blk_md->enable_map_stride * blk_md->inst_cnt;
+ group_dump_buffer_count +=
+ blk_md->dump_buf_stride * blk_md->inst_cnt;
+ group_avail_mask_bits += blk_md->inst_cnt;
+ }
+
+ /* Fill in the group's information */
+ grp_md->type = grp_desc->type;
+ grp_md->blk_cnt = grp_desc->blk_cnt;
+ grp_md->blk_metadata = blk_mds;
+ grp_md->enable_map_index = enable_map_count;
+ grp_md->dump_buf_index = dump_buf_count;
+ grp_md->avail_mask_index = avail_mask_bits;
+
+ enable_map_count += group_enable_map_count;
+ dump_buf_count += group_dump_buffer_count;
+ avail_mask_bits += group_avail_mask_bits;
+ }
+
+ /* Fill in the top level metadata's information */
+ metadata->grp_cnt = desc->grp_cnt;
+ metadata->grp_metadata = grp_mds;
+ metadata->enable_map_bytes =
+ enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+ metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
+ metadata->avail_mask = desc->avail_mask;
+
+ WARN_ON(size != offset);
+ /* Due to the block alignment, there should be exactly one enable map
+ * bit per 4 bytes in the dump buffer.
+ */
+ WARN_ON(metadata->dump_buf_bytes !=
+ (metadata->enable_map_bytes *
+ BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+
+ *out_metadata = metadata;
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create);
+
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+ kfree(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy);
+
+int kbase_hwcnt_enable_map_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map)
+{
+ u64 *enable_map_buf;
+
+ if (!metadata || !enable_map)
+ return -EINVAL;
+
+ enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+ if (!enable_map_buf)
+ return -ENOMEM;
+
+ enable_map->metadata = metadata;
+ enable_map->enable_map = enable_map_buf;
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
+
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
+{
+ if (!enable_map)
+ return;
+
+ kfree(enable_map->enable_map);
+ enable_map->enable_map = NULL;
+ enable_map->metadata = NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
+
+int kbase_hwcnt_dump_buffer_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ u32 *buf;
+
+ if (!metadata || !dump_buf)
+ return -EINVAL;
+
+ buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ dump_buf->metadata = metadata;
+ dump_buf->dump_buf = buf;
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
+
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ if (!dump_buf)
+ return;
+
+ kfree(dump_buf->dump_buf);
+ memset(dump_buf, 0, sizeof(*dump_buf));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free);
+
+int kbase_hwcnt_dump_buffer_array_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+ struct kbase_hwcnt_dump_buffer *buffers;
+ size_t buf_idx;
+ unsigned int order;
+ unsigned long addr;
+
+ if (!metadata || !dump_bufs)
+ return -EINVAL;
+
+ /* Allocate memory for the dump buffer struct array */
+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+ if (!buffers)
+ return -ENOMEM;
+
+ /* Allocate pages for the actual dump buffers, as they tend to be fairly
+ * large.
+ */
+ order = get_order(metadata->dump_buf_bytes * n);
+ addr = __get_free_pages(GFP_KERNEL, order);
+
+ if (!addr) {
+ kfree(buffers);
+ return -ENOMEM;
+ }
+
+ dump_bufs->page_addr = addr;
+ dump_bufs->page_order = order;
+ dump_bufs->buf_cnt = n;
+ dump_bufs->bufs = buffers;
+
+ /* Set the buffer of each dump buf */
+ for (buf_idx = 0; buf_idx < n; buf_idx++) {
+ const size_t offset = metadata->dump_buf_bytes * buf_idx;
+
+ buffers[buf_idx].metadata = metadata;
+ buffers[buf_idx].dump_buf = (u32 *)(addr + offset);
+ }
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc);
+
+void kbase_hwcnt_dump_buffer_array_free(
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+ if (!dump_bufs)
+ return;
+
+ kfree(dump_bufs->bufs);
+ free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+ memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free);
+
+void kbase_hwcnt_dump_buffer_zero(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk;
+ size_t val_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(
+ dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
+
+void kbase_hwcnt_dump_buffer_zero_strict(
+ struct kbase_hwcnt_dump_buffer *dst)
+{
+ if (WARN_ON(!dst))
+ return;
+
+ memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
+
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+
+ if (kbase_hwcnt_metadata_block_instance_avail(
+ metadata, grp, blk, blk_inst)) {
+ /* Block available, so only zero non-enabled values */
+ kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+ dst_blk, blk_em, val_cnt);
+ } else {
+ /* Block not available, so zero the entire thing */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+ }
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled);
+
+void kbase_hwcnt_dump_buffer_copy(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!src) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk;
+ const u32 *src_blk;
+ size_t val_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(
+ dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
+
+void kbase_hwcnt_dump_buffer_copy_strict(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!src) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+
+ kbase_hwcnt_dump_buffer_block_copy_strict(
+ dst_blk, src_blk, blk_em, val_cnt);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
+
+void kbase_hwcnt_dump_buffer_accumulate(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!src) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk;
+ const u32 *src_blk;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(
+ dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+ metadata, grp, blk);
+ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+ metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_accumulate(
+ dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
+
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) ||
+ WARN_ON(!src) ||
+ WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ dst, grp, blk, blk_inst);
+ const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+ metadata, grp, blk);
+ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+ metadata, grp, blk);
+ /* Align upwards to include padding bytes */
+ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+
+ kbase_hwcnt_dump_buffer_block_accumulate_strict(
+ dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h
new file mode 100644
index 0000000..4d78c84
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_types.h
@@ -0,0 +1,1087 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter types.
+ * Contains structures for describing the physical layout of hardware counter
+ * dump buffers and enable maps within a system.
+ *
+ * Also contains helper functions for manipulation of these dump buffers and
+ * enable maps.
+ *
+ * Through use of these structures and functions, hardware counters can be
+ * enabled, copied, accumulated, and generally manipulated in a generic way,
+ * regardless of the physical counter dump layout.
+ *
+ * Terminology:
+ *
+ * Hardware Counter System:
+ * A collection of hardware counter groups, making a full hardware counter
+ * system.
+ * Hardware Counter Group:
+ * A group of Hardware Counter Blocks (e.g. a t62x might have more than one
+ * core group, so has one counter group per core group, where each group
+ * may have a different number and layout of counter blocks).
+ * Hardware Counter Block:
+ * A block of hardware counters (e.g. shader block, tiler block).
+ * Hardware Counter Block Instance:
+ * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
+ * 4 shader block instances).
+ *
+ * Block Header:
+ * A header value inside a counter block. Headers don't count anything,
+ * so it is only valid to copy or zero them. Headers are always the first
+ * values in the block.
+ * Block Counter:
+ * A counter value inside a counter block. Counters can be zeroed, copied,
+ * or accumulated. Counters are always immediately after the headers in the
+ * block.
+ * Block Value:
+ * A catch-all term for block headers and block counters.
+ *
+ * Enable Map:
+ * An array of u64 bitfields, where each bit either enables exactly one
+ * block value, or is unused (padding).
+ * Dump Buffer:
+ * An array of u32 values, where each u32 corresponds either to one block
+ * value, or is unused (padding).
+ * Availability Mask:
+ * A bitfield, where each bit corresponds to whether a block instance is
+ * physically available (e.g. an MP3 GPU may have a sparse core mask of
+ * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the
+ * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
+ * case, the availability mask might be 0b1011111 (the exact layout will
+ * depend on the specific hardware architecture), with the 3 extra early bits
+ * corresponding to other block instances in the hardware counter system).
+ * Metadata:
+ * Structure describing the physical layout of the enable map and dump buffers
+ * for a specific hardware counter system.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_TYPES_H_
+#define _KBASE_HWCNT_TYPES_H_
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include "mali_malisw.h"
+
+/* Number of bytes in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
+
+/* Number of bits in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
+
+/* Number of bytes for each counter value */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32))
+
+/* Number of bits in an availability mask (i.e. max total number of block
+ * instances supported in a Hardware Counter System)
+ */
+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/**
+ * struct kbase_hwcnt_block_description - Description of one or more identical,
+ * contiguous, Hardware Counter Blocks.
+ * @type: The arbitrary identifier used to identify the type of the block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt: The number of 32-bit Block Headers in the block.
+ * @ctr_cnt: The number of 32-bit Block Counters in the block.
+ */
+struct kbase_hwcnt_block_description {
+ u64 type;
+ size_t inst_cnt;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_group_description - Description of one or more identical,
+ * contiguous Hardware Counter Groups.
+ * @type: The arbitrary identifier used to identify the type of the group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the group.
+ * @blks: Non-NULL pointer to an array of blk_cnt block descriptions,
+ * describing each type of Hardware Counter Block in the group.
+ */
+struct kbase_hwcnt_group_description {
+ u64 type;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_description *blks;
+};
+
+/**
+ * struct kbase_hwcnt_description - Description of a Hardware Counter System.
+ * @grp_cnt: The number of Hardware Counter Groups.
+ * @grps: Non-NULL pointer to an array of grp_cnt group descriptions,
+ * describing each Hardware Counter Group in the system.
+ * @avail_mask: Flat Availability Mask for all block instances in the system.
+ */
+struct kbase_hwcnt_description {
+ size_t grp_cnt;
+ const struct kbase_hwcnt_group_description *grps;
+ u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout
+ * of a block in a Hardware Counter System's
+ * Dump Buffers and Enable Maps.
+ * @type: The arbitrary identifier used to identify the type of the
+ * block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt: The number of 32-bit Block Headers in the block.
+ * @ctr_cnt: The number of 32-bit Block Counters in the block.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ * Enable Map bitfields of the Block Instances described by
+ * this metadata start.
+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the
+ * Block Instances described by this metadata.
+ * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the
+ * Dump Buffers of the Block Instances described by this
+ * metadata start.
+ * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the
+ * Block Instances described by this metadata.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ * the Availability Masks of the Block Instances described
+ * by this metadata start.
+ */
+struct kbase_hwcnt_block_metadata {
+ u64 type;
+ size_t inst_cnt;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+ size_t enable_map_index;
+ size_t enable_map_stride;
+ size_t dump_buf_index;
+ size_t dump_buf_stride;
+ size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
+ * of a group of blocks in a Hardware
+ * Counter System's Dump Buffers and Enable
+ * Maps.
+ * @type: The arbitrary identifier used to identify the type of the
+ * group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the
+ * group.
+ * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata,
+ * describing the physical layout of each type of Hardware
+ * Counter Block in the group.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ * Enable Maps of the blocks within the group described by
+ * this metadata start.
+ * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the
+ * Dump Buffers of the blocks within the group described by
+ * metadata start.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ * the Availability Masks of the blocks within the group
+ * described by this metadata start.
+ */
+struct kbase_hwcnt_group_metadata {
+ u64 type;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_metadata *blk_metadata;
+ size_t enable_map_index;
+ size_t dump_buf_index;
+ size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_metadata - Metadata describing the physical layout
+ * of Dump Buffers and Enable Maps within a
+ * Hardware Counter System.
+ * @grp_cnt: The number of Hardware Counter Groups.
+ * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata,
+ * describing the physical layout of each Hardware Counter
+ * Group in the system.
+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
+ * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system.
+ * @avail_mask: The Availability Mask for the system.
+ */
+struct kbase_hwcnt_metadata {
+ size_t grp_cnt;
+ const struct kbase_hwcnt_group_metadata *grp_metadata;
+ size_t enable_map_bytes;
+ size_t dump_buf_bytes;
+ u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64
+ * bitfields.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ * the layout of the enable map.
+ * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array
+ * of u64 bitfields, each bit of which enables one hardware
+ * counter.
+ */
+struct kbase_hwcnt_enable_map {
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 *enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32
+ * values.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ * the layout of the Dump Buffer.
+ * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
+ * of u32 values.
+ */
+struct kbase_hwcnt_dump_buffer {
+ const struct kbase_hwcnt_metadata *metadata;
+ u32 *dump_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
+ * @page_addr: Address of allocated pages. A single allocation is used for all
+ * Dump Buffers in the array.
+ * @page_order: The allocation order of the pages.
+ * @buf_cnt: The number of allocated Dump Buffers.
+ * @bufs: Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_hwcnt_dump_buffer_array {
+ unsigned long page_addr;
+ unsigned int page_order;
+ size_t buf_cnt;
+ struct kbase_hwcnt_dump_buffer *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object
+ * from a description.
+ * @desc: Non-NULL pointer to a hardware counter description.
+ * @metadata: Non-NULL pointer to where created metadata will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_metadata_create(
+ const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **metadata);
+
+/**
+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
+ * @metadata: Pointer to hardware counter metadata
+ */
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_metadata_group_count() - Get the number of groups.
+ * @metadata: Non-NULL pointer to metadata.
+ *
+ * Return: Number of hardware counter groups described by metadata.
+ */
+#define kbase_hwcnt_metadata_group_count(metadata) \
+ ((metadata)->grp_cnt)
+
+/**
+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ *
+ * Return: Type of the group grp.
+ */
+#define kbase_hwcnt_metadata_group_type(metadata, grp) \
+ ((metadata)->grp_metadata[(grp)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+#define kbase_hwcnt_metadata_block_count(metadata, grp) \
+ ((metadata)->grp_metadata[(grp)].blk_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Type of the block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \
+ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
+ * a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \
+ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
+ * headers.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of u32 counter headers in each instance of block blk in
+ * group grp.
+ */
+#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \
+ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of u32 counters in each instance of block blk in group
+ * grp.
+ */
+#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \
+ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of u32 headers plus counters in each instance of block blk
+ * in group grp.
+ */
+#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \
+ (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \
+ + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk)))
+
+/**
+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
+ * the metadata.
+ * @md: Non-NULL pointer to metadata.
+ * @grp: size_t variable used as group iterator.
+ * @blk: size_t variable used as block iterator.
+ * @blk_inst: size_t variable used as block instance iterator.
+ *
+ * Iteration order is group, then block, then block instance (i.e. linearly
+ * through memory).
+ */
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
+ for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
+ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
+ for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++)
+
+/**
+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
+ * mask corresponding to the block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: The bit index into the avail mask for the block.
+ */
+static inline size_t kbase_hwcnt_metadata_block_avail_bit(
+ const struct kbase_hwcnt_metadata *metadata,
+ size_t grp,
+ size_t blk)
+{
+ const size_t bit =
+ metadata->grp_metadata[grp].avail_mask_index +
+ metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+
+ return bit;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
+ * available.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if the block instance is available, else false.
+ */
+static inline bool kbase_hwcnt_metadata_block_instance_avail(
+ const struct kbase_hwcnt_metadata *metadata,
+ size_t grp,
+ size_t blk,
+ size_t blk_inst)
+{
+ const size_t bit = kbase_hwcnt_metadata_block_avail_bit(
+ metadata, grp, blk) + blk_inst;
+ const u64 mask = 1ull << bit;
+
+ return (metadata->avail_mask & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be
+ * initialised to all zeroes (i.e. all counters disabled).
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_enable_map_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_free() - Free an enable map.
+ * @enable_map: Enable map to be freed.
+ *
+ * Can be safely called on an all-zeroed enable map structure, or on an already
+ * freed enable map.
+ */
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
+ * instance's enable map.
+ * @map: Non-NULL pointer to (const) enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u64* to the bitfield(s) used as the enable map for the
+ * block instance.
+ */
+#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
+ ((map)->enable_map + \
+ (map)->metadata->grp_metadata[(grp)].enable_map_index + \
+ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
+ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ * to have at minimum one bit per value.
+ * @val_cnt: Number of values.
+ *
+ * Return: Number of required bitfields.
+ */
+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
+{
+ return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) /
+ KBASE_HWCNT_BITFIELD_BITS;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * @dst: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_all(
+ struct kbase_hwcnt_enable_map *dst,
+ size_t grp,
+ size_t blk,
+ size_t blk_inst)
+{
+ const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+ dst->metadata, grp, blk);
+ const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+ u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+ dst, grp, blk, blk_inst);
+
+ memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
+}
+
+/**
+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
+ * @dst: Non-NULL pointer to enable map to zero.
+ */
+static inline void kbase_hwcnt_enable_map_disable_all(
+ struct kbase_hwcnt_enable_map *dst)
+{
+ memset(dst->enable_map, 0, dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
+ * @dst: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_all(
+ struct kbase_hwcnt_enable_map *dst,
+ size_t grp,
+ size_t blk,
+ size_t blk_inst)
+{
+ const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+ dst->metadata, grp, blk);
+ const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+ u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+ dst, grp, blk, blk_inst);
+
+ size_t bitfld_idx;
+
+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+ const u64 remaining_values = val_cnt -
+ (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ u64 block_enable_map_mask = U64_MAX;
+
+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+ block_enable_map_mask = (1ull << remaining_values) - 1;
+
+ block_enable_map[bitfld_idx] = block_enable_map_mask;
+ }
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable
+ * map.
+ * @dst: Non-NULL pointer to enable map.
+ */
+static inline void kbase_hwcnt_enable_map_enable_all(
+ struct kbase_hwcnt_enable_map *dst)
+{
+ size_t grp, blk, blk_inst;
+
+ kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
+ kbase_hwcnt_enable_map_block_enable_all(
+ dst, grp, blk, blk_inst);
+}
+
+/**
+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_copy(
+ struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ memcpy(dst->enable_map,
+ src->enable_map,
+ dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_union(
+ struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ const size_t bitfld_count =
+ dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+ size_t i;
+
+ for (i = 0; i < bitfld_count; i++)
+ dst->enable_map[i] |= src->enable_map[i];
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
+ * instance are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if any values in the block are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_enabled(
+ const struct kbase_hwcnt_enable_map *enable_map,
+ size_t grp,
+ size_t blk,
+ size_t blk_inst)
+{
+ bool any_enabled = false;
+ const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+ enable_map->metadata, grp, blk);
+ const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+ const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+ enable_map, grp, blk, blk_inst);
+
+ size_t bitfld_idx;
+
+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+ const u64 remaining_values = val_cnt -
+ (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ u64 block_enable_map_mask = U64_MAX;
+
+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+ block_enable_map_mask = (1ull << remaining_values) - 1;
+
+ any_enabled = any_enabled ||
+ (block_enable_map[bitfld_idx] & block_enable_map_mask);
+ }
+
+ return any_enabled;
+}
+
+/**
+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * Return: true if any values are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_any_enabled(
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ size_t grp, blk, blk_inst;
+
+ kbase_hwcnt_metadata_for_each_block(
+ enable_map->metadata, grp, blk, blk_inst) {
+ if (kbase_hwcnt_enable_map_block_enabled(
+ enable_map, grp, blk, blk_inst))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block
+ * instance is enabled.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to check in the block instance.
+ *
+ * Return: true if the value was enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(
+ const u64 *bitfld,
+ size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ return (bitfld[idx] & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block
+ * instance.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to enable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_value(
+ u64 *bitfld,
+ size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ bitfld[idx] |= mask;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block
+ * instance.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to disable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_value(
+ u64 *bitfld,
+ size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ bitfld[idx] &= ~mask;
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
+ * initialised to undefined values, so must be used as a copy dest,
+ * or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer structure, or on an already
+ * freed dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @n: Number of dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each
+ * dump buffer in the array will be initialised to undefined values,
+ * so must be used as a copy dest, or cleared before use.
+ *
+ * A single contiguous page allocation will be used for all of the buffers
+ * inside the array, where:
+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_array_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
+ * @dump_bufs: Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer array structure, or on an
+ * already freed dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_array_free(
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
+ * instance's dump buffer.
+ * @buf: Non-NULL pointer to (const) dump buffer.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u32* to the dump buffer for the block instance.
+ */
+#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \
+ ((buf)->dump_buf + \
+ (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \
+ (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \
+ (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
+ * After the operation, all non-enabled values
+ * will be undefined.
+ * @dst: Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero(
+ u32 *dst_blk,
+ size_t val_cnt)
+{
+ memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst.
+ * After the operation, all values
+ * (including padding bytes) will be
+ * zero.
+ * Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_zero_strict(
+ struct kbase_hwcnt_dump_buffer *dst);
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
+ * dst (including padding bytes and
+ * unavailable blocks).
+ * After the operation, all enabled
+ * values will be unchanged.
+ * @dst: Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
+ * values in a block.
+ * After the operation, all
+ * enabled values will be
+ * unchanged.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+ u32 *dst_blk,
+ const u64 *blk_em,
+ size_t val_cnt)
+{
+ size_t val;
+
+ for (val = 0; val < val_cnt; val++) {
+ if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
+ dst_blk[val] = 0;
+ }
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst.
+ * After the operation, all non-enabled values
+ * will be undefined.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy(
+ u32 *dst_blk,
+ const u32 *src_blk,
+ size_t val_cnt)
+{
+ /* Copy all the counters in the block instance.
+ * Values of non-enabled counters are undefined.
+ */
+ memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to
+ * dst.
+ * After the operation, all non-enabled
+ * values (including padding bytes) will
+ * be zero.
+ * Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
+ * from src to dst.
+ * After the operation, all
+ * non-enabled values will be
+ * zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in dst will be zero.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(
+ u32 *dst_blk,
+ const u32 *src_blk,
+ const u64 *blk_em,
+ size_t val_cnt)
+{
+ size_t val;
+
+ for (val = 0; val < val_cnt; val++) {
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+ blk_em, val);
+
+ dst_blk[val] = val_enabled ? src_blk[val] : 0;
+ }
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and
+ * accumulate all enabled counters from
+ * src to dst.
+ * After the operation, all non-enabled
+ * values will be undefined.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
+ * accumulate all block counters
+ * from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(
+ u32 *dst_blk,
+ const u32 *src_blk,
+ size_t hdr_cnt,
+ size_t ctr_cnt)
+{
+ size_t ctr;
+ /* Copy all the headers in the block instance.
+ * Values of non-enabled headers are undefined.
+ */
+ memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES);
+
+ /* Accumulate all the counters in the block instance.
+ * Values of non-enabled counters are undefined.
+ */
+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+ u32 *dst_ctr = dst_blk + ctr;
+ const u32 *src_ctr = src_blk + ctr;
+
+ const u32 src_counter = *src_ctr;
+ const u32 dst_counter = *dst_ctr;
+
+ /* Saturating add */
+ u32 accumulated = src_counter + dst_counter;
+
+ if (accumulated < src_counter)
+ accumulated = U32_MAX;
+
+ *dst_ctr = accumulated;
+ }
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and
+ * accumulate all enabled counters
+ * from src to dst.
+ * After the operation, all
+ * non-enabled values (including
+ * padding bytes) will be zero.
+ * Slower than the non-strict
+ * variant.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
+ * headers and accumulate
+ * all block counters from
+ * src to dst.
+ * After the operation, all
+ * non-enabled values will
+ * be zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
+ u32 *dst_blk,
+ const u32 *src_blk,
+ const u64 *blk_em,
+ size_t hdr_cnt,
+ size_t ctr_cnt)
+{
+ size_t ctr;
+
+ kbase_hwcnt_dump_buffer_block_copy_strict(
+ dst_blk, src_blk, blk_em, hdr_cnt);
+
+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+ bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+ blk_em, ctr);
+
+ u32 *dst_ctr = dst_blk + ctr;
+ const u32 *src_ctr = src_blk + ctr;
+
+ const u32 src_counter = *src_ctr;
+ const u32 dst_counter = *dst_ctr;
+
+ /* Saturating add */
+ u32 accumulated = src_counter + dst_counter;
+
+ if (accumulated < src_counter)
+ accumulated = U32_MAX;
+
+ *dst_ctr = ctr_enabled ? accumulated : 0;
+ }
+}
+
+#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/mali_kbase_hwcnt_virtualizer.c
new file mode 100644
index 0000000..26e9852
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.c
@@ -0,0 +1,688 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure.
+ * @hctx: Hardware counter context being virtualized.
+ * @metadata: Hardware counter metadata.
+ * @lock: Lock acquired at all entrypoints, to protect mutable state.
+ * @client_count: Current number of virtualizer clients.
+ * @clients: List of virtualizer clients.
+ * @accum: Hardware counter accumulator. NULL if no clients.
+ * @scratch_map: Enable map used as scratch space during counter changes.
+ * @scratch_buf: Dump buffer used as scratch space during dumps.
+ */
+struct kbase_hwcnt_virtualizer {
+ struct kbase_hwcnt_context *hctx;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct mutex lock;
+ size_t client_count;
+ struct list_head clients;
+ struct kbase_hwcnt_accumulator *accum;
+ struct kbase_hwcnt_enable_map scratch_map;
+ struct kbase_hwcnt_dump_buffer scratch_buf;
+};
+
+/**
+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure.
+ * @node: List node used for virtualizer client list.
+ * @hvirt: Hardware counter virtualizer.
+ * @enable_map: Enable map with client's current enabled counters.
+ * @accum_buf: Dump buffer with client's current accumulated counters.
+ * @has_accum: True if accum_buf contains any accumulated counters.
+ * @ts_start_ns: Counter collection start time of current dump.
+ */
+struct kbase_hwcnt_virtualizer_client {
+ struct list_head node;
+ struct kbase_hwcnt_virtualizer *hvirt;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer accum_buf;
+ bool has_accum;
+ u64 ts_start_ns;
+};
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+ struct kbase_hwcnt_virtualizer *hvirt)
+{
+ if (!hvirt)
+ return NULL;
+
+ return hvirt->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
+ * @hvcli: Pointer to virtualizer client.
+ *
+ * Will safely free a client in any partial state of construction.
+ */
+static void kbasep_hwcnt_virtualizer_client_free(
+ struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ if (!hvcli)
+ return;
+
+ kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf);
+ kbase_hwcnt_enable_map_free(&hvcli->enable_map);
+ kfree(hvcli);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer
+ * client.
+ * @metadata: Non-NULL pointer to counter metadata.
+ * @out_hvcli: Non-NULL pointer to where created client will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_alloc(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
+
+ WARN_ON(!metadata);
+ WARN_ON(!out_hvcli);
+
+ hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL);
+ if (!hvcli)
+ return -ENOMEM;
+
+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf);
+ if (errcode)
+ goto error;
+
+ *out_hvcli = hvcli;
+ return 0;
+error:
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a
+ * client's accumulation buffer.
+ * @hvcli: Non-NULL pointer to virtualizer client.
+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
+ */
+static void kbasep_hwcnt_virtualizer_client_accumulate(
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ WARN_ON(!hvcli);
+ WARN_ON(!dump_buf);
+ lockdep_assert_held(&hvcli->hvirt->lock);
+
+ if (hvcli->has_accum) {
+ /* If already some accumulation, accumulate */
+ kbase_hwcnt_dump_buffer_accumulate(
+ &hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ } else {
+ /* If no accumulation, copy */
+ kbase_hwcnt_dump_buffer_copy(
+ &hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ }
+ hvcli->has_accum = true;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter
+ * accumulator after final client
+ * removal.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Will safely terminate the accumulator in any partial state of initialisation.
+ */
+static void kbasep_hwcnt_virtualizer_accumulator_term(
+ struct kbase_hwcnt_virtualizer *hvirt)
+{
+ WARN_ON(!hvirt);
+ lockdep_assert_held(&hvirt->lock);
+ WARN_ON(hvirt->client_count);
+
+ kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf);
+ kbase_hwcnt_enable_map_free(&hvirt->scratch_map);
+ kbase_hwcnt_accumulator_release(hvirt->accum);
+ hvirt->accum = NULL;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter
+ * accumulator before first client
+ * addition.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_accumulator_init(
+ struct kbase_hwcnt_virtualizer *hvirt)
+{
+ int errcode;
+
+ WARN_ON(!hvirt);
+ lockdep_assert_held(&hvirt->lock);
+ WARN_ON(hvirt->client_count);
+ WARN_ON(hvirt->accum);
+
+ errcode = kbase_hwcnt_accumulator_acquire(
+ hvirt->hctx, &hvirt->accum);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_enable_map_alloc(
+ hvirt->metadata, &hvirt->scratch_map);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(
+ hvirt->metadata, &hvirt->scratch_buf);
+ if (errcode)
+ goto error;
+
+ return 0;
+error:
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the
+ * virtualizer.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client to add.
+ * @enable_map: Non-NULL pointer to client's initial enable map.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_add(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode = 0;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!enable_map);
+ lockdep_assert_held(&hvirt->lock);
+
+ if (hvirt->client_count == 0)
+ /* First client added, so initialise the accumulator */
+ errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt);
+ if (errcode)
+ return errcode;
+
+ hvirt->client_count += 1;
+
+ if (hvirt->client_count == 1) {
+ /* First client, so just pass the enable map onwards as is */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+ enable_map, &ts_start_ns, &ts_end_ns, NULL);
+ } else {
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_copy(
+ &hvirt->scratch_map, enable_map);
+ list_for_each_entry(pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(
+ &hvirt->scratch_map, &pos->enable_map);
+
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+ &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
+ /* Accumulate into only existing clients' accumulation bufs */
+ if (!errcode)
+ list_for_each_entry(pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(
+ pos, &hvirt->scratch_buf);
+ }
+ if (errcode)
+ goto error;
+
+ list_add(&hvcli->node, &hvirt->clients);
+ hvcli->hvirt = hvirt;
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+ hvcli->has_accum = false;
+ hvcli->ts_start_ns = ts_end_ns;
+
+ return 0;
+error:
+ hvirt->client_count -= 1;
+ if (hvirt->client_count == 0)
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the
+ * virtualizer.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client to remove.
+ */
+static void kbasep_hwcnt_virtualizer_client_remove(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ int errcode = 0;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ lockdep_assert_held(&hvirt->lock);
+
+ list_del(&hvcli->node);
+ hvirt->client_count -= 1;
+
+ if (hvirt->client_count == 0) {
+ /* Last client removed, so terminate the accumulator */
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ } else {
+ struct kbase_hwcnt_virtualizer_client *pos;
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
+ list_for_each_entry(pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(
+ &hvirt->scratch_map, &pos->enable_map);
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+ &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
+ /* Accumulate into remaining clients' accumulation bufs */
+ if (!errcode)
+ list_for_each_entry(pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(
+ pos, &hvirt->scratch_buf);
+ }
+ WARN_ON(errcode);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ * currently enabled counters,
+ * and enable a new set of
+ * counters that will be used for
+ * subsequent dumps.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @enable_map: Non-NULL pointer to the new counter enable map for the client.
+ * Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_set_counters(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!enable_map);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(enable_map->metadata != hvirt->metadata);
+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+ lockdep_assert_held(&hvirt->lock);
+
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+ list_for_each_entry(pos, &hvirt->clients, node)
+ /* Ignore the enable map of the selected client */
+ if (pos != hvcli)
+ kbase_hwcnt_enable_map_union(
+ &hvirt->scratch_map, &pos->enable_map);
+
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+ &hvirt->scratch_map, ts_start_ns, ts_end_ns,
+ &hvirt->scratch_buf);
+ if (errcode)
+ return errcode;
+
+ /* Accumulate into all accumulation bufs except the selected client's */
+ list_for_each_entry(pos, &hvirt->clients, node)
+ if (pos != hvcli)
+ kbasep_hwcnt_virtualizer_client_accumulate(
+ pos, &hvirt->scratch_buf);
+
+ /* Finally, write into the dump buf */
+ if (dump_buf) {
+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+ if (hvcli->has_accum) {
+ kbase_hwcnt_dump_buffer_accumulate(
+ &hvcli->accum_buf, src, &hvcli->enable_map);
+ src = &hvcli->accum_buf;
+ }
+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+ }
+ hvcli->has_accum = false;
+
+ /* Update the selected client's enable map */
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_set_counters(
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer *hvirt;
+
+ if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hvirt = hvcli->hvirt;
+
+ if ((enable_map->metadata != hvirt->metadata) ||
+ (dump_buf && (dump_buf->metadata != hvirt->metadata)))
+ return -EINVAL;
+
+ mutex_lock(&hvirt->lock);
+
+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+ /*
+ * If there's only one client with no prior accumulation, we can
+ * completely skip the virtualize and just pass through the call
+ * to the accumulator, saving a fair few copies and
+ * accumulations.
+ */
+ errcode = kbase_hwcnt_accumulator_set_counters(
+ hvirt->accum, enable_map,
+ ts_start_ns, ts_end_ns, dump_buf);
+
+ if (!errcode) {
+ /* Update the selected client's enable map */
+ kbase_hwcnt_enable_map_copy(
+ &hvcli->enable_map, enable_map);
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+ }
+ } else {
+ /* Otherwise, do the full virtualize */
+ errcode = kbasep_hwcnt_virtualizer_client_set_counters(
+ hvirt, hvcli, enable_map,
+ ts_start_ns, ts_end_ns, dump_buf);
+ }
+
+ mutex_unlock(&hvirt->lock);
+
+ return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ * currently enabled counters.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+ lockdep_assert_held(&hvirt->lock);
+
+ /* Perform the dump */
+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum,
+ ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+ if (errcode)
+ return errcode;
+
+ /* Accumulate into all accumulation bufs except the selected client's */
+ list_for_each_entry(pos, &hvirt->clients, node)
+ if (pos != hvcli)
+ kbasep_hwcnt_virtualizer_client_accumulate(
+ pos, &hvirt->scratch_buf);
+
+ /* Finally, write into the dump buf */
+ if (dump_buf) {
+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+ if (hvcli->has_accum) {
+ kbase_hwcnt_dump_buffer_accumulate(
+ &hvcli->accum_buf, src, &hvcli->enable_map);
+ src = &hvcli->accum_buf;
+ }
+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+ }
+ hvcli->has_accum = false;
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_dump(
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer *hvirt;
+
+ if (!hvcli || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hvirt = hvcli->hvirt;
+
+ if (dump_buf && (dump_buf->metadata != hvirt->metadata))
+ return -EINVAL;
+
+ mutex_lock(&hvirt->lock);
+
+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+ /*
+ * If there's only one client with no prior accumulation, we can
+ * completely skip the virtualize and just pass through the call
+ * to the accumulator, saving a fair few copies and
+ * accumulations.
+ */
+ errcode = kbase_hwcnt_accumulator_dump(
+ hvirt->accum, ts_start_ns, ts_end_ns, dump_buf);
+
+ if (!errcode) {
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+ }
+ } else {
+ /* Otherwise, do the full virtualize */
+ errcode = kbasep_hwcnt_virtualizer_client_dump(
+ hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+ }
+
+ mutex_unlock(&hvirt->lock);
+
+ return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump);
+
+int kbase_hwcnt_virtualizer_client_create(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+
+ if (!hvirt || !enable_map || !out_hvcli ||
+ (enable_map->metadata != hvirt->metadata))
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_virtualizer_client_alloc(
+ hvirt->metadata, &hvcli);
+ if (errcode)
+ return errcode;
+
+ mutex_lock(&hvirt->lock);
+
+ errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map);
+
+ mutex_unlock(&hvirt->lock);
+
+ if (errcode) {
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+ return errcode;
+ }
+
+ *out_hvcli = hvcli;
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create);
+
+void kbase_hwcnt_virtualizer_client_destroy(
+ struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ if (!hvcli)
+ return;
+
+ mutex_lock(&hvcli->hvirt->lock);
+
+ kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli);
+
+ mutex_unlock(&hvcli->hvirt->lock);
+
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy);
+
+int kbase_hwcnt_virtualizer_init(
+ struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_virtualizer **out_hvirt)
+{
+ struct kbase_hwcnt_virtualizer *virt;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if (!hctx || !out_hvirt)
+ return -EINVAL;
+
+ metadata = kbase_hwcnt_context_metadata(hctx);
+ if (!metadata)
+ return -EINVAL;
+
+ virt = kzalloc(sizeof(*virt), GFP_KERNEL);
+ if (!virt)
+ return -ENOMEM;
+
+ virt->hctx = hctx;
+ virt->metadata = metadata;
+
+ mutex_init(&virt->lock);
+ INIT_LIST_HEAD(&virt->clients);
+
+ *out_hvirt = virt;
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init);
+
+void kbase_hwcnt_virtualizer_term(
+ struct kbase_hwcnt_virtualizer *hvirt)
+{
+ if (!hvirt)
+ return;
+
+ /* Non-zero client count implies client leak */
+ if (WARN_ON(hvirt->client_count != 0)) {
+ struct kbase_hwcnt_virtualizer_client *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &hvirt->clients, node)
+ kbase_hwcnt_virtualizer_client_destroy(pos);
+ }
+
+ WARN_ON(hvirt->client_count != 0);
+ WARN_ON(hvirt->accum);
+
+ kfree(hvirt);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term);
diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/mali_kbase_hwcnt_virtualizer.h
new file mode 100644
index 0000000..1efa81d
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.h
@@ -0,0 +1,139 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter virtualizer API.
+ *
+ * Virtualizes a hardware counter context, so multiple clients can access
+ * a single hardware counter resource as though each was the exclusive user.
+ */
+
+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_
+#define _KBASE_HWCNT_VIRTUALIZER_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_hwcnt_virtualizer_client;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer.
+ * @hctx: Non-NULL pointer to the hardware counter context to virtualize.
+ * @out_hvirt: Non-NULL pointer to where the pointer to the created virtualizer
+ * will be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_init(
+ struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_virtualizer **out_hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
+ * @hvirt: Pointer to virtualizer to be terminated.
+ */
+void kbase_hwcnt_virtualizer_term(
+ struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
+ * the virtualizer, so related counter data
+ * structures can be created.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+ struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the
+ * same metadata as the virtualizer.
+ * @out_hvcli: Non-NULL pointer to where the pointer to the created client will
+ * be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_client_create(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
+ * @hvcli: Pointer to the hardware counter client.
+ */
+void kbase_hwcnt_virtualizer_client_destroy(
+ struct kbase_hwcnt_virtualizer_client *hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ * currently enabled counters, and
+ * enable a new set of counters
+ * that will be used for
+ * subsequent dumps.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @enable_map: Non-NULL pointer to the new counter enable map for the client.
+ * Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_set_counters(
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ * currently enabled counters.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_dump(
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns,
+ u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
index ffc30d8..ccf67df 100644
--- a/mali_kbase/mali_kbase_ioctl.h
+++ b/mali_kbase/mali_kbase_ioctl.h
@@ -64,9 +64,11 @@ extern "C" {
* - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
* 11.12:
* - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 12
+#define BASE_UK_VERSION_MINOR 13
/**
* struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -673,6 +675,19 @@ union kbase_ioctl_cinstr_gwt_dump {
_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+ __u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT \
+ _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+
/***************
* test ioctls *
***************/
@@ -747,6 +762,21 @@ union kbase_ioctl_cs_event_memory_read {
#endif
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ * ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+
/**********************************
* Definitions for GPU properties *
**********************************/
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 29cf193..97d7b43 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -804,7 +804,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
katom->extres = NULL;
katom->device_nr = user_atom->device_nr;
katom->jc = user_atom->jc;
- katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
katom->core_req = user_atom->core_req;
katom->atom_flags = 0;
katom->retry_count = 0;
@@ -1219,7 +1218,6 @@ void kbase_jd_done_worker(struct work_struct *data)
struct kbasep_js_atom_retained_state katom_retained_state;
bool context_idle;
base_jd_core_req core_req = katom->core_req;
- enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
/* Soft jobs should never reach this function */
KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
@@ -1365,7 +1363,7 @@ void kbase_jd_done_worker(struct work_struct *data)
mutex_unlock(&jctx->lock);
}
- kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+ kbase_backend_complete_wq_post_sched(kbdev, core_req);
if (context_idle)
kbase_pm_context_idle(kbdev);
diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c
index 271daef..7b15d8a 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.c
+++ b/mali_kbase/mali_kbase_jd_debugfs.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -190,9 +190,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
kbasep_jd_debugfs_atom_deps(deps, atom);
seq_printf(sfile,
- "%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+ "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ",
i, atom->core_req, atom->status,
- atom->coreref_state,
deps[0].type, deps[0].id,
deps[1].type, deps[1].id,
start_timestamp);
diff --git a/mali_kbase/mali_kbase_jd_debugfs.h b/mali_kbase/mali_kbase_jd_debugfs.h
index ce0cb61..697bdef 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.h
+++ b/mali_kbase/mali_kbase_jd_debugfs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,7 @@
#include <linux/debugfs.h>
-#define MALI_JD_DEBUGFS_VERSION 2
+#define MALI_JD_DEBUGFS_VERSION 3
/* Forward declarations */
struct kbase_context;
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 58a1b4b..80b6d77 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -2259,7 +2259,6 @@ static void js_return_worker(struct work_struct *data)
bool context_idle = false;
unsigned long flags;
base_jd_core_req core_req = katom->core_req;
- enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
@@ -2349,7 +2348,7 @@ static void js_return_worker(struct work_struct *data)
kbase_js_sched_all(kbdev);
- kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+ kbase_backend_complete_wq_post_sched(kbdev, core_req);
}
void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
diff --git a/mali_kbase/mali_kbase_js_ctx_attr.c b/mali_kbase/mali_kbase_js_ctx_attr.c
index 6fd908a..1ff230c 100644
--- a/mali_kbase/mali_kbase_js_ctx_attr.c
+++ b/mali_kbase/mali_kbase_js_ctx_attr.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -198,29 +198,6 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru
* More commonly used public functions
*/
-void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
-{
- bool runpool_state_changed = false;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
- if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
- /* This context never submits, so don't track any scheduling attributes */
- return;
- }
-
- /* Transfer attributes held in the context flags for contexts that have submit enabled */
-
- /* ... More attributes can be added here ... */
-
- /* The context should not have been scheduled yet, so ASSERT if this caused
- * runpool state changes (note that other threads *can't* affect the value
- * of runpool_state_changed, due to how it's calculated) */
- KBASE_DEBUG_ASSERT(runpool_state_changed == false);
- CSTD_UNUSED(runpool_state_changed);
-}
-
void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
{
bool runpool_state_changed;
diff --git a/mali_kbase/mali_kbase_js_ctx_attr.h b/mali_kbase/mali_kbase_js_ctx_attr.h
index be781e6..25fd397 100644
--- a/mali_kbase/mali_kbase_js_ctx_attr.h
+++ b/mali_kbase/mali_kbase_js_ctx_attr.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -46,14 +46,6 @@
*/
/**
- * Set the initial attributes of a context (when context create flags are set)
- *
- * Requires:
- * - Hold the jsctx_mutex
- */
-void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
-
-/**
* Retain all attributes of a context
*
* This occurs on scheduling in the context on the runpool (but after
diff --git a/mali_kbase/mali_kbase_js_defs.h b/mali_kbase/mali_kbase_js_defs.h
index 7385daa..052a0b3 100644
--- a/mali_kbase/mali_kbase_js_defs.h
+++ b/mali_kbase/mali_kbase_js_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -151,18 +151,19 @@ typedef u32 kbasep_js_atom_done_code;
*/
enum {
/*
- * In this mode, the context containing higher priority atoms will be
- * scheduled first and also the new runnable higher priority atoms can
- * preempt lower priority atoms currently running on the GPU, even if
- * they belong to a different context.
+ * In this mode, higher priority atoms will be scheduled first,
+ * regardless of the context they belong to. Newly-runnable higher
+ * priority atoms can preempt lower priority atoms currently running on
+ * the GPU, even if they belong to a different context.
*/
KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
/*
- * In this mode, the contexts are scheduled in round-robin fashion and
- * the new runnable higher priority atoms can preempt the lower priority
- * atoms currently running on the GPU, only if they belong to the same
- * context.
+ * In this mode, the highest-priority atom will be chosen from each
+ * context in turn using a round-robin algorithm, so priority only has
+ * an effect within the context an atom belongs to. Newly-runnable
+ * higher priority atoms can preempt the lower priority atoms currently
+ * running on the GPU, but only if they belong to the same context.
*/
KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 3940024..3d0de90 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -79,21 +79,28 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
{
struct rb_root *rbtree = NULL;
+ /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
+ * zone if this has been initialized.
+ */
+ if (gpu_pfn >= kctx->exec_va_start)
+ rbtree = &kctx->reg_rbtree_exec;
+ else {
+ u64 same_va_end;
+
#ifdef CONFIG_64BIT
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (kbase_ctx_flag(kctx, KCTX_COMPAT))
#endif /* CONFIG_64BIT */
- if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
- rbtree = &kctx->reg_rbtree_custom;
- else
- rbtree = &kctx->reg_rbtree_same;
+ same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
#ifdef CONFIG_64BIT
- } else {
- if (gpu_pfn >= kctx->same_va_end)
+ else
+ same_va_end = kctx->same_va_end;
+#endif /* CONFIG_64BIT */
+
+ if (gpu_pfn >= same_va_end)
rbtree = &kctx->reg_rbtree_custom;
else
rbtree = &kctx->reg_rbtree_same;
}
-#endif /* CONFIG_64BIT */
return rbtree;
}
@@ -224,7 +231,6 @@ struct kbase_va_region *kbase_find_region_base_address(
rbnode = rbnode->rb_right;
else
return reg;
-
}
return NULL;
@@ -615,11 +621,15 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
kctx->reg_rbtree_same = RB_ROOT;
kbase_region_tracker_insert(same_va_reg);
- /* Although custom_va_reg doesn't always exist,
+ /* Although custom_va_reg and exec_va_reg don't always exist,
* initialize unconditionally because of the mem_view debugfs
- * implementation which relies on this being empty.
+ * implementation which relies on them being empty.
+ *
+ * The difference between the two is that the EXEC_VA region
+ * is never initialized at this stage.
*/
kctx->reg_rbtree_custom = RB_ROOT;
+ kctx->reg_rbtree_exec = RB_ROOT;
if (custom_va_reg)
kbase_region_tracker_insert(custom_va_reg);
@@ -644,6 +654,7 @@ void kbase_region_tracker_term(struct kbase_context *kctx)
{
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
}
void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
@@ -657,9 +668,6 @@ static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
}
-/**
- * Initialize the region tracker data structure.
- */
int kbase_region_tracker_init(struct kbase_context *kctx)
{
struct kbase_va_region *same_va_reg;
@@ -709,12 +717,17 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
goto fail_free_same_va;
}
#ifdef CONFIG_64BIT
+ } else {
+ custom_va_size = 0;
}
#endif
kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
kctx->same_va_end = same_va_pages + 1;
+ kctx->gpu_va_end = kctx->same_va_end + custom_va_size;
+ kctx->exec_va_start = U64_MAX;
+ kctx->jit_va = false;
kbase_gpu_vm_unlock(kctx);
@@ -735,11 +748,12 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
struct kbase_va_region *custom_va_reg;
u64 same_va_bits = kbase_get_same_va_bits(kctx);
u64 total_va_size;
- int err;
total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
- kbase_gpu_vm_lock(kctx);
+ /* First verify that a JIT_VA zone has not been created already. */
+ if (kctx->jit_va)
+ return -EINVAL;
/*
* Modify the same VA free region after creation. Be careful to ensure
@@ -748,23 +762,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
*/
same_va = kbase_region_tracker_find_region_base_address(kctx,
PAGE_SIZE);
- if (!same_va) {
- err = -ENOMEM;
- goto fail_unlock;
- }
-
- /* The region flag or region size has changed since creation so bail. */
- if ((!(same_va->flags & KBASE_REG_FREE)) ||
- (same_va->nr_pages != total_va_size)) {
- err = -ENOMEM;
- goto fail_unlock;
- }
+ if (!same_va)
+ return -ENOMEM;
- if (same_va->nr_pages < jit_va_pages ||
- kctx->same_va_end < jit_va_pages) {
- err = -ENOMEM;
- goto fail_unlock;
- }
+ if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages)
+ return -ENOMEM;
/* It's safe to adjust the same VA zone now */
same_va->nr_pages -= jit_va_pages;
@@ -779,44 +781,121 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
jit_va_pages,
KBASE_REG_ZONE_CUSTOM_VA);
- if (!custom_va_reg) {
- /*
- * The context will be destroyed if we fail here so no point
- * reverting the change we made to same_va.
- */
- err = -ENOMEM;
- goto fail_unlock;
- }
+ /*
+ * The context will be destroyed if we fail here so no point
+ * reverting the change we made to same_va.
+ */
+ if (!custom_va_reg)
+ return -ENOMEM;
kbase_region_tracker_insert(custom_va_reg);
-
- kbase_gpu_vm_unlock(kctx);
return 0;
-
-fail_unlock:
- kbase_gpu_vm_unlock(kctx);
- return err;
}
#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
u8 max_allocations, u8 trim_level)
{
+ int err = 0;
+
if (trim_level > 100)
return -EINVAL;
- kctx->jit_max_allocations = max_allocations;
- kctx->trim_level = trim_level;
+ kbase_gpu_vm_lock(kctx);
#ifdef CONFIG_64BIT
if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
- return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+ err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
#endif
/*
* Nothing to do for 32-bit clients, JIT uses the existing
* custom VA zone.
*/
- return 0;
+
+ if (!err) {
+ kctx->jit_max_allocations = max_allocations;
+ kctx->trim_level = trim_level;
+ kctx->jit_va = true;
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return err;
+}
+
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
+{
+ struct kbase_va_region *shrinking_va_reg;
+ struct kbase_va_region *exec_va_reg;
+ u64 exec_va_start, exec_va_base_addr;
+ int err;
+
+ /* The EXEC_VA zone shall be created by making space at the end of the
+ * address space. Firstly, verify that the number of EXEC_VA pages
+ * requested by the client is reasonable and then make sure that it is
+ * not greater than the address space itself before calculating the base
+ * address of the new zone.
+ */
+ if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
+ return -EINVAL;
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* First verify that a JIT_VA zone has not been created already. */
+ if (kctx->jit_va) {
+ err = -EPERM;
+ goto exit_unlock;
+ }
+
+ if (exec_va_pages > kctx->gpu_va_end) {
+ err = -ENOMEM;
+ goto exit_unlock;
+ }
+
+ exec_va_start = kctx->gpu_va_end - exec_va_pages;
+ exec_va_base_addr = exec_va_start << PAGE_SHIFT;
+
+ shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+ exec_va_base_addr);
+ if (!shrinking_va_reg) {
+ err = -ENOMEM;
+ goto exit_unlock;
+ }
+
+ /* Make sure that the EXEC_VA region is still uninitialized */
+ if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) ==
+ KBASE_REG_ZONE_EXEC_VA) {
+ err = -EPERM;
+ goto exit_unlock;
+ }
+
+ if (shrinking_va_reg->nr_pages <= exec_va_pages) {
+ err = -ENOMEM;
+ goto exit_unlock;
+ }
+
+ exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
+ exec_va_start,
+ exec_va_pages,
+ KBASE_REG_ZONE_EXEC_VA);
+ if (!exec_va_reg) {
+ err = -ENOMEM;
+ goto exit_unlock;
+ }
+
+ shrinking_va_reg->nr_pages -= exec_va_pages;
+#ifdef CONFIG_64BIT
+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ kctx->same_va_end -= exec_va_pages;
+#endif
+ kctx->exec_va_start = exec_va_start;
+
+ kbase_region_tracker_insert(exec_va_reg);
+ err = 0;
+
+exit_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return err;
}
@@ -938,6 +1017,10 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
kctx = container_of(rbtree, struct kbase_context,
reg_rbtree_same);
break;
+ case KBASE_REG_ZONE_EXEC_VA:
+ kctx = container_of(rbtree, struct kbase_context,
+ reg_rbtree_exec);
+ break;
default:
WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
break;
@@ -2917,6 +3000,30 @@ update_failed_unlocked:
return ret;
}
+static void trace_jit_stats(struct kbase_context *kctx,
+ u32 bin_id, u32 max_allocations)
+{
+ const u32 alloc_count =
+ kctx->jit_current_allocations_per_bin[bin_id];
+
+ struct kbase_va_region *walker;
+ u32 va_pages = 0;
+ u32 ph_pages = 0;
+
+ mutex_lock(&kctx->jit_evict_lock);
+ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
+ if (walker->jit_bin_id != bin_id)
+ continue;
+
+ va_pages += walker->nr_pages;
+ ph_pages += walker->gpu_alloc->nents;
+ }
+ mutex_unlock(&kctx->jit_evict_lock);
+
+ KBASE_TLSTREAM_AUX_JIT_STATS(kctx->id, bin_id, max_allocations,
+ alloc_count, va_pages, ph_pages);
+}
+
struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
struct base_jit_alloc_info *info)
{
@@ -3069,6 +3176,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
kctx->jit_current_allocations++;
kctx->jit_current_allocations_per_bin[info->bin_id]++;
+ trace_jit_stats(kctx, info->bin_id, info->max_allocations);
+
reg->jit_usage_id = info->usage_id;
reg->jit_bin_id = info->bin_id;
@@ -3112,6 +3221,8 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
kctx->jit_current_allocations--;
kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+ trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX);
+
kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
kbase_gpu_vm_lock(kctx);
@@ -3225,6 +3336,17 @@ void kbase_jit_term(struct kbase_context *kctx)
cancel_work_sync(&kctx->jit_work);
}
+bool kbase_has_exec_va_zone(struct kbase_context *kctx)
+{
+ bool has_exec_va_zone;
+
+ kbase_gpu_vm_lock(kctx);
+ has_exec_va_zone = (kctx->exec_va_start != U64_MAX);
+ kbase_gpu_vm_unlock(kctx);
+
+ return has_exec_va_zone;
+}
+
static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct kbase_va_region *reg)
{
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 5958cf4..a873bb1 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -328,6 +328,13 @@ struct kbase_va_region {
#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
/* end 32-bit clients only */
+/* The starting address and size of the GPU-executable zone are dynamic
+ * and depend on the platform and the number of pages requested by the
+ * user process, with an upper limit of 4 GB.
+ */
+#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
+
unsigned long flags;
@@ -792,9 +799,40 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
*/
struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+/**
+ * kbase_region_tracker_init - Initialize the region tracker data structure
+ * @kctx: kbase context
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
int kbase_region_tracker_init(struct kbase_context *kctx);
+
+/**
+ * kbase_region_tracker_init_jit - Initialize the JIT region
+ * @kctx: kbase context
+ * @jit_va_pages: Size of the JIT region in pages
+ * @max_allocations: Maximum number of allocations allowed for the JIT region
+ * @trim_level: Trim level for the JIT region
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
u8 max_allocations, u8 trim_level);
+
+/**
+ * kbase_region_tracker_init_exec - Initialize the EXEC_VA region
+ * @kctx: kbase context
+ * @exec_va_pages: Size of the JIT region in pages.
+ * It must not be greater than 4 GB.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages);
+
+/**
+ * kbase_region_tracker_term - Terminate the JIT region
+ * @kctx: kbase context
+ */
void kbase_region_tracker_term(struct kbase_context *kctx);
/**
@@ -1349,6 +1387,18 @@ bool kbase_jit_evict(struct kbase_context *kctx);
void kbase_jit_term(struct kbase_context *kctx);
/**
+ * kbase_has_exec_va_zone - EXEC_VA zone predicate
+ *
+ * Determine whether an EXEC_VA zone has been created for the GPU address space
+ * of the given kbase context.
+ *
+ * @kctx: kbase context
+ *
+ * Return: True if the kbase context has an EXEC_VA zone.
+ */
+bool kbase_has_exec_va_zone(struct kbase_context *kctx);
+
+/**
* kbase_map_external_resource - Map an external resource to the GPU.
* @kctx: kbase context.
* @reg: The region to map.
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 1299353..c70112d 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -250,6 +250,16 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
goto bad_flags;
}
+#ifdef CONFIG_DEBUG_FS
+ if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) {
+ /* Mask coherency flags if infinite cache is enabled to prevent
+ * the skipping of syncs from BASE side.
+ */
+ *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED |
+ BASE_MEM_COHERENT_SYSTEM);
+ }
+#endif
+
if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
@@ -273,6 +283,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
if (*flags & BASE_MEM_SAME_VA) {
rbtree = &kctx->reg_rbtree_same;
zone = KBASE_REG_ZONE_SAME_VA;
+ } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
+ rbtree = &kctx->reg_rbtree_exec;
+ zone = KBASE_REG_ZONE_EXEC_VA;
} else {
rbtree = &kctx->reg_rbtree_custom;
zone = KBASE_REG_ZONE_CUSTOM_VA;
@@ -914,6 +927,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
if (!reg)
goto no_region;
+ if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+ goto invalid_flags;
+
reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
KBASE_MEM_TYPE_IMPORTED_UMM);
if (IS_ERR_OR_NULL(reg->gpu_alloc))
@@ -924,9 +940,6 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
/* No pages to map yet */
reg->gpu_alloc->nents = 0;
- if (kbase_update_region_flags(kctx, reg, *flags) != 0)
- goto invalid_flags;
-
reg->flags &= ~KBASE_REG_FREE;
reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */
reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */
@@ -946,10 +959,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
return reg;
-invalid_flags:
- kbase_mem_phy_alloc_put(reg->gpu_alloc);
- kbase_mem_phy_alloc_put(reg->cpu_alloc);
no_alloc_obj:
+invalid_flags:
kfree(reg);
no_region:
bad_size:
@@ -1186,7 +1197,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* mask to only allowed flags */
*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
- BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+ BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED);
if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
dev_warn(kctx->kbdev->dev,
@@ -1787,6 +1798,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
struct tagged_addr *page_array;
int err = 0;
int i;
+ u64 start_off;
map = kzalloc(sizeof(*map), GFP_KERNEL);
@@ -1819,6 +1831,38 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
vma->vm_private_data = map;
page_array = kbase_get_cpu_phy_pages(reg);
+ start_off = vma->vm_pgoff - reg->start_pfn +
+ (aligned_offset >> PAGE_SHIFT);
+ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) {
+ struct kbase_aliased *aliased =
+ reg->cpu_alloc->imported.alias.aliased;
+
+ if (!reg->cpu_alloc->imported.alias.stride ||
+ reg->nr_pages < (start_off + nr_pages)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ while (start_off >= reg->cpu_alloc->imported.alias.stride) {
+ aliased++;
+ start_off -= reg->cpu_alloc->imported.alias.stride;
+ }
+
+ if (!aliased->alloc) {
+ /* sink page not available for dumping map */
+ err = -EINVAL;
+ goto out;
+ }
+
+ if ((start_off + nr_pages) > aliased->length) {
+ /* not fully backed by physical pages */
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* ready the pages for dumping map */
+ page_array = aliased->alloc->pages + aliased->offset;
+ }
if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
(reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
@@ -1833,8 +1877,6 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
if (!kaddr) {
unsigned long addr = vma->vm_start + aligned_offset;
- u64 start_off = vma->vm_pgoff - reg->start_pfn +
- (aligned_offset>>PAGE_SHIFT);
vma->vm_flags |= VM_PFNMAP;
for (i = 0; i < nr_pages; i++) {
@@ -2127,8 +2169,19 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma)
}
#endif /* CONFIG_DMA_SHARED_BUFFER */
- /* limit what we map to the amount currently backed */
- if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+ /* initial params check for aliased dumping map */
+ if (nr_pages > reg->gpu_alloc->imported.alias.stride ||
+ !reg->gpu_alloc->imported.alias.stride ||
+ !nr_pages) {
+ err = -EINVAL;
+ dev_warn(dev, "mmap aliased: invalid params!\n");
+ goto out_unlock;
+ }
+ }
+ else if (reg->cpu_alloc->nents <
+ (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+ /* limit what we map to the amount currently backed */
if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
nr_pages = 0;
else
@@ -2431,134 +2484,4 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_
return 0;
}
-void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
-{
- int res;
- void *va;
- dma_addr_t dma_pa;
- struct kbase_va_region *reg;
- struct tagged_addr *page_array;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
- unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
- DEFINE_DMA_ATTRS(attrs);
-#endif
-
- u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
- u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
- BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
- u32 i;
-
- KBASE_DEBUG_ASSERT(kctx != NULL);
- KBASE_DEBUG_ASSERT(0 != size);
- KBASE_DEBUG_ASSERT(0 != pages);
-
- if (size == 0)
- goto err;
-
- /* All the alloc calls return zeroed memory */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
- va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
- attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
- dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
- va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
- &attrs);
-#else
- va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
-#endif
- if (!va)
- goto err;
-
- /* Store the state so we can free it later. */
- handle->cpu_va = va;
- handle->dma_pa = dma_pa;
- handle->size = size;
-
-
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages,
- KBASE_REG_ZONE_SAME_VA);
- if (!reg)
- goto no_reg;
-
- reg->flags &= ~KBASE_REG_FREE;
- if (kbase_update_region_flags(kctx, reg, flags) != 0)
- goto invalid_flags;
-
- reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW);
- if (IS_ERR_OR_NULL(reg->cpu_alloc))
- goto no_alloc;
-
- reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
-
- page_array = kbase_get_cpu_phy_pages(reg);
-
- for (i = 0; i < pages; i++)
- page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
-
- reg->cpu_alloc->nents = pages;
-
- kbase_gpu_vm_lock(kctx);
- res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
- kbase_gpu_vm_unlock(kctx);
- if (res)
- goto no_mmap;
-
- return va;
-
-no_mmap:
- kbase_mem_phy_alloc_put(reg->cpu_alloc);
- kbase_mem_phy_alloc_put(reg->gpu_alloc);
-no_alloc:
-invalid_flags:
- kfree(reg);
-no_reg:
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
- dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
- dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
-#else
- dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
-#endif
-err:
- return NULL;
-}
-KBASE_EXPORT_SYMBOL(kbase_va_alloc);
-
-void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
-{
- struct kbase_va_region *reg;
- int err;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
- (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
- DEFINE_DMA_ATTRS(attrs);
-#endif
-
- KBASE_DEBUG_ASSERT(kctx != NULL);
- KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
-
- kbase_gpu_vm_lock(kctx);
- reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
- KBASE_DEBUG_ASSERT(reg);
- err = kbase_gpu_munmap(kctx, reg);
- kbase_gpu_vm_unlock(kctx);
- KBASE_DEBUG_ASSERT(!err);
-
- kbase_mem_phy_alloc_put(reg->cpu_alloc);
- kbase_mem_phy_alloc_put(reg->gpu_alloc);
- kfree(reg);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
- dma_free_attrs(kctx->kbdev->dev, handle->size,
- handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
- dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
- dma_free_attrs(kctx->kbdev->dev, handle->size,
- handle->cpu_va, handle->dma_pa, &attrs);
-#else
- dma_free_writecombine(kctx->kbdev->dev, handle->size,
- handle->cpu_va, handle->dma_pa);
-#endif
-}
-KBASE_EXPORT_SYMBOL(kbase_va_free);
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 0a03bee..5cb88d1 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -303,22 +303,6 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
*/
void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
-/** @brief Allocate memory from kernel space and map it onto the GPU
- *
- * @param kctx The context used for the allocation/mapping
- * @param size The size of the allocation in bytes
- * @param handle An opaque structure used to contain the state needed to free the memory
- * @return the VA for kernel space and GPU MMU
- */
-void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
-
-/** @brief Free/unmap memory allocated by kbase_va_alloc
- *
- * @param kctx The context used for the allocation/mapping
- * @param handle An opaque structure returned by the kbase_va_alloc function.
- */
-void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
-
extern const struct vm_operations_struct kbase_vm_ops;
/**
diff --git a/mali_kbase/mali_kbase_mmu.c b/mali_kbase/mali_kbase_mmu.c
index 5e6732a..84341ca 100644
--- a/mali_kbase/mali_kbase_mmu.c
+++ b/mali_kbase/mali_kbase_mmu.c
@@ -45,7 +45,7 @@
#include <mali_kbase_hw.h>
#include <mali_kbase_mmu_hw.h>
#include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_time.h>
+#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_mem.h>
#define KBASE_MMU_PAGE_ENTRIES 512
@@ -1404,7 +1404,6 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
err = kbase_mmu_hw_do_operation(kbdev,
&kbdev->as[kctx->as_nr],
vpfn, nr, op, 0);
-#if KBASE_GPU_RESET_EN
if (err) {
/* Flush failed to complete, assume the
* GPU has hung and perform a reset to
@@ -1414,7 +1413,6 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
if (kbase_prepare_to_reset_gpu_locked(kbdev))
kbase_reset_gpu_locked(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
#ifndef CONFIG_MALI_NO_MALI
/*
@@ -1454,7 +1452,6 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
err = kbase_mmu_hw_do_operation(kbdev,
as, vpfn, nr, op, 0);
-#if KBASE_GPU_RESET_EN
if (err) {
/* Flush failed to complete, assume the GPU has hung and
* perform a reset to recover
@@ -1464,7 +1461,6 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
if (kbase_prepare_to_reset_gpu(kbdev))
kbase_reset_gpu(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
@@ -2054,9 +2050,7 @@ void bus_fault_worker(struct work_struct *data)
struct kbase_context *kctx;
struct kbase_device *kbdev;
struct kbase_fault *fault;
-#if KBASE_GPU_RESET_EN
bool reset_status = false;
-#endif /* KBASE_GPU_RESET_EN */
faulting_as = container_of(data, struct kbase_as, work_busfault);
fault = &faulting_as->bf_data;
@@ -2088,7 +2082,6 @@ void bus_fault_worker(struct work_struct *data)
}
-#if KBASE_GPU_RESET_EN
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
* We start the reset before switching to UNMAPPED to ensure that unrelated jobs
@@ -2097,7 +2090,6 @@ void bus_fault_worker(struct work_struct *data)
dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
reset_status = kbase_prepare_to_reset_gpu(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
unsigned long flags;
@@ -2122,10 +2114,8 @@ void bus_fault_worker(struct work_struct *data)
kbase_pm_context_idle(kbdev);
}
-#if KBASE_GPU_RESET_EN
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
kbasep_js_runpool_release_ctx(kbdev, kctx);
@@ -2336,9 +2326,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
-#if KBASE_GPU_RESET_EN
bool reset_status = false;
-#endif
as_no = as->number;
kbdev = kctx->kbdev;
@@ -2375,11 +2363,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_DUMPING)) {
- unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
-
if ((fault->addr >= kbdev->hwcnt.addr) &&
(fault->addr < (kbdev->hwcnt.addr +
- (num_core_groups * 2048))))
+ kbdev->hwcnt.addr_bytes)))
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
}
@@ -2394,7 +2380,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
kbase_backend_jm_kill_jobs_from_kctx(kctx);
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
-#if KBASE_GPU_RESET_EN
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
* We start the reset before switching to UNMAPPED to ensure that unrelated jobs
@@ -2403,7 +2388,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
reset_status = kbase_prepare_to_reset_gpu(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_mmu_disable(kctx);
@@ -2417,10 +2401,8 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
-#if KBASE_GPU_RESET_EN
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
}
void kbasep_as_do_poke(struct work_struct *work)
@@ -2608,7 +2590,6 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
}
-#if KBASE_GPU_RESET_EN
if (kbase_as_has_bus_fault(as) &&
kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
bool reset_status;
@@ -2622,7 +2603,6 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
if (reset_status)
kbase_reset_gpu_locked(kbdev);
}
-#endif /* KBASE_GPU_RESET_EN */
return;
}
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index d5b8c77..5699eb8 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -30,6 +30,7 @@
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hwcnt_context.h>
#include <mali_kbase_pm.h>
@@ -83,10 +84,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas
* the policy */
kbase_hwaccess_pm_gpu_active(kbdev);
}
-#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
- if (kbdev->ipa.gpu_active_callback)
- kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
-#endif
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
@@ -118,25 +115,11 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
* waiters must synchronize with us by locking the pm.lock after
- * waiting */
+ * waiting.
+ */
wake_up(&kbdev->pm.zero_active_count_wait);
}
-#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
- /* IPA may be using vinstr, in which case there may be one PM reference
- * still held when all other contexts have left the GPU. Inform IPA that
- * the GPU is now idle so that vinstr can drop it's reference.
- *
- * If the GPU was only briefly active then it might have gone idle
- * before vinstr has taken a PM reference, meaning that active_count is
- * zero. We still need to inform IPA in this case, so that vinstr can
- * drop the PM reference and avoid keeping the GPU powered
- * unnecessarily.
- */
- if (c <= 1 && kbdev->ipa.gpu_idle_callback)
- kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
-#endif
-
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
}
@@ -147,10 +130,16 @@ void kbase_pm_suspend(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
- /* Suspend vinstr.
- * This call will block until vinstr is suspended. */
+ /* Suspend vinstr. This blocks until the vinstr worker and timer are
+ * no longer running.
+ */
kbase_vinstr_suspend(kbdev->vinstr_ctx);
+ /* Disable GPU hardware counters.
+ * This call will block until counters are disabled.
+ */
+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
mutex_lock(&kbdev->pm.lock);
KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
kbdev->pm.suspending = true;
@@ -177,6 +166,8 @@ void kbase_pm_suspend(struct kbase_device *kbdev)
void kbase_pm_resume(struct kbase_device *kbdev)
{
+ unsigned long flags;
+
/* MUST happen before any pm_context_active calls occur */
kbase_hwaccess_pm_resume(kbdev);
@@ -195,7 +186,11 @@ void kbase_pm_resume(struct kbase_device *kbdev)
* need it and the policy doesn't want it on */
kbase_pm_context_idle(kbdev);
- /* Resume vinstr operation */
+ /* Re-enable GPU hardware counters */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* Resume vinstr */
kbase_vinstr_resume(kbdev->vinstr_ctx);
}
-
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index a3090c1..e762af4 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1129,8 +1129,9 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
reg = kctx->jit_alloc[info->id];
new_addr = reg->start_pfn << PAGE_SHIFT;
*ptr = new_addr;
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
- katom, info->gpu_alloc_addr, new_addr);
+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(katom,
+ info->gpu_alloc_addr,
+ new_addr, info->va_pages);
kbase_vunmap(kctx, &mapping);
}
diff --git a/mali_kbase/mali_kbase_tlstream.c b/mali_kbase/mali_kbase_tlstream.c
index aaf5782..10e3889 100644
--- a/mali_kbase/mali_kbase_tlstream.c
+++ b/mali_kbase/mali_kbase_tlstream.c
@@ -170,7 +170,8 @@ enum tl_msg_id_aux {
KBASE_AUX_PROTECTED_ENTER_START,
KBASE_AUX_PROTECTED_ENTER_END,
KBASE_AUX_PROTECTED_LEAVE_START,
- KBASE_AUX_PROTECTED_LEAVE_END
+ KBASE_AUX_PROTECTED_LEAVE_END,
+ KBASE_AUX_JIT_STATS,
};
/*****************************************************************************/
@@ -448,8 +449,8 @@ static const struct tp_desc tp_desc_obj[] = {
KBASE_TL_ATTRIB_ATOM_JIT,
__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
"jit done for atom",
- "@pLL",
- "atom,edit_addr,new_addr"
+ "@pLLL",
+ "atom,edit_addr,new_addr,va_pages"
},
{
KBASE_TL_ATTRIB_ATOM_JITALLOCINFO,
@@ -573,6 +574,13 @@ static const struct tp_desc tp_desc_aux[] = {
"leave protected mode end",
"@p",
"gpu"
+ },
+ {
+ KBASE_AUX_JIT_STATS,
+ __stringify(KBASE_AUX_JIT_STATS),
+ "per-bin JIT statistics",
+ "@IIIIII",
+ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages"
}
};
@@ -2165,12 +2173,12 @@ void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom)
}
void __kbase_tlstream_tl_attrib_atom_jit(
- void *atom, u64 edit_addr, u64 new_addr)
+ void *atom, u64 edit_addr, u64 new_addr, u64 va_pages)
{
const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
const size_t msg_size =
sizeof(msg_id) + sizeof(u64) + sizeof(atom)
- + sizeof(edit_addr) + sizeof(new_addr);
+ + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages);
unsigned long flags;
char *buffer;
size_t pos = 0;
@@ -2188,6 +2196,9 @@ void __kbase_tlstream_tl_attrib_atom_jit(
buffer, pos, &edit_addr, sizeof(edit_addr));
pos = kbasep_tlstream_write_bytes(
buffer, pos, &new_addr, sizeof(new_addr));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &va_pages, sizeof(va_pages));
+
KBASE_DEBUG_ASSERT(msg_size == pos);
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
@@ -2624,3 +2635,40 @@ void __kbase_tlstream_aux_protected_leave_end(void *gpu)
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
}
+
+void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid,
+ u32 max_allocs, u32 allocs,
+ u32 va_pages, u32 ph_pages)
+{
+ const u32 msg_id = KBASE_AUX_JIT_STATS;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) +
+ sizeof(ctx_nr) + sizeof(bid) +
+ sizeof(max_allocs) + sizeof(allocs) +
+ sizeof(va_pages) + sizeof(ph_pages);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_AUX,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &ctx_nr, sizeof(ctx_nr));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &bid, sizeof(bid));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &max_allocs, sizeof(max_allocs));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &allocs, sizeof(allocs));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &va_pages, sizeof(va_pages));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &ph_pages, sizeof(ph_pages));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/mali_kbase/mali_kbase_tlstream.h b/mali_kbase/mali_kbase_tlstream.h
index 6f9656f..e2a3ea4 100644
--- a/mali_kbase/mali_kbase_tlstream.h
+++ b/mali_kbase/mali_kbase_tlstream.h
@@ -141,7 +141,7 @@ void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom);
void __kbase_tlstream_tl_attrib_atom_jit(
- void *atom, u64 edit_addr, u64 new_addr);
+ void *atom, u64 edit_addr, u64 new_addr, u64 va_pages);
void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
void *atom, u64 va_pages, u64 commit_pages, u64 extent,
u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags,
@@ -163,6 +163,9 @@ void __kbase_tlstream_aux_protected_enter_start(void *gpu);
void __kbase_tlstream_aux_protected_enter_end(void *gpu);
void __kbase_tlstream_aux_protected_leave_start(void *gpu);
void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bin_id,
+ u32 max_allocations, u32 allocations,
+ u32 va_pages_nr, u32 ph_pages_nr);
#define TLSTREAM_ENABLED (1 << 31)
@@ -472,9 +475,11 @@ extern atomic_t kbase_tlstream_enabled;
* @atom: atom identifier
* @edit_addr: address edited by jit
* @new_addr: address placed into the edited location
+ * @va_pages: maximum number of pages this jit can allocate
*/
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
- __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr, va_pages) \
+ __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, \
+ new_addr, va_pages)
/**
* Information about the JIT allocation atom.
@@ -652,5 +657,24 @@ extern atomic_t kbase_tlstream_enabled;
#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+/**
+ * KBASE_TLSTREAM_AUX_JIT_STATS - JIT allocations per bin statistics
+ *
+ * @ctx_nr: kernel context number
+ * @bid: JIT bin id
+ * @max_allocs: maximum allocations allowed in this bin.
+ * UINT_MAX is a special value. It denotes that
+ * the parameter was not changed since the last time.
+ * @allocs: number of active allocations in this bin
+ * @va_pages: number of virtual pages allocated in this bin
+ * @ph_pages: number of physical pages allocated in this bin
+ *
+ * Function emits a timeline message indicating the JIT statistics
+ * for a given bin have chaned.
+ */
+#define KBASE_TLSTREAM_AUX_JIT_STATS(ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \
+ __TRACE_IF_ENABLED(aux_jit_stats, ctx_nr, bid, \
+ max_allocs, allocs, \
+ va_pages, ph_pages)
#endif /* _KBASE_TLSTREAM_H */
diff --git a/mali_kbase/mali_kbase_trace_defs.h b/mali_kbase/mali_kbase_trace_defs.h
index d7364d5..77fb818 100644
--- a/mali_kbase/mali_kbase_trace_defs.h
+++ b/mali_kbase/mali_kbase_trace_defs.h
@@ -172,8 +172,6 @@ int dummy_array[] = {
KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
/* gpu_addr==value to write into JS_HEAD */
KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
- /* kctx is the one being evicted, info_val == kctx to put in */
- KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
/* info_val == lower 32 bits of affinity */
KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
diff --git a/mali_kbase/mali_kbase_utility.c b/mali_kbase/mali_kbase_utility.c
deleted file mode 100644
index 3ea234a..0000000
--- a/mali_kbase/mali_kbase_utility.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#include <mali_kbase.h>
-
-bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
-{
- struct list_head *pos = base->next;
-
- while (pos != base) {
- if (pos == entry)
- return true;
-
- pos = pos->next;
- }
- return false;
-}
diff --git a/mali_kbase/mali_kbase_utility.h b/mali_kbase/mali_kbase_utility.h
index f2e5a33..8d4f044 100644
--- a/mali_kbase/mali_kbase_utility.h
+++ b/mali_kbase/mali_kbase_utility.h
@@ -29,17 +29,6 @@
#error "Don't include this file directly, use mali_kbase.h instead"
#endif
-/** Test whether the given list entry is a member of the given list.
- *
- * @param base The head of the list to be tested
- * @param entry The list entry to be tested
- *
- * @return true if entry is a member of base
- * false otherwise
- */
-bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
-
-
static inline void kbase_timer_setup(struct timer_list *timer,
void (*callback)(struct timer_list *timer))
{
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index df936cf..51cb365 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -20,221 +20,109 @@
*
*/
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_reader.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+
#include <linux/anon_inodes.h>
-#include <linux/atomic.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
#include <linux/hrtimer.h>
-#include <linux/jiffies.h>
-#include <linux/kthread.h>
-#include <linux/list.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/poll.h>
-#include <linux/preempt.h>
#include <linux/slab.h>
-#include <linux/wait.h>
-
-#include <mali_kbase.h>
-#include <mali_kbase_hwaccess_instr.h>
-#include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_hwcnt_reader.h>
-#include <mali_kbase_mem_linux.h>
-#include <mali_kbase_tlstream.h>
-#ifdef CONFIG_MALI_NO_MALI
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif
-
-/*****************************************************************************/
+#include <linux/workqueue.h>
/* Hwcnt reader API version */
-#define HWCNT_READER_API 1
-
-/* The number of nanoseconds in a second. */
-#define NSECS_IN_SEC 1000000000ull /* ns */
-
-/* The time resolution of dumping service. */
-#define DUMPING_RESOLUTION 500000ull /* ns */
+#define HWCNT_READER_API 1
-/* The maximal supported number of dumping buffers. */
-#define MAX_BUFFER_COUNT 32
+/* The minimum allowed interval between dumps (equivalent to 10KHz) */
+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
-/* Size and number of hw counters blocks. */
-#define NR_CNT_BLOCKS_PER_GROUP 8
-#define NR_CNT_PER_BLOCK 64
-#define NR_BYTES_PER_CNT 4
-#define NR_BYTES_PER_HDR 16
-#define PRFCNT_EN_MASK_OFFSET 0x8
-
-/*****************************************************************************/
-
-enum {
- SHADER_HWCNT_BM,
- TILER_HWCNT_BM,
- MMU_L2_HWCNT_BM,
- JM_HWCNT_BM
-};
-
-enum vinstr_state {
- VINSTR_IDLE,
- VINSTR_DUMPING,
- VINSTR_SUSPENDING,
- VINSTR_SUSPENDED,
- VINSTR_RESUMING
-};
+/* The maximum allowed buffers per client */
+#define MAX_BUFFER_COUNT 32
/**
- * struct kbase_vinstr_context - vinstr context per device
- * @lock: protects the entire vinstr context, but the list of
- * vinstr clients can be updated outside the lock using
- * @state_lock.
- * @kbdev: pointer to kbase device
- * @kctx: pointer to kbase context
- * @vmap: vinstr vmap for mapping hwcnt dump buffer
- * @gpu_va: GPU hwcnt dump buffer address
- * @cpu_va: the CPU side mapping of the hwcnt dump buffer
- * @dump_size: size of the dump buffer in bytes
- * @bitmap: current set of counters monitored, not always in sync
- * with hardware
- * @reprogram: when true, reprogram hwcnt block with the new set of
- * counters
- * @state: vinstr state
- * @state_lock: protects information about vinstr state and list of
- * clients.
- * @suspend_waitq: notification queue to trigger state re-validation
- * @suspend_cnt: reference counter of vinstr's suspend state
- * @suspend_work: worker to execute on entering suspended state
- * @resume_work: worker to execute on leaving suspended state
- * @nclients: number of attached clients, pending or idle
- * @nclients_suspended: number of attached but suspended clients
- * @waiting_clients: head of list of clients being periodically sampled
- * @idle_clients: head of list of clients being idle
- * @suspended_clients: head of list of clients being suspended
- * @thread: periodic sampling thread
- * @waitq: notification queue of sampling thread
- * @request_pending: request for action for sampling thread
- * @clients_present: when true, we have at least one client
- * Note: this variable is in sync. with nclients and is
- * present to preserve simplicity. Protected by state_lock.
- * @need_suspend: when true, a suspend has been requested while a resume is
- * in progress. Resume worker should queue a suspend.
- * @need_resume: when true, a resume has been requested while a suspend is
- * in progress. Suspend worker should queue a resume.
- * @forced_suspend: when true, the suspend of vinstr needs to take place
- * regardless of the kernel/user space clients attached
- * to it. In particular, this flag is set when the suspend
- * of vinstr is requested on entering protected mode or at
- * the time of device suspend.
+ * struct kbase_vinstr_context - IOCTL interface for userspace hardware
+ * counters.
+ * @hvirt: Hardware counter virtualizer used by vinstr.
+ * @metadata: Hardware counter metadata provided by virtualizer.
+ * @lock: Lock protecting all vinstr state.
+ * @suspend_count: Suspend reference count. If non-zero, timer and worker are
+ * prevented from being re-scheduled.
+ * @client_count: Number of vinstr clients.
+ * @clients: List of vinstr clients.
+ * @dump_timer: Timer that enqueues dump_work to a workqueue.
+ * @dump_work: Worker for performing periodic counter dumps.
*/
struct kbase_vinstr_context {
- struct mutex lock;
- struct kbase_device *kbdev;
- struct kbase_context *kctx;
-
- struct kbase_vmap_struct *vmap;
- u64 gpu_va;
- void *cpu_va;
- size_t dump_size;
- u32 bitmap[4];
- bool reprogram;
-
- enum vinstr_state state;
- struct spinlock state_lock;
- wait_queue_head_t suspend_waitq;
- unsigned int suspend_cnt;
- struct work_struct suspend_work;
- struct work_struct resume_work;
-
- u32 nclients;
- u32 nclients_suspended;
- struct list_head waiting_clients;
- struct list_head idle_clients;
- struct list_head suspended_clients;
-
- struct task_struct *thread;
- wait_queue_head_t waitq;
- atomic_t request_pending;
-
- bool clients_present;
-
- bool need_suspend;
- bool need_resume;
- bool forced_suspend;
+ struct kbase_hwcnt_virtualizer *hvirt;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct mutex lock;
+ size_t suspend_count;
+ size_t client_count;
+ struct list_head clients;
+ struct hrtimer dump_timer;
+ struct work_struct dump_work;
};
/**
- * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
- * @vinstr_ctx: vinstr context client is attached to
- * @list: node used to attach this client to list in vinstr context
- * @buffer_count: number of buffers this client is using
- * @event_mask: events this client reacts to
- * @dump_size: size of one dump buffer in bytes
- * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters
- * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
- * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
- * @accum_buffer: temporary accumulation buffer for preserving counters
- * @dump_time: next time this clients shall request hwcnt dump
- * @dump_interval: interval between periodic hwcnt dumps
- * @dump_buffers: kernel hwcnt dump buffers allocated by this client
- * @dump_buffers_meta: metadata of dump buffers
- * @meta_idx: index of metadata being accessed by userspace
- * @read_idx: index of buffer read by userspace
- * @write_idx: index of buffer being written by dumping service
- * @waitq: client's notification queue
- * @pending: when true, client has attached but hwcnt not yet updated
- * @suspended: when true, client is suspended
+ * struct kbase_vinstr_client - A vinstr client attached to a vinstr context.
+ * @vctx: Vinstr context client is attached to.
+ * @hvcli: Hardware counter virtualizer client.
+ * @node: Node used to attach this client to list in vinstr
+ * context.
+ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
+ * client.
+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must
+ * occur. If 0, not a periodic client.
+ * @enable_map: Counters enable map.
+ * @dump_bufs: Array of dump buffers allocated by this client.
+ * @dump_bufs_meta: Metadata of dump buffers.
+ * @meta_idx: Index of metadata being accessed by userspace.
+ * @read_idx: Index of buffer read by userspace.
+ * @write_idx: Index of buffer being written by dump worker.
+ * @waitq: Client's notification queue.
*/
struct kbase_vinstr_client {
- struct kbase_vinstr_context *vinstr_ctx;
- struct list_head list;
- unsigned int buffer_count;
- u32 event_mask;
- size_t dump_size;
- u32 bitmap[4];
- void __user *legacy_buffer;
- void *kernel_buffer;
- void *accum_buffer;
- u64 dump_time;
- u32 dump_interval;
- char *dump_buffers;
- struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
- atomic_t meta_idx;
- atomic_t read_idx;
- atomic_t write_idx;
- wait_queue_head_t waitq;
- bool pending;
- bool suspended;
-};
-
-/**
- * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
- * @hrtimer: high resolution timer
- * @vinstr_ctx: vinstr context
- */
-struct kbasep_vinstr_wake_up_timer {
- struct hrtimer hrtimer;
- struct kbase_vinstr_context *vinstr_ctx;
+ struct kbase_vinstr_context *vctx;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+ struct list_head node;
+ u64 next_dump_time_ns;
+ u32 dump_interval_ns;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer_array dump_bufs;
+ struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
+ atomic_t meta_idx;
+ atomic_t read_idx;
+ atomic_t write_idx;
+ wait_queue_head_t waitq;
};
-/*****************************************************************************/
-
-static void kbase_vinstr_update_suspend(
- struct kbase_vinstr_context *vinstr_ctx);
-
-static int kbasep_vinstr_service_task(void *data);
-
static unsigned int kbasep_vinstr_hwcnt_reader_poll(
- struct file *filp,
- poll_table *wait);
+ struct file *filp,
+ poll_table *wait);
+
static long kbasep_vinstr_hwcnt_reader_ioctl(
- struct file *filp,
- unsigned int cmd,
- unsigned long arg);
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long arg);
+
static int kbasep_vinstr_hwcnt_reader_mmap(
- struct file *filp,
- struct vm_area_struct *vma);
+ struct file *filp,
+ struct vm_area_struct *vma);
+
static int kbasep_vinstr_hwcnt_reader_release(
- struct inode *inode,
- struct file *filp);
+ struct inode *inode,
+ struct file *filp);
-/* The timeline stream file operations structure. */
+/* Vinstr client file operations */
static const struct file_operations vinstr_client_fops = {
.poll = kbasep_vinstr_hwcnt_reader_poll,
.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
@@ -243,1211 +131,546 @@ static const struct file_operations vinstr_client_fops = {
.release = kbasep_vinstr_hwcnt_reader_release,
};
-/*****************************************************************************/
-
-static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+/**
+ * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds.
+ *
+ * Return: Current time in nanoseconds.
+ */
+static u64 kbasep_vinstr_timestamp_ns(void)
{
- struct kbase_context *kctx = vinstr_ctx->kctx;
- struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_ioctl_hwcnt_enable enable;
- int err;
-
- enable.dump_buffer = vinstr_ctx->gpu_va;
- enable.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM];
- enable.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM];
- enable.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
- enable.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
-
- /* Mark the context as active so the GPU is kept turned on */
- /* A suspend won't happen here, because we're in a syscall from a
- * userspace thread. */
- kbase_pm_context_active(kbdev);
-
- /* Schedule the context in */
- kbasep_js_schedule_privileged_ctx(kbdev, kctx);
- err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
- if (err) {
- /* Release the context. This had its own Power Manager Active
- * reference */
- kbasep_js_release_privileged_ctx(kbdev, kctx);
-
- /* Also release our Power Manager Active reference */
- kbase_pm_context_idle(kbdev);
- }
+ struct timespec ts;
- return err;
+ getrawmonotonic(&ts);
+ return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}
-static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+/**
+ * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time.
+ * @cur_ts_ns: Current time in nanoseconds.
+ * @interval: Interval between dumps in nanoseconds.
+ *
+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump
+ * time that occurs after cur_ts_ns.
+ */
+static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval)
{
- struct kbase_context *kctx = vinstr_ctx->kctx;
- struct kbase_device *kbdev = kctx->kbdev;
- int err;
-
- err = kbase_instr_hwcnt_disable_internal(kctx);
- if (err) {
- dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
- kctx);
- return;
- }
-
- /* Release the context. This had its own Power Manager Active reference. */
- kbasep_js_release_privileged_ctx(kbdev, kctx);
+ /* Non-periodic client */
+ if (interval == 0)
+ return 0;
- /* Also release our Power Manager Active reference. */
- kbase_pm_context_idle(kbdev);
-
- dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+ /*
+ * Return the next interval after the current time relative to t=0.
+ * This means multiple clients with the same period will synchronise,
+ * regardless of when they were started, allowing the worker to be
+ * scheduled less frequently.
+ */
+ do_div(cur_ts_ns, interval);
+ return (cur_ts_ns + 1) * interval;
}
-static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
-{
- disable_hwcnt(vinstr_ctx);
- return enable_hwcnt(vinstr_ctx);
-}
+/**
+ * kbasep_vinstr_client_dump() - Perform a dump for a client.
+ * @vcli: Non-NULL pointer to a vinstr client.
+ * @event_id: Event type that triggered the dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_dump(
+ struct kbase_vinstr_client *vcli,
+ enum base_hwcnt_reader_event event_id)
+{
+ int errcode;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+ unsigned int write_idx;
+ unsigned int read_idx;
+ struct kbase_hwcnt_dump_buffer *dump_buf;
+ struct kbase_hwcnt_reader_metadata *meta;
-static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
-{
- dst[JM_HWCNT_BM] = src[JM_HWCNT_BM];
- dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM];
- dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
- dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
-}
+ WARN_ON(!vcli);
+ lockdep_assert_held(&vcli->vctx->lock);
-static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
-{
- dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM];
- dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM];
- dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
- dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
-}
+ write_idx = atomic_read(&vcli->write_idx);
+ read_idx = atomic_read(&vcli->read_idx);
-size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
-{
- size_t dump_size;
-
-#ifndef CONFIG_MALI_NO_MALI
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
- u32 nr_cg;
-
- nr_cg = kbdev->gpu_props.num_core_groups;
- dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
- NR_CNT_PER_BLOCK *
- NR_BYTES_PER_CNT;
- } else
-#endif /* CONFIG_MALI_NO_MALI */
- {
- /* assume v5 for now */
-#ifdef CONFIG_MALI_NO_MALI
- u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
- u64 core_mask =
- (1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
-#else
- base_gpu_props *props = &kbdev->gpu_props.props;
- u32 nr_l2 = props->l2_props.num_l2_slices;
- u64 core_mask = props->coherency_info.group[0].core_mask;
-#endif
- u32 nr_blocks = fls64(core_mask);
+ /* Check if there is a place to copy HWC block into. */
+ if (write_idx - read_idx == vcli->dump_bufs.buf_cnt)
+ return -EBUSY;
+ write_idx %= vcli->dump_bufs.buf_cnt;
- /* JM and tiler counter blocks are always present */
- dump_size = (2 + nr_l2 + nr_blocks) *
- NR_CNT_PER_BLOCK *
- NR_BYTES_PER_CNT;
- }
- return dump_size;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+ dump_buf = &vcli->dump_bufs.bufs[write_idx];
+ meta = &vcli->dump_bufs_meta[write_idx];
-static size_t kbasep_vinstr_dump_size_ctx(
- struct kbase_vinstr_context *vinstr_ctx)
-{
- return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
-}
+ errcode = kbase_hwcnt_virtualizer_client_dump(
+ vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf);
+ if (errcode)
+ return errcode;
-static int kbasep_vinstr_map_kernel_dump_buffer(
- struct kbase_vinstr_context *vinstr_ctx)
-{
- struct kbase_va_region *reg;
- struct kbase_context *kctx = vinstr_ctx->kctx;
- u64 flags, nr_pages;
-
- flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR |
- BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU;
- if (kctx->kbdev->mmu_mode->flags &
- KBASE_MMU_MODE_HAS_NON_CACHEABLE)
- flags |= BASE_MEM_UNCACHED_GPU;
- vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
- nr_pages = PFN_UP(vinstr_ctx->dump_size);
-
- reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
- &vinstr_ctx->gpu_va);
- if (!reg)
- return -ENOMEM;
+ /* Patch the dump buf headers, to hide the counters that other hwcnt
+ * clients are using.
+ */
+ kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map);
- vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx,
- vinstr_ctx->gpu_va, &vinstr_ctx->vmap);
+ /* Zero all non-enabled counters (current values are undefined) */
+ kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map);
- if (!vinstr_ctx->cpu_va) {
- kbase_mem_free(kctx, vinstr_ctx->gpu_va);
- return -ENOMEM;
- }
+ meta->timestamp = ts_end_ns;
+ meta->event_id = event_id;
+ meta->buffer_idx = write_idx;
+ /* Notify client. Make sure all changes to memory are visible. */
+ wmb();
+ atomic_inc(&vcli->write_idx);
+ wake_up_interruptible(&vcli->waitq);
return 0;
}
-static void kbasep_vinstr_unmap_kernel_dump_buffer(
- struct kbase_vinstr_context *vinstr_ctx)
+/**
+ * kbasep_vinstr_client_clear() - Reset all the client's counters to zero.
+ * @vcli: Non-NULL pointer to a vinstr client.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli)
{
- struct kbase_context *kctx = vinstr_ctx->kctx;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
- kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap);
- kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+ WARN_ON(!vcli);
+ lockdep_assert_held(&vcli->vctx->lock);
+
+ /* A virtualizer dump with a NULL buffer will just clear the virtualizer
+ * client's buffer.
+ */
+ return kbase_hwcnt_virtualizer_client_dump(
+ vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL);
}
/**
- * kbasep_vinstr_create_kctx - create kernel context for vinstr
- * @vinstr_ctx: vinstr context
- * Return: zero on success
+ * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic
+ * vinstr clients, then reschedule the dump
+ * worker appropriately.
+ * @vctx: Non-NULL pointer to the vinstr context.
+ *
+ * If there are no periodic clients, then the dump worker will not be
+ * rescheduled. Else, the dump worker will be rescheduled for the next periodic
+ * client dump.
*/
-static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx)
{
- struct kbase_device *kbdev = vinstr_ctx->kbdev;
- struct kbasep_kctx_list_element *element = NULL;
- unsigned long flags;
- bool enable_backend = false;
- int err;
-
- vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
- if (!vinstr_ctx->kctx)
- return -ENOMEM;
+ u64 cur_ts_ns;
+ u64 earliest_next_ns = U64_MAX;
+ struct kbase_vinstr_client *pos;
- /* Map the master kernel dump buffer. The HW dumps the counters
- * into this memory region. */
- err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
- if (err)
- goto failed_map;
-
- /* Add kernel context to list of contexts associated with device. */
- element = kzalloc(sizeof(*element), GFP_KERNEL);
- if (element) {
- element->kctx = vinstr_ctx->kctx;
- mutex_lock(&kbdev->kctx_list_lock);
- list_add(&element->link, &kbdev->kctx_list);
-
- /* Inform timeline client about new context.
- * Do this while holding the lock to avoid tracepoint
- * being created in both body and summary stream. */
- KBASE_TLSTREAM_TL_NEW_CTX(
- vinstr_ctx->kctx,
- vinstr_ctx->kctx->id,
- (u32)(vinstr_ctx->kctx->tgid));
-
- mutex_unlock(&kbdev->kctx_list_lock);
- } else {
- /* Don't treat this as a fail - just warn about it. */
- dev_warn(kbdev->dev,
- "couldn't add kctx to kctx_list\n");
- }
+ WARN_ON(!vctx);
+ lockdep_assert_held(&vctx->lock);
- /* Don't enable hardware counters if vinstr is suspended.
- * Note that vinstr resume code is run under vinstr context lock,
- * lower layer will be enabled as needed on resume. */
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- if (VINSTR_IDLE == vinstr_ctx->state)
- enable_backend = true;
- vinstr_ctx->clients_present = true;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- if (enable_backend)
- err = enable_hwcnt(vinstr_ctx);
- if (err)
- goto failed_enable;
-
- vinstr_ctx->thread = kthread_run(
- kbasep_vinstr_service_task,
- vinstr_ctx,
- "mali_vinstr_service");
- if (IS_ERR(vinstr_ctx->thread)) {
- err = PTR_ERR(vinstr_ctx->thread);
- goto failed_kthread;
- }
+ cur_ts_ns = kbasep_vinstr_timestamp_ns();
- return 0;
+ /*
+ * Update each client's next dump time, and find the earliest next
+ * dump time if any of the clients have a non-zero interval.
+ */
+ list_for_each_entry(pos, &vctx->clients, node) {
+ const u64 cli_next_ns =
+ kbasep_vinstr_next_dump_time_ns(
+ cur_ts_ns, pos->dump_interval_ns);
-failed_kthread:
- disable_hwcnt(vinstr_ctx);
-failed_enable:
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->clients_present = false;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
- if (element) {
- mutex_lock(&kbdev->kctx_list_lock);
- list_del(&element->link);
- kfree(element);
- mutex_unlock(&kbdev->kctx_list_lock);
- KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+ /* Non-zero next dump time implies a periodic client */
+ if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns))
+ earliest_next_ns = cli_next_ns;
+
+ pos->next_dump_time_ns = cli_next_ns;
}
-failed_map:
- kbase_destroy_context(vinstr_ctx->kctx);
- vinstr_ctx->kctx = NULL;
- return err;
+
+ /* Cancel the timer if it is already pending */
+ hrtimer_cancel(&vctx->dump_timer);
+
+ /* Start the timer if there are periodic clients and vinstr is not
+ * suspended.
+ */
+ if ((earliest_next_ns != U64_MAX) &&
+ (vctx->suspend_count == 0) &&
+ !WARN_ON(earliest_next_ns < cur_ts_ns))
+ hrtimer_start(
+ &vctx->dump_timer,
+ ns_to_ktime(earliest_next_ns - cur_ts_ns),
+ HRTIMER_MODE_REL);
}
/**
- * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
- * @vinstr_ctx: vinstr context
+ * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients
+ * that need to be dumped, then reschedules itself.
+ * @work: Work structure.
*/
-static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+static void kbasep_vinstr_dump_worker(struct work_struct *work)
{
- struct kbase_device *kbdev = vinstr_ctx->kbdev;
- struct kbasep_kctx_list_element *element;
- struct kbasep_kctx_list_element *tmp;
- bool found = false;
- bool hwcnt_disabled = false;
- unsigned long flags;
-
- /* Release hw counters dumping resources. */
- vinstr_ctx->thread = NULL;
-
- /* Simplify state transitions by specifying that we have no clients. */
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->clients_present = false;
- if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
- hwcnt_disabled = true;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- if (!hwcnt_disabled)
- disable_hwcnt(vinstr_ctx);
-
- kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
-
- /* Remove kernel context from the device's contexts list. */
- mutex_lock(&kbdev->kctx_list_lock);
- list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
- if (element->kctx == vinstr_ctx->kctx) {
- list_del(&element->link);
- kfree(element);
- found = true;
- }
- }
- mutex_unlock(&kbdev->kctx_list_lock);
+ struct kbase_vinstr_context *vctx =
+ container_of(work, struct kbase_vinstr_context, dump_work);
+ struct kbase_vinstr_client *pos;
+ u64 cur_time_ns;
- if (!found)
- dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+ mutex_lock(&vctx->lock);
- /* Destroy context. */
- kbase_destroy_context(vinstr_ctx->kctx);
+ cur_time_ns = kbasep_vinstr_timestamp_ns();
- /* Inform timeline client about context destruction. */
- KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+ /* Dump all periodic clients whose next dump time is before the current
+ * time.
+ */
+ list_for_each_entry(pos, &vctx->clients, node) {
+ if ((pos->next_dump_time_ns != 0) &&
+ (pos->next_dump_time_ns < cur_time_ns))
+ kbasep_vinstr_client_dump(
+ pos, BASE_HWCNT_READER_EVENT_PERIODIC);
+ }
- vinstr_ctx->kctx = NULL;
+ /* Update the next dump times of all periodic clients, then reschedule
+ * this worker at the earliest next dump time.
+ */
+ kbasep_vinstr_reschedule_worker(vctx);
+
+ mutex_unlock(&vctx->lock);
}
/**
- * kbasep_vinstr_attach_client - Attach a client to the vinstr core
- * @vinstr_ctx: vinstr context
- * @buffer_count: requested number of dump buffers
- * @bitmap: bitmaps describing which counters should be enabled
- * @argp: pointer where notification descriptor shall be stored
- * @kernel_buffer: pointer to kernel side buffer
- *
- * Return: vinstr opaque client handle or NULL on failure
+ * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for
+ * execution as soon as possible.
+ * @timer: Timer structure.
*/
-static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
- struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
- u32 bitmap[4], void *argp, void *kernel_buffer)
+static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer)
{
- struct task_struct *thread = NULL;
- struct kbase_vinstr_client *cli;
- unsigned long flags;
- bool clients_present = false;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
+ struct kbase_vinstr_context *vctx =
+ container_of(timer, struct kbase_vinstr_context, dump_timer);
- if (buffer_count > MAX_BUFFER_COUNT
- || (buffer_count & (buffer_count - 1)))
- return NULL;
-
- cli = kzalloc(sizeof(*cli), GFP_KERNEL);
- if (!cli)
- return NULL;
-
- cli->vinstr_ctx = vinstr_ctx;
- cli->buffer_count = buffer_count;
- cli->event_mask =
- (1 << BASE_HWCNT_READER_EVENT_MANUAL) |
- (1 << BASE_HWCNT_READER_EVENT_PERIODIC);
- cli->pending = true;
+ /* We don't need to check vctx->suspend_count here, as the suspend
+ * function will ensure that any worker enqueued here is immediately
+ * cancelled, and the worker itself won't reschedule this timer if
+ * suspend_count != 0.
+ */
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+ queue_work(system_wq, &vctx->dump_work);
+#else
+ queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
+ return HRTIMER_NORESTART;
+}
- hwcnt_bitmap_set(cli->bitmap, bitmap);
+/**
+ * kbasep_vinstr_client_destroy() - Destroy a vinstr client.
+ * @vcli: vinstr client. Must not be attached to a vinstr context.
+ */
+static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
+{
+ if (!vcli)
+ return;
- mutex_lock(&vinstr_ctx->lock);
+ kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli);
+ kfree(vcli->dump_bufs_meta);
+ kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs);
+ kbase_hwcnt_enable_map_free(&vcli->enable_map);
+ kfree(vcli);
+}
- hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
- vinstr_ctx->reprogram = true;
+/**
+ * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to
+ * the vinstr context.
+ * @vctx: Non-NULL pointer to vinstr context.
+ * @setup: Non-NULL pointer to hardware counter ioctl setup structure.
+ * setup->buffer_count must not be 0.
+ * @out_vcli: Non-NULL pointer to where created client will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_create(
+ struct kbase_vinstr_context *vctx,
+ struct kbase_ioctl_hwcnt_reader_setup *setup,
+ struct kbase_vinstr_client **out_vcli)
+{
+ int errcode;
+ struct kbase_vinstr_client *vcli;
+ struct kbase_hwcnt_physical_enable_map phys_em;
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+ WARN_ON(!vctx);
+ WARN_ON(!setup);
+ WARN_ON(setup->buffer_count == 0);
- /* If this is the first client, create the vinstr kbase
- * context. This context is permanently resident until the
- * last client exits. */
- if (!clients_present) {
- hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
- if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
- goto error;
+ vcli = kzalloc(sizeof(*vcli), GFP_KERNEL);
+ if (!vcli)
+ return -ENOMEM;
- vinstr_ctx->reprogram = false;
- cli->pending = false;
- }
+ vcli->vctx = vctx;
- /* The GPU resets the counter block every time there is a request
- * to dump it. We need a per client kernel buffer for accumulating
- * the counters. */
- cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
- cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
- if (!cli->accum_buffer)
+ errcode = kbase_hwcnt_enable_map_alloc(
+ vctx->metadata, &vcli->enable_map);
+ if (errcode)
goto error;
- /* Prepare buffers. */
- if (cli->buffer_count) {
- int *fd = (int *)argp;
- size_t tmp;
-
- /* Allocate area for buffers metadata storage. */
- tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
- cli->buffer_count;
- cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
- if (!cli->dump_buffers_meta)
- goto error;
-
- /* Allocate required number of dumping buffers. */
- cli->dump_buffers = (char *)__get_free_pages(
- GFP_KERNEL | __GFP_ZERO,
- get_order(cli->dump_size * cli->buffer_count));
- if (!cli->dump_buffers)
- goto error;
-
- /* Create descriptor for user-kernel data exchange. */
- *fd = anon_inode_getfd(
- "[mali_vinstr_desc]",
- &vinstr_client_fops,
- cli,
- O_RDONLY | O_CLOEXEC);
- if (0 > *fd)
- goto error;
- } else if (kernel_buffer) {
- cli->kernel_buffer = kernel_buffer;
- } else {
- cli->legacy_buffer = (void __user *)argp;
- }
+ phys_em.jm_bm = setup->jm_bm;
+ phys_em.shader_bm = setup->shader_bm;
+ phys_em.tiler_bm = setup->tiler_bm;
+ phys_em.mmu_l2_bm = setup->mmu_l2_bm;
+ kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em);
- atomic_set(&cli->read_idx, 0);
- atomic_set(&cli->meta_idx, 0);
- atomic_set(&cli->write_idx, 0);
- init_waitqueue_head(&cli->waitq);
+ errcode = kbase_hwcnt_dump_buffer_array_alloc(
+ vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
+ if (errcode)
+ goto error;
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->nclients++;
- list_add(&cli->list, &vinstr_ctx->idle_clients);
- kbase_vinstr_update_suspend(vinstr_ctx);
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+ errcode = -ENOMEM;
+ vcli->dump_bufs_meta = kmalloc_array(
+ setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL);
+ if (!vcli->dump_bufs_meta)
+ goto error;
- mutex_unlock(&vinstr_ctx->lock);
+ errcode = kbase_hwcnt_virtualizer_client_create(
+ vctx->hvirt, &vcli->enable_map, &vcli->hvcli);
+ if (errcode)
+ goto error;
- return cli;
+ init_waitqueue_head(&vcli->waitq);
+ *out_vcli = vcli;
+ return 0;
error:
- kfree(cli->dump_buffers_meta);
- if (cli->dump_buffers)
- free_pages(
- (unsigned long)cli->dump_buffers,
- get_order(cli->dump_size * cli->buffer_count));
- kfree(cli->accum_buffer);
- if (!clients_present && vinstr_ctx->kctx) {
- thread = vinstr_ctx->thread;
- kbasep_vinstr_destroy_kctx(vinstr_ctx);
- }
- kfree(cli);
-
- mutex_unlock(&vinstr_ctx->lock);
-
- /* Thread must be stopped after lock is released. */
- if (thread)
- kthread_stop(thread);
-
- return NULL;
+ kbasep_vinstr_client_destroy(vcli);
+ return errcode;
}
-void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+int kbase_vinstr_init(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_vinstr_context **out_vctx)
{
- struct kbase_vinstr_context *vinstr_ctx;
- struct kbase_vinstr_client *iter, *tmp;
- struct task_struct *thread = NULL;
- u32 zerobitmap[4] = { 0 };
- int cli_found = 0;
- unsigned long flags;
- bool clients_present;
-
- KBASE_DEBUG_ASSERT(cli);
- vinstr_ctx = cli->vinstr_ctx;
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- mutex_lock(&vinstr_ctx->lock);
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
- list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
- if (iter == cli) {
- cli_found = 1;
- break;
- }
- }
- if (!cli_found) {
- list_for_each_entry_safe(
- iter, tmp, &vinstr_ctx->waiting_clients, list) {
- if (iter == cli) {
- cli_found = 1;
- break;
- }
- }
- }
- if (!cli_found) {
- list_for_each_entry_safe(
- iter, tmp, &vinstr_ctx->suspended_clients, list) {
- if (iter == cli) {
- cli_found = 1;
- break;
- }
- }
- }
- KBASE_DEBUG_ASSERT(cli_found);
+ struct kbase_vinstr_context *vctx;
+ const struct kbase_hwcnt_metadata *metadata;
- if (cli_found) {
- vinstr_ctx->reprogram = true;
- list_del(&iter->list);
- }
-
- if (!cli->suspended)
- vinstr_ctx->nclients--;
- else
- vinstr_ctx->nclients_suspended--;
-
- kbase_vinstr_update_suspend(vinstr_ctx);
-
- clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
-
- /* Rebuild context bitmap now that the client has detached */
- hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
- list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
- hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
- list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
- hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
- list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
- hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+ if (!hvirt || !out_vctx)
+ return -EINVAL;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+ metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+ if (!metadata)
+ return -EINVAL;
- kfree(cli->dump_buffers_meta);
- free_pages(
- (unsigned long)cli->dump_buffers,
- get_order(cli->dump_size * cli->buffer_count));
- kfree(cli->accum_buffer);
- kfree(cli);
+ vctx = kzalloc(sizeof(*vctx), GFP_KERNEL);
+ if (!vctx)
+ return -ENOMEM;
- if (!clients_present) {
- thread = vinstr_ctx->thread;
- kbasep_vinstr_destroy_kctx(vinstr_ctx);
- }
+ vctx->hvirt = hvirt;
+ vctx->metadata = metadata;
- mutex_unlock(&vinstr_ctx->lock);
+ mutex_init(&vctx->lock);
+ INIT_LIST_HEAD(&vctx->clients);
+ hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ vctx->dump_timer.function = kbasep_vinstr_dump_timer;
+ INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker);
- /* Thread must be stopped after lock is released. */
- if (thread)
- kthread_stop(thread);
+ *out_vctx = vctx;
+ return 0;
}
-KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
-/* Accumulate counters in the dump buffer */
-static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
{
- size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
- u32 *d = dst;
- u32 *s = src;
- size_t i, j;
-
- for (i = 0; i < dump_size; i += block_size) {
- /* skip over the header block */
- d += NR_BYTES_PER_HDR / sizeof(u32);
- s += NR_BYTES_PER_HDR / sizeof(u32);
- for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
- /* saturate result if addition would result in wraparound */
- if (U32_MAX - *d < *s)
- *d = U32_MAX;
- else
- *d += *s;
- d++;
- s++;
- }
- }
-}
+ if (!vctx)
+ return;
-/* This is the Midgard v4 patch function. It copies the headers for each
- * of the defined blocks from the master kernel buffer and then patches up
- * the performance counter enable mask for each of the blocks to exclude
- * counters that were not requested by the client. */
-static void patch_dump_buffer_hdr_v4(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_vinstr_client *cli)
-{
- u32 *mask;
- u8 *dst = cli->accum_buffer;
- u8 *src = vinstr_ctx->cpu_va;
- u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
- size_t i, group_size, group;
- enum {
- SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
- JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
- };
-
- group_size = NR_CNT_BLOCKS_PER_GROUP *
- NR_CNT_PER_BLOCK *
- NR_BYTES_PER_CNT;
- for (i = 0; i < nr_cg; i++) {
- group = i * group_size;
- /* copy shader core headers */
- memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
- NR_BYTES_PER_HDR);
- memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
- NR_BYTES_PER_HDR);
- memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
- NR_BYTES_PER_HDR);
- memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
- NR_BYTES_PER_HDR);
-
- /* copy tiler header */
- memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
- NR_BYTES_PER_HDR);
-
- /* copy mmu header */
- memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
- NR_BYTES_PER_HDR);
-
- /* copy job manager header */
- memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
- NR_BYTES_PER_HDR);
-
- /* patch the shader core enable mask */
- mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[SHADER_HWCNT_BM];
- mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[SHADER_HWCNT_BM];
- mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[SHADER_HWCNT_BM];
- mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[SHADER_HWCNT_BM];
-
- /* patch the tiler core enable mask */
- mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[TILER_HWCNT_BM];
-
- /* patch the mmu core enable mask */
- mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[MMU_L2_HWCNT_BM];
-
- /* patch the job manager enable mask */
- mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[JM_HWCNT_BM];
- }
-}
+ cancel_work_sync(&vctx->dump_work);
-/* This is the Midgard v5 patch function. It copies the headers for each
- * of the defined blocks from the master kernel buffer and then patches up
- * the performance counter enable mask for each of the blocks to exclude
- * counters that were not requested by the client. */
-static void patch_dump_buffer_hdr_v5(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_vinstr_client *cli)
-{
- struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
- u32 i, nr_l2;
- u64 core_mask;
- u32 *mask;
- u8 *dst = cli->accum_buffer;
- u8 *src = vinstr_ctx->cpu_va;
- size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
-
- /* copy and patch job manager header */
- memcpy(dst, src, NR_BYTES_PER_HDR);
- mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[JM_HWCNT_BM];
- dst += block_size;
- src += block_size;
-
- /* copy and patch tiler header */
- memcpy(dst, src, NR_BYTES_PER_HDR);
- mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[TILER_HWCNT_BM];
- dst += block_size;
- src += block_size;
-
- /* copy and patch MMU/L2C headers */
- nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
- for (i = 0; i < nr_l2; i++) {
- memcpy(dst, src, NR_BYTES_PER_HDR);
- mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[MMU_L2_HWCNT_BM];
- dst += block_size;
- src += block_size;
- }
+ /* Non-zero client count implies client leak */
+ if (WARN_ON(vctx->client_count != 0)) {
+ struct kbase_vinstr_client *pos, *n;
- /* copy and patch shader core headers */
- core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
- while (0ull != core_mask) {
- memcpy(dst, src, NR_BYTES_PER_HDR);
- if (0ull != (core_mask & 1ull)) {
- /* if block is not reserved update header */
- mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
- *mask &= cli->bitmap[SHADER_HWCNT_BM];
+ list_for_each_entry_safe(pos, n, &vctx->clients, node) {
+ list_del(&pos->node);
+ vctx->client_count--;
+ kbasep_vinstr_client_destroy(pos);
}
- dst += block_size;
- src += block_size;
-
- core_mask >>= 1;
}
+
+ WARN_ON(vctx->client_count != 0);
+ kfree(vctx);
}
-/**
- * accum_clients - accumulate dumped hw counters for all known clients
- * @vinstr_ctx: vinstr context
- */
-static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
{
- struct kbase_vinstr_client *iter;
- int v4 = 0;
+ if (WARN_ON(!vctx))
+ return;
-#ifndef CONFIG_MALI_NO_MALI
- v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
-#endif
+ mutex_lock(&vctx->lock);
- list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
- /* Don't bother accumulating clients whose hwcnt requests
- * have not yet been honoured. */
- if (iter->pending)
- continue;
- if (v4)
- patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
- else
- patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
- accum_dump_buffer(
- iter->accum_buffer,
- vinstr_ctx->cpu_va,
- iter->dump_size);
- }
- list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
- /* Don't bother accumulating clients whose hwcnt requests
- * have not yet been honoured. */
- if (iter->pending)
- continue;
- if (v4)
- patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
- else
- patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
- accum_dump_buffer(
- iter->accum_buffer,
- vinstr_ctx->cpu_va,
- iter->dump_size);
- }
-}
+ if (!WARN_ON(vctx->suspend_count == SIZE_MAX))
+ vctx->suspend_count++;
-/*****************************************************************************/
+ mutex_unlock(&vctx->lock);
-/**
- * kbasep_vinstr_get_timestamp - return timestamp
- *
- * Function returns timestamp value based on raw monotonic timer. Value will
- * wrap around zero in case of overflow.
- *
- * Return: timestamp value
- */
-static u64 kbasep_vinstr_get_timestamp(void)
-{
- struct timespec ts;
-
- getrawmonotonic(&ts);
- return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+ /* Always sync cancel the timer and then the worker, regardless of the
+ * new suspend count.
+ *
+ * This ensures concurrent calls to kbase_vinstr_suspend() always block
+ * until vinstr is fully suspended.
+ *
+ * The timer is cancelled before the worker, as the timer
+ * unconditionally re-enqueues the worker, but the worker checks the
+ * suspend_count that we just incremented before rescheduling the timer.
+ *
+ * Therefore if we cancel the worker first, the timer might re-enqueue
+ * the worker before we cancel the timer, but the opposite is not
+ * possible.
+ */
+ hrtimer_cancel(&vctx->dump_timer);
+ cancel_work_sync(&vctx->dump_work);
}
-/**
- * kbasep_vinstr_add_dump_request - register client's dumping request
- * @cli: requesting client
- * @waiting_clients: list of pending dumping requests
- */
-static void kbasep_vinstr_add_dump_request(
- struct kbase_vinstr_client *cli,
- struct list_head *waiting_clients)
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
{
- struct kbase_vinstr_client *tmp;
-
- if (list_empty(waiting_clients)) {
- list_add(&cli->list, waiting_clients);
+ if (WARN_ON(!vctx))
return;
- }
- list_for_each_entry(tmp, waiting_clients, list) {
- if (tmp->dump_time > cli->dump_time) {
- list_add_tail(&cli->list, &tmp->list);
- return;
- }
- }
- list_add_tail(&cli->list, waiting_clients);
-}
-/**
- * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
- * dump and accumulate them for known
- * clients
- * @vinstr_ctx: vinstr context
- * @timestamp: pointer where collection timestamp will be recorded
- *
- * Return: zero on success
- */
-static int kbasep_vinstr_collect_and_accumulate(
- struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
-{
- unsigned long flags;
- int rcode;
+ mutex_lock(&vctx->lock);
-#ifdef CONFIG_MALI_NO_MALI
- /* The dummy model needs the CPU mapping. */
- gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
-#endif
+ if (!WARN_ON(vctx->suspend_count == 0)) {
+ vctx->suspend_count--;
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- if (VINSTR_IDLE != vinstr_ctx->state) {
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- return -EAGAIN;
- } else {
- vinstr_ctx->state = VINSTR_DUMPING;
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- /* Request HW counters dump.
- * Disable preemption to make dump timestamp more accurate. */
- preempt_disable();
- *timestamp = kbasep_vinstr_get_timestamp();
- rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
- preempt_enable();
-
- if (!rcode)
- rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
- WARN_ON(rcode);
-
- if (!rcode) {
- /* Invalidate the kernel buffer before reading from it.
- * As the vinstr_ctx->lock is already held by the caller, the
- * unmap of kernel buffer cannot take place simultaneously.
+ /* Last resume, so re-enqueue the worker if we have any periodic
+ * clients.
*/
- lockdep_assert_held(&vinstr_ctx->lock);
- kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap,
- KBASE_SYNC_TO_CPU);
- }
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- switch (vinstr_ctx->state) {
- case VINSTR_SUSPENDING:
- schedule_work(&vinstr_ctx->suspend_work);
- break;
- case VINSTR_DUMPING:
- vinstr_ctx->state = VINSTR_IDLE;
- wake_up_all(&vinstr_ctx->suspend_waitq);
- break;
- default:
- break;
- }
+ if (vctx->suspend_count == 0) {
+ struct kbase_vinstr_client *pos;
+ bool has_periodic_clients = false;
- /* Accumulate values of collected counters. */
- if (!rcode)
- accum_clients(vinstr_ctx);
+ list_for_each_entry(pos, &vctx->clients, node) {
+ if (pos->dump_interval_ns != 0) {
+ has_periodic_clients = true;
+ break;
+ }
+ }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+ if (has_periodic_clients)
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+ queue_work(system_wq, &vctx->dump_work);
+#else
+ queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
+ }
+ }
- return rcode;
+ mutex_unlock(&vctx->lock);
}
-/**
- * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
- * buffer
- * @cli: requesting client
- * @timestamp: timestamp when counters were collected
- * @event_id: id of event that caused triggered counters collection
- *
- * Return: zero on success
- */
-static int kbasep_vinstr_fill_dump_buffer(
- struct kbase_vinstr_client *cli, u64 timestamp,
- enum base_hwcnt_reader_event event_id)
+int kbase_vinstr_hwcnt_reader_setup(
+ struct kbase_vinstr_context *vctx,
+ struct kbase_ioctl_hwcnt_reader_setup *setup)
{
- unsigned int write_idx = atomic_read(&cli->write_idx);
- unsigned int read_idx = atomic_read(&cli->read_idx);
+ int errcode;
+ int fd;
+ struct kbase_vinstr_client *vcli = NULL;
- struct kbase_hwcnt_reader_metadata *meta;
- void *buffer;
+ if (!vctx || !setup ||
+ (setup->buffer_count == 0) ||
+ (setup->buffer_count > MAX_BUFFER_COUNT))
+ return -EINVAL;
- /* Check if there is a place to copy HWC block into. */
- if (write_idx - read_idx == cli->buffer_count)
- return -1;
- write_idx %= cli->buffer_count;
-
- /* Fill in dump buffer and its metadata. */
- buffer = &cli->dump_buffers[write_idx * cli->dump_size];
- meta = &cli->dump_buffers_meta[write_idx];
- meta->timestamp = timestamp;
- meta->event_id = event_id;
- meta->buffer_idx = write_idx;
- memcpy(buffer, cli->accum_buffer, cli->dump_size);
- return 0;
-}
+ errcode = kbasep_vinstr_client_create(vctx, setup, &vcli);
+ if (errcode)
+ goto error;
-/**
- * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
- * allocated in userspace
- * @cli: requesting client
- *
- * Return: zero on success
- *
- * This is part of legacy ioctl interface.
- */
-static int kbasep_vinstr_fill_dump_buffer_legacy(
- struct kbase_vinstr_client *cli)
-{
- void __user *buffer = cli->legacy_buffer;
- int rcode;
+ errcode = anon_inode_getfd(
+ "[mali_vinstr_desc]",
+ &vinstr_client_fops,
+ vcli,
+ O_RDONLY | O_CLOEXEC);
+ if (errcode < 0)
+ goto error;
- /* Copy data to user buffer. */
- rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
- if (rcode) {
- pr_warn("error while copying buffer to user\n");
- return -EFAULT;
- }
- return 0;
-}
+ fd = errcode;
-/**
- * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
- * allocated in kernel space
- * @cli: requesting client
- *
- * Return: zero on success
- *
- * This is part of the kernel client interface.
- */
-static int kbasep_vinstr_fill_dump_buffer_kernel(
- struct kbase_vinstr_client *cli)
-{
- memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+ /* Add the new client. No need to reschedule worker, as not periodic */
+ mutex_lock(&vctx->lock);
- return 0;
-}
+ vctx->client_count++;
+ list_add(&vcli->node, &vctx->clients);
-/**
- * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
- * @vinstr_ctx: vinstr context
- */
-static void kbasep_vinstr_reprogram(
- struct kbase_vinstr_context *vinstr_ctx)
-{
- unsigned long flags;
- bool suspended = false;
-
- /* Don't enable hardware counters if vinstr is suspended. */
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- if (VINSTR_IDLE != vinstr_ctx->state)
- suspended = true;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- if (suspended)
- return;
+ mutex_unlock(&vctx->lock);
- /* Change to suspended state is done while holding vinstr context
- * lock. Below code will then no re-enable the instrumentation. */
-
- if (vinstr_ctx->reprogram) {
- struct kbase_vinstr_client *iter;
-
- if (!reprogram_hwcnt(vinstr_ctx)) {
- vinstr_ctx->reprogram = false;
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- list_for_each_entry(
- iter,
- &vinstr_ctx->idle_clients,
- list)
- iter->pending = false;
- list_for_each_entry(
- iter,
- &vinstr_ctx->waiting_clients,
- list)
- iter->pending = false;
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- }
- }
+ return fd;
+error:
+ kbasep_vinstr_client_destroy(vcli);
+ return errcode;
}
/**
- * kbasep_vinstr_update_client - copy accumulated counters to user readable
- * buffer and notify the user
- * @cli: requesting client
- * @timestamp: timestamp when counters were collected
- * @event_id: id of event that caused triggered counters collection
+ * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready
+ * buffers.
+ * @cli: Non-NULL pointer to vinstr client.
*
- * Return: zero on success
+ * Return: Non-zero if client has at least one dumping buffer filled that was
+ * not notified to user yet.
*/
-static int kbasep_vinstr_update_client(
- struct kbase_vinstr_client *cli, u64 timestamp,
- enum base_hwcnt_reader_event event_id)
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+ struct kbase_vinstr_client *cli)
{
- int rcode = 0;
- unsigned long flags;
-
- /* Copy collected counters to user readable buffer. */
- if (cli->buffer_count)
- rcode = kbasep_vinstr_fill_dump_buffer(
- cli, timestamp, event_id);
- else if (cli->kernel_buffer)
- rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
- else
- rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
-
- /* Prepare for next request. */
- memset(cli->accum_buffer, 0, cli->dump_size);
-
- spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
- /* Check if client was put to suspend state while it was being updated */
- if (cli->suspended)
- rcode = -EINVAL;
- spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
-
- if (rcode)
- goto exit;
-
- /* Notify client. Make sure all changes to memory are visible. */
- wmb();
- atomic_inc(&cli->write_idx);
- wake_up_interruptible(&cli->waitq);
-
-exit:
- return rcode;
+ WARN_ON(!cli);
+ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
}
/**
- * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
*
- * @hrtimer: high resolution timer
- *
- * Return: High resolution timer restart enum.
+ * Return: 0 on success, else error code.
*/
-static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
- struct hrtimer *hrtimer)
+static long kbasep_vinstr_hwcnt_reader_ioctl_dump(
+ struct kbase_vinstr_client *cli)
{
- struct kbasep_vinstr_wake_up_timer *timer =
- container_of(
- hrtimer,
- struct kbasep_vinstr_wake_up_timer,
- hrtimer);
+ int errcode;
- KBASE_DEBUG_ASSERT(timer);
+ mutex_lock(&cli->vctx->lock);
- atomic_set(&timer->vinstr_ctx->request_pending, 1);
- wake_up_all(&timer->vinstr_ctx->waitq);
+ errcode = kbasep_vinstr_client_dump(
+ cli, BASE_HWCNT_READER_EVENT_MANUAL);
- return HRTIMER_NORESTART;
+ mutex_unlock(&cli->vctx->lock);
+ return errcode;
}
/**
- * kbasep_vinstr_service_task - HWC dumping service thread
+ * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
*
- * @data: Pointer to vinstr context structure.
- *
- * Return: 0 on success; -ENOMEM if timer allocation fails
+ * Return: 0 on success, else error code.
*/
-static int kbasep_vinstr_service_task(void *data)
+static long kbasep_vinstr_hwcnt_reader_ioctl_clear(
+ struct kbase_vinstr_client *cli)
{
- struct kbase_vinstr_context *vinstr_ctx = data;
- struct kbasep_vinstr_wake_up_timer *timer;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
+ int errcode;
- timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+ mutex_lock(&cli->vctx->lock);
- if (!timer) {
- dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
- return -ENOMEM;
- }
-
- hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-
- timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
- timer->vinstr_ctx = vinstr_ctx;
-
- while (!kthread_should_stop()) {
- struct kbase_vinstr_client *cli = NULL;
- struct kbase_vinstr_client *tmp;
- int rcode;
- unsigned long flags;
+ errcode = kbasep_vinstr_client_clear(cli);
- u64 timestamp = kbasep_vinstr_get_timestamp();
- u64 dump_time = 0;
- struct list_head expired_requests;
-
- /* Hold lock while performing operations on lists of clients. */
- mutex_lock(&vinstr_ctx->lock);
-
- /* Closing thread must not interact with client requests. */
- if (current == vinstr_ctx->thread) {
- atomic_set(&vinstr_ctx->request_pending, 0);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- if (!list_empty(&vinstr_ctx->waiting_clients)) {
- cli = list_first_entry(
- &vinstr_ctx->waiting_clients,
- struct kbase_vinstr_client,
- list);
- dump_time = cli->dump_time;
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- }
-
- if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
- mutex_unlock(&vinstr_ctx->lock);
-
- /* Sleep until next dumping event or service request. */
- if (cli) {
- u64 diff = dump_time - timestamp;
-
- hrtimer_start(
- &timer->hrtimer,
- ns_to_ktime(diff),
- HRTIMER_MODE_REL);
- }
- wait_event(
- vinstr_ctx->waitq,
- atomic_read(
- &vinstr_ctx->request_pending) ||
- kthread_should_stop());
- hrtimer_cancel(&timer->hrtimer);
- continue;
- }
-
- rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
- &timestamp);
-
- INIT_LIST_HEAD(&expired_requests);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- /* Find all expired requests. */
- list_for_each_entry_safe(
- cli,
- tmp,
- &vinstr_ctx->waiting_clients,
- list) {
- s64 tdiff =
- (s64)(timestamp + DUMPING_RESOLUTION) -
- (s64)cli->dump_time;
- if (tdiff >= 0ll) {
- list_del(&cli->list);
- list_add(&cli->list, &expired_requests);
- } else {
- break;
- }
- }
-
- /* Fill data for each request found. */
- while (!list_empty(&expired_requests)) {
- cli = list_first_entry(&expired_requests,
- struct kbase_vinstr_client, list);
-
- /* Ensure that legacy buffer will not be used from
- * this kthread context. */
- BUG_ON(0 == cli->buffer_count);
- /* Expect only periodically sampled clients. */
- BUG_ON(0 == cli->dump_interval);
-
- /* Release the spinlock, as filling the data in client's
- * userspace buffer could result in page faults. */
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- if (!rcode)
- kbasep_vinstr_update_client(
- cli,
- timestamp,
- BASE_HWCNT_READER_EVENT_PERIODIC);
- spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
-
- /* This client got suspended, move to the next one. */
- if (cli->suspended)
- continue;
-
- /* Set new dumping time. Drop missed probing times. */
- do {
- cli->dump_time += cli->dump_interval;
- } while (cli->dump_time < timestamp);
-
- list_del(&cli->list);
- kbasep_vinstr_add_dump_request(
- cli,
- &vinstr_ctx->waiting_clients);
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- /* Reprogram counters set if required. */
- kbasep_vinstr_reprogram(vinstr_ctx);
-
- mutex_unlock(&vinstr_ctx->lock);
- }
-
- kfree(timer);
-
- return 0;
+ mutex_unlock(&cli->vctx->lock);
+ return errcode;
}
-/*****************************************************************************/
-
/**
- * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
- * @cli: pointer to vinstr client structure
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size: Size of buffer.
*
- * Return: non-zero if client has at least one dumping buffer filled that was
- * not notified to user yet
- */
-static int kbasep_vinstr_hwcnt_reader_buffer_ready(
- struct kbase_vinstr_client *cli)
-{
- KBASE_DEBUG_ASSERT(cli);
- return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
-}
-
-/**
- * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @buffer: pointer to userspace buffer
- * @size: size of buffer
- *
- * Return: zero on success
+ * Return: 0 on success, else error code.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
- struct kbase_vinstr_client *cli, void __user *buffer,
- size_t size)
+ struct kbase_vinstr_client *cli,
+ void __user *buffer,
+ size_t size)
{
unsigned int meta_idx = atomic_read(&cli->meta_idx);
- unsigned int idx = meta_idx % cli->buffer_count;
+ unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt;
- struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+ struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx];
/* Metadata sanity check. */
- KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+ WARN_ON(idx != meta->buffer_idx);
if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
return -EINVAL;
@@ -1470,19 +693,20 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
}
/**
- * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @buffer: pointer to userspace buffer
- * @size: size of buffer
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size: Size of buffer.
*
- * Return: zero on success
+ * Return: 0 on success, else error code.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
- struct kbase_vinstr_client *cli, void __user *buffer,
- size_t size)
+ struct kbase_vinstr_client *cli,
+ void __user *buffer,
+ size_t size)
{
unsigned int read_idx = atomic_read(&cli->read_idx);
- unsigned int idx = read_idx % cli->buffer_count;
+ unsigned int idx = read_idx % cli->dump_bufs.buf_cnt;
struct kbase_hwcnt_reader_metadata meta;
@@ -1505,182 +729,126 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
}
/**
- * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @interval: periodic dumping interval (disable periodic dumping if zero)
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @interval: Periodic dumping interval (disable periodic dumping if 0).
*
- * Return: zero on success
+ * Return: 0 always.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
- struct kbase_vinstr_client *cli, u32 interval)
-{
- struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
- unsigned long flags;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- mutex_lock(&vinstr_ctx->lock);
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
- if (cli->suspended) {
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- mutex_unlock(&vinstr_ctx->lock);
- return -ENOMEM;
- }
-
- list_del(&cli->list);
-
- cli->dump_interval = interval;
-
- /* If interval is non-zero, enable periodic dumping for this client. */
- if (cli->dump_interval) {
- if (DUMPING_RESOLUTION > cli->dump_interval)
- cli->dump_interval = DUMPING_RESOLUTION;
- cli->dump_time =
- kbasep_vinstr_get_timestamp() + cli->dump_interval;
-
- kbasep_vinstr_add_dump_request(
- cli, &vinstr_ctx->waiting_clients);
-
- atomic_set(&vinstr_ctx->request_pending, 1);
- wake_up_all(&vinstr_ctx->waitq);
- } else {
- list_add(&cli->list, &vinstr_ctx->idle_clients);
- }
+ struct kbase_vinstr_client *cli,
+ u32 interval)
+{
+ mutex_lock(&cli->vctx->lock);
+
+ if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS))
+ interval = DUMP_INTERVAL_MIN_NS;
+ /* Update the interval, and put in a dummy next dump time */
+ cli->dump_interval_ns = interval;
+ cli->next_dump_time_ns = 0;
+
+ /*
+ * If it's a periodic client, kick off the worker early to do a proper
+ * timer reschedule. Return value is ignored, as we don't care if the
+ * worker is already queued.
+ */
+ if ((interval != 0) && (cli->vctx->suspend_count == 0))
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+ queue_work(system_wq, &cli->vctx->dump_work);
+#else
+ queue_work(system_highpri_wq, &cli->vctx->dump_work);
+#endif
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
- mutex_unlock(&vinstr_ctx->lock);
+ mutex_unlock(&cli->vctx->lock);
return 0;
}
/**
- * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
- * @event_id: id of event
- * Return: event_mask or zero if event is not supported or maskable
- */
-static u32 kbasep_vinstr_hwcnt_reader_event_mask(
- enum base_hwcnt_reader_event event_id)
-{
- u32 event_mask = 0;
-
- switch (event_id) {
- case BASE_HWCNT_READER_EVENT_PREJOB:
- case BASE_HWCNT_READER_EVENT_POSTJOB:
- /* These event are maskable. */
- event_mask = (1 << event_id);
- break;
-
- case BASE_HWCNT_READER_EVENT_MANUAL:
- case BASE_HWCNT_READER_EVENT_PERIODIC:
- /* These event are non-maskable. */
- default:
- /* These event are not supported. */
- break;
- }
-
- return event_mask;
-}
-
-/**
- * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @event_id: id of event to enable
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to enable.
*
- * Return: zero on success
+ * Return: 0 always.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
struct kbase_vinstr_client *cli,
enum base_hwcnt_reader_event event_id)
{
- struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
- u32 event_mask;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
- if (!event_mask)
- return -EINVAL;
-
- mutex_lock(&vinstr_ctx->lock);
- cli->event_mask |= event_mask;
- mutex_unlock(&vinstr_ctx->lock);
-
+ /* No-op, as events aren't supported */
return 0;
}
/**
- * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @event_id: id of event to disable
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl
+ * command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to disable.
*
- * Return: zero on success
+ * Return: 0 always.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
- struct kbase_vinstr_client *cli,
- enum base_hwcnt_reader_event event_id)
+ struct kbase_vinstr_client *cli,
+ enum base_hwcnt_reader_event event_id)
{
- struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
- u32 event_mask;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
- if (!event_mask)
- return -EINVAL;
-
- mutex_lock(&vinstr_ctx->lock);
- cli->event_mask &= ~event_mask;
- mutex_unlock(&vinstr_ctx->lock);
-
+ /* No-op, as events aren't supported */
return 0;
}
/**
- * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
- * @cli: pointer to vinstr client structure
- * @hwver: pointer to user buffer where hw version will be stored
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ * @hwver: Non-NULL pointer to user buffer where HW version will be stored.
*
- * Return: zero on success
+ * Return: 0 on success, else error code.
*/
static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
- struct kbase_vinstr_client *cli, u32 __user *hwver)
+ struct kbase_vinstr_client *cli,
+ u32 __user *hwver)
{
-#ifndef CONFIG_MALI_NO_MALI
- struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-#endif
-
- u32 ver = 5;
+ u32 ver = 0;
+ const enum kbase_hwcnt_gpu_group_type type =
+ kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0);
-#ifndef CONFIG_MALI_NO_MALI
- KBASE_DEBUG_ASSERT(vinstr_ctx);
- if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+ switch (type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
ver = 4;
-#endif
+ break;
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ ver = 5;
+ break;
+ default:
+ WARN_ON(true);
+ }
- return put_user(ver, hwver);
+ if (ver != 0) {
+ return put_user(ver, hwver);
+ } else {
+ return -EINVAL;
+ }
}
/**
- * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
- * @filp: pointer to file structure
- * @cmd: user command
- * @arg: command's argument
+ * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
+ * @filp: Non-NULL pointer to file structure.
+ * @cmd: User command.
+ * @arg: Command's argument.
*
- * Return: zero on success
+ * Return: 0 on success, else error code.
*/
-static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long arg)
{
- long rcode = 0;
+ long rcode;
struct kbase_vinstr_client *cli;
- KBASE_DEBUG_ASSERT(filp);
+ if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER))
+ return -EINVAL;
cli = filp->private_data;
- KBASE_DEBUG_ASSERT(cli);
-
- if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+ if (!cli)
return -EINVAL;
switch (cmd) {
@@ -1689,42 +857,41 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
break;
case KBASE_HWCNT_READER_GET_HWVER:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
- cli, (u32 __user *)arg);
+ cli, (u32 __user *)arg);
break;
case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
- KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
rcode = put_user(
- (u32)cli->vinstr_ctx->dump_size,
- (u32 __user *)arg);
+ (u32)cli->vctx->metadata->dump_buf_bytes,
+ (u32 __user *)arg);
break;
case KBASE_HWCNT_READER_DUMP:
- rcode = kbase_vinstr_hwc_dump(
- cli, BASE_HWCNT_READER_EVENT_MANUAL);
+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
break;
case KBASE_HWCNT_READER_CLEAR:
- rcode = kbase_vinstr_hwc_clear(cli);
+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli);
break;
case KBASE_HWCNT_READER_GET_BUFFER:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
- cli, (void __user *)arg, _IOC_SIZE(cmd));
+ cli, (void __user *)arg, _IOC_SIZE(cmd));
break;
case KBASE_HWCNT_READER_PUT_BUFFER:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
- cli, (void __user *)arg, _IOC_SIZE(cmd));
+ cli, (void __user *)arg, _IOC_SIZE(cmd));
break;
case KBASE_HWCNT_READER_SET_INTERVAL:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
- cli, (u32)arg);
+ cli, (u32)arg);
break;
case KBASE_HWCNT_READER_ENABLE_EVENT:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
- cli, (enum base_hwcnt_reader_event)arg);
+ cli, (enum base_hwcnt_reader_event)arg);
break;
case KBASE_HWCNT_READER_DISABLE_EVENT:
rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
- cli, (enum base_hwcnt_reader_event)arg);
+ cli, (enum base_hwcnt_reader_event)arg);
break;
default:
+ WARN_ON(true);
rcode = -EINVAL;
break;
}
@@ -1733,21 +900,25 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
}
/**
- * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
- * @filp: pointer to file structure
- * @wait: pointer to poll table
- * Return: POLLIN if data can be read without blocking, otherwise zero
+ * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll.
+ * @filp: Non-NULL pointer to file structure.
+ * @wait: Non-NULL pointer to poll table.
+ *
+ * Return: POLLIN if data can be read without blocking, 0 if data can not be
+ * read without blocking, else error code.
*/
-static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
- poll_table *wait)
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+ struct file *filp,
+ poll_table *wait)
{
struct kbase_vinstr_client *cli;
- KBASE_DEBUG_ASSERT(filp);
- KBASE_DEBUG_ASSERT(wait);
+ if (!filp || !wait)
+ return -EINVAL;
cli = filp->private_data;
- KBASE_DEBUG_ASSERT(cli);
+ if (!cli)
+ return -EINVAL;
poll_wait(filp, &cli->waitq, wait);
if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
@@ -1756,25 +927,28 @@ static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
}
/**
- * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
- * @filp: pointer to file structure
- * @vma: pointer to vma structure
- * Return: zero on success
+ * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap.
+ * @filp: Non-NULL pointer to file structure.
+ * @vma: Non-NULL pointer to vma structure.
+ *
+ * Return: 0 on success, else error code.
*/
-static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
- struct vm_area_struct *vma)
+static int kbasep_vinstr_hwcnt_reader_mmap(
+ struct file *filp,
+ struct vm_area_struct *vma)
{
struct kbase_vinstr_client *cli;
- unsigned long size, addr, pfn, offset;
- unsigned long vm_size = vma->vm_end - vma->vm_start;
+ unsigned long vm_size, size, addr, pfn, offset;
- KBASE_DEBUG_ASSERT(filp);
- KBASE_DEBUG_ASSERT(vma);
+ if (!filp || !vma)
+ return -EINVAL;
cli = filp->private_data;
- KBASE_DEBUG_ASSERT(cli);
+ if (!cli)
+ return -EINVAL;
- size = cli->buffer_count * cli->dump_size;
+ vm_size = vma->vm_end - vma->vm_start;
+ size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes;
if (vma->vm_pgoff > (size >> PAGE_SHIFT))
return -EINVAL;
@@ -1783,579 +957,33 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
if (vm_size > size - offset)
return -EINVAL;
- addr = __pa((unsigned long)cli->dump_buffers + offset);
+ addr = __pa(cli->dump_bufs.page_addr + offset);
pfn = addr >> PAGE_SHIFT;
return remap_pfn_range(
- vma,
- vma->vm_start,
- pfn,
- vm_size,
- vma->vm_page_prot);
+ vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
}
/**
- * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
- * Return always return zero
+ * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release.
+ * @inode: Non-NULL pointer to inode structure.
+ * @filp: Non-NULL pointer to file structure.
+ *
+ * Return: 0 always.
*/
static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
- struct file *filp)
-{
- struct kbase_vinstr_client *cli;
-
- KBASE_DEBUG_ASSERT(inode);
- KBASE_DEBUG_ASSERT(filp);
-
- cli = filp->private_data;
- KBASE_DEBUG_ASSERT(cli);
-
- kbase_vinstr_detach_client(cli);
- return 0;
-}
-
-/*****************************************************************************/
-
-/**
- * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
- * @kbdev: pointer to kbase device structure
- */
-static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
-{
- struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
- unsigned long flags;
-
- down(&js_devdata->schedule_sem);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_backend_slot_update(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- up(&js_devdata->schedule_sem);
-}
-
-/**
- * kbasep_vinstr_suspend_worker - worker suspending vinstr module
- * @data: pointer to work structure
- */
-static void kbasep_vinstr_suspend_worker(struct work_struct *data)
-{
- struct kbase_vinstr_context *vinstr_ctx;
- unsigned long flags;
-
- vinstr_ctx = container_of(data, struct kbase_vinstr_context,
- suspend_work);
-
- mutex_lock(&vinstr_ctx->lock);
-
- if (vinstr_ctx->kctx)
- disable_hwcnt(vinstr_ctx);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->state = VINSTR_SUSPENDED;
- wake_up_all(&vinstr_ctx->suspend_waitq);
-
- if (vinstr_ctx->need_resume) {
- vinstr_ctx->need_resume = false;
- vinstr_ctx->state = VINSTR_RESUMING;
- schedule_work(&vinstr_ctx->resume_work);
-
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- mutex_unlock(&vinstr_ctx->lock);
- } else {
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- mutex_unlock(&vinstr_ctx->lock);
-
- /* Kick GPU scheduler to allow entering protected mode.
- * This must happen after vinstr was suspended.
- */
- kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
- }
-}
-
-/**
- * kbasep_vinstr_resume_worker - worker resuming vinstr module
- * @data: pointer to work structure
- */
-static void kbasep_vinstr_resume_worker(struct work_struct *data)
+ struct file *filp)
{
- struct kbase_vinstr_context *vinstr_ctx;
- unsigned long flags;
-
- vinstr_ctx = container_of(data, struct kbase_vinstr_context,
- resume_work);
-
- mutex_lock(&vinstr_ctx->lock);
-
- if (vinstr_ctx->kctx)
- enable_hwcnt(vinstr_ctx);
+ struct kbase_vinstr_client *vcli = filp->private_data;
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->state = VINSTR_IDLE;
- wake_up_all(&vinstr_ctx->suspend_waitq);
+ mutex_lock(&vcli->vctx->lock);
- if (vinstr_ctx->need_suspend) {
- vinstr_ctx->need_suspend = false;
- vinstr_ctx->state = VINSTR_SUSPENDING;
- schedule_work(&vinstr_ctx->suspend_work);
+ vcli->vctx->client_count--;
+ list_del(&vcli->node);
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+ mutex_unlock(&vcli->vctx->lock);
- mutex_unlock(&vinstr_ctx->lock);
- } else {
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- mutex_unlock(&vinstr_ctx->lock);
-
- /* Kick GPU scheduler to allow entering protected mode.
- * Note that scheduler state machine might requested re-entry to
- * protected mode before vinstr was resumed.
- * This must happen after vinstr was release.
- */
- kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
- }
-}
-
-/*****************************************************************************/
-
-struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
-{
- struct kbase_vinstr_context *vinstr_ctx;
-
- vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
- if (!vinstr_ctx)
- return NULL;
-
- INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
- INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
- INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
- mutex_init(&vinstr_ctx->lock);
- spin_lock_init(&vinstr_ctx->state_lock);
- vinstr_ctx->kbdev = kbdev;
- vinstr_ctx->thread = NULL;
- vinstr_ctx->state = VINSTR_IDLE;
- vinstr_ctx->suspend_cnt = 0;
- INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
- INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
- init_waitqueue_head(&vinstr_ctx->suspend_waitq);
-
- atomic_set(&vinstr_ctx->request_pending, 0);
- init_waitqueue_head(&vinstr_ctx->waitq);
-
- return vinstr_ctx;
-}
-
-void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
-{
- struct kbase_vinstr_client *cli;
-
- /* Stop service thread first. */
- if (vinstr_ctx->thread)
- kthread_stop(vinstr_ctx->thread);
-
- /* Wait for workers. */
- flush_work(&vinstr_ctx->suspend_work);
- flush_work(&vinstr_ctx->resume_work);
-
- while (1) {
- struct list_head *list = &vinstr_ctx->idle_clients;
-
- if (list_empty(list)) {
- list = &vinstr_ctx->waiting_clients;
- if (list_empty(list)) {
- list = &vinstr_ctx->suspended_clients;
- if (list_empty(list))
- break;
- }
- }
-
- cli = list_first_entry(list, struct kbase_vinstr_client, list);
- list_del(&cli->list);
- if (!cli->suspended)
- vinstr_ctx->nclients--;
- else
- vinstr_ctx->nclients_suspended--;
- kfree(cli->accum_buffer);
- kfree(cli);
- }
- KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
- KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
- if (vinstr_ctx->kctx)
- kbasep_vinstr_destroy_kctx(vinstr_ctx);
- kfree(vinstr_ctx);
-}
-
-int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_ioctl_hwcnt_reader_setup *setup)
-{
- struct kbase_vinstr_client *cli;
- u32 bitmap[4];
- int fd;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
- KBASE_DEBUG_ASSERT(setup);
- KBASE_DEBUG_ASSERT(setup->buffer_count);
-
- bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
- bitmap[TILER_HWCNT_BM] = setup->tiler_bm;
- bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
- bitmap[JM_HWCNT_BM] = setup->jm_bm;
-
- cli = kbasep_vinstr_attach_client(
- vinstr_ctx,
- setup->buffer_count,
- bitmap,
- &fd,
- NULL);
-
- if (!cli)
- return -ENOMEM;
-
- kbase_vinstr_wait_for_ready(vinstr_ctx);
- return fd;
-}
-
-int kbase_vinstr_legacy_hwc_setup(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_vinstr_client **cli,
- struct kbase_ioctl_hwcnt_enable *enable)
-{
- KBASE_DEBUG_ASSERT(vinstr_ctx);
- KBASE_DEBUG_ASSERT(enable);
- KBASE_DEBUG_ASSERT(cli);
-
- if (enable->dump_buffer) {
- u32 bitmap[4];
-
- bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
- bitmap[TILER_HWCNT_BM] = enable->tiler_bm;
- bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
- bitmap[JM_HWCNT_BM] = enable->jm_bm;
-
- if (*cli)
- return -EBUSY;
-
- *cli = kbasep_vinstr_attach_client(
- vinstr_ctx,
- 0,
- bitmap,
- (void *)(uintptr_t)enable->dump_buffer,
- NULL);
-
- if (!(*cli))
- return -ENOMEM;
-
- kbase_vinstr_wait_for_ready(vinstr_ctx);
- } else {
- if (!*cli)
- return -EINVAL;
-
- kbase_vinstr_detach_client(*cli);
- *cli = NULL;
- }
+ kbasep_vinstr_client_destroy(vcli);
return 0;
}
-
-struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_ioctl_hwcnt_reader_setup *setup,
- void *kernel_buffer)
-{
- struct kbase_vinstr_client *kernel_client;
- u32 bitmap[4];
-
- if (!vinstr_ctx || !setup || !kernel_buffer)
- return NULL;
-
- bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
- bitmap[TILER_HWCNT_BM] = setup->tiler_bm;
- bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
- bitmap[JM_HWCNT_BM] = setup->jm_bm;
-
- kernel_client = kbasep_vinstr_attach_client(
- vinstr_ctx,
- 0,
- bitmap,
- NULL,
- kernel_buffer);
-
- if (kernel_client)
- kbase_vinstr_wait_for_ready(vinstr_ctx);
-
- return kernel_client;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
-
-int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
- enum base_hwcnt_reader_event event_id)
-{
- int rcode = 0;
- struct kbase_vinstr_context *vinstr_ctx;
- u64 timestamp;
- u32 event_mask;
-
- if (!cli)
- return -EINVAL;
-
- vinstr_ctx = cli->vinstr_ctx;
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
- event_mask = 1 << event_id;
-
- mutex_lock(&vinstr_ctx->lock);
-
- if (event_mask & cli->event_mask) {
- rcode = kbasep_vinstr_collect_and_accumulate(
- vinstr_ctx,
- &timestamp);
- if (rcode)
- goto exit;
-
- rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
- if (rcode)
- goto exit;
-
- kbasep_vinstr_reprogram(vinstr_ctx);
- }
-
-exit:
- mutex_unlock(&vinstr_ctx->lock);
-
- return rcode;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
-
-int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
-{
- struct kbase_vinstr_context *vinstr_ctx;
- int rcode;
- u64 unused;
-
- if (!cli)
- return -EINVAL;
-
- vinstr_ctx = cli->vinstr_ctx;
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- mutex_lock(&vinstr_ctx->lock);
-
- rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
- if (rcode)
- goto exit;
- rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
- if (rcode)
- goto exit;
- memset(cli->accum_buffer, 0, cli->dump_size);
-
- kbasep_vinstr_reprogram(vinstr_ctx);
-
-exit:
- mutex_unlock(&vinstr_ctx->lock);
-
- return rcode;
-}
-
-int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
- unsigned long flags;
- int ret = -EAGAIN;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- vinstr_ctx->forced_suspend = true;
- switch (vinstr_ctx->state) {
- case VINSTR_SUSPENDED:
- vinstr_ctx->suspend_cnt++;
- /* overflow shall not happen */
- BUG_ON(0 == vinstr_ctx->suspend_cnt);
- ret = 0;
- break;
-
- case VINSTR_IDLE:
- if (vinstr_ctx->clients_present) {
- vinstr_ctx->state = VINSTR_SUSPENDING;
- schedule_work(&vinstr_ctx->suspend_work);
- } else {
- vinstr_ctx->state = VINSTR_SUSPENDED;
-
- vinstr_ctx->suspend_cnt++;
- /* overflow shall not happen */
- WARN_ON(0 == vinstr_ctx->suspend_cnt);
- ret = 0;
- }
- break;
-
- case VINSTR_DUMPING:
- vinstr_ctx->state = VINSTR_SUSPENDING;
- break;
-
- case VINSTR_RESUMING:
- vinstr_ctx->need_suspend = true;
- break;
-
- case VINSTR_SUSPENDING:
- break;
-
- default:
- KBASE_DEBUG_ASSERT(0);
- break;
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- return ret;
-}
-
-static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
-{
- unsigned long flags;
- int ret = -EAGAIN;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- switch (vinstr_ctx->state) {
- case VINSTR_SUSPENDED:
- case VINSTR_RESUMING:
- case VINSTR_SUSPENDING:
- break;
-
- case VINSTR_IDLE:
- case VINSTR_DUMPING:
- ret = 0;
- break;
- default:
- KBASE_DEBUG_ASSERT(0);
- break;
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
- return ret;
-}
-
-void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
- wait_event(vinstr_ctx->suspend_waitq,
- (0 == kbase_vinstr_try_suspend(vinstr_ctx)));
-}
-
-void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
-{
- wait_event(vinstr_ctx->suspend_waitq,
- (0 == kbase_vinstr_is_ready(vinstr_ctx)));
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
-
-/**
- * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
- * on nclients
- * @vinstr_ctx: vinstr context pointer
- *
- * This function should be called whenever vinstr_ctx->nclients changes. This
- * may cause vinstr to be suspended or resumed, depending on the number of
- * clients and whether IPA is suspended or not.
- */
-static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
- lockdep_assert_held(&vinstr_ctx->state_lock);
-
- switch (vinstr_ctx->state) {
- case VINSTR_SUSPENDED:
- if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
- vinstr_ctx->state = VINSTR_RESUMING;
- schedule_work(&vinstr_ctx->resume_work);
- }
- break;
-
- case VINSTR_SUSPENDING:
- if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend))
- vinstr_ctx->need_resume = true;
- break;
-
- case VINSTR_IDLE:
- if (!vinstr_ctx->nclients) {
- vinstr_ctx->state = VINSTR_SUSPENDING;
- schedule_work(&vinstr_ctx->suspend_work);
- }
- break;
-
- case VINSTR_DUMPING:
- if (!vinstr_ctx->nclients)
- vinstr_ctx->state = VINSTR_SUSPENDING;
- break;
-
- case VINSTR_RESUMING:
- if (!vinstr_ctx->nclients)
- vinstr_ctx->need_suspend = true;
- break;
- }
-}
-
-void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
-{
- unsigned long flags;
-
- KBASE_DEBUG_ASSERT(vinstr_ctx);
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
- BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
- if (VINSTR_SUSPENDED == vinstr_ctx->state) {
- BUG_ON(0 == vinstr_ctx->suspend_cnt);
- vinstr_ctx->suspend_cnt--;
- if (0 == vinstr_ctx->suspend_cnt) {
- vinstr_ctx->forced_suspend = false;
- if (vinstr_ctx->clients_present) {
- vinstr_ctx->state = VINSTR_RESUMING;
- schedule_work(&vinstr_ctx->resume_work);
- } else {
- vinstr_ctx->state = VINSTR_IDLE;
- }
- }
- }
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
-
-void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
-{
- struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
- if (!client->suspended) {
- list_del(&client->list);
- list_add(&client->list, &vinstr_ctx->suspended_clients);
-
- vinstr_ctx->nclients--;
- vinstr_ctx->nclients_suspended++;
- kbase_vinstr_update_suspend(vinstr_ctx);
-
- client->suspended = true;
- }
-
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
-
-void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
-{
- struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
- if (client->suspended) {
- list_del(&client->list);
- list_add(&client->list, &vinstr_ctx->idle_clients);
-
- vinstr_ctx->nclients++;
- vinstr_ctx->nclients_suspended--;
- kbase_vinstr_update_suspend(vinstr_ctx);
-
- client->suspended = false;
- }
-
- spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
diff --git a/mali_kbase/mali_kbase_vinstr.h b/mali_kbase/mali_kbase_vinstr.h
index d32799f..81d315f 100644
--- a/mali_kbase/mali_kbase_vinstr.h
+++ b/mali_kbase/mali_kbase_vinstr.h
@@ -20,163 +20,72 @@
*
*/
+/*
+ * Vinstr, used to provide an ioctl for userspace access to periodic hardware
+ * counters.
+ */
+
#ifndef _KBASE_VINSTR_H_
#define _KBASE_VINSTR_H_
-#include <mali_kbase_hwcnt_reader.h>
-#include <mali_kbase_ioctl.h>
-
-/*****************************************************************************/
-
struct kbase_vinstr_context;
-struct kbase_vinstr_client;
-
-/*****************************************************************************/
-
-/**
- * kbase_vinstr_init() - initialize the vinstr core
- * @kbdev: kbase device
- *
- * Return: pointer to the vinstr context on success or NULL on failure
- */
-struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
-
-/**
- * kbase_vinstr_term() - terminate the vinstr core
- * @vinstr_ctx: vinstr context
- */
-void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
- * @vinstr_ctx: vinstr context
- * @setup: reader's configuration
- *
- * Return: file descriptor on success and a (negative) error code otherwise
- */
-int kbase_vinstr_hwcnt_reader_setup(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_ioctl_hwcnt_reader_setup *setup);
+struct kbase_hwcnt_virtualizer;
+struct kbase_ioctl_hwcnt_reader_setup;
/**
- * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
- * @vinstr_ctx: vinstr context
- * @cli: pointer where to store pointer to new vinstr client structure
- * @enable: hwc configuration
+ * kbase_vinstr_init() - Initialise a vinstr context.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr
+ * context will be stored on success.
*
- * Return: zero on success
- */
-int kbase_vinstr_legacy_hwc_setup(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_vinstr_client **cli,
- struct kbase_ioctl_hwcnt_enable *enable);
-
-/**
- * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
- * client
- * @vinstr_ctx: vinstr context
- * @setup: reader's configuration
- * @kernel_buffer: pointer to dump buffer
+ * On creation, the suspend count of the context will be 0.
*
- * setup->buffer_count is not used for kernel side clients.
- *
- * Return: pointer to client structure, or NULL on failure
+ * Return: 0 on success, else error code.
*/
-struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
- struct kbase_vinstr_context *vinstr_ctx,
- struct kbase_ioctl_hwcnt_reader_setup *setup,
- void *kernel_buffer);
+int kbase_vinstr_init(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_vinstr_context **out_vctx);
/**
- * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
- * @cli: pointer to vinstr client
- * @event_id: id of event that triggered hwcnt dump
- *
- * Return: zero on success
+ * kbase_vinstr_term() - Terminate a vinstr context.
+ * @vctx: Pointer to the vinstr context to be terminated.
*/
-int kbase_vinstr_hwc_dump(
- struct kbase_vinstr_client *cli,
- enum base_hwcnt_reader_event event_id);
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx);
/**
- * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
- * a given kbase context
- * @cli: pointer to vinstr client
+ * kbase_vinstr_suspend() - Increment the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be suspended.
*
- * Return: zero on success
+ * After this function call returns, it is guaranteed that all timers and
+ * workers in vinstr will be cancelled, and will not be re-triggered until
+ * after the context has been resumed. In effect, this means no new counter
+ * dumps will occur for any existing or subsequently added periodic clients.
*/
-int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx);
/**
- * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Return: 0 on success, or negative if state change is in progress
+ * kbase_vinstr_resume() - Decrement the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be resumed.
*
- * Warning: This API call is non-generic. It is meant to be used only by
- * job scheduler state machine.
+ * If a call to this function decrements the suspend count from 1 to 0, then
+ * normal operation of vinstr will be resumed (i.e. counter dumps will once
+ * again be automatically triggered for all periodic clients).
*
- * Function initiates vinstr switch to suspended state. Once it was called
- * vinstr enters suspending state. If function return non-zero value, it
- * indicates that state switch is not complete and function must be called
- * again. On state switch vinstr will trigger job scheduler state machine
- * cycle.
- */
-int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_suspend - suspends operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Function initiates vinstr switch to suspended state. Then it blocks until
- * operation is completed.
- */
-void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
- * @vinstr_ctx: vinstr context
- *
- * Function waits for the vinstr to become ready for dumping. It can be in the
- * resuming state after the client was attached but the client currently expects
- * that vinstr is ready for dumping immediately post attach.
- */
-void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_resume - resumes operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Function can be called only if it was preceded by a successful call
+ * It is only valid to call this function one time for each prior returned call
* to kbase_vinstr_suspend.
*/
-void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx);
/**
- * kbase_vinstr_dump_size - Return required size of dump buffer
- * @kbdev: device pointer
+ * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader
+ * client.
+ * @vinstr_ctx: Non-NULL pointer to the vinstr context.
+ * @setup: Non-NULL pointer to the hwcnt reader configuration.
*
- * Return : buffer size in bytes
+ * Return: file descriptor on success, else a (negative) error code.
*/
-size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
-
-/**
- * kbase_vinstr_detach_client - Detach a client from the vinstr core
- * @cli: pointer to vinstr client
- */
-void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
-
-/**
- * kbase_vinstr_suspend_client - Suspend vinstr client
- * @client: pointer to vinstr client
- */
-void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
-
-/**
- * kbase_vinstr_resume_client - Resume vinstr client
- * @client: pointer to vinstr client
- */
-void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+int kbase_vinstr_hwcnt_reader_setup(
+ struct kbase_vinstr_context *vinstr_ctx,
+ struct kbase_ioctl_hwcnt_reader_setup *setup);
#endif /* _KBASE_VINSTR_H_ */
-
diff --git a/mali_kbase/mali_linux_kbase_trace.h b/mali_kbase/mali_linux_kbase_trace.h
index 920562e..6c6a8c6 100644
--- a/mali_kbase/mali_linux_kbase_trace.h
+++ b/mali_kbase/mali_linux_kbase_trace.h
@@ -154,7 +154,6 @@ DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
-DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index f17bd5e..3a4db10 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -83,15 +83,6 @@
#define CSTD_NOP(...) ((void)#__VA_ARGS__)
/**
- * Function-like macro for converting a pointer in to a u64 for storing into
- * an external data structure. This is commonly used when pairing a 32-bit
- * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
- * is complex and a straight cast does not work reliably as pointers are
- * often considered as signed.
- */
-#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x)))
-
-/**
* @hideinitializer
* Function-like macro for stringizing a single level macro.
* @code
@@ -115,22 +106,4 @@
*/
#define CSTD_STR2(x) CSTD_STR1(x)
-/**
- * Specify an assertion value which is evaluated at compile time. Recommended
- * usage is specification of a @c static @c INLINE function containing all of
- * the assertions thus:
- *
- * @code
- * static INLINE [module]_compile_time_assertions( void )
- * {
- * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
- * }
- * @endcode
- *
- * @note Use @c static not @c STATIC. We never want to turn off this @c static
- * specification for testing purposes.
- */
-#define CSTD_COMPILE_TIME_ASSERT(expr) \
- do { switch (0) { case 0: case (expr):; } } while (false)
-
#endif /* _MALISW_H_ */
diff --git a/mali_kbase/mali_midg_regmap.h b/mali_kbase/mali_midg_regmap.h
index 8d9f7b6..0f03e8d 100644
--- a/mali_kbase/mali_midg_regmap.h
+++ b/mali_kbase/mali_midg_regmap.h
@@ -217,7 +217,7 @@
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
/* JOB IRQ flags */
-#define JOB_IRQ_GLOBAL_IF (1 << 18) /* Global interface interrupt received */
+#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
@@ -381,14 +381,14 @@
/*
* Begin TRANSCFG register values
*/
-#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
-
-#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
-#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
-#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
-
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
/*
* Begin Command Values
*/
diff --git a/mali_kbase/mali_uk.h b/mali_kbase/mali_uk.h
index c81f404..701f390 100644
--- a/mali_kbase/mali_uk.h
+++ b/mali_kbase/mali_uk.h
@@ -74,68 +74,6 @@ enum uk_client_id {
UK_CLIENT_COUNT
};
-/**
- * Each function callable through the UK interface has a unique number.
- * Functions provided by UK clients start from number UK_FUNC_ID.
- * Numbers below UK_FUNC_ID are used for internal UK functions.
- */
-enum uk_func {
- UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */
- /**
- * Each UK client numbers the functions they provide starting from
- * number UK_FUNC_ID. This number is then eventually assigned to the
- * id field of the union uk_header structure when preparing to make a
- * UK call. See your UK client for a list of their function numbers.
- */
- UK_FUNC_ID = 512
-};
-
-/**
- * Arguments for a UK call are stored in a structure. This structure consists
- * of a fixed size header and a payload. The header carries a 32-bit number
- * identifying the UK function to be called (see uk_func). When the UKK client
- * receives this header and executed the requested UK function, it will use
- * the same header to store the result of the function in the form of a
- * int return code. The size of this structure is such that the
- * first member of the payload following the header can be accessed efficiently
- * on a 32 and 64-bit kernel and the structure has the same size regardless
- * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
- * accordingly in the OS specific mali_uk_os.h header file.
- */
-union uk_header {
- /**
- * 32-bit number identifying the UK function to be called.
- * Also see uk_func.
- */
- u32 id;
- /**
- * The int return code returned by the called UK function.
- * See the specification of the particular UK function you are
- * calling for the meaning of the error codes returned. All
- * UK functions return 0 on success.
- */
- u32 ret;
- /*
- * Used to ensure 64-bit alignment of this union. Do not remove.
- * This field is used for padding and does not need to be initialized.
- */
- u64 sizer;
-};
-
-/**
- * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
- * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
- */
-struct uku_version_check_args {
- union uk_header header;
- /**< UK call header */
- u16 major;
- /**< This field carries the user-side major version on input and the kernel-side major version on output */
- u16 minor;
- /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
- u8 padding[4];
-};
-
/** @} end group uk_api */
/** @} *//* end group base_api */
diff --git a/mali_kbase/sconscript b/mali_kbase/sconscript
index 01c7589..f9d9c1b 100644
--- a/mali_kbase/sconscript
+++ b/mali_kbase/sconscript
@@ -50,7 +50,6 @@ make_args = env.kernel_get_config_defines(ret_list = True) + [
'MALI_KERNEL_TEST_API=%s' % env['debug'],
'MALI_UNIT_TEST=%s' % env['unit'],
'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
- 'MALI_MOCK_TEST=%s' % mock_test,
'MALI_CUSTOMER_RELEASE=%s' % env['release'],
'MALI_USE_CSF=%s' % env['csf'],
'MALI_COVERAGE=%s' % env['coverage'],
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index ddd7630..af4e383 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -21,6 +21,11 @@ config BUILD_IPA_TESTS
default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
default n
+config BUILD_IPA_UNIT_TESTS
+ bool
+ default y if NO_MALI && BUILD_IPA_TESTS
+ default n
+
config BUILD_CSF_TESTS
bool
default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
diff --git a/mali_kbase/tests/mali_kutf_irq_test/build.bp b/mali_kbase/tests/mali_kutf_irq_test/build.bp
index a6669af..66f4eb3 100644
--- a/mali_kbase/tests/mali_kutf_irq_test/build.bp
+++ b/mali_kbase/tests/mali_kutf_irq_test/build.bp
@@ -21,7 +21,6 @@ bob_kernel_module {
"mali_kbase",
"kutf",
],
- install_group: "IG_tests",
enabled: false,
base_build_kutf: {
enabled: true,
diff --git a/mali_kbase/tests/sconscript b/mali_kbase/tests/sconscript
index 0bd24a5..ca64e83 100644
--- a/mali_kbase/tests/sconscript
+++ b/mali_kbase/tests/sconscript
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2011, 2013, 2017-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,5 @@ if kutf_env['debug'] == '1':
SConscript('kutf_test_runner/sconscript')
if env['unit'] == '1':
- SConscript('mali_kutf_ipa_test/sconscript')
SConscript('mali_kutf_ipa_unit_test/sconscript')
SConscript('mali_kutf_vinstr_test/sconscript')