summaryrefslogtreecommitdiff
path: root/mali_kbase/backend
diff options
context:
space:
mode:
authorKevin DuBois <kevindubois@google.com>2022-11-02 21:39:17 +0000
committerKevin DuBois <kevindubois@google.com>2022-11-02 22:39:21 +0000
commit34e635317dc2a91076ac341df3867ac3bdb31ef1 (patch)
treecf1c0e597ce1e7dcd9b276ff4d51be60c7fdca58 /mali_kbase/backend
parent6dcd9736cbf84712dd7073dab4aea256e30517c2 (diff)
downloadgpu-34e635317dc2a91076ac341df3867ac3bdb31ef1.tar.gz
Revert "Merge r38p1 from upstream into partner/android13-gs-pixel-5.10-tm-qpr2"
This reverts commit 6dcd9736cbf84712dd7073dab4aea256e30517c2. Reason for revert: UMD taking too long to merge Bug: 228779790 Change-Id: I08b861ba3cfc8b025f653ef86b0a5ec643e5b13d
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c45
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h16
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c73
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c21
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c69
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c67
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c102
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c319
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.h61
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c29
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c271
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_metrics.c124
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c56
18 files changed, 521 insertions, 797 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index ddd03ca..d6b9750 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -72,6 +72,49 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
return callbacks;
}
+int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev)
+{
+ /* Uses default reference frequency defined in below macro */
+ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+
+ /* Only check lowest frequency in cases when OPPs are used and
+ * present in the device tree.
+ */
+#ifdef CONFIG_PM_OPP
+ struct dev_pm_opp *opp_ptr;
+ unsigned long found_freq = 0;
+
+ /* find lowest frequency OPP */
+ opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq);
+ if (IS_ERR(opp_ptr)) {
+ dev_err(kbdev->dev,
+ "No OPPs found in device tree! Scaling timeouts using %llu kHz",
+ (unsigned long long)lowest_freq_khz);
+ } else {
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+ dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */
+#endif
+ /* convert found frequency to KHz */
+ found_freq /= 1000;
+
+ /* If lowest frequency in OPP table is still higher
+ * than the reference, then keep the reference frequency
+ * as the one to use for scaling .
+ */
+ if (found_freq < lowest_freq_khz)
+ lowest_freq_khz = found_freq;
+ }
+#else
+ dev_err(kbdev->dev,
+ "No operating-points-v2 node or operating-points property in DT");
+#endif
+
+ kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
+ dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz",
+ kbdev->lowest_gpu_freq_khz);
+ return 0;
+}
+
static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
index 35b3b8d..a6ee959 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -61,6 +61,20 @@ struct kbase_clk_data {
int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
/**
+ * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can
+ * run as using the device tree, and save this
+ * within kbdev.
+ * @kbdev: Pointer to kbase device.
+ *
+ * This function could be called from kbase_clk_rate_trace_manager_init,
+ * but is left separate as it can be called as soon as
+ * dev_pm_opp_of_add_table() has been called to initialize the OPP table.
+ *
+ * Return: 0 in any case.
+ */
+int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev);
+
+/**
* kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager.
*
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 09c1863..00b32b9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -57,7 +57,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true);
if (IS_ERR_OR_NULL(opp))
- dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp));
+ dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
else {
voltage = dev_pm_opp_get_voltage(opp);
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
@@ -133,8 +133,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
rcu_read_unlock();
#endif
if (IS_ERR_OR_NULL(opp)) {
- dev_err(dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp));
- return IS_ERR(opp) ? PTR_ERR(opp) : -ENODEV;
+ dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+ return PTR_ERR(opp);
}
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
dev_pm_opp_put(opp);
@@ -317,7 +317,6 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
dp->max_state = i;
-
/* Have the lowest clock as suspend clock.
* It may be overridden by 'opp-mali-errata-1485982'.
*/
@@ -637,7 +636,6 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
struct devfreq_dev_profile *dp;
int err;
unsigned int i;
- bool free_devfreq_freq_table = true;
if (kbdev->nr_clocks == 0) {
dev_err(kbdev->dev, "Clock not available for devfreq\n");
@@ -671,35 +669,32 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
dp->freq_table[0] / 1000;
}
-#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
- err = kbase_ipa_init(kbdev);
+ err = kbase_devfreq_init_core_mask_table(kbdev);
if (err) {
- dev_err(kbdev->dev, "IPA initialization failed");
- goto ipa_init_failed;
+ kbase_devfreq_term_freq_table(kbdev);
+ return err;
}
-#endif
-
- err = kbase_devfreq_init_core_mask_table(kbdev);
- if (err)
- goto init_core_mask_table_failed;
kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
"simple_ondemand", NULL);
if (IS_ERR(kbdev->devfreq)) {
err = PTR_ERR(kbdev->devfreq);
kbdev->devfreq = NULL;
- dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err);
- goto devfreq_add_dev_failed;
+ kbase_devfreq_term_core_mask_table(kbdev);
+ kbase_devfreq_term_freq_table(kbdev);
+ dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err);
+ return err;
}
- /* Explicit free of freq table isn't needed after devfreq_add_device() */
- free_devfreq_freq_table = false;
-
/* Initialize devfreq suspend/resume workqueue */
err = kbase_devfreq_work_init(kbdev);
if (err) {
- dev_err(kbdev->dev, "Fail to init devfreq workqueue");
- goto devfreq_work_init_failed;
+ if (devfreq_remove_device(kbdev->devfreq))
+ dev_err(kbdev->dev, "Fail to rm devfreq\n");
+ kbdev->devfreq = NULL;
+ kbase_devfreq_term_core_mask_table(kbdev);
+ dev_err(kbdev->dev, "Fail to init devfreq workqueue\n");
+ return err;
}
/* devfreq_add_device only copies a few of kbdev->dev's fields, so
@@ -710,20 +705,26 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
if (err) {
dev_err(kbdev->dev,
- "Failed to register OPP notifier (%d)", err);
+ "Failed to register OPP notifier (%d)\n", err);
goto opp_notifier_failed;
}
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
+ err = kbase_ipa_init(kbdev);
+ if (err) {
+ dev_err(kbdev->dev, "IPA initialization failed\n");
+ goto ipa_init_failed;
+ }
+
kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
kbdev->dev->of_node,
kbdev->devfreq,
&kbase_ipa_power_model_ops);
if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
- err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling);
+ err = PTR_ERR(kbdev->devfreq_cooling);
dev_err(kbdev->dev,
- "Failed to register cooling device (%d)", err);
- err = err == 0 ? -ENODEV : err;
+ "Failed to register cooling device (%d)\n",
+ err);
goto cooling_reg_failed;
}
#endif
@@ -732,29 +733,21 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
cooling_reg_failed:
+ kbase_ipa_term(kbdev);
+ipa_init_failed:
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
#endif /* CONFIG_DEVFREQ_THERMAL */
opp_notifier_failed:
kbase_devfreq_work_term(kbdev);
-devfreq_work_init_failed:
if (devfreq_remove_device(kbdev->devfreq))
- dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err);
+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
kbdev->devfreq = NULL;
-devfreq_add_dev_failed:
kbase_devfreq_term_core_mask_table(kbdev);
-init_core_mask_table_failed:
-#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
- kbase_ipa_term(kbdev);
-ipa_init_failed:
-#endif
- if (free_devfreq_freq_table)
- kbase_devfreq_term_freq_table(kbdev);
-
return err;
}
@@ -767,6 +760,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
if (kbdev->devfreq_cooling)
devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+ kbase_ipa_term(kbdev);
#endif
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
@@ -780,8 +775,4 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
kbdev->devfreq = NULL;
kbase_devfreq_term_core_mask_table(kbdev);
-
-#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
- kbase_ipa_term(kbdev);
-#endif
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 10e92ec..0ea14bc 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,19 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
registers.l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES));
-
+ registers.core_features = 0;
+#if !MALI_USE_CSF
+ /* TGOx */
+ registers.core_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CORE_FEATURES));
+#else /* !MALI_USE_CSF */
+ if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+ GPU_ID2_PRODUCT_TDUX) ||
+ ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+ GPU_ID2_PRODUCT_TODX)))
+ registers.core_features =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
+#endif /* MALI_USE_CSF */
registers.tiler_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_FEATURES));
registers.mem_features = kbase_reg_read(kbdev,
@@ -158,11 +170,6 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
regdump->coherency_features = coherency_features;
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
- regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
- else
- regdump->core_features = 0;
-
kbase_pm_register_access_disable(kbdev);
return error;
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index b89b917..0ece571 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,20 +29,6 @@
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
-static int wait_prfcnt_ready(struct kbase_device *kbdev)
-{
- u32 loops;
-
- for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) {
- const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
- GPU_STATUS_PRFCNT_ACTIVE;
- if (!prfcnt_active)
- return 0;
- }
-
- dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n");
- return -EBUSY;
-}
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
@@ -57,20 +43,20 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
/* alignment failure */
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
- return err;
+ goto out_err;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- return err;
+ goto out_err;
}
if (kbase_is_gpu_removed(kbdev)) {
/* GPU has been removed by Arbiter */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- return err;
+ goto out_err;
}
/* Enable interrupt */
@@ -95,19 +81,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
#endif
- /* Wait until prfcnt config register can be written */
- err = wait_prfcnt_ready(kbdev);
- if (err)
- return err;
-
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
- /* Wait until prfcnt is disabled before writing configuration registers */
- err = wait_prfcnt_ready(kbdev);
- if (err)
- return err;
-
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
enable->dump_buffer & 0xFFFFFFFF);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
@@ -135,8 +111,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ err = 0;
+
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx);
- return 0;
+ return err;
+ out_err:
+ return err;
}
static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
@@ -155,10 +135,7 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
- /* Wait until prfcnt config register can be written, then disable the counters.
- * Return value is ignored as we are disabling anyway.
- */
- wait_prfcnt_ready(kbdev);
+ /* Disable the counters */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
kbdev->hwcnt.kctx = NULL;
@@ -169,6 +146,7 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
+ int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
@@ -189,14 +167,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- return -EINVAL;
+ return err;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- return -EINVAL;
+ return err;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
@@ -255,11 +233,6 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
- /* Wait until prfcnt is ready to request dump */
- err = wait_prfcnt_ready(kbdev);
- if (err)
- goto unlock;
-
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
kbdev->hwcnt.addr & 0xFFFFFFFF);
@@ -275,8 +248,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);
+ err = 0;
+
unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
@@ -370,24 +346,21 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
*/
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
- goto unlock;
+ goto out;
if (kbase_is_gpu_removed(kbdev)) {
/* GPU has been removed by Arbiter */
- goto unlock;
+ goto out;
}
- /* Wait until prfcnt is ready to clear */
- err = wait_prfcnt_ready(kbdev);
- if (err)
- goto unlock;
-
/* Clear the counters */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_CLEAR);
-unlock:
+ err = 0;
+
+out:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 20905f7..32bdf72 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -191,7 +191,9 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -200,13 +202,13 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
lockdep_assert_held(&kbdev->hwaccess_lock);
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(katom);
kctx = katom->kctx;
/* Command register must be available */
- if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
- "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
- return -EPERM;
+ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
jc_head, (void *)katom);
@@ -279,7 +281,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
/* Write an approximate start timestamp.
* It's approximate because there might be a job in the HEAD register.
*/
- katom->start_timestamp = ktime_get_raw();
+ katom->start_timestamp = ktime_get();
/* GO ! */
dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx",
@@ -327,8 +329,6 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_START);
-
- return 0;
}
/**
@@ -393,9 +393,11 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
lockdep_assert_held(&kbdev->hwaccess_lock);
+ KBASE_DEBUG_ASSERT(kbdev);
+
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done);
- end_timestamp = ktime_get_raw();
+ end_timestamp = ktime_get();
while (done) {
u32 failed = done >> 16;
@@ -407,8 +409,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
- if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
- break;
+ KBASE_DEBUG_ASSERT(i >= 0);
do {
int nr_done;
@@ -589,7 +590,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
failed = done >> 16;
finished = (done & 0xFFFF) | failed;
if (done)
- end_timestamp = ktime_get_raw();
+ end_timestamp = ktime_get();
} while (finished & (1 << i));
kbasep_job_slot_update_head_start_timestamp(kbdev, i,
@@ -618,7 +619,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
u64 job_in_head_before;
u32 status_reg_after;
- WARN_ON(action & (~JS_COMMAND_MASK));
+ KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
/* Check the head pointer */
job_in_head_before = ((u64) kbase_reg_read(kbdev,
@@ -696,8 +697,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
break;
default:
- WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
- (void *)target_katom, (void *)target_katom->kctx);
+ BUG();
break;
}
} else {
@@ -726,8 +726,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
break;
default:
- WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
- (void *)target_katom, (void *)target_katom->kctx);
+ BUG();
break;
}
}
@@ -753,7 +752,9 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
int i;
bool stop_sent = false;
+ KBASE_DEBUG_ASSERT(kctx != NULL);
kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -933,11 +934,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
- if (sw_flags & JS_COMMAND_MASK) {
- WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom,
- target_katom ? (void *)target_katom->kctx : NULL, sw_flags);
- sw_flags &= ~((u32)JS_COMMAND_MASK);
- }
+ KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
JS_COMMAND_SOFT_STOP | sw_flags);
}
@@ -1055,14 +1052,17 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
{
unsigned long flags;
struct kbase_device *kbdev;
- ktime_t end_timestamp = ktime_get_raw();
+ ktime_t end_timestamp = ktime_get();
struct kbasep_js_device_data *js_devdata;
bool silent = false;
u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+ KBASE_DEBUG_ASSERT(data);
+
kbdev = container_of(data, struct kbase_device,
hwaccess.backend.reset_work);
+ KBASE_DEBUG_ASSERT(kbdev);
js_devdata = &kbdev->js_data;
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
@@ -1097,7 +1097,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
return;
}
- WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__);
+ KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&kbdev->mmu_mask_change);
@@ -1138,8 +1138,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
mutex_lock(&kbdev->pm.lock);
/* We hold the pm lock, so there ought to be a current policy */
- if (unlikely(!kbdev->pm.backend.pm_current_policy))
- dev_warn(kbdev->dev, "No power policy set!");
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
/* All slot have been soft-stopped and we've waited
* SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
@@ -1236,6 +1235,8 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
hwaccess.backend.reset_timer);
+ KBASE_DEBUG_ASSERT(kbdev);
+
/* Reset still pending? */
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
@@ -1256,6 +1257,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
int i;
int pending_jobs = 0;
+ KBASE_DEBUG_ASSERT(kbdev);
+
/* Count the number of jobs */
for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
@@ -1313,6 +1316,8 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
{
int i;
+ KBASE_DEBUG_ASSERT(kbdev);
+
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbase_pm_is_gpu_lost(kbdev)) {
/* GPU access has been removed, reset will be done by
@@ -1366,11 +1371,13 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
*/
void kbase_reset_gpu(struct kbase_device *kbdev)
{
+ KBASE_DEBUG_ASSERT(kbdev);
+
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
- if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
- return;
+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+ KBASE_RESET_GPU_PREPARED);
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);
@@ -1388,11 +1395,13 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu);
void kbase_reset_gpu_locked(struct kbase_device *kbdev)
{
+ KBASE_DEBUG_ASSERT(kbdev);
+
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
- if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
- return;
+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+ KBASE_RESET_GPU_PREPARED);
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 1ebb843..1039e85 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -76,6 +76,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
}
#endif
+
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
@@ -87,10 +88,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
- *
- * Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ int js);
#if !MALI_USE_CSF
/**
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 4fe8046..48d1de8 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -346,35 +346,16 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
katom->protected_state.exit !=
KBASE_ATOM_EXIT_PROTECTED_CHECK)
kbdev->protected_mode_transition = false;
-
- /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means
- * one of two events prevented it from progressing to the next state and
- * ultimately reach protected mode:
- * - hwcnts were enabled, and the atom had to schedule a worker to
- * disable them.
- * - the hwcnts were already disabled, but some other error occurred.
- * In the first case, if the worker has not yet completed
- * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable
- * them and signal to the worker they have already been enabled
- */
- if (kbase_jd_katom_is_protected(katom) &&
- (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
- kbdev->protected_mode_hwcnt_desired = true;
- if (kbdev->protected_mode_hwcnt_disabled) {
- kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
- kbdev->protected_mode_hwcnt_disabled = false;
- }
- }
-
/* If the atom has suspended hwcnt but has not yet entered
* protected mode, then resume hwcnt now. If the GPU is now in
* protected mode then hwcnt will be resumed by GPU reset so
* don't resume it here.
*/
if (kbase_jd_katom_is_protected(katom) &&
- ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
- (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
- (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
+ ((katom->protected_state.enter ==
+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+ (katom->protected_state.enter ==
+ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
kbdev->protected_mode_hwcnt_desired = true;
if (kbdev->protected_mode_hwcnt_disabled) {
@@ -525,14 +506,17 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
if (err) {
/*
- * Failed to switch into protected mode.
- *
- * At this point we expect:
- * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
- * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED
- * ==>
- * kbdev->protected_mode_hwcnt_disabled = false
+ * Failed to switch into protected mode, resume
+ * GPU hwcnt and fail atom.
*/
+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(
+ kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
+
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
/*
@@ -552,9 +536,12 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
/*
* Protected mode sanity checks.
*/
- WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev),
- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
- kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+ KBASE_DEBUG_ASSERT_MSG(
+ kbase_jd_katom_is_protected(katom[idx]) ==
+ kbase_gpu_in_protected_mode(kbdev),
+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+ kbase_jd_katom_is_protected(katom[idx]),
+ kbase_gpu_in_protected_mode(kbdev));
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_READY;
@@ -964,6 +951,18 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
cores_ready = kbase_pm_cores_requested(kbdev,
true);
+ if (katom[idx]->event_code ==
+ BASE_JD_EVENT_PM_EVENT) {
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(
+ kbdev, JM_MARK_FOR_RETURN_TO_JS,
+ katom[idx]->kctx, katom[idx],
+ katom[idx]->jc, js,
+ katom[idx]->event_code);
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+ break;
+ }
+
if (!cores_ready)
break;
@@ -1012,10 +1011,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
kbase_pm_request_gpu_cycle_counter_l2_is_on(
kbdev);
- if (!kbase_job_hw_submit(kbdev, katom[idx], js))
- katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
- else
- break;
+ kbase_job_hw_submit(kbdev, katom[idx], js);
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_SUBMITTED;
kbasep_platform_event_work_begin(katom[idx]);
@@ -1348,9 +1346,11 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
} else {
char js_string[16];
- trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string,
- sizeof(js_string)),
- ktime_to_ns(ktime_get_raw()), 0, 0, 0);
+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+ js_string,
+ sizeof(js_string)),
+ ktime_to_ns(ktime_get()), 0, 0,
+ 0);
}
}
#endif
@@ -1406,14 +1406,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
if (katom->protected_state.exit ==
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
/* protected mode sanity checks */
- WARN(kbase_jd_katom_is_protected(katom) !=
- kbase_gpu_in_protected_mode(kbdev),
- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
- kbase_jd_katom_is_protected(katom),
- kbase_gpu_in_protected_mode(kbdev));
- WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
- kbase_jd_katom_is_protected(katom),
- "Protected atom on JS%d not supported", js);
+ KBASE_DEBUG_ASSERT_MSG(
+ kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+ kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+ KBASE_DEBUG_ASSERT_MSG(
+ (kbase_jd_katom_is_protected(katom) && js == 0) ||
+ !kbase_jd_katom_is_protected(katom),
+ "Protected atom on JS%d not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1804,9 +1804,11 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
base_jd_core_req core_req)
{
if (!kbdev->pm.active_count) {
- kbase_pm_lock(kbdev);
+ mutex_lock(&kbdev->js_data.runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
kbase_pm_update_active(kbdev);
- kbase_pm_unlock(kbdev);
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&kbdev->js_data.runpool_mutex);
}
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 961a951..603ffcf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,7 +80,6 @@ static bool ipa_control_timer_enabled;
#endif
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
-#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
static u32 get_implementation_register(u32 reg)
{
@@ -105,15 +104,20 @@ static u32 get_implementation_register(u32 reg)
}
struct {
- spinlock_t access_lock;
-#if !MALI_USE_CSF
unsigned long prfcnt_base;
-#endif /* !MALI_USE_CSF */
u32 *prfcnt_base_cpu;
+ struct kbase_device *kbdev;
+ struct tagged_addr *pages;
+ size_t page_count;
u32 time;
- struct gpu_model_prfcnt_en prfcnt_en;
+ struct {
+ u32 jm;
+ u32 tiler;
+ u32 l2;
+ u32 shader;
+ } prfcnt_en;
u64 l2_present;
u64 shader_present;
@@ -177,9 +181,7 @@ struct control_reg_values_t {
struct dummy_model_t {
int reset_completed;
int reset_completed_mask;
-#if !MALI_USE_CSF
int prfcnt_sample_completed;
-#endif /* !MALI_USE_CSF */
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
int power_changed; /* 1bit */
bool clean_caches_completed;
@@ -462,7 +464,6 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
u32 event_index;
u64 value = 0;
u32 core;
- unsigned long flags;
if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
return 0;
@@ -486,8 +487,6 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
event_index -= 4;
- spin_lock_irqsave(&performance_counters.access_lock, flags);
-
switch (core_type) {
case KBASE_IPA_CORE_TYPE_CSHW:
core_count = 1;
@@ -515,46 +514,28 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
}
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
-
if (is_low_word)
return (value & U32_MAX);
else
return (value >> 32);
}
-#endif /* MALI_USE_CSF */
-/**
- * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values
- *
- * Sets all performance counter values to zero. The performance counter access
- * lock must be held when calling this function.
- */
-static void gpu_model_clear_prfcnt_values_nolock(void)
-{
- lockdep_assert_held(&performance_counters.access_lock);
-#if !MALI_USE_CSF
- memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters));
-#else
- memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters));
-#endif /* !MALI_USE_CSF */
- memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters));
- memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters));
- memset(performance_counters.shader_counters, 0,
- sizeof(performance_counters.shader_counters));
-}
-
-#if MALI_USE_CSF
void gpu_model_clear_prfcnt_values(void)
{
- unsigned long flags;
+ memset(performance_counters.cshw_counters, 0,
+ sizeof(performance_counters.cshw_counters));
- spin_lock_irqsave(&performance_counters.access_lock, flags);
- gpu_model_clear_prfcnt_values_nolock();
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+ memset(performance_counters.tiler_counters, 0,
+ sizeof(performance_counters.tiler_counters));
+
+ memset(performance_counters.l2_counters, 0,
+ sizeof(performance_counters.l2_counters));
+
+ memset(performance_counters.shader_counters, 0,
+ sizeof(performance_counters.shader_counters));
}
KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
-#endif /* MALI_USE_CSF */
+#endif
/**
* gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
@@ -564,20 +545,17 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
* @block_count: Number of blocks to dump
* @prfcnt_enable_mask: Counter enable mask
* @blocks_present: Available blocks bit mask
- *
- * The performance counter access lock must be held before calling this
- * function.
*/
-static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count,
- u32 prfcnt_enable_mask, u64 blocks_present)
+static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
+ u32 block_count,
+ u32 prfcnt_enable_mask,
+ u64 blocks_present)
{
u32 block_idx, counter;
u32 counter_value = 0;
u32 *prfcnt_base;
u32 index = 0;
- lockdep_assert_held(&performance_counters.access_lock);
-
prfcnt_base = performance_counters.prfcnt_base_cpu;
for (block_idx = 0; block_idx < block_count; block_idx++) {
@@ -616,18 +594,35 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_
}
}
-static void gpu_model_dump_nolock(void)
+/**
+ * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
+ *
+ * Used to ensure counter values are not lost if cache invalidation is performed
+ * prior to reading.
+ */
+static void gpu_model_sync_dummy_prfcnt(void)
{
- u32 index = 0;
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < performance_counters.page_count; i++) {
+ pg = as_page(performance_counters.pages[i]);
+ kbase_sync_single_for_device(performance_counters.kbdev,
+ kbase_dma_addr(pg), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+}
- lockdep_assert_held(&performance_counters.access_lock);
+static void midgard_model_dump_prfcnt(void)
+{
+ u32 index = 0;
#if !MALI_USE_CSF
- gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1,
- performance_counters.prfcnt_en.fe, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
+ 1, 0xffffffff, 0x1);
#else
- gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1,
- performance_counters.prfcnt_en.fe, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
+ 1, 0xffffffff, 0x1);
#endif /* !MALI_USE_CSF */
gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
&index, 1,
@@ -642,48 +637,12 @@ static void gpu_model_dump_nolock(void)
performance_counters.prfcnt_en.shader,
performance_counters.shader_present);
- /* Counter values are cleared after each dump */
- gpu_model_clear_prfcnt_values_nolock();
+ gpu_model_sync_dummy_prfcnt();
/* simulate a 'long' time between samples */
performance_counters.time += 10;
}
-#if !MALI_USE_CSF
-static void midgard_model_dump_prfcnt(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&performance_counters.access_lock, flags);
- gpu_model_dump_nolock();
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
-}
-#else
-void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps)
-{
- unsigned long flags;
-
- if (WARN_ON(!sample_buf))
- return;
-
- spin_lock_irqsave(&performance_counters.access_lock, flags);
- performance_counters.prfcnt_base_cpu = sample_buf;
- performance_counters.prfcnt_en = enable_maps;
- gpu_model_dump_nolock();
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
-}
-
-void gpu_model_glb_request_job_irq(void *model)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&hw_error_status.access_lock, flags);
- hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
- spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
- gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
-}
-#endif /* !MALI_USE_CSF */
-
static void init_register_statuses(struct dummy_model_t *dummy)
{
int i;
@@ -714,8 +673,6 @@ static void init_register_statuses(struct dummy_model_t *dummy)
static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
{
- lockdep_assert_held(&hw_error_status.access_lock);
-
if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
if (job_slot == hw_error_status.current_job_slot) {
#if !MALI_USE_CSF
@@ -965,7 +922,6 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
{
int i;
- lockdep_assert_held(&hw_error_status.access_lock);
pr_debug("%s", "Updating the JS_ACTIVE register");
for (i = 0; i < NUM_SLOTS; i++) {
@@ -1034,9 +990,6 @@ void *midgard_model_create(const void *config)
{
struct dummy_model_t *dummy = NULL;
- spin_lock_init(&hw_error_status.access_lock);
- spin_lock_init(&performance_counters.access_lock);
-
dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
if (dummy) {
@@ -1056,18 +1009,14 @@ static void midgard_model_get_outputs(void *h)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
- lockdep_assert_held(&hw_error_status.access_lock);
-
if (hw_error_status.job_irq_status)
gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
hw_error_status.gpu_error_irq ||
-#if !MALI_USE_CSF
- dummy->prfcnt_sample_completed ||
-#endif
- (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
+ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
+ dummy->prfcnt_sample_completed)
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
@@ -1079,8 +1028,6 @@ static void midgard_model_update(void *h)
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
int i;
- lockdep_assert_held(&hw_error_status.access_lock);
-
for (i = 0; i < NUM_SLOTS; i++) {
if (!dummy->slots[i].job_active)
continue;
@@ -1127,8 +1074,6 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
{
int i;
- lockdep_assert_held(&hw_error_status.access_lock);
-
for (i = 0; i < NUM_SLOTS; i++) {
if (dummy->slots[i].job_active) {
hw_error_status.job_irq_rawstat |= (1 << (16 + i));
@@ -1140,11 +1085,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
{
- unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
-
- spin_lock_irqsave(&hw_error_status.access_lock, flags);
-
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
@@ -1247,10 +1188,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
-#if !MALI_USE_CSF
- if (value & PRFCNT_SAMPLE_COMPLETED)
+ if (value & (1 << 16))
dummy->prfcnt_sample_completed = 0;
-#endif /* !MALI_USE_CSF */
+
/*update error status */
hw_error_status.gpu_error_irq &= ~(value);
} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
@@ -1274,11 +1214,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
-#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
-#endif /* !MALI_USE_CSF */
default:
break;
}
@@ -1408,24 +1346,20 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
mem_addr_space, addr, value);
break;
}
- } else {
+ } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
+ addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
switch (addr) {
-#if !MALI_USE_CSF
case PRFCNT_BASE_LO:
- performance_counters.prfcnt_base =
- HI_MASK(performance_counters.prfcnt_base) | value;
- performance_counters.prfcnt_base_cpu =
- (u32 *)(uintptr_t)performance_counters.prfcnt_base;
+ performance_counters.prfcnt_base |= value;
break;
case PRFCNT_BASE_HI:
- performance_counters.prfcnt_base =
- LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32);
- performance_counters.prfcnt_base_cpu =
- (u32 *)(uintptr_t)performance_counters.prfcnt_base;
+ performance_counters.prfcnt_base |= ((u64) value) << 32;
break;
+#if !MALI_USE_CSF
case PRFCNT_JM_EN:
- performance_counters.prfcnt_en.fe = value;
+ performance_counters.prfcnt_en.jm = value;
break;
+#endif /* !MALI_USE_CSF */
case PRFCNT_SHADER_EN:
performance_counters.prfcnt_en.shader = value;
break;
@@ -1435,7 +1369,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PRFCNT_MMU_L2_EN:
performance_counters.prfcnt_en.l2 = value;
break;
-#endif /* !MALI_USE_CSF */
+ }
+ } else {
+ switch (addr) {
case TILER_PWRON_LO:
dummy->power_on |= (value & 1) << 1;
/* Also ensure L2 is powered on */
@@ -1480,7 +1416,6 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PWR_OVERRIDE0:
#if !MALI_USE_CSF
case JM_CONFIG:
- case PRFCNT_CONFIG:
#else /* !MALI_USE_CSF */
case CSF_CONFIG:
#endif /* !MALI_USE_CSF */
@@ -1499,18 +1434,13 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
- spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 1;
}
u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
{
- unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
-
- spin_lock_irqsave(&hw_error_status.access_lock, flags);
-
*value = 0; /* 0 by default */
#if !MALI_USE_CSF
if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
@@ -1545,31 +1475,24 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
#endif /* !MALI_USE_CSF */
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
- ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
- (dummy->power_changed_mask << 9) | (1 << 7) | 1;
+ (dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
(dummy->reset_completed << 8) |
-#if !MALI_USE_CSF
- (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
-#endif /* !MALI_USE_CSF */
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
- hw_error_status.gpu_error_irq;
+ (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
*value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
-#if !MALI_USE_CSF
- (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
-#endif /* !MALI_USE_CSF */
(((dummy->clean_caches_completed &&
dummy->clean_caches_completed_irq_enabled) ?
1u :
0u)
<< 17) |
- hw_error_status.gpu_error_irq;
+ (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_STAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
*value = 0;
@@ -1904,8 +1827,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
- } else if (addr == USER_REG(LATEST_FLUSH)) {
- *value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
@@ -1919,20 +1840,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = 0;
}
- spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
return 1;
}
-static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
- u32 usr_data_size, u32 core_count)
+static u32 set_user_sample_core_type(u64 *counters,
+ u32 *usr_data_start, u32 usr_data_offset,
+ u32 usr_data_size, u32 core_count)
{
u32 sample_size;
u32 *usr_data = NULL;
- lockdep_assert_held(&performance_counters.access_lock);
-
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
@@ -1947,7 +1866,11 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr
u32 i;
for (i = 0; i < loop_cnt; i++) {
- counters[i] = usr_data[i];
+ if (copy_from_user(&counters[i], &usr_data[i],
+ sizeof(u32))) {
+ model_error_log(KBASE_CORE, "Unable to set counter sample 2");
+ break;
+ }
}
}
@@ -1961,8 +1884,6 @@ static u32 set_kernel_sample_core_type(u64 *counters,
u32 sample_size;
u64 *usr_data = NULL;
- lockdep_assert_held(&performance_counters.access_lock);
-
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
@@ -1979,70 +1900,49 @@ static u32 set_kernel_sample_core_type(u64 *counters,
}
/* Counter values injected through ioctl are of 32 bits */
-int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size)
+void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
{
- unsigned long flags;
- u32 *user_data;
u32 offset = 0;
- if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32))
- return -EINVAL;
-
- /* copy_from_user might sleep so can't be called from inside a spinlock
- * allocate a temporary buffer for user data and copy to that before taking
- * the lock
- */
- user_data = kmalloc(size, GFP_KERNEL);
- if (!user_data)
- return -ENOMEM;
-
- if (copy_from_user(user_data, data, size)) {
- model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace");
- kfree(user_data);
- return -EINVAL;
- }
-
- spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset,
- size, 1);
+ offset = set_user_sample_core_type(performance_counters.jm_counters,
+ usr_data, offset, usr_data_size, 1);
#else
- offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset,
- size, 1);
+ offset = set_user_sample_core_type(performance_counters.cshw_counters,
+ usr_data, offset, usr_data_size, 1);
#endif /* !MALI_USE_CSF */
- offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset,
- size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset,
- size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
- offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset,
- size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
-
- kfree(user_data);
- return 0;
+ offset = set_user_sample_core_type(performance_counters.tiler_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_user_sample_core_type(performance_counters.l2_counters,
+ usr_data, offset, usr_data_size,
+ KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
+ offset = set_user_sample_core_type(performance_counters.shader_counters,
+ usr_data, offset, usr_data_size,
+ KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
}
/* Counter values injected through kutf are of 64 bits */
-void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size)
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
{
- unsigned long flags;
u32 offset = 0;
- spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size,
- 1);
+ offset = set_kernel_sample_core_type(performance_counters.jm_counters,
+ usr_data, offset, usr_data_size, 1);
#else
- offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size,
- 1);
+ offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
+ usr_data, offset, usr_data_size, 1);
#endif /* !MALI_USE_CSF */
- offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset,
- size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size,
- hweight64(performance_counters.l2_present));
- offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset,
- size, hweight64(performance_counters.shader_present));
- spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+ offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_kernel_sample_core_type(performance_counters.l2_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(performance_counters.l2_present));
+ offset = set_kernel_sample_core_type(performance_counters.shader_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(performance_counters.shader_present));
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
@@ -2077,12 +1977,21 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
+void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
+ struct tagged_addr *pages,
+ size_t page_count)
+{
+ performance_counters.prfcnt_base_cpu = base;
+ performance_counters.kbdev = kbdev;
+ performance_counters.pages = pages;
+ performance_counters.page_count = page_count;
+}
+
int gpu_model_control(void *model,
struct kbase_model_control_params *params)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)model;
int i;
- unsigned long flags;
if (params->command == KBASE_MC_DISABLE_JOBS) {
for (i = 0; i < NUM_SLOTS; i++)
@@ -2091,10 +2000,8 @@ int gpu_model_control(void *model,
return -EINVAL;
}
- spin_lock_irqsave(&hw_error_status.access_lock, flags);
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
- spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 0;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
index 8eaf1b0..87690f4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -116,8 +116,6 @@ struct kbase_error_atom {
/*struct to track the system error state*/
struct error_status_t {
- spinlock_t access_lock;
-
u32 errors_mask;
u32 mmu_table_level;
int faulty_mmu_as;
@@ -140,20 +138,6 @@ struct error_status_t {
u64 as_transtab[NUM_MMU_AS];
};
-/**
- * struct gpu_model_prfcnt_en - Performance counter enable masks
- * @fe: Enable mask for front-end block
- * @tiler: Enable mask for tiler block
- * @l2: Enable mask for L2/Memory system blocks
- * @shader: Enable mask for shader core blocks
- */
-struct gpu_model_prfcnt_en {
- u32 fe;
- u32 tiler;
- u32 l2;
- u32 shader;
-};
-
void *midgard_model_create(const void *config);
void midgard_model_destroy(void *h);
u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
@@ -164,53 +148,18 @@ int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,
struct kbase_model_control_params *params);
-/**
- * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values
- * @data: Userspace pointer to array of counter values
- * @size: Size of counter value array
- *
- * Counter values set by this function will be used for one sample dump only
- * after which counters will be cleared back to zero.
- *
- * Return: 0 on success, else error code.
- */
-int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size);
-
-/**
- * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values
- * @data: Pointer to array of counter values
- * @size: Size of counter value array
- *
- * Counter values set by this function will be used for one sample dump only
- * after which counters will be cleared back to zero.
- */
-void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size);
-
+void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size);
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size);
void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
u64 *l2_present, u64 *shader_present);
void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
u64 l2_present, u64 shader_present);
-
+void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
+ struct tagged_addr *pages,
+ size_t page_count);
/* Clear the counter values array maintained by the dummy model */
void gpu_model_clear_prfcnt_values(void);
-#if MALI_USE_CSF
-/**
- * gpu_model_prfcnt_dump_request() - Request performance counter sample dump.
- * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array
- * in which to store dumped performance counter values.
- * @enable_maps: Physical enable maps for performance counter blocks.
- */
-void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps);
-
-/**
- * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request
- * flag set.
- * @model: Model pointer returned by midgard_model_create().
- */
-void gpu_model_glb_request_job_irq(void *model);
-#endif /* MALI_USE_CSF */
-
enum gpu_dummy_irq {
GPU_DUMMY_JOB_IRQ,
GPU_DUMMY_GPU_IRQ,
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index fcf98b0..3d92251 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -437,7 +437,8 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
return;
/* Stop the metrics gathering framework */
- kbase_pm_metrics_stop(kbdev);
+ if (kbase_pm_metrics_is_active(kbdev))
+ kbase_pm_metrics_stop(kbdev);
/* Keep the current freq to restore it upon resume */
kbdev->previous_frequency = clk_get_rate(clk);
@@ -879,7 +880,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
#if !MALI_USE_CSF
- kbase_backend_slot_update(kbdev);
+ kbase_backend_slot_update(kbdev);
#endif /* !MALI_USE_CSF */
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -989,7 +990,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
{
unsigned long flags;
- ktime_t end_timestamp = ktime_get_raw();
+ ktime_t end_timestamp = ktime_get();
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
if (!kbdev->arb.arb_if)
@@ -1064,7 +1065,6 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
lockdep_assert_held(&kbdev->pm.lock);
-#ifdef CONFIG_MALI_DEBUG
/* In case of no active CSG on slot, powering up L2 could be skipped and
* proceed directly to suspend GPU.
* ToDo: firmware has to be reloaded after wake-up as no halt command
@@ -1074,7 +1074,6 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
dev_info(
kbdev->dev,
"No active CSGs. Can skip the power up of L2 and go for suspension directly");
-#endif
ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
if (ret) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index a4d7168..7d14be9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -92,10 +92,29 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
* for those cores to get powered down
*/
if ((core_mask & old_core_mask) != old_core_mask) {
- if (kbase_pm_wait_for_cores_down_scale(kbdev)) {
- dev_warn(kbdev->dev,
- "Wait for update of core_mask from %llx to %llx failed",
- old_core_mask, core_mask);
+ bool can_wait;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* This check is ideally not required, the wait function can
+ * deal with the GPU power down. But it has been added to
+ * address the scenario where down-scaling request comes from
+ * the platform specific code soon after the GPU power down
+ * and at the time same time application thread tries to
+ * power up the GPU (on the flush of GPU queue).
+ * The platform specific @ref callback_power_on that gets
+ * invoked on power up does not return until down-scaling
+ * request is complete. The check mitigates the race caused by
+ * the problem in platform specific code.
+ */
+ if (likely(can_wait)) {
+ if (kbase_pm_wait_for_desired_state(kbdev)) {
+ dev_warn(kbdev->dev,
+ "Wait for update of core_mask from %llx to %llx failed",
+ old_core_mask, core_mask);
+ }
}
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 66ca0b6..a249b1e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -136,7 +136,7 @@ struct kbasep_pm_metrics {
* or removed from a GPU slot.
* @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot.
- * @lock: spinlock protecting the kbasep_pm_metrics_state structure
+ * @lock: spinlock protecting the kbasep_pm_metrics_data structure
* @platform_data: pointer to data controlled by platform specific code
* @kbdev: pointer to kbase device for which metrics are collected
* @values: The current values of the power management metrics. The
@@ -145,7 +145,7 @@ struct kbasep_pm_metrics {
* @initialized: tracks whether metrics_state has been initialized or not.
* @timer: timer to regularly make DVFS decisions based on the power
* management metrics.
- * @timer_state: atomic indicating current @timer state, on, off, or stopped.
+ * @timer_active: boolean indicating @timer is running
* @dvfs_last: values of the PM metrics from the last DVFS tick
* @dvfs_diff: different between the current and previous PM metrics.
*/
@@ -169,7 +169,7 @@ struct kbasep_pm_metrics_state {
#ifdef CONFIG_MALI_MIDGARD_DVFS
bool initialized;
struct hrtimer timer;
- atomic_t timer_state;
+ bool timer_active;
struct kbasep_pm_metrics dvfs_last;
struct kbasep_pm_metrics dvfs_diff;
#endif
@@ -572,7 +572,7 @@ struct kbase_pm_backend_data {
};
#if MALI_USE_CSF
-/* CSF PM flag, signaling that the MCU shader Core should be kept on */
+/* CSF PM flag, signaling that the MCU CORE should be kept on */
#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0)
/* CSF PM flag, signaling no scheduler suspension on idle groups */
#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index aab07c9..52e228c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -656,38 +656,6 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK;
kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val);
}
-
-/**
- * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured
- *
- * @kbdev: Pointer to the device.
- *
- * This function is called to wait for the AS used by MCU FW to get configured
- * before DB notification on MCU is enabled, as a workaround for HW issue.
- */
-static void wait_mcu_as_inactive(struct kbase_device *kbdev)
-{
- unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716))
- return;
-
- /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */
- while (--max_loops &&
- kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
- AS_STATUS_AS_ACTIVE_INT)
- ;
-
- if (!WARN_ON_ONCE(max_loops == 0))
- return;
-
- dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR);
-
- if (kbase_prepare_to_reset_gpu(kbdev, 0))
- kbase_reset_gpu(kbdev);
-}
#endif
/**
@@ -697,10 +665,10 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
* @kbdev: Pointer to the device
* @enable: boolean indicating to enable interrupts or not
*
- * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on
- * when FW is controlling the power for the shader cores. Correspondingly, the
- * interrupts can be re-enabled after the MCU has been disabled before the
- * power down of L2.
+ * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
+ * after L2 has been turned on when FW is controlling the power for the shader
+ * cores. Correspondingly, the interrupts can be re-enabled after the MCU has
+ * been disabled before the power down of L2.
*/
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
{
@@ -711,15 +679,15 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ (void)enable;
/* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */
- enable = true;
-#endif
- if (enable) {
- irq_mask |= POWER_CHANGED_ALL;
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL);
- } else {
- irq_mask &= ~POWER_CHANGED_ALL;
- }
+ irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
+#else
+ if (enable)
+ irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
+ else
+ irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
+#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
}
@@ -953,7 +921,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_HALT:
if (kbase_csf_firmware_mcu_halted(kbdev)) {
- KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL,
+ KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
if (kbdev->csf.firmware_hctl_core_pwr)
backend->mcu_state =
@@ -1000,7 +968,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_SLEEP:
if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
- KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL,
+ KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
backend->mcu_state = KBASE_MCU_IN_SLEEP;
kbase_pm_enable_db_mirror_interrupt(kbdev);
@@ -1016,7 +984,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_IN_SLEEP:
if (kbase_pm_is_mcu_desired(kbdev) &&
backend->l2_state == KBASE_L2_ON) {
- wait_mcu_as_inactive(kbdev);
KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(
kbdev, kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_enable_mcu_db_notification(kbdev);
@@ -1027,7 +994,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, false);
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
- kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
break;
#endif
@@ -1154,24 +1120,13 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
#endif
}
-static bool need_tiler_control(struct kbase_device *kbdev)
-{
-#if MALI_USE_CSF
- if (kbase_pm_no_mcu_core_pwroff(kbdev))
- return true;
- else
- return false;
-#else
- return true;
-#endif
-}
-
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
u64 l2_present = kbdev->gpu_props.curr_config.l2_present;
+#if !MALI_USE_CSF
u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
- bool l2_power_up_done;
+#endif
enum kbase_l2_core_state prev_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1182,18 +1137,24 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
KBASE_PM_CORE_L2);
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_L2);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
- u64 tiler_trans = kbase_pm_get_trans_cores(
- kbdev, KBASE_PM_CORE_TILER);
- u64 tiler_ready = kbase_pm_get_ready_cores(
- kbdev, KBASE_PM_CORE_TILER);
+
+#if !MALI_USE_CSF
+ u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
+ KBASE_PM_CORE_TILER);
+ u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_TILER);
+#endif
/*
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
* are vulnerable to corruption if gpu is lost
*/
if (kbase_is_gpu_removed(kbdev)
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
|| kbase_pm_is_gpu_lost(kbdev)) {
+#else
+ ) {
+#endif
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
backend->hwcnt_desired = false;
@@ -1216,45 +1177,32 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
}
break;
}
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* mask off ready from trans in case transitions finished
* between the register reads
*/
l2_trans &= ~l2_ready;
-
+#if !MALI_USE_CSF
+ tiler_trans &= ~tiler_ready;
+#endif
prev_state = backend->l2_state;
switch (backend->l2_state) {
case KBASE_L2_OFF:
if (kbase_pm_is_l2_desired(kbdev)) {
-#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
- /* Enable HW timer of IPA control before
- * L2 cache is powered-up.
- */
- kbase_ipa_control_handle_gpu_sleep_exit(kbdev);
-#endif
/*
* Set the desired config for L2 before
* powering it on
*/
kbase_pm_l2_config_override(kbdev);
kbase_pbha_write_settings(kbdev);
-
- /* If Host is controlling the power for shader
- * cores, then it also needs to control the
- * power for Tiler.
- * Powering on the tiler will also power the
- * L2 cache.
- */
- if (need_tiler_control(kbdev)) {
- kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present,
- ACTION_PWRON);
- } else {
- kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present,
- ACTION_PWRON);
- }
#if !MALI_USE_CSF
+ /* L2 is required, power on. Powering on the
+ * tiler will also power the first L2 cache.
+ */
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
+ tiler_present, ACTION_PWRON);
+
/* If we have more than one L2 cache then we
* must power them on explicitly.
*/
@@ -1264,36 +1212,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
ACTION_PWRON);
/* Clear backend slot submission kctx */
kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev);
+#else
+ /* With CSF firmware, Host driver doesn't need to
+ * handle power management with both shader and tiler cores.
+ * The CSF firmware will power up the cores appropriately.
+ * So only power the l2 cache explicitly.
+ */
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+ l2_present, ACTION_PWRON);
#endif
backend->l2_state = KBASE_L2_PEND_ON;
}
break;
case KBASE_L2_PEND_ON:
- l2_power_up_done = false;
+#if !MALI_USE_CSF
+ if (!l2_trans && l2_ready == l2_present && !tiler_trans
+ && tiler_ready == tiler_present) {
+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL,
+ tiler_ready);
+#else
if (!l2_trans && l2_ready == l2_present) {
- if (need_tiler_control(kbdev)) {
-#ifndef CONFIG_MALI_ARBITER_SUPPORT
- u64 tiler_trans = kbase_pm_get_trans_cores(
- kbdev, KBASE_PM_CORE_TILER);
- u64 tiler_ready = kbase_pm_get_ready_cores(
- kbdev, KBASE_PM_CORE_TILER);
-#endif
-
- tiler_trans &= ~tiler_ready;
- if (!tiler_trans && tiler_ready == tiler_present) {
- KBASE_KTRACE_ADD(kbdev,
- PM_CORES_CHANGE_AVAILABLE_TILER,
- NULL, tiler_ready);
- l2_power_up_done = true;
- }
- } else {
- KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
- l2_ready);
- l2_power_up_done = true;
- }
- }
- if (l2_power_up_done) {
+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
+ l2_ready);
+#endif
/*
* Ensure snoops are enabled after L2 is powered
* up. Note that kbase keeps track of the snoop
@@ -1489,26 +1431,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
/* We only need to check the L2 here - if the L2
* is off then the tiler is definitely also off.
*/
- if (!l2_trans && !l2_ready) {
-#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
- /* Allow clock gating within the GPU and prevent it
- * from being seen as active during sleep.
- */
- kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
-#endif
+ if (!l2_trans && !l2_ready)
/* L2 is now powered off */
backend->l2_state = KBASE_L2_OFF;
- }
} else {
- if (!kbdev->cache_clean_in_progress) {
-#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
- /* Allow clock gating within the GPU and prevent it
- * from being seen as active during sleep.
- */
- kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
-#endif
+ if (!kbdev->cache_clean_in_progress)
backend->l2_state = KBASE_L2_OFF;
- }
}
break;
@@ -2365,14 +2293,12 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
/* Wait for cores */
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
- remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
- kbase_pm_is_in_desired_state_with_l2_powered(kbdev),
- timeout);
+ remaining = wait_event_killable_timeout(
#else
remaining = wait_event_timeout(
+#endif
kbdev->pm.backend.gpu_in_desired_state_wait,
kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout);
-#endif
if (!remaining) {
kbase_pm_timed_out(kbdev);
@@ -2427,66 +2353,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
-#if MALI_USE_CSF
-/**
- * core_mask_update_done - Check if downscaling of shader cores is done
- *
- * @kbdev: The kbase device structure for the device.
- *
- * This function checks if the downscaling of cores is effectively complete.
- *
- * Return: true if the downscale is done.
- */
-static bool core_mask_update_done(struct kbase_device *kbdev)
-{
- bool update_done = false;
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- /* If MCU is in stable ON state then it implies that the downscale
- * request had completed.
- * If MCU is not active then it implies all cores are off, so can
- * consider the downscale request as complete.
- */
- if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) ||
- kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state))
- update_done = true;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return update_done;
-}
-
-int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
-{
- long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
- long remaining;
- int err = 0;
-
- /* Wait for core mask update to complete */
-#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
- remaining = wait_event_killable_timeout(
- kbdev->pm.backend.gpu_in_desired_state_wait,
- core_mask_update_done(kbdev), timeout);
-#else
- remaining = wait_event_timeout(
- kbdev->pm.backend.gpu_in_desired_state_wait,
- core_mask_update_done(kbdev), timeout);
-#endif
-
- if (!remaining) {
- kbase_pm_timed_out(kbdev);
- err = -ETIMEDOUT;
- } else if (remaining < 0) {
- dev_info(
- kbdev->dev,
- "Wait for cores down scaling got interrupted");
- err = (int)remaining;
- }
-
- return err;
-}
-#endif
-
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
{
unsigned long flags;
@@ -2550,21 +2416,14 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
-
- /* Only if the mappings for USER page exist, update all PTEs associated to it */
- if (kbdev->csf.nr_user_page_mapped > 0) {
- if (likely(kbdev->csf.mali_file_inode)) {
- /* This would zap the pte corresponding to the mapping of User
- * register page for all the Kbase contexts.
- */
- unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
- BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
- } else {
- dev_err(kbdev->dev,
- "Device file inode not exist even if USER page previously mapped");
- }
+ if (kbdev->csf.mali_file_inode) {
+ /* This would zap the pte corresponding to the mapping of User
+ * register page for all the Kbase contexts.
+ */
+ unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
+ BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
+ PAGE_SIZE, 1);
}
-
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index cd5a6a3..68ded7d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -269,37 +269,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
*/
int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
-#if MALI_USE_CSF
-/**
- * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * This function can be called to ensure that the downscaling of cores is
- * effectively complete and it would be safe to lower the voltage.
- * The function assumes that caller had exercised the MCU state machine for the
- * downscale request through the kbase_pm_update_state() function.
- *
- * This function needs to be used by the caller to safely wait for the completion
- * of downscale request, instead of kbase_pm_wait_for_desired_state().
- * The downscale request would trigger a state change in MCU state machine
- * and so when MCU reaches the stable ON state, it can be inferred that
- * downscaling is complete. But it has been observed that the wake up of the
- * waiting thread can get delayed by few milli seconds and by the time the
- * thread wakes up the power down transition could have started (after the
- * completion of downscale request).
- * On the completion of power down transition another wake up signal would be
- * sent, but again by the time thread wakes up the power up transition can begin.
- * And the power up transition could then get blocked inside the platform specific
- * callback_power_on() function due to the thread that called into Kbase (from the
- * platform specific code) to perform the downscaling and then ended up waiting
- * for the completion of downscale request.
- *
- * Return: 0 on success, error code on error or remaining jiffies on timeout.
- */
-int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev);
-#endif
-
/**
* kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
* machines after changing shader core
@@ -831,7 +800,7 @@ bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev)
/**
* kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the
- * MCU shader Core powered in accordance to the active
+ * MCU core powered in accordance to the active
* power management policy
*
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index 2df6804..f85b466 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,6 @@
*/
#include <mali_kbase.h>
-#include <mali_kbase_config_defaults.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -49,51 +48,27 @@
#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
#endif
-/*
- * Possible state transitions
- * ON -> ON | OFF | STOPPED
- * STOPPED -> ON | OFF
- * OFF -> ON
- *
- *
- * ┌─e─┐┌────────────f─────────────┐
- * │ v│ v
- * └───ON ──a──> STOPPED ──b──> OFF
- * ^^ │ │
- * │└──────c─────┘ │
- * │ │
- * └─────────────d─────────────┘
- *
- * Transition effects:
- * a. None
- * b. Timer expires without restart
- * c. Timer is not stopped, timer period is unaffected
- * d. Timer must be restarted
- * e. Callback is executed and the timer is restarted
- * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during
- * tear-down and should not be subject to a race from an OFF->ON transition
- */
-enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON };
-
#ifdef CONFIG_MALI_MIDGARD_DVFS
static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
{
+ unsigned long flags;
struct kbasep_pm_metrics_state *metrics;
- if (WARN_ON(!timer))
- return HRTIMER_NORESTART;
+ KBASE_DEBUG_ASSERT(timer != NULL);
metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
+ kbase_pm_get_dvfs_action(metrics->kbdev);
- /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */
- if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON)
- return HRTIMER_NORESTART;
+ spin_lock_irqsave(&metrics->lock, flags);
- kbase_pm_get_dvfs_action(metrics->kbdev);
+ if (metrics->timer_active)
+ hrtimer_start(timer,
+ HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+ HRTIMER_MODE_REL);
+
+ spin_unlock_irqrestore(&metrics->lock, flags);
- /* Set the new expiration time and restart (transition e) */
- hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period));
- return HRTIMER_RESTART;
+ return HRTIMER_NORESTART;
}
#endif /* CONFIG_MALI_MIDGARD_DVFS */
@@ -108,7 +83,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
- kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
+ kbdev->pm.backend.metrics.time_period_start = ktime_get();
kbdev->pm.backend.metrics.values.time_busy = 0;
kbdev->pm.backend.metrics.values.time_idle = 0;
kbdev->pm.backend.metrics.values.time_in_protm = 0;
@@ -136,7 +111,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
#else
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
- kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
+ kbdev->pm.backend.metrics.time_period_start = ktime_get();
kbdev->pm.backend.metrics.gpu_active = false;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
@@ -159,7 +134,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
HRTIMER_MODE_REL);
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
kbdev->pm.backend.metrics.initialized = true;
- atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
kbase_pm_metrics_start(kbdev);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
@@ -178,12 +152,16 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
{
#ifdef CONFIG_MALI_MIDGARD_DVFS
+ unsigned long flags;
+
KBASE_DEBUG_ASSERT(kbdev != NULL);
- /* Cancel the timer, and block if the callback is currently executing (transition f) */
- kbdev->pm.backend.metrics.initialized = false;
- atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ kbdev->pm.backend.metrics.timer_active = false;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+ kbdev->pm.backend.metrics.initialized = false;
#endif /* CONFIG_MALI_MIDGARD_DVFS */
#if MALI_USE_CSF
@@ -221,7 +199,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
* elapsed time. The lock taken inside kbase_ipa_control_query()
* function can cause lot of variation.
*/
- now = ktime_get_raw();
+ now = ktime_get();
if (err) {
dev_err(kbdev->dev,
@@ -253,14 +231,12 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
* time.
*/
if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) {
- /* The margin is scaled to allow for the worst-case
- * scenario where the samples are maximally separated,
- * plus a small offset for sampling errors.
+ /* Use a margin value that is approximately 1% of the time
+ * difference.
*/
- u64 const MARGIN_NS =
- IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2;
+ u64 margin_ns = diff_ns >> 6;
- if (gpu_active_counter > (diff_ns + MARGIN_NS)) {
+ if (gpu_active_counter > (diff_ns + margin_ns)) {
dev_info(
kbdev->dev,
"GPU activity takes longer than time interval: %llu ns > %llu ns",
@@ -355,7 +331,7 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
#if MALI_USE_CSF
kbase_pm_get_dvfs_utilisation_calc(kbdev);
#else
- kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw());
+ kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
#endif
memset(diff, 0, sizeof(*diff));
@@ -420,33 +396,57 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
{
+ bool isactive;
+ unsigned long flags;
+
KBASE_DEBUG_ASSERT(kbdev != NULL);
- return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON;
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ isactive = kbdev->pm.backend.metrics.timer_active;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ return isactive;
}
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
void kbase_pm_metrics_start(struct kbase_device *kbdev)
{
- struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics;
+ unsigned long flags;
+ bool update = true;
- if (unlikely(!metrics->initialized))
+ if (unlikely(!kbdev->pm.backend.metrics.initialized))
return;
- /* Transition to ON, from a stopped state (transition c) */
- if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF)
- /* Start the timer only if it's been fully stopped (transition d)*/
- hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
- HRTIMER_MODE_REL);
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ if (!kbdev->pm.backend.metrics.timer_active)
+ kbdev->pm.backend.metrics.timer_active = true;
+ else
+ update = false;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ if (update)
+ hrtimer_start(&kbdev->pm.backend.metrics.timer,
+ HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+ HRTIMER_MODE_REL);
}
void kbase_pm_metrics_stop(struct kbase_device *kbdev)
{
+ unsigned long flags;
+ bool update = true;
+
if (unlikely(!kbdev->pm.backend.metrics.initialized))
return;
- /* Timer is Stopped if its currently on (transition a) */
- atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED);
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ if (kbdev->pm.backend.metrics.timer_active)
+ kbdev->pm.backend.metrics.timer_active = false;
+ else
+ update = false;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ if (update)
+ hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
}
@@ -512,7 +512,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (!timestamp) {
- now = ktime_get_raw();
+ now = ktime_get();
timestamp = &now;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index deeb1b5..5f16434 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -310,7 +310,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
mutex_lock(&kbdev->pm.backend.policy_change_lock);
if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
- dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset");
+ dev_warn(kbdev->dev, "Set PM policy failed to prevent gpu reset");
reset_op_prevented = false;
}
@@ -332,7 +332,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
* the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON
* flag bit.
*/
- sched_suspend = reset_op_prevented &&
+ sched_suspend = kbdev->csf.firmware_inited && reset_op_prevented &&
(CSF_DYNAMIC_PM_CORE_KEEP_ON &
(new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags));
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 5110e3d..a83206a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,9 +21,6 @@
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
-#if MALI_USE_CSF
-#include <csf/mali_kbase_csf_timeout.h>
-#endif
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_config_defaults.h>
@@ -116,17 +113,13 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
*/
u64 timeout, nr_cycles = 0;
- u64 freq_khz;
-
+ /* Default value to mean 'no cap' */
+ u64 timeout_cap = U64_MAX;
+ u64 freq_khz = kbdev->lowest_gpu_freq_khz;
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
- if (WARN(!kbdev->lowest_gpu_freq_khz,
- "Lowest frequency uninitialized! Using reference frequency for scaling")) {
- freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
- } else {
- freq_khz = kbdev->lowest_gpu_freq_khz;
- }
+ WARN_ON(!freq_khz);
switch (selector) {
case KBASE_TIMEOUT_SELECTOR_COUNT:
@@ -142,15 +135,16 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
fallthrough;
case CSF_FIRMWARE_TIMEOUT:
selector_str = "CSF_FIRMWARE_TIMEOUT";
- /* Any FW timeout cannot be longer than the FW ping interval, after which
- * the firmware_aliveness_monitor will be triggered and may restart
- * the GPU if the FW is unresponsive.
+ nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
+ /* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
+ * if calculated timeout exceeds it. This should be adapted to
+ * a direct timeout comparison once the
+ * FIRMWARE_PING_INTERVAL_MS option is added to this timeout
+ * function. A compile-time check such as BUILD_BUG_ON can also
+ * be done once the firmware ping interval in cycles becomes
+ * available as a macro.
*/
- nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
-
- if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
- dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
- selector_str);
+ timeout_cap = FIRMWARE_PING_INTERVAL_MS;
break;
case CSF_PM_TIMEOUT:
selector_str = "CSF_PM_TIMEOUT";
@@ -160,33 +154,21 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
selector_str = "CSF_GPU_RESET_TIMEOUT";
nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
break;
- case CSF_CSG_SUSPEND_TIMEOUT:
- selector_str = "CSF_CSG_SUSPEND_TIMEOUT";
- nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES;
- break;
- case CSF_FIRMWARE_BOOT_TIMEOUT:
- selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
- nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
- break;
- case CSF_FIRMWARE_PING_TIMEOUT:
- selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
- nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
- break;
- case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
- selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
- nr_cycles = kbase_csf_timeout_get(kbdev);
- break;
#endif
}
timeout = div_u64(nr_cycles, freq_khz);
+ if (timeout > timeout_cap) {
+ dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
+ (unsigned long long)timeout, (unsigned long long)timeout_cap);
+ timeout = timeout_cap;
+ }
if (WARN(timeout > UINT_MAX,
"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
(unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
timeout = UINT_MAX;
return (unsigned int)timeout;
}
-KBASE_EXPORT_TEST_API(kbase_get_timeout_ms);
u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
{