summaryrefslogtreecommitdiff
path: root/mali_kbase/backend
diff options
context:
space:
mode:
authorVarad Gautam <varadgautam@google.com>2023-01-09 13:11:50 +0000
committerVarad Gautam <varadgautam@google.com>2023-01-18 10:26:45 +0000
commit5833a0028c701290fb26566c2615af25f20556c9 (patch)
treeb890da2dc899156e164e179e54358fd57877f49c /mali_kbase/backend
parentd820707eee5c927e8d68ae37835c848f1023fb47 (diff)
downloadgpu-5833a0028c701290fb26566c2615af25f20556c9.tar.gz
kbase: Powercycle mali to recover from a PM timeout
The existing reset flow (kbase_pm_do_reset()) is: 1. Write to SOFT_RESET and wait for irq until timeout. 2. If RESET_COMPLETED irq timed out, write to HARD_RESET and wait for irq until timeout. 3. If RESET_COMPLETED irq timed out, powercycle the GPU via kbase_pm_hw_reset(). If a power transition timed out (ie, kbase_pm_timed_out()), writing to SOFT/HARD_RESET regs is unreliable and can send the GPU into an undefined state (eg, when writing to SOFT/HARD_RESET regs if L2 is transitioning) and prevent recovery. Introduce a RESET_FLAGS_FORCE_PM_HW_RESET flag to allow resetting the GPU via powercycle, which currently only happens when soft/hard reset both fail, and use only this method to reset the GPU from kbase_pm_timed_out(). kbase upstreaming: Powercycle-to-reset is pixel-only atm (df58a621a). kbase upstreaming: WIP: b/243522189#comment23 Change-Id: I7b0f594cb5592a175f18af6b76230de3c83ce420 Signed-off-by: Varad Gautam <varadgautam@google.com> Bug: 241217496 Test: SST ~2500h (b/265003962)
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 2ef22d9..1864240 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -2392,8 +2392,22 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
L2_PWRTRANS_LO)));
dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+
+ /* pixel: If either:
+ * 1. L2/MCU power transition timed out, or,
+ * 2. kbase state machine fell out of sync with the hw state,
+ * a soft/hard reset (ie writing to SOFT/HARD_RESET regs) is insufficient to resume
+ * operation.
+ *
+ * Besides, Odin TRM advises against touching SOFT/HARD_RESET
+ * regs if L2_PWRTRANS is 1 to avoid undefined state.
+ *
+ * We have already lost work if we end up here, so send a powercycle to reset the hw,
+ * which is more reliable.
+ */
if (kbase_prepare_to_reset_gpu(kbdev,
- RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR |
+ RESET_FLAGS_FORCE_PM_HW_RESET))
kbase_reset_gpu(kbdev);
}
@@ -3154,6 +3168,14 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
+#if MALI_USE_CSF
+ if (kbdev->csf.reset.force_pm_hw_reset) {
+ dev_err(kbdev->dev, "Power Cycle reset mali");
+ kbdev->csf.reset.force_pm_hw_reset = false;
+ return kbase_pm_hw_reset(kbdev);
+ }
+#endif
+
if (kbdev->pm.backend.callback_soft_reset) {
ret = kbdev->pm.backend.callback_soft_reset(kbdev);
if (ret < 0)