summaryrefslogtreecommitdiff
path: root/mali_kbase/backend
diff options
context:
space:
mode:
authorRobin Peng <robinpeng@google.com>2023-03-13 05:02:39 +0000
committerRobin Peng <robinpeng@google.com>2023-03-14 03:36:42 +0000
commit2ad96fc3373735858ac2251d10d71555313854d5 (patch)
tree024c82365e4b548c818e7864d27208d9ba62093b /mali_kbase/backend
parent361910a924d6d5302f223b8482ae051cd2c64d61 (diff)
parent3de0f21de599057191564844ca1a748fdc962d46 (diff)
downloadgpu-2ad96fc3373735858ac2251d10d71555313854d5.tar.gz
Merge android13-gs-pixel-5.10-tm-qpr3 into android13-gs-pixel-5.10-udc
Bug: 255246572 Change-Id: I4154a51b5f02391cda656a64dc03db3df7671ed9 Signed-off-by: Robin Peng <robinpeng@google.com>
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c34
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c80
2 files changed, 72 insertions, 42 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index ca9a106..047b83e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -693,8 +693,40 @@ static bool is_poweroff_in_progress(struct kbase_device *kbdev)
void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
{
- wait_event_killable(kbdev->pm.backend.poweroff_wait,
+#define POWEROFF_TIMEOUT_MSEC 200
+ long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC);
+ remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
+ is_poweroff_in_progress(kbdev), remaining);
+ if (!remaining) {
+ unsigned long flags;
+ kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
+ dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims",
+ POWEROFF_TIMEOUT_MSEC);
+ kbase_gpu_timeout_debug_message(kbdev);
+ dev_err(kbdev->dev, "gpu_poweroff_wait_work pending %d",
+ work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
+#if MALI_USE_CSF
+ //csf.scheduler.state should be accessed with scheduler lock!
+ //callchains go through this function though holding that lock
+ //so just print without locking.
+ dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
+ dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev));
+#endif
+ //Attempt another state machine transition prompt.
+ dev_err(kbdev->dev, "Attempt to prompt state machine");
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ dev_err(kbdev->dev, "GPU state after re-prompt of state machine");
+ kbase_gpu_timeout_debug_message(kbdev);
+
+ dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d",
is_poweroff_in_progress(kbdev));
+ wait_event_killable(kbdev->pm.backend.poweroff_wait,
+ is_poweroff_in_progress(kbdev));
+ }
+#undef POWEROFF_TIMEOUT_MSEC
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 803becc..b046903 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1585,6 +1585,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
kbase_l2_core_state_to_string(prev_state),
kbase_l2_core_state_to_string(
backend->l2_state));
+#if IS_ENABLED(CONFIG_SOC_GS201)
+ if (!kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+ backend->l2_state == KBASE_L2_OFF) {
+ dev_warn(kbdev->dev, "transition to l2 off without waking waiter");
+ }
+#endif
}
} while (backend->l2_state != prev_state);
@@ -1594,6 +1600,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false;
queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
&kbdev->pm.backend.gpu_poweroff_wait_work);
+#if IS_ENABLED(CONFIG_SOC_GS201)
+ } else if (backend->l2_state == KBASE_L2_OFF) {
+ dev_warn(kbdev->dev, "l2 off - skipped queue_work for waking up potential waiters");
+#endif
}
return 0;
@@ -2336,21 +2346,26 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
#define PM_TIMEOUT_MS (5000) /* 5s */
#endif
-static void kbase_pm_timed_out(struct kbase_device *kbdev)
-{
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
unsigned long flags;
-
- dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
#if !MALI_USE_CSF
CSTD_UNUSED(flags);
dev_err(kbdev->dev, "Desired state :\n");
- dev_err(kbdev->dev, "\tShader=%016llx\n",
+ dev_err(kbdev->dev, " Shader=%016llx\n",
kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
#else
+ dev_err(kbdev->dev, "GPU pm state :\n");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- dev_err(kbdev->dev, "\tMCU desired = %d\n",
+ dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
+ dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
+ kbdev->pm.backend.poweron_required,
+ kbdev->pm.active_count,
+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off);
+ dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d",
+ work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
+ dev_err(kbdev->dev, " MCU desired = %d\n",
kbase_pm_is_mcu_desired(kbdev));
- dev_err(kbdev->dev, "\tMCU sw state = %d\n",
+ dev_err(kbdev->dev, " MCU sw state = %d\n",
kbdev->pm.backend.mcu_state);
dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n",
kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off);
@@ -2363,60 +2378,51 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif
dev_err(kbdev->dev, "Current state :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ dev_err(kbdev->dev, " Shader=%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ dev_err(kbdev->dev, " Tiler =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ dev_err(kbdev->dev, " L2 =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_READY_LO)));
#if MALI_USE_CSF
- dev_err(kbdev->dev, "\tMCU status = %d\n",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
+ kbase_csf_debug_dump_registers(kbdev);
#endif
dev_err(kbdev->dev, "Cores transitioning :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ dev_err(kbdev->dev, " Shader=%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ dev_err(kbdev->dev, " Tiler =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ dev_err(kbdev->dev, " L2 =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_LO)));
dump_stack();
- dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+}
- /* pixel: If either:
- * 1. L2/MCU power transition timed out, or,
- * 2. kbase state machine fell out of sync with the hw state,
- * a soft/hard reset (ie writing to SOFT/HARD_RESET regs) is insufficient to resume
- * operation.
- *
- * Besides, Odin TRM advises against touching SOFT/HARD_RESET
- * regs if L2_PWRTRANS is 1 to avoid undefined state.
- *
- * We have already lost work if we end up here, so send a powercycle to reset the hw,
- * which is more reliable.
- */
+static void kbase_pm_timed_out(struct kbase_device *kbdev)
+{
+ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+ kbase_gpu_timeout_debug_message(kbdev);
+ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
if (kbase_prepare_to_reset_gpu(kbdev,
- RESET_FLAGS_HWC_UNRECOVERABLE_ERROR |
- RESET_FLAGS_FORCE_PM_HW_RESET))
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -3177,17 +3183,6 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
-#if MALI_USE_CSF
- if (kbdev->csf.reset.force_pm_hw_reset && kbdev->pm.backend.callback_hardware_reset) {
- dev_err(kbdev->dev, "Power Cycle reset mali");
- kbdev->csf.reset.force_pm_hw_reset = false;
- return kbase_pm_hw_reset(kbdev);
- }
-#endif
-
- /* Unmask the reset complete interrupt only */
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
-
if (kbdev->pm.backend.callback_soft_reset) {
ret = kbdev->pm.backend.callback_soft_reset(kbdev);
if (ret < 0)
@@ -3199,6 +3194,9 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
GPU_COMMAND_SOFT_RESET);
}
+ /* Unmask the reset complete interrupt only */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
+
/* Initialize a structure for tracking the status of the reset */
rtdata.kbdev = kbdev;
rtdata.timed_out = false;