summaryrefslogtreecommitdiff
path: root/mali_kbase/backend
diff options
context:
space:
mode:
authorKevin DuBois <kevindubois@google.com>2023-02-08 23:53:14 +0000
committerSean Callanan <spyffe@google.com>2023-02-14 22:04:29 +0000
commit722a340cbba39bbae2099dfe79f9a21ff2977d76 (patch)
tree1aa5ac88b96f44f0dd8b78c7aa209b90650b1ece /mali_kbase/backend
parent2e6974379ba5a7f083c086c04a3dbaba5ba824cd (diff)
downloadgpu-722a340cbba39bbae2099dfe79f9a21ff2977d76.tar.gz
mali_kbase: add log around hang location
adds logging and state machine prompt at a place where we note hang during system suspend: - Arm's requested output in kbase_pm_wait_for_poweroff_work_complete - Internal ones from us in kbase_pm_wait_for_poweroff_work_complete and other places - Firmware ping in kbase_pm_wait_for_poweroff_work_complete (type B) - Firmware ping in suspend timeout reporting (type A) Bug: 252072919 Test: boot Change-Id: I1672c5143afbca8862a1be02a3f29ca17f9839fd
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c34
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c50
2 files changed, 68 insertions, 16 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index fcf98b0..65741cc 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -693,8 +693,40 @@ static bool is_poweroff_in_progress(struct kbase_device *kbdev)
void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
{
- wait_event_killable(kbdev->pm.backend.poweroff_wait,
+#define POWEROFF_TIMEOUT_MSEC 200
+ long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC);
+ remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
+ is_poweroff_in_progress(kbdev), remaining);
+ if (!remaining) {
+ unsigned long flags;
+ kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
+ dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims",
+ POWEROFF_TIMEOUT_MSEC);
+ kbase_gpu_timeout_debug_message(kbdev);
+ dev_err(kbdev->dev, "gpu_poweroff_wait_work pending %d",
+ work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
+#if MALI_USE_CSF
+ //csf.scheduler.state should be accessed with scheduler lock!
+ //callchains go through this function though holding that lock
+ //so just print without locking.
+ dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
+ dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev));
+#endif
+ //Attempt another state machine transition prompt.
+ dev_err(kbdev->dev, "Attempt to prompt state machine");
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ dev_err(kbdev->dev, "GPU state after re-prompt of state machine");
+ kbase_gpu_timeout_debug_message(kbdev);
+
+ dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d",
is_poweroff_in_progress(kbdev));
+ wait_event_killable(kbdev->pm.backend.poweroff_wait,
+ is_poweroff_in_progress(kbdev));
+ }
+#undef POWEROFF_TIMEOUT_MSEC
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index d998ae5..e73c19b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1585,6 +1585,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
kbase_l2_core_state_to_string(prev_state),
kbase_l2_core_state_to_string(
backend->l2_state));
+ if (!kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+ backend->l2_state == KBASE_L2_OFF) {
+ dev_warn(kbdev->dev, "transition to l2 off without waking waiter");
+ }
}
} while (backend->l2_state != prev_state);
@@ -1594,6 +1598,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false;
queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
&kbdev->pm.backend.gpu_poweroff_wait_work);
+ } else if (backend->l2_state == KBASE_L2_OFF) {
+ dev_warn(kbdev->dev, "l2 off - skipped queue_work for waking up potential waiters");
}
return 0;
@@ -2336,61 +2342,75 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
#define PM_TIMEOUT_MS (5000) /* 5s */
#endif
-static void kbase_pm_timed_out(struct kbase_device *kbdev)
-{
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
unsigned long flags;
-
- dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
#if !MALI_USE_CSF
CSTD_UNUSED(flags);
dev_err(kbdev->dev, "Desired state :\n");
- dev_err(kbdev->dev, "\tShader=%016llx\n",
+ dev_err(kbdev->dev, " Shader=%016llx\n",
kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
#else
+ dev_err(kbdev->dev, "GPU pm state :\n");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- dev_err(kbdev->dev, "\tMCU desired = %d\n",
+ dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
+ dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
+ kbdev->pm.backend.poweron_required,
+ kbdev->pm.active_count,
+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off);
+ dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d",
+ work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
+ dev_err(kbdev->dev, " MCU desired = %d\n",
kbase_pm_is_mcu_desired(kbdev));
- dev_err(kbdev->dev, "\tMCU sw state = %d\n",
+ dev_err(kbdev->dev, " MCU sw state = %d\n",
kbdev->pm.backend.mcu_state);
+ dev_err(kbdev->dev, " L2 desired = %d (locked_off: %d)\n",
+ kbase_pm_is_l2_desired(kbdev),
+ kbdev->pm.backend.policy_change_clamp_state_to_off);
+ dev_err(kbdev->dev, " L2 sw state = %d\n",
+ kbdev->pm.backend.l2_state);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif
dev_err(kbdev->dev, "Current state :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ dev_err(kbdev->dev, " Shader=%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ dev_err(kbdev->dev, " Tiler =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ dev_err(kbdev->dev, " L2 =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_READY_LO)));
#if MALI_USE_CSF
- dev_err(kbdev->dev, "\tMCU status = %d\n",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
+ kbase_csf_debug_dump_registers(kbdev);
#endif
dev_err(kbdev->dev, "Cores transitioning :\n");
- dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+ dev_err(kbdev->dev, " Shader=%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+ dev_err(kbdev->dev, " Tiler =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_LO)));
- dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
+ dev_err(kbdev->dev, " L2 =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_LO)));
+}
+static void kbase_pm_timed_out(struct kbase_device *kbdev)
+{
+ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+ kbase_gpu_timeout_debug_message(kbdev);
dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
/* pixel: If either: