diff options
author | Kevin DuBois <kevindubois@google.com> | 2023-02-08 23:53:14 +0000 |
---|---|---|
committer | Sean Callanan <spyffe@google.com> | 2023-02-14 22:04:29 +0000 |
commit | 722a340cbba39bbae2099dfe79f9a21ff2977d76 (patch) | |
tree | 1aa5ac88b96f44f0dd8b78c7aa209b90650b1ece /mali_kbase/backend | |
parent | 2e6974379ba5a7f083c086c04a3dbaba5ba824cd (diff) | |
download | gpu-722a340cbba39bbae2099dfe79f9a21ff2977d76.tar.gz |
mali_kbase: add log around hang location
adds logging and state machine prompt at a place where we note hang
during system suspend:
- Arm's requested output in kbase_pm_wait_for_poweroff_work_complete
- Internal ones from us in kbase_pm_wait_for_poweroff_work_complete and
other places
- Firmware ping in kbase_pm_wait_for_poweroff_work_complete (type B)
- Firmware ping in suspend timeout reporting (type A)
Bug: 252072919
Test: boot
Change-Id: I1672c5143afbca8862a1be02a3f29ca17f9839fd
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_pm_backend.c | 34 | ||||
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_pm_driver.c | 50 |
2 files changed, 68 insertions, 16 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index fcf98b0..65741cc 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -693,8 +693,40 @@ static bool is_poweroff_in_progress(struct kbase_device *kbdev) void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) { - wait_event_killable(kbdev->pm.backend.poweroff_wait, +#define POWEROFF_TIMEOUT_MSEC 200 + long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC); + remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev), remaining); + if (!remaining) { + unsigned long flags; + kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); + dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims", + POWEROFF_TIMEOUT_MSEC); + kbase_gpu_timeout_debug_message(kbdev); + dev_err(kbdev->dev, "gpu_poweroff_wait_work pending %d", + work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); +#if MALI_USE_CSF + //csf.scheduler.state should be accessed with scheduler lock! + //callchains go through this function though holding that lock + //so just print without locking. + dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); + dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev)); +#endif + //Attempt another state machine transition prompt. + dev_err(kbdev->dev, "Attempt to prompt state machine"); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_err(kbdev->dev, "GPU state after re-prompt of state machine"); + kbase_gpu_timeout_debug_message(kbdev); + + dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d", is_poweroff_in_progress(kbdev)); + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); + } +#undef POWEROFF_TIMEOUT_MSEC } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index d998ae5..e73c19b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1585,6 +1585,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbase_l2_core_state_to_string(prev_state), kbase_l2_core_state_to_string( backend->l2_state)); + if (!kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + dev_warn(kbdev->dev, "transition to l2 off without waking waiter"); + } } } while (backend->l2_state != prev_state); @@ -1594,6 +1598,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, &kbdev->pm.backend.gpu_poweroff_wait_work); + } else if (backend->l2_state == KBASE_L2_OFF) { + dev_warn(kbdev->dev, "l2 off - skipped queue_work for waking up potential waiters"); } return 0; @@ -2336,61 +2342,75 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -static void kbase_pm_timed_out(struct kbase_device *kbdev) -{ +void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { unsigned long flags; - - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, "\tShader=%016llx\n", + dev_err(kbdev->dev, " Shader=%016llx\n", kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else + dev_err(kbdev->dev, "GPU pm state :\n"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_err(kbdev->dev, "\tMCU desired = %d\n", + dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); + dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", + kbdev->pm.backend.poweron_required, + kbdev->pm.active_count, + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off); + dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d", + work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); + dev_err(kbdev->dev, " MCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, "\tMCU sw state = %d\n", + dev_err(kbdev->dev, " MCU sw state = %d\n", kbdev->pm.backend.mcu_state); + dev_err(kbdev->dev, " L2 desired = %d (locked_off: %d)\n", + kbase_pm_is_l2_desired(kbdev), + kbdev->pm.backend.policy_change_clamp_state_to_off); + dev_err(kbdev->dev, " L2 sw state = %d\n", + kbdev->pm.backend.l2_state); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", + dev_err(kbdev->dev, " Shader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + dev_err(kbdev->dev, " Tiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + dev_err(kbdev->dev, " L2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_LO))); #if MALI_USE_CSF - dev_err(kbdev->dev, "\tMCU status = %d\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); + kbase_csf_debug_dump_registers(kbdev); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", + dev_err(kbdev->dev, " Shader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + dev_err(kbdev->dev, " Tiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + dev_err(kbdev->dev, " L2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_LO))); +} +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + kbase_gpu_timeout_debug_message(kbdev); dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); /* pixel: If either: |