diff options
author | Robin Peng <robinpeng@google.com> | 2023-03-13 05:02:39 +0000 |
---|---|---|
committer | Robin Peng <robinpeng@google.com> | 2023-03-14 03:36:42 +0000 |
commit | 2ad96fc3373735858ac2251d10d71555313854d5 (patch) | |
tree | 024c82365e4b548c818e7864d27208d9ba62093b /mali_kbase/backend | |
parent | 361910a924d6d5302f223b8482ae051cd2c64d61 (diff) | |
parent | 3de0f21de599057191564844ca1a748fdc962d46 (diff) | |
download | gpu-2ad96fc3373735858ac2251d10d71555313854d5.tar.gz |
Merge android13-gs-pixel-5.10-tm-qpr3 into android13-gs-pixel-5.10-udc
Bug: 255246572
Change-Id: I4154a51b5f02391cda656a64dc03db3df7671ed9
Signed-off-by: Robin Peng <robinpeng@google.com>
Diffstat (limited to 'mali_kbase/backend')
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_pm_backend.c | 34 | ||||
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_pm_driver.c | 80 |
2 files changed, 72 insertions, 42 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index ca9a106..047b83e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -693,8 +693,40 @@ static bool is_poweroff_in_progress(struct kbase_device *kbdev) void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) { - wait_event_killable(kbdev->pm.backend.poweroff_wait, +#define POWEROFF_TIMEOUT_MSEC 200 + long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC); + remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev), remaining); + if (!remaining) { + unsigned long flags; + kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); + dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims", + POWEROFF_TIMEOUT_MSEC); + kbase_gpu_timeout_debug_message(kbdev); + dev_err(kbdev->dev, "gpu_poweroff_wait_work pending %d", + work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); +#if MALI_USE_CSF + //csf.scheduler.state should be accessed with scheduler lock! + //callchains go through this function though holding that lock + //so just print without locking. + dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); + dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev)); +#endif + //Attempt another state machine transition prompt. + dev_err(kbdev->dev, "Attempt to prompt state machine"); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_err(kbdev->dev, "GPU state after re-prompt of state machine"); + kbase_gpu_timeout_debug_message(kbdev); + + dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d", is_poweroff_in_progress(kbdev)); + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); + } +#undef POWEROFF_TIMEOUT_MSEC } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 803becc..b046903 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1585,6 +1585,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbase_l2_core_state_to_string(prev_state), kbase_l2_core_state_to_string( backend->l2_state)); +#if IS_ENABLED(CONFIG_SOC_GS201) + if (!kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + dev_warn(kbdev->dev, "transition to l2 off without waking waiter"); + } +#endif } } while (backend->l2_state != prev_state); @@ -1594,6 +1600,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, &kbdev->pm.backend.gpu_poweroff_wait_work); +#if IS_ENABLED(CONFIG_SOC_GS201) + } else if (backend->l2_state == KBASE_L2_OFF) { + dev_warn(kbdev->dev, "l2 off - skipped queue_work for waking up potential waiters"); +#endif } return 0; @@ -2336,21 +2346,26 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -static void kbase_pm_timed_out(struct kbase_device *kbdev) -{ +void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { unsigned long flags; - - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, "\tShader=%016llx\n", + dev_err(kbdev->dev, " Shader=%016llx\n", kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else + dev_err(kbdev->dev, "GPU pm state :\n"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_err(kbdev->dev, "\tMCU desired = %d\n", + dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); + dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", + kbdev->pm.backend.poweron_required, + kbdev->pm.active_count, + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off); + dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d", + work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); + dev_err(kbdev->dev, " MCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, "\tMCU sw state = %d\n", + dev_err(kbdev->dev, " MCU sw state = %d\n", kbdev->pm.backend.mcu_state); dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n", kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off); @@ -2363,60 +2378,51 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", + dev_err(kbdev->dev, " Shader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + dev_err(kbdev->dev, " Tiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + dev_err(kbdev->dev, " L2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_LO))); #if MALI_USE_CSF - dev_err(kbdev->dev, "\tMCU status = %d\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); + kbase_csf_debug_dump_registers(kbdev); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", + dev_err(kbdev->dev, " Shader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + dev_err(kbdev->dev, " Tiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + dev_err(kbdev->dev, " L2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_LO))); dump_stack(); - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); +} - /* pixel: If either: - * 1. L2/MCU power transition timed out, or, - * 2. kbase state machine fell out of sync with the hw state, - * a soft/hard reset (ie writing to SOFT/HARD_RESET regs) is insufficient to resume - * operation. - * - * Besides, Odin TRM advises against touching SOFT/HARD_RESET - * regs if L2_PWRTRANS is 1 to avoid undefined state. - * - * We have already lost work if we end up here, so send a powercycle to reset the hw, - * which is more reliable. - */ +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + kbase_gpu_timeout_debug_message(kbdev); + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); if (kbase_prepare_to_reset_gpu(kbdev, - RESET_FLAGS_HWC_UNRECOVERABLE_ERROR | - RESET_FLAGS_FORCE_PM_HW_RESET)) + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -3177,17 +3183,6 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); -#if MALI_USE_CSF - if (kbdev->csf.reset.force_pm_hw_reset && kbdev->pm.backend.callback_hardware_reset) { - dev_err(kbdev->dev, "Power Cycle reset mali"); - kbdev->csf.reset.force_pm_hw_reset = false; - return kbase_pm_hw_reset(kbdev); - } -#endif - - /* Unmask the reset complete interrupt only */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); - if (kbdev->pm.backend.callback_soft_reset) { ret = kbdev->pm.backend.callback_soft_reset(kbdev); if (ret < 0) @@ -3199,6 +3194,9 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) GPU_COMMAND_SOFT_RESET); } + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); + /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; rtdata.timed_out = false; |