diff options
author | Varad Gautam <varadgautam@google.com> | 2024-01-08 15:10:20 +0000 |
---|---|---|
committer | Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com> | 2024-01-09 21:35:44 +0000 |
commit | e3adb6f3566d67217b916f0ab1716dd55f858836 (patch) | |
tree | 76594bdc36df767d5638aea403379322c0611875 | |
parent | 38c1fd7b5833697c04ed8d836a73df6c0a4172e3 (diff) | |
download | gpu-e3adb6f3566d67217b916f0ab1716dd55f858836.tar.gz |
mali_kbase: Add more gpu error events
Bug: 312206733
Change-Id: I154b927152d768944e02fe6083e66bdbb0f47e3a
Signed-off-by: Varad Gautam <varadgautam@google.com>
-rw-r--r-- | mali_kbase/csf/mali_kbase_csf.c | 4 | ||||
-rw-r--r-- | mali_kbase/csf/mali_kbase_csf_firmware.c | 2 | ||||
-rw-r--r-- | mali_kbase/csf/mali_kbase_csf_scheduler.c | 7 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu_hw_direct.c | 2 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_uevent.c | 12 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_uevent.h | 8 |
6 files changed, 31 insertions, 4 deletions
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index 94bd2d0..7fcd45e 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -40,6 +40,7 @@ #include <tl/mali_kbase_tracepoints.h> #include "mali_kbase_csf_mcu_shared_reg.h" #include <linux/version_compat_defs.h> +#include "mali_kbase_config_defaults.h" #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) @@ -1922,6 +1923,7 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); kbase_event_wakeup_sync(group->kctx); + pixel_gpu_uevent_kmd_error_send(group->kctx->kbdev, GPU_UEVENT_INFO_TILER_OOM); } static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) @@ -2108,6 +2110,7 @@ static void timer_event_worker(struct work_struct *data) struct kbase_device *const kbdev = kctx->kbdev; bool reset_prevented = false; int err = kbase_reset_gpu_prevent_and_wait(kbdev); + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PROGRESS_TIMER); if (err) dev_warn( @@ -2366,6 +2369,7 @@ static void cs_error_worker(struct work_struct *const data) bool reset_prevented = false; int err; + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_CS_ERROR); kbase_debug_csf_fault_wait_completion(kbdev); err = kbase_reset_gpu_prevent_and_wait(kbdev); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index 7b9aefd..d6d2456 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -31,6 +31,7 @@ #include "mali_kbase_reset_gpu.h" #include "mali_kbase_ctx_sched.h" #include "mali_kbase_csf_scheduler.h" +#include "mali_kbase_config_defaults.h" #include <mali_kbase_hwaccess_time.h> #include "device/mali_kbase_device.h" #include "backend/gpu/mali_kbase_pm_internal.h" @@ -1558,6 +1559,7 @@ static void firmware_error_worker(struct work_struct *const data) { struct kbase_device *const kbdev = container_of(data, struct kbase_device, csf.fw_error_work); + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_FW_ERROR); handle_internal_firmware_fatal(kbdev); } diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index fcb972f..8d0fff6 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -1114,6 +1114,7 @@ static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms); schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PMODE_EXIT_TIMEOUT); success = false; } @@ -4504,10 +4505,12 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, err = kbase_csf_wait_protected_mode_enter(kbdev); up_write(&kbdev->csf.pmode_sync_sem); - if (err) + if (err) { + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PMODE_ENTRY_FAILURE); schedule_actions_trigger_df( kbdev, input_grp->kctx, DF_PROTECTED_MODE_ENTRY_FAILURE); + } scheduler->protm_enter_time = ktime_get_raw(); @@ -5944,7 +5947,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) suspend_queue_group(lru_idle_group); if (wait_csg_slots_suspend(kbdev, &slot_mask)) { enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; - + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_CSG_SUSPEND); dev_warn( kbdev->dev, "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)", diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 4cfa496..bfe6527 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -28,6 +28,7 @@ #include <mmu/mali_kbase_mmu_hw.h> #include <tl/mali_kbase_tracepoints.h> #include <linux/delay.h> +#include "mali_kbase_config_defaults.h" #if MALI_USE_CSF /** @@ -179,6 +180,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) if (!err) return 0; + pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_MMU_AS_ACTIVE_STUCK); dev_err(kbdev->dev, "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", as_nr); diff --git a/mali_kbase/platform/pixel/pixel_gpu_uevent.c b/mali_kbase/platform/pixel/pixel_gpu_uevent.c index 4a5c1d3..9f0589c 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_uevent.c +++ b/mali_kbase/platform/pixel/pixel_gpu_uevent.c @@ -36,9 +36,17 @@ static bool gpu_uevent_check_valid(const struct gpu_uevent *evt) case GPU_UEVENT_INFO_CSG_SLOT_READY: case GPU_UEVENT_INFO_L2_PM_TIMEOUT: case GPU_UEVENT_INFO_PM_TIMEOUT: + case GPU_UEVENT_INFO_TILER_OOM: + case GPU_UEVENT_INFO_PROGRESS_TIMER: + case GPU_UEVENT_INFO_CS_ERROR: + case GPU_UEVENT_INFO_FW_ERROR: + case GPU_UEVENT_INFO_PMODE_EXIT_TIMEOUT: + case GPU_UEVENT_INFO_PMODE_ENTRY_FAILURE: + case GPU_UEVENT_INFO_GPU_PAGE_FAULT: + case GPU_UEVENT_INFO_MMU_AS_ACTIVE_STUCK: return true; default: - break; + return false; } case GPU_UEVENT_TYPE_GPU_RESET: switch (evt->info) { @@ -46,7 +54,7 @@ static bool gpu_uevent_check_valid(const struct gpu_uevent *evt) case GPU_UEVENT_INFO_CSF_RESET_FAILED: return true; default: - break; + return false; } default: break; diff --git a/mali_kbase/platform/pixel/pixel_gpu_uevent.h b/mali_kbase/platform/pixel/pixel_gpu_uevent.h index b6756ec..b477262 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_uevent.h +++ b/mali_kbase/platform/pixel/pixel_gpu_uevent.h @@ -48,6 +48,14 @@ static inline const char *gpu_uevent_type_str(enum gpu_uevent_type type) { GPU_UEVENT_INFO(PM_TIMEOUT) \ GPU_UEVENT_INFO(CSF_RESET_OK) \ GPU_UEVENT_INFO(CSF_RESET_FAILED) \ + GPU_UEVENT_INFO(TILER_OOM) \ + GPU_UEVENT_INFO(PROGRESS_TIMER) \ + GPU_UEVENT_INFO(CS_ERROR) \ + GPU_UEVENT_INFO(FW_ERROR) \ + GPU_UEVENT_INFO(PMODE_EXIT_TIMEOUT) \ + GPU_UEVENT_INFO(PMODE_ENTRY_FAILURE) \ + GPU_UEVENT_INFO(GPU_PAGE_FAULT) \ + GPU_UEVENT_INFO(MMU_AS_ACTIVE_STUCK) \ GPU_UEVENT_INFO(MAX) #define GPU_UEVENT_INFO(info) GPU_UEVENT_INFO_##info, |