summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarad Gautam <varadgautam@google.com>2024-01-08 15:10:20 +0000
committerTreehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com>2024-01-09 21:35:44 +0000
commite3adb6f3566d67217b916f0ab1716dd55f858836 (patch)
tree76594bdc36df767d5638aea403379322c0611875
parent38c1fd7b5833697c04ed8d836a73df6c0a4172e3 (diff)
downloadgpu-e3adb6f3566d67217b916f0ab1716dd55f858836.tar.gz
mali_kbase: Add more gpu error events
Bug: 312206733 Change-Id: I154b927152d768944e02fe6083e66bdbb0f47e3a Signed-off-by: Varad Gautam <varadgautam@google.com>
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c2
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c7
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c2
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_uevent.c12
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_uevent.h8
6 files changed, 31 insertions, 4 deletions
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 94bd2d0..7fcd45e 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -40,6 +40,7 @@
#include <tl/mali_kbase_tracepoints.h>
#include "mali_kbase_csf_mcu_shared_reg.h"
#include <linux/version_compat_defs.h>
+#include "mali_kbase_config_defaults.h"
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -1922,6 +1923,7 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
kbase_event_wakeup_sync(group->kctx);
+ pixel_gpu_uevent_kmd_error_send(group->kctx->kbdev, GPU_UEVENT_INFO_TILER_OOM);
}
static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
@@ -2108,6 +2110,7 @@ static void timer_event_worker(struct work_struct *data)
struct kbase_device *const kbdev = kctx->kbdev;
bool reset_prevented = false;
int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PROGRESS_TIMER);
if (err)
dev_warn(
@@ -2366,6 +2369,7 @@ static void cs_error_worker(struct work_struct *const data)
bool reset_prevented = false;
int err;
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_CS_ERROR);
kbase_debug_csf_fault_wait_completion(kbdev);
err = kbase_reset_gpu_prevent_and_wait(kbdev);
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 7b9aefd..d6d2456 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -31,6 +31,7 @@
#include "mali_kbase_reset_gpu.h"
#include "mali_kbase_ctx_sched.h"
#include "mali_kbase_csf_scheduler.h"
+#include "mali_kbase_config_defaults.h"
#include <mali_kbase_hwaccess_time.h>
#include "device/mali_kbase_device.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
@@ -1558,6 +1559,7 @@ static void firmware_error_worker(struct work_struct *const data)
{
struct kbase_device *const kbdev =
container_of(data, struct kbase_device, csf.fw_error_work);
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_FW_ERROR);
handle_internal_firmware_fatal(kbdev);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index fcb972f..8d0fff6 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -1114,6 +1114,7 @@ static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms);
schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PMODE_EXIT_TIMEOUT);
success = false;
}
@@ -4504,10 +4505,12 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
err = kbase_csf_wait_protected_mode_enter(kbdev);
up_write(&kbdev->csf.pmode_sync_sem);
- if (err)
+ if (err) {
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_PMODE_ENTRY_FAILURE);
schedule_actions_trigger_df(
kbdev, input_grp->kctx,
DF_PROTECTED_MODE_ENTRY_FAILURE);
+ }
scheduler->protm_enter_time = ktime_get_raw();
@@ -5944,7 +5947,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
suspend_queue_group(lru_idle_group);
if (wait_csg_slots_suspend(kbdev, &slot_mask)) {
enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
-
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_CSG_SUSPEND);
dev_warn(
kbdev->dev,
"[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 4cfa496..bfe6527 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -28,6 +28,7 @@
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
#include <linux/delay.h>
+#include "mali_kbase_config_defaults.h"
#if MALI_USE_CSF
/**
@@ -179,6 +180,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
if (!err)
return 0;
+ pixel_gpu_uevent_kmd_error_send(kbdev, GPU_UEVENT_INFO_MMU_AS_ACTIVE_STUCK);
dev_err(kbdev->dev,
"AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
as_nr);
diff --git a/mali_kbase/platform/pixel/pixel_gpu_uevent.c b/mali_kbase/platform/pixel/pixel_gpu_uevent.c
index 4a5c1d3..9f0589c 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_uevent.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_uevent.c
@@ -36,9 +36,17 @@ static bool gpu_uevent_check_valid(const struct gpu_uevent *evt)
case GPU_UEVENT_INFO_CSG_SLOT_READY:
case GPU_UEVENT_INFO_L2_PM_TIMEOUT:
case GPU_UEVENT_INFO_PM_TIMEOUT:
+ case GPU_UEVENT_INFO_TILER_OOM:
+ case GPU_UEVENT_INFO_PROGRESS_TIMER:
+ case GPU_UEVENT_INFO_CS_ERROR:
+ case GPU_UEVENT_INFO_FW_ERROR:
+ case GPU_UEVENT_INFO_PMODE_EXIT_TIMEOUT:
+ case GPU_UEVENT_INFO_PMODE_ENTRY_FAILURE:
+ case GPU_UEVENT_INFO_GPU_PAGE_FAULT:
+ case GPU_UEVENT_INFO_MMU_AS_ACTIVE_STUCK:
return true;
default:
- break;
+ return false;
}
case GPU_UEVENT_TYPE_GPU_RESET:
switch (evt->info) {
@@ -46,7 +54,7 @@ static bool gpu_uevent_check_valid(const struct gpu_uevent *evt)
case GPU_UEVENT_INFO_CSF_RESET_FAILED:
return true;
default:
- break;
+ return false;
}
default:
break;
diff --git a/mali_kbase/platform/pixel/pixel_gpu_uevent.h b/mali_kbase/platform/pixel/pixel_gpu_uevent.h
index b6756ec..b477262 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_uevent.h
+++ b/mali_kbase/platform/pixel/pixel_gpu_uevent.h
@@ -48,6 +48,14 @@ static inline const char *gpu_uevent_type_str(enum gpu_uevent_type type) {
GPU_UEVENT_INFO(PM_TIMEOUT) \
GPU_UEVENT_INFO(CSF_RESET_OK) \
GPU_UEVENT_INFO(CSF_RESET_FAILED) \
+ GPU_UEVENT_INFO(TILER_OOM) \
+ GPU_UEVENT_INFO(PROGRESS_TIMER) \
+ GPU_UEVENT_INFO(CS_ERROR) \
+ GPU_UEVENT_INFO(FW_ERROR) \
+ GPU_UEVENT_INFO(PMODE_EXIT_TIMEOUT) \
+ GPU_UEVENT_INFO(PMODE_ENTRY_FAILURE) \
+ GPU_UEVENT_INFO(GPU_PAGE_FAULT) \
+ GPU_UEVENT_INFO(MMU_AS_ACTIVE_STUCK) \
GPU_UEVENT_INFO(MAX)
#define GPU_UEVENT_INFO(info) GPU_UEVENT_INFO_##info,