From fd068334b8839c878cbff750e267b05d7103407d Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Fri, 31 Mar 2023 14:04:12 +0000 Subject: pixel_gpu_sscd: Prevent dumping multiple SSCDs when the GPU hangs Add a heuristic to ratelimit SSCD generation for "GPU hang"-type coredumps. Typically when the GPU hangs, this codepath is hit multiple times leading to unnecessary SSCD generation per hang (sometimes > 200 coredumps for a single incident). The heuristic skips SSCD generation depending on: 1. whether there was a "GPU hang" coredump recently within the GPU_HANG_SSCD_TIMEOUT_MS time window. 2. whether there was an unsuccesful GPU reset, which implies the system will end up rebooting soon. Change-Id: I761057aee9c4ff9f32d658c49b99eb162486033b Bug: 264595878 Signed-off-by: Varad Gautam Test: b/264595878#comment7 --- mali_kbase/platform/pixel/pixel_gpu_sscd.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'mali_kbase') diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c index 24cb95e..6a284fa 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c +++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c @@ -12,6 +12,7 @@ /* Pixel integration includes */ #include "mali_kbase_config_platform.h" +#include #include "pixel_gpu_sscd.h" #include "pixel_gpu_debug.h" #include "pixel_gpu_control.h" @@ -473,6 +474,8 @@ static void segments_term(struct kbase_device *kbdev, struct sscd_segment* segme memset(segments, 0, sizeof(struct sscd_segment) * NUM_SEGMENTS); } +#define GPU_HANG_SSCD_TIMEOUT_MS (300000) /* 300s */ + /** * gpu_sscd_dump() - Initiates and reports a subsystem core-dump of the GPU. * @@ -487,8 +490,23 @@ void gpu_sscd_dump(struct kbase_device *kbdev, const char* reason) struct sscd_platform_data *pdata = dev_get_platdata(&sscd_dev.dev); struct pixel_context *pc = kbdev->platform_context; int ec = 0; - unsigned long flags; + unsigned long flags, current_ts = jiffies; struct pixel_gpu_pdc_status pdc_status; + static unsigned long last_hang_sscd_ts; + + if (!strcmp(reason, "GPU hang")) { + /* GPU hang - avoid multiple coredumps for the same hang until + * GPU_HANG_SSCD_TIMEOUT_MS passes and GPU reset shows no failure. + */ + if (!last_hang_sscd_ts || (time_after(current_ts, + last_hang_sscd_ts + msecs_to_jiffies(GPU_HANG_SSCD_TIMEOUT_MS)) && + !kbase_reset_gpu_failed(kbdev))) { + last_hang_sscd_ts = current_ts; + } else { + dev_info(kbdev->dev, "pixel: skipping mali subsystem core dump"); + return; + } + } dev_info(kbdev->dev, "pixel: mali subsystem core dump in progress"); /* No point in proceeding if we can't report the dumped data */ -- cgit v1.2.3