diff options
author | Varad Gautam <varadgautam@google.com> | 2023-03-31 14:04:12 +0000 |
---|---|---|
committer | Varad Gautam <varadgautam@google.com> | 2023-03-31 14:09:32 +0000 |
commit | fd068334b8839c878cbff750e267b05d7103407d (patch) | |
tree | 7adb7cf35f25696c5a2f911400eec39c54ddf8a7 /mali_kbase/platform | |
parent | 4425c9707d1865d49259b49774c4482be39fae84 (diff) | |
download | gpu-fd068334b8839c878cbff750e267b05d7103407d.tar.gz |
pixel_gpu_sscd: Prevent dumping multiple SSCDs when the GPU hangs
Add a heuristic to ratelimit SSCD generation for "GPU hang"-type
coredumps. Typically when the GPU hangs, this codepath is hit multiple
times leading to unnecessary SSCD generation per hang (sometimes > 200
coredumps for a single incident).
The heuristic skips SSCD generation depending on:
1. whether there was a "GPU hang" coredump recently within the
GPU_HANG_SSCD_TIMEOUT_MS time window.
2. whether there was an unsuccesful GPU reset, which implies the
system will end up rebooting soon.
Change-Id: I761057aee9c4ff9f32d658c49b99eb162486033b
Bug: 264595878
Signed-off-by: Varad Gautam <varadgautam@google.com>
Test: b/264595878#comment7
Diffstat (limited to 'mali_kbase/platform')
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_sscd.c | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c index 24cb95e..6a284fa 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c +++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c @@ -12,6 +12,7 @@ /* Pixel integration includes */ #include "mali_kbase_config_platform.h" +#include <mali_kbase_reset_gpu.h> #include "pixel_gpu_sscd.h" #include "pixel_gpu_debug.h" #include "pixel_gpu_control.h" @@ -473,6 +474,8 @@ static void segments_term(struct kbase_device *kbdev, struct sscd_segment* segme memset(segments, 0, sizeof(struct sscd_segment) * NUM_SEGMENTS); } +#define GPU_HANG_SSCD_TIMEOUT_MS (300000) /* 300s */ + /** * gpu_sscd_dump() - Initiates and reports a subsystem core-dump of the GPU. * @@ -487,8 +490,23 @@ void gpu_sscd_dump(struct kbase_device *kbdev, const char* reason) struct sscd_platform_data *pdata = dev_get_platdata(&sscd_dev.dev); struct pixel_context *pc = kbdev->platform_context; int ec = 0; - unsigned long flags; + unsigned long flags, current_ts = jiffies; struct pixel_gpu_pdc_status pdc_status; + static unsigned long last_hang_sscd_ts; + + if (!strcmp(reason, "GPU hang")) { + /* GPU hang - avoid multiple coredumps for the same hang until + * GPU_HANG_SSCD_TIMEOUT_MS passes and GPU reset shows no failure. + */ + if (!last_hang_sscd_ts || (time_after(current_ts, + last_hang_sscd_ts + msecs_to_jiffies(GPU_HANG_SSCD_TIMEOUT_MS)) && + !kbase_reset_gpu_failed(kbdev))) { + last_hang_sscd_ts = current_ts; + } else { + dev_info(kbdev->dev, "pixel: skipping mali subsystem core dump"); + return; + } + } dev_info(kbdev->dev, "pixel: mali subsystem core dump in progress"); /* No point in proceeding if we can't report the dumped data */ |