summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorVarad Gautam <varadgautam@google.com>2023-03-31 14:04:12 +0000
committerVarad Gautam <varadgautam@google.com>2023-03-31 14:09:32 +0000
commitfd068334b8839c878cbff750e267b05d7103407d (patch)
tree7adb7cf35f25696c5a2f911400eec39c54ddf8a7 /mali_kbase
parent4425c9707d1865d49259b49774c4482be39fae84 (diff)
downloadgpu-fd068334b8839c878cbff750e267b05d7103407d.tar.gz
pixel_gpu_sscd: Prevent dumping multiple SSCDs when the GPU hangs
Add a heuristic to ratelimit SSCD generation for "GPU hang"-type coredumps. Typically when the GPU hangs, this codepath is hit multiple times leading to unnecessary SSCD generation per hang (sometimes > 200 coredumps for a single incident). The heuristic skips SSCD generation depending on: 1. whether there was a "GPU hang" coredump recently within the GPU_HANG_SSCD_TIMEOUT_MS time window. 2. whether there was an unsuccesful GPU reset, which implies the system will end up rebooting soon. Change-Id: I761057aee9c4ff9f32d658c49b99eb162486033b Bug: 264595878 Signed-off-by: Varad Gautam <varadgautam@google.com> Test: b/264595878#comment7
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_sscd.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
index 24cb95e..6a284fa 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
@@ -12,6 +12,7 @@
/* Pixel integration includes */
#include "mali_kbase_config_platform.h"
+#include <mali_kbase_reset_gpu.h>
#include "pixel_gpu_sscd.h"
#include "pixel_gpu_debug.h"
#include "pixel_gpu_control.h"
@@ -473,6 +474,8 @@ static void segments_term(struct kbase_device *kbdev, struct sscd_segment* segme
memset(segments, 0, sizeof(struct sscd_segment) * NUM_SEGMENTS);
}
+#define GPU_HANG_SSCD_TIMEOUT_MS (300000) /* 300s */
+
/**
* gpu_sscd_dump() - Initiates and reports a subsystem core-dump of the GPU.
*
@@ -487,8 +490,23 @@ void gpu_sscd_dump(struct kbase_device *kbdev, const char* reason)
struct sscd_platform_data *pdata = dev_get_platdata(&sscd_dev.dev);
struct pixel_context *pc = kbdev->platform_context;
int ec = 0;
- unsigned long flags;
+ unsigned long flags, current_ts = jiffies;
struct pixel_gpu_pdc_status pdc_status;
+ static unsigned long last_hang_sscd_ts;
+
+ if (!strcmp(reason, "GPU hang")) {
+ /* GPU hang - avoid multiple coredumps for the same hang until
+ * GPU_HANG_SSCD_TIMEOUT_MS passes and GPU reset shows no failure.
+ */
+ if (!last_hang_sscd_ts || (time_after(current_ts,
+ last_hang_sscd_ts + msecs_to_jiffies(GPU_HANG_SSCD_TIMEOUT_MS)) &&
+ !kbase_reset_gpu_failed(kbdev))) {
+ last_hang_sscd_ts = current_ts;
+ } else {
+ dev_info(kbdev->dev, "pixel: skipping mali subsystem core dump");
+ return;
+ }
+ }
dev_info(kbdev->dev, "pixel: mali subsystem core dump in progress");
/* No point in proceeding if we can't report the dumped data */