summaryrefslogtreecommitdiff
path: root/mali_kbase/csf/mali_kbase_csf_event.c
diff options
context:
space:
mode:
authorWill McVicker <willmcvicker@google.com>2024-04-15 11:41:22 -0700
committerWill McVicker <willmcvicker@google.com>2024-04-16 10:17:07 -0700
commit0aa4c41c172f1e2acdf976c655f75a7a21db9791 (patch)
tree878a00410737d020c7be8fa0e2ab6849e310645e /mali_kbase/csf/mali_kbase_csf_event.c
parentde85b3c05698f1ce2829d3ff977dee90be48b2d8 (diff)
parentcfb55729953d62d99f66b0adc59963b189e9394b (diff)
downloadgpu-0aa4c41c172f1e2acdf976c655f75a7a21db9791.tar.gz
Merge aosp/android-gs-raviole-5.10-android14-qpr2 into aosp/android14-gs-pixel-6.1
* aosp/android-gs-raviole-5.10-android14-qpr2: (354 commits) [Official] MIDCET-5090, GPUCORE-40350: Flushes for L2 powerdown Fix invalid page table entries from occuring. Fix deadlock BTW user thread and page fault worker Fix deadlock BTW user thread and page fault worker csf: Fix kbase_kcpu_command_queue UaF due to bad queue creation Fix kernel build warnings Fix kernel build warnings Add firmware core dump error code in sscd GPUCORE-39469 Error handling for invalid slot when parsing trace data mali_kbase: platform: Add missing bounds check mali_kbase: Zero-initialize the dump_bufs_meta array mali_kbase: Fix OOB write in kbase_csf_cpu_queue_dump() mali_kbase: Move epoll-consumed waitqueue to struct kbase_file Integrate firmware core dump into sscd MIDCET-4870: Fix GPU page fault issue due to reclaiming of Tiler heap chunks mali_kbase: platform: Fix integer overflow mali_kbase: Tracepoints for governor recommendation mali_kbase: Add tracepoints to hint_min_freq / hint_max_freq mali_kbase: Enable mali_kutf_clk_rate_trace_test_portal build mali_kbase: restore CSF ftrace events Refactor helpers for creating RT threads Update KMD to 'mini release: update r44p1-00dev2 to r44p1-00dev3' mali_kbase: Use kthread for protm_event_worker GPUCORE-34589 jit_lock all JIT operations [Official] MIDCET-4458, GPUCORE-36765: Stop the use of tracking page for GPU memory accounting mali_kbase: Unmask RESET_COMPLETED irq before resetting the GPU [Official] MIDCET-4820,GPUCORE-36255 Sync whole USER_BUFFER pages upon GPU mapping mali_kbase: Use rt_mutex for scheduler lock mali_kbase: fix incorrect auto-merger change mali_pixel: Disable mgm debugfs by default mali_kbase: platform: Batch MMU flushes after liveness update mali_kbase: refactor kbase_mmu_update_pages [Official] MIDCET-4806,GPUCORE-38732 Continue FLUSH_MEM after power transition timeout mali_pixel: mgm: Compensate for group migration mali_pixel: mgm: Remove race condition mali_pixel: mgm: Refactor update_size mali_kbase: add missing deinitialization [Official] MIDCET-4458, GPUCORE-36765: Stop the use of tracking page for GPU memory accounting mali_kbase: restore hysteresis time. Update KMD to 'mini release: update r44p1-01bet1 to r44p1-00dev2' mali_kbase: Reduce kernel log spam. csf: Setup kcpu_fence->metadata before accessing it mali_kbase: Add an ITMON notifier callback to check GPU page tables. mali_kbase: shorten 'mali_kbase_*' thread names Constrain protected memory allocation during FW initialization Merge upstream DDK R43P0 KMD Mali allocations: unconditionally check for pending kill signals pixel_gpu_uevent: Increase uevent ratelimiting timeout to 20mins GPUCORE-38292 Fix Use-After-Free Race with Memory-Pool Grow kbase: csf: Reboot on failed GPU reset Add missing hwaccess_lock around atom_flags updates. GPUCORE-35754: Add barrier before updating GLB_DB_REQ to ring CSG DB mali_kbase: Enable kutf modules GPUCORE-36682 Lock MMU while disabling AS to prevent use after free kbase_mem: Reduce per-memory-group pool size to 4. mali_pixel: mgm: Ensure partition size is set to 0 when disabled. GPUCORE-37961 Deadlock issue due to lock ordering issue Make sure jobs are flushed before kbasep_platform_context_term [Official] MIDCET-4546, GPUCORE-37946: Synchronize GPU cache flush cmds with silent reset on GPU power up mali_kbase: hold GPU utilization for premature update. mali_kbase: Remove incorrect WARN() MIDCET-4324/GPUCORE-35611 Unmapping of aliased sink-page memory [Official] MIDCET-4458, GPUCORE-36402: Check for process exit before page alloc from kthread Revert "Revert "GPUCORE-36748 Fix kbase_gpu_mmap() error handling"" Mali Valhall Android DDK r43p0-01eac0 KMD Mali Valhall Android DDK r42p0-01eac0 KMD mali_kbase: platform: [SLC-VK] Add new MGM group id for explicit SLC allocations. mali_kbase: [SLC-VK] Add new BASE_MEM_GROUP for explicit SLC allocations. mali_kbase: [SLC-VK] Add CCTX memory class for explicit SLC allocations. platform: Fix mgm_term_data behavior platform: Disable the GPU SLC partition when not in demand Revert "GPUCORE-36748 Fix kbase_gpu_mmap() error handling" Revert "GPUCORE-36682 Lock MMU while disabling AS to prevent use after free" Revert "GPUCORE-36748 Fix kbase_gpu_mmap() error handling" Revert "GPUCORE-36682 Lock MMU while disabling AS to prevent use after free" [Official] MIDCET-4458, GPUCORE-36429: Prevent JIT allocations following unmap of tracking page [Official] MIDCET-4458, GPUCORE-36635 Fix memory leak via GROUP_SUSPEND Flush mmu updates regardless of coherency mode kbase: Add a debugfs file to test GPU uevents kbase: Add new GPU uevents to kbase pixel: Introduce GPU uevents to notify userspace of GPU failures [Official] MIDCET-4458, GPUCORE-36654 Use %pK on GPU bus fault mali_kbase: platform: Init GPU SLC context Add partial term support to pixel gpu init mali_kbase: Add missing wake_up(poweroff_wait) when cancelling poweroff. mali_pixel: mgm: Factor out common code between enabling/mutating partitions mali_pixel: mgm: Get accurate size from slc pt mutate mali_kbase: platform: mgm: Get accurate SLC partition size mali_kbase: Remove redundant if check to unblock suspend mali_kbase: reset: Flush SSCD worker before resetting the GPU pixel_gpu_sscd: Prevent dumping multiple SSCDs when the GPU hangs mali_kbase: reset: Add a helper to check GPU reset failure mali-pma: Defer probing until the dma_heap is found Revert "mali_kbase: mem: Prevent vma splits" GPUCORE-36682 Lock MMU while disabling AS to prevent use after free GPUCORE-36748 Fix kbase_gpu_mmap() error handling Powercycle mali to recover from a PM timeout mali_pixel: Downgrade invalid region warning to dev_dbg mali_pixel: Fix PBHA bit pos for ZUMA and PRO mali_kbase: platform: Perform partition resize and region migration ... Test: Verify `git diff aosp/android-gs-raviole-5.10-android14-qpr2..HEAD` Change-Id: I0711654dd45ae2996e837ce3353f0790394d7c72 Signed-off-by: Will McVicker <willmcvicker@google.com>
Diffstat (limited to 'mali_kbase/csf/mali_kbase_csf_event.c')
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.c22
1 files changed, 17 insertions, 5 deletions
diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c
index 5c86688..63e6c15 100644
--- a/mali_kbase/csf/mali_kbase_csf_event.c
+++ b/mali_kbase/csf/mali_kbase_csf_event.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@ static void sync_update_notify_gpu(struct kbase_context *kctx)
if (can_notify_gpu) {
kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u);
}
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
@@ -120,7 +120,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
/* First increment the signal count and wake up event thread.
*/
atomic_set(&kctx->event_count, 1);
- kbase_event_wakeup(kctx);
+ kbase_event_wakeup_nosync(kctx);
/* Signal the CSF firmware. This is to ensure that pending command
* stream synch object wait operations are re-evaluated.
@@ -169,7 +169,8 @@ void kbase_csf_event_term(struct kbase_context *kctx)
kfree(event_cb);
}
- WARN_ON(!list_empty(&kctx->csf.event.error_list));
+ WARN(!list_empty(&kctx->csf.event.error_list),
+ "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id);
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
}
@@ -226,12 +227,15 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx,
return;
spin_lock_irqsave(&kctx->csf.event.lock, flags);
- if (!WARN_ON(!list_empty(&error->link))) {
+ if (list_empty(&error->link)) {
error->data = *data;
list_add_tail(&error->link, &kctx->csf.event.error_list);
dev_dbg(kctx->kbdev->dev,
"Added error %pK of type %d in context %pK\n",
(void *)error, data->type, (void *)kctx);
+ } else {
+ dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK",
+ (void *)error, error->data.type, (void *)kctx);
}
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
}
@@ -241,6 +245,14 @@ bool kbase_csf_event_error_pending(struct kbase_context *kctx)
bool error_pending = false;
unsigned long flags;
+ /* Withhold the error event if the dump on fault is ongoing.
+ * This would prevent the Userspace from taking error recovery actions
+ * (which can potentially affect the state that is being dumped).
+ * Event handling thread would eventually notice the error event.
+ */
+ if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev)))
+ return false;
+
spin_lock_irqsave(&kctx->csf.event.lock, flags);
error_pending = !list_empty(&kctx->csf.event.error_list);