summaryrefslogtreecommitdiff
path: root/mali_kbase/context
diff options
context:
space:
mode:
authorSuzanne Candanedo <suzanne.candanedo@arm.com>2023-04-12 12:31:53 +0100
committerGuus Sliepen <gsliepen@google.com>2023-05-09 07:46:56 +0000
commit41f159f6de2788d7ce6993ba20218bcb8392ace1 (patch)
tree74f90aee88180d6e4dabbb2d7e326abfb2e70d78 /mali_kbase/context
parentb08aa4e87a4adc0af4fea283d3af26637e2fdd8a (diff)
downloadgpu-41f159f6de2788d7ce6993ba20218bcb8392ace1.tar.gz
[Official] MIDCET-4458, GPUCORE-36402: Check for process exit before page alloc from kthread
The backing pages for native GPU allocations aren't always allocated in the ioctl context. A JIT_ALLOC softjob or KCPU command can get processed in the kernel worker thread. GPU page fault handling is anyways done in a kernel thread. Userspace can make Kbase allocate large number of backing pages from the kernel thread to cause out of memory situation, which would eventually lead to a kernel panic as OoM killer would run out of suitable processes to kill. Though Kbase will account for the backing pages and OoM killer will try to kill the culprit process, the memory already allocated by the process won't get freed as context termination would remain blocked or won't kick-in until kernel thread keeps trying to allocate the backing pages. For the allocation that is done from the context of kernel thread, OoM killer won't consider the kernel thread for killing and kernel would keep retrying to allocate physical page as long as the OoM killer is able to kill processes. For the memory allocation done from the ioctl context, kernel would eventually stop retrying when it sees that process has been marked for killing by the OoM killer. This commit adds a check for process exit in the page allocation loop. The check allows kernel thread to swiftly exit the page allocation loop once OoM killer has initiated the killing of culprit process (for which kernel thread is trying to allocate pages) thereby unblocking context termination and freeing of GPU memory already allocated by the process. This helps in preventing the kernel panic and also limits the number of innocent processes that gets killed. The use of __GFP_RETRY_MAYFAIL flag didn't help in all the scenarios. The flag ensures that OoM killer is not invoked directly and kernel doesn't keep retrying to allocate the page. But when system is running low on memory, other threads can invoke the OoM killer and the page allocation request from kthread could continue to get satisfied due to the killing of other processes and so the kthread may not always timely exit the page allocation loop. (cherry picked from commit 3c5c9328a7fc552e61972c1bbff4b56696682d30) GPUCORE-36402: Fix potential memleak and NULL ptr deref issue in Kbase The commit 3c5c9328a7fc552e61972c1bbff4b56696682d30 updated Kbase to check for the process exit in every iteration of the page allocation loop when the allocation is done from the context of kernel worker thread. The commit introduced a potential memleak and NULL pointer dereference issue (which was reported by Coverity). This commit adds the required fix for the 2 issues and also sets the task pointer only for the Userspace created contexts and not for the contexts created by Kbase i.e. privileged context created for the HW counter dumping and for the WA of HW issue TRYM-3485. Bug: 275614526 Change-Id: I8107edce09a2cb52d8586fc9f7990a25166f590e Signed-off-by: Guus Sliepen <gsliepen@google.com> Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/5169 (cherry picked from commit 8294169160ebb0d11d7d22b11311ddf887fb0b63)
Diffstat (limited to 'mali_kbase/context')
-rw-r--r--mali_kbase/context/mali_kbase_context.c58
1 files changed, 52 insertions, 6 deletions
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 5fc1636..8787a56 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
/*
* Base kernel context APIs
*/
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -178,11 +184,47 @@ int kbase_context_common_init(struct kbase_context *kctx)
spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
+ kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
+ /* Check if this is a Userspace created context */
+ if (likely(kctx->filp)) {
+ struct pid *pid_struct;
+
+ rcu_read_lock();
+ pid_struct = find_get_pid(kctx->tgid);
+ if (likely(pid_struct)) {
+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+ if (likely(task)) {
+ /* Take a reference on the task to avoid slow lookup
+ * later on from the page allocation loop.
+ */
+ get_task_struct(task);
+ kctx->task = task;
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get task pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+
+ put_pid(pid_struct);
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get pid pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+ rcu_read_unlock();
+
+ if (unlikely(err))
+ return err;
+ }
+
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -213,13 +255,14 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
mutex_lock(&kctx->kbdev->kctx_list_lock);
-
err = kbase_insert_kctx_to_process(kctx);
- if (err)
- dev_err(kctx->kbdev->dev,
- "(err:%d) failed to insert kctx to kbase_process\n", err);
-
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (err) {
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process", err);
+ if (likely(kctx->filp))
+ put_task_struct(kctx->task);
+ }
return err;
}
@@ -307,6 +350,9 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (likely(kctx->filp))
+ put_task_struct(kctx->task);
+
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}