summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2020-04-14 14:55:25 +0200
committerSidath Senanayake <sidaths@google.com>2020-04-14 14:55:25 +0200
commitb64f568f943e567534694cc993270adca96dcd06 (patch)
treeff175812d02016e7e630217ecf8de53d6ac2d6a2 /mali_kbase/mali_kbase_mem.c
parentb2b1764ee0fe59773c1c8f621ad2955c35cd9d92 (diff)
downloadgpu-b64f568f943e567534694cc993270adca96dcd06.tar.gz
Mali Valhall DDK r24p0 KMD
Provenance: c5a37f014 (collaborate/EAC/v_r24p0) VX504X08X-BU-00000-r24p0-01rel0 - Android DDK VX504X08X-BU-60000-r24p0-01rel0 - Android Document Bundle Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: I1536474b6a18731cd377251c6dc947811ba0c787
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c391
1 files changed, 373 insertions, 18 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index de57024..2362e22 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,8 +20,6 @@
*
*/
-
-
/**
* Base kernel memory APIs
*/
@@ -44,6 +42,30 @@
#include <mali_kbase_native_mgm.h>
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
+#include <mali_kbase_config_defaults.h>
+
+/*
+ * Alignment of objects allocated by the GPU inside a just-in-time memory
+ * region whose size is given by an end address
+ *
+ * This is the alignment of objects allocated by the GPU, but possibly not
+ * fully written to. When taken into account with
+ * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes
+ * that the JIT memory report size can exceed the actual backed memory size.
+ */
+#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u)
+
+/*
+ * Maximum size of objects allocated by the GPU inside a just-in-time memory
+ * region whose size is given by an end address
+ *
+ * This is the maximum size of objects allocated by the GPU, but possibly not
+ * fully written to. When taken into account with
+ * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes
+ * that the JIT memory report size can exceed the actual backed memory size.
+ */
+#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
+
/* Forward declarations */
static void free_partial_locked(struct kbase_context *kctx,
@@ -672,9 +694,11 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
void kbase_region_tracker_term(struct kbase_context *kctx)
{
+ kbase_gpu_vm_lock(kctx);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+ kbase_gpu_vm_unlock(kctx);
}
void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
@@ -812,15 +836,22 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
- u8 max_allocations, u8 trim_level, int group_id)
+ int max_allocations, int trim_level, int group_id,
+ u64 phys_pages_limit)
{
int err = 0;
- if (trim_level > 100)
+ if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL)
+ return -EINVAL;
+
+ if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
return -EINVAL;
- if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
- WARN_ON(group_id < 0))
+#if MALI_JIT_PRESSURE_LIMIT
+ if (phys_pages_limit > jit_va_pages)
+#else
+ if (phys_pages_limit != jit_va_pages)
+#endif /* MALI_JIT_PRESSURE_LIMIT */
return -EINVAL;
kbase_gpu_vm_lock(kctx);
@@ -839,6 +870,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
kctx->trim_level = trim_level;
kctx->jit_va = true;
kctx->jit_group_id = group_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ kctx->jit_phys_pages_limit = phys_pages_limit;
+ dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n",
+ phys_pages_limit);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
}
kbase_gpu_vm_unlock(kctx);
@@ -940,6 +976,12 @@ int kbase_mem_init(struct kbase_device *kbdev)
/* Initialize memory usage */
atomic_set(&memdev->used_pages, 0);
+#ifdef IR_THRESHOLD
+ atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
+#else
+ atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD);
+#endif
+
kbdev->mgm_dev = &kbase_native_mgm_dev;
#ifdef CONFIG_OF
@@ -1055,6 +1097,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
new_reg->nr_pages = nr_pages;
INIT_LIST_HEAD(&new_reg->jit_node);
+ INIT_LIST_HEAD(&new_reg->link);
return new_reg;
}
@@ -1109,6 +1152,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (WARN_ON(kbase_is_region_invalid(reg)))
return;
+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n",
+ (void *)reg);
mutex_lock(&kctx->jit_evict_lock);
@@ -1633,6 +1678,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
KBASE_DEBUG_ASSERT(NULL != kctx);
KBASE_DEBUG_ASSERT(NULL != reg);
+ dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n",
+ __func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
if (reg->flags & KBASE_REG_NO_USER_FREE) {
@@ -1688,6 +1735,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
struct kbase_va_region *reg;
KBASE_DEBUG_ASSERT(kctx != NULL);
+ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n",
+ __func__, gpu_addr, (void *)kctx);
if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
@@ -3049,6 +3098,153 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct
return meet_reqs;
}
+#if MALI_JIT_PRESSURE_LIMIT
+/* Function will guarantee *@freed will not exceed @pages_needed
+ */
+static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
+ struct kbase_va_region *reg, size_t pages_needed,
+ size_t *freed)
+{
+ int err = 0;
+ size_t available_pages = 0u;
+ const size_t old_pages = kbase_reg_current_backed_size(reg);
+ size_t new_pages = old_pages;
+ size_t to_free = 0u;
+ size_t max_allowed_pages = old_pages;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+
+ /* Is this a JIT allocation that has been reported on? */
+ if (reg->used_pages == reg->nr_pages)
+ goto out;
+
+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) {
+ /* For address based memory usage calculation, the GPU
+ * allocates objects of up to size 's', but aligns every object
+ * to alignment 'a', with a < s.
+ *
+ * It also doesn't have to write to all bytes in an object of
+ * size 's'.
+ *
+ * Hence, we can observe the GPU's address for the end of used
+ * memory being up to (s - a) bytes into the first unallocated
+ * page.
+ *
+ * We allow for this and only warn when it exceeds this bound
+ * (rounded up to page sized units). Note, this is allowed to
+ * exceed reg->nr_pages.
+ */
+ max_allowed_pages += PFN_UP(
+ KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES -
+ KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES);
+ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+ /* The GPU could report being ready to write to the next
+ * 'extent' sized chunk, but didn't actually write to it, so we
+ * can report up to 'extent' size pages more than the backed
+ * size.
+ *
+ * Note, this is allowed to exceed reg->nr_pages.
+ */
+ max_allowed_pages += reg->extent;
+
+ /* Also note that in these GPUs, the GPU may make a large (>1
+ * page) initial allocation but not actually write out to all
+ * of it. Hence it might report that a much higher amount of
+ * memory was used than actually was written to. This does not
+ * result in a real warning because on growing this memory we
+ * round up the size of the allocation up to an 'extent' sized
+ * chunk, hence automatically bringing the backed size up to
+ * the reported size.
+ */
+ }
+
+ if (old_pages < reg->used_pages) {
+ /* Prevent overflow on available_pages, but only report the
+ * problem if it's in a scenario where used_pages should have
+ * been consistent with the backed size
+ *
+ * Note: In case of a size-based report, this legitimately
+ * happens in common use-cases: we allow for up to this size of
+ * memory being used, but depending on the content it doesn't
+ * have to use all of it.
+ *
+ * Hence, we're much more quiet about that in the size-based
+ * report case - it's not indicating a real problem, it's just
+ * for information
+ */
+ if (max_allowed_pages < reg->used_pages) {
+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE))
+ dev_warn(kctx->kbdev->dev,
+ "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n",
+ __func__,
+ old_pages, reg->used_pages,
+ max_allowed_pages,
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages);
+ else
+ dev_dbg(kctx->kbdev->dev,
+ "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n",
+ __func__,
+ old_pages, reg->used_pages,
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages);
+ }
+ /* In any case, no error condition to report here, caller can
+ * try other regions
+ */
+
+ goto out;
+ }
+ available_pages = old_pages - reg->used_pages;
+ to_free = min(available_pages, pages_needed);
+
+ new_pages -= to_free;
+
+ err = kbase_mem_shrink(kctx, reg, new_pages);
+
+out:
+ trace_mali_jit_trim_from_region(reg, to_free, old_pages,
+ available_pages, new_pages);
+ *freed = to_free;
+ return err;
+}
+
+size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
+ size_t pages_needed)
+{
+ struct kbase_va_region *reg, *tmp;
+ size_t total_freed = 0;
+
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->jit_evict_lock);
+ list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) {
+ int err;
+ size_t freed = 0u;
+
+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
+ pages_needed, &freed);
+
+ if (err) {
+ /* Failed to trim, try the next region */
+ continue;
+ }
+
+ total_freed += freed;
+ WARN_ON(freed > pages_needed);
+ pages_needed -= freed;
+ if (!pages_needed)
+ break;
+ }
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
+
+ trace_mali_jit_trim(total_freed);
+
+ return total_freed;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
static int kbase_jit_grow(struct kbase_context *kctx,
struct base_jit_alloc_info *info, struct kbase_va_region *reg)
{
@@ -3208,6 +3404,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
{
struct kbase_va_region *reg = NULL;
+#if MALI_JIT_PRESSURE_LIMIT
+ if (info->va_pages > (kctx->jit_phys_pages_limit -
+ kctx->jit_current_phys_pressure) &&
+ kctx->jit_current_phys_pressure > 0) {
+ dev_dbg(kctx->kbdev->dev,
+ "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n",
+ kctx->jit_current_phys_pressure + info->va_pages,
+ kctx->jit_phys_pages_limit);
+ return NULL;
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT */
if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
/* Too many current allocations */
dev_dbg(kctx->kbdev->dev,
@@ -3228,6 +3435,33 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
return NULL;
}
+#if MALI_JIT_PRESSURE_LIMIT
+ /* Before allocating a new just-in-time memory region or reusing a
+ * previous one, ensure that the total JIT physical page usage also will
+ * not exceed the pressure limit.
+ *
+ * If there are no reported-on allocations, then we already guarantee
+ * this will be the case - because our current pressure then only comes
+ * from the va_pages of each JIT region, hence JIT physical page usage
+ * is guaranteed to be bounded by this.
+ *
+ * However as soon as JIT allocations become "reported on", the
+ * pressure is lowered to allow new JIT regions to be allocated. It is
+ * after such a point that the total JIT physical page usage could
+ * (either now or in the future on a grow-on-GPU-page-fault) exceed the
+ * pressure limit, but only on newly allocated JIT regions. Hence, trim
+ * any "reported on" regions.
+ *
+ * Any pages freed will go into the pool and be allocated from there in
+ * kbase_mem_alloc().
+ *
+ * In future, GPUCORE-21217: Only do this when physical page usage
+ * could exceed the pressure limit, and only trim as much as is
+ * necessary.
+ */
+ kbase_mem_jit_trim_pages(kctx, SIZE_MAX);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
mutex_lock(&kctx->jit_evict_lock);
/*
@@ -3372,6 +3606,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
}
+ trace_mali_jit_alloc(reg, info->id);
+
kctx->jit_current_allocations++;
kctx->jit_current_allocations_per_bin[info->bin_id]++;
@@ -3379,6 +3615,13 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
reg->jit_usage_id = info->usage_id;
reg->jit_bin_id = info->bin_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+ reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE;
+ reg->heap_info_gpu_addr = info->heap_info_gpu_addr;
+ kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
return reg;
@@ -3394,6 +3637,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
u64 old_pages;
+ /* JIT id not immediately available here, so use 0u */
+ trace_mali_jit_free(reg, 0u);
+
/* Get current size of JIT region */
old_pages = kbase_reg_current_backed_size(reg);
if (reg->initial_commit < old_pages) {
@@ -3404,19 +3650,16 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
div_u64(old_pages * (100 - kctx->trim_level), 100));
u64 delta = old_pages - new_size;
- if (delta) {
- kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
- old_pages);
- kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
- old_pages);
-
- kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
- if (reg->cpu_alloc != reg->gpu_alloc)
- kbase_free_phy_pages_helper(reg->gpu_alloc,
- delta);
- }
+ if (delta)
+ kbase_mem_shrink(kctx, reg, old_pages - delta);
}
+#if MALI_JIT_PRESSURE_LIMIT
+ reg->heap_info_gpu_addr = 0;
+ kbase_jit_report_update_pressure(kctx, reg, 0,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
kctx->jit_current_allocations--;
kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
@@ -3535,6 +3778,118 @@ void kbase_jit_term(struct kbase_context *kctx)
cancel_work_sync(&kctx->jit_work);
}
+#if MALI_JIT_PRESSURE_LIMIT
+void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
+ struct kbase_va_region *reg, unsigned int flags)
+{
+ /* Offset to the location used for a JIT report within the GPU memory
+ *
+ * This constants only used for this debugging function - not useful
+ * anywhere else in kbase
+ */
+ const u64 jit_report_gpu_mem_offset = sizeof(u64)*2;
+
+ u64 addr_start;
+ struct kbase_vmap_struct mapping;
+ u64 *ptr;
+
+ if (reg->heap_info_gpu_addr == 0ull)
+ goto out;
+
+ /* Nothing else to trace in the case the memory just contains the
+ * size. Other tracepoints already record the relevant area of memory.
+ */
+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)
+ goto out;
+
+ addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
+
+ ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
+ &mapping);
+ if (!ptr) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
+ __func__, reg->start_pfn << PAGE_SHIFT,
+ addr_start);
+ goto out;
+ }
+
+ trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT,
+ ptr, flags);
+
+ kbase_vunmap(kctx, &mapping);
+out:
+ return;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+#if MALI_JIT_PRESSURE_LIMIT
+void kbase_jit_report_update_pressure(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 new_used_pages,
+ unsigned int flags)
+{
+ u64 diff;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ trace_mali_jit_report_pressure(reg, new_used_pages,
+ kctx->jit_current_phys_pressure + new_used_pages -
+ reg->used_pages,
+ flags);
+
+ if (WARN_ON(new_used_pages > reg->nr_pages))
+ return;
+
+ if (reg->used_pages > new_used_pages) {
+ /* We reduced the number of used pages */
+ diff = reg->used_pages - new_used_pages;
+
+ if (!WARN_ON(diff > kctx->jit_current_phys_pressure))
+ kctx->jit_current_phys_pressure -= diff;
+
+ reg->used_pages = new_used_pages;
+
+ /* In the case of pressure reduced on a free, don't attempt to
+ * trim the region: it will soon be placed on the evict_list
+ * so that if we really were close to running out of memory then
+ * the shrinker can reclaim the memory.
+ */
+ if ((flags & KBASE_JIT_REPORT_ON_ALLOC_OR_FREE) == 0u) {
+ size_t freed;
+ int err;
+
+ kbase_gpu_vm_lock(kctx);
+ /* If this was from an allocation that a single
+ * BASE_JD_REQ_SOFT_JIT_ALLOC atom that is allowed to
+ * breach the pressure limit, then check whether we can
+ * bring the total JIT physical page below (or at least
+ * nearer) the pressure limit.
+ *
+ * In future, GPUCORE-21217: Only do this when physical
+ * page usage currently exceeds the pressure limit, and
+ * only trim as much as is necessary.
+ */
+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
+ SIZE_MAX, &freed);
+ kbase_gpu_vm_unlock(kctx);
+
+ CSTD_UNUSED(freed);
+ /* Nothing to do if trimming failed */
+ CSTD_UNUSED(err);
+ }
+ } else {
+ /* We increased the number of used pages */
+ diff = new_used_pages - reg->used_pages;
+
+ if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure))
+ kctx->jit_current_phys_pressure += diff;
+
+ reg->used_pages = new_used_pages;
+ }
+
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
bool kbase_has_exec_va_zone(struct kbase_context *kctx)
{
bool has_exec_va_zone;