From b64f568f943e567534694cc993270adca96dcd06 Mon Sep 17 00:00:00 2001 From: Sidath Senanayake Date: Tue, 14 Apr 2020 14:55:25 +0200 Subject: Mali Valhall DDK r24p0 KMD Provenance: c5a37f014 (collaborate/EAC/v_r24p0) VX504X08X-BU-00000-r24p0-01rel0 - Android DDK VX504X08X-BU-60000-r24p0-01rel0 - Android Document Bundle Signed-off-by: Sidath Senanayake Change-Id: I1536474b6a18731cd377251c6dc947811ba0c787 --- mali_kbase/mali_kbase_mem.c | 391 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 373 insertions(+), 18 deletions(-) (limited to 'mali_kbase/mali_kbase_mem.c') diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index de57024..2362e22 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,8 +20,6 @@ * */ - - /** * Base kernel memory APIs */ @@ -44,6 +42,30 @@ #include #include #include +#include + +/* + * Alignment of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the alignment of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + +/* + * Maximum size of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the maximum size of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) + /* Forward declarations */ static void free_partial_locked(struct kbase_context *kctx, @@ -672,9 +694,11 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); + kbase_gpu_vm_unlock(kctx); } void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) @@ -812,15 +836,22 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, #endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level, int group_id) + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit) { int err = 0; - if (trim_level > 100) + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) + return -EINVAL; + + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || - WARN_ON(group_id < 0)) +#if MALI_JIT_PRESSURE_LIMIT + if (phys_pages_limit > jit_va_pages) +#else + if (phys_pages_limit != jit_va_pages) +#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; kbase_gpu_vm_lock(kctx); @@ -839,6 +870,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT */ } kbase_gpu_vm_unlock(kctx); @@ -940,6 +976,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); +#ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); +#else + atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); +#endif + kbdev->mgm_dev = &kbase_native_mgm_dev; #ifdef CONFIG_OF @@ -1055,6 +1097,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, new_reg->nr_pages = nr_pages; INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); return new_reg; } @@ -1109,6 +1152,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; + dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + (void *)reg); mutex_lock(&kctx->jit_evict_lock); @@ -1633,6 +1678,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); + dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); if (reg->flags & KBASE_REG_NO_USER_FREE) { @@ -1688,6 +1735,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); @@ -3049,6 +3098,153 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct return meet_reqs; } +#if MALI_JIT_PRESSURE_LIMIT +/* Function will guarantee *@freed will not exceed @pages_needed + */ +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed) +{ + int err = 0; + size_t available_pages = 0u; + const size_t old_pages = kbase_reg_current_backed_size(reg); + size_t new_pages = old_pages; + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ + if (reg->used_pages == reg->nr_pages) + goto out; + + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { + /* For address based memory usage calculation, the GPU + * allocates objects of up to size 's', but aligns every object + * to alignment 'a', with a < s. + * + * It also doesn't have to write to all bytes in an object of + * size 's'. + * + * Hence, we can observe the GPU's address for the end of used + * memory being up to (s - a) bytes into the first unallocated + * page. + * + * We allow for this and only warn when it exceeds this bound + * (rounded up to page sized units). Note, this is allowed to + * exceed reg->nr_pages. + */ + max_allowed_pages += PFN_UP( + KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next + * 'extent' sized chunk, but didn't actually write to it, so we + * can report up to 'extent' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ + max_allowed_pages += reg->extent; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we + * round up the size of the allocation up to an 'extent' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ + } + + if (old_pages < reg->used_pages) { + /* Prevent overflow on available_pages, but only report the + * problem if it's in a scenario where used_pages should have + * been consistent with the backed size + * + * Note: In case of a size-based report, this legitimately + * happens in common use-cases: we allow for up to this size of + * memory being used, but depending on the content it doesn't + * have to use all of it. + * + * Hence, we're much more quiet about that in the size-based + * report case - it's not indicating a real problem, it's just + * for information + */ + if (max_allowed_pages < reg->used_pages) { + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) + dev_warn(kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + else + dev_dbg(kctx->kbdev->dev, + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + } + /* In any case, no error condition to report here, caller can + * try other regions + */ + + goto out; + } + available_pages = old_pages - reg->used_pages; + to_free = min(available_pages, pages_needed); + + new_pages -= to_free; + + err = kbase_mem_shrink(kctx, reg, new_pages); + +out: + trace_mali_jit_trim_from_region(reg, to_free, old_pages, + available_pages, new_pages); + *freed = to_free; + return err; +} + +size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed) +{ + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + pages_needed, &freed); + + if (err) { + /* Failed to trim, try the next region */ + continue; + } + + total_freed += freed; + WARN_ON(freed > pages_needed); + pages_needed -= freed; + if (!pages_needed) + break; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + trace_mali_jit_trim(total_freed); + + return total_freed; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + static int kbase_jit_grow(struct kbase_context *kctx, struct base_jit_alloc_info *info, struct kbase_va_region *reg) { @@ -3208,6 +3404,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, { struct kbase_va_region *reg = NULL; +#if MALI_JIT_PRESSURE_LIMIT + if (info->va_pages > (kctx->jit_phys_pages_limit - + kctx->jit_current_phys_pressure) && + kctx->jit_current_phys_pressure > 0) { + dev_dbg(kctx->kbdev->dev, + "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", + kctx->jit_current_phys_pressure + info->va_pages, + kctx->jit_phys_pages_limit); + return NULL; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ dev_dbg(kctx->kbdev->dev, @@ -3228,6 +3435,33 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, return NULL; } +#if MALI_JIT_PRESSURE_LIMIT + /* Before allocating a new just-in-time memory region or reusing a + * previous one, ensure that the total JIT physical page usage also will + * not exceed the pressure limit. + * + * If there are no reported-on allocations, then we already guarantee + * this will be the case - because our current pressure then only comes + * from the va_pages of each JIT region, hence JIT physical page usage + * is guaranteed to be bounded by this. + * + * However as soon as JIT allocations become "reported on", the + * pressure is lowered to allow new JIT regions to be allocated. It is + * after such a point that the total JIT physical page usage could + * (either now or in the future on a grow-on-GPU-page-fault) exceed the + * pressure limit, but only on newly allocated JIT regions. Hence, trim + * any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + * + * In future, GPUCORE-21217: Only do this when physical page usage + * could exceed the pressure limit, and only trim as much as is + * necessary. + */ + kbase_mem_jit_trim_pages(kctx, SIZE_MAX); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + mutex_lock(&kctx->jit_evict_lock); /* @@ -3372,6 +3606,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); } + trace_mali_jit_alloc(reg, info->id); + kctx->jit_current_allocations++; kctx->jit_current_allocations_per_bin[info->bin_id]++; @@ -3379,6 +3615,13 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; +#if MALI_JIT_PRESSURE_LIMIT + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ return reg; @@ -3394,6 +3637,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { u64 old_pages; + /* JIT id not immediately available here, so use 0u */ + trace_mali_jit_free(reg, 0u); + /* Get current size of JIT region */ old_pages = kbase_reg_current_backed_size(reg); if (reg->initial_commit < old_pages) { @@ -3404,19 +3650,16 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) { - kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, - old_pages); - kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, - old_pages); - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); - } + if (delta) + kbase_mem_shrink(kctx, reg, old_pages - delta); } +#if MALI_JIT_PRESSURE_LIMIT + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3535,6 +3778,118 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } +#if MALI_JIT_PRESSURE_LIMIT +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) +{ + /* Offset to the location used for a JIT report within the GPU memory + * + * This constants only used for this debugging function - not useful + * anywhere else in kbase + */ + const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + + u64 addr_start; + struct kbase_vmap_struct mapping; + u64 *ptr; + + if (reg->heap_info_gpu_addr == 0ull) + goto out; + + /* Nothing else to trace in the case the memory just contains the + * size. Other tracepoints already record the relevant area of memory. + */ + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + goto out; + + addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + + ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + &mapping); + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, + addr_start); + goto out; + } + + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, + ptr, flags); + + kbase_vunmap(kctx, &mapping); +out: + return; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) +{ + u64 diff; + + lockdep_assert_held(&kctx->jctx.lock); + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - + reg->used_pages, + flags); + + if (WARN_ON(new_used_pages > reg->nr_pages)) + return; + + if (reg->used_pages > new_used_pages) { + /* We reduced the number of used pages */ + diff = reg->used_pages - new_used_pages; + + if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure -= diff; + + reg->used_pages = new_used_pages; + + /* In the case of pressure reduced on a free, don't attempt to + * trim the region: it will soon be placed on the evict_list + * so that if we really were close to running out of memory then + * the shrinker can reclaim the memory. + */ + if ((flags & KBASE_JIT_REPORT_ON_ALLOC_OR_FREE) == 0u) { + size_t freed; + int err; + + kbase_gpu_vm_lock(kctx); + /* If this was from an allocation that a single + * BASE_JD_REQ_SOFT_JIT_ALLOC atom that is allowed to + * breach the pressure limit, then check whether we can + * bring the total JIT physical page below (or at least + * nearer) the pressure limit. + * + * In future, GPUCORE-21217: Only do this when physical + * page usage currently exceeds the pressure limit, and + * only trim as much as is necessary. + */ + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + SIZE_MAX, &freed); + kbase_gpu_vm_unlock(kctx); + + CSTD_UNUSED(freed); + /* Nothing to do if trimming failed */ + CSTD_UNUSED(err); + } + } else { + /* We increased the number of used pages */ + diff = new_used_pages - reg->used_pages; + + if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure += diff; + + reg->used_pages = new_used_pages; + } + +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + bool kbase_has_exec_va_zone(struct kbase_context *kctx) { bool has_exec_va_zone; -- cgit v1.2.3