diff options
author | Sidath Senanayake <sidaths@google.com> | 2018-03-19 13:26:23 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2018-03-19 13:26:23 +0100 |
commit | 8946bcdee4c36dbc82b8c2a2abcf9c2f5eab5ae0 (patch) | |
tree | adf890cf7a6af02a05c8eb94c177bd83ca21fd8b /mali_kbase/mali_kbase_mem.c | |
parent | e42736e67f7d84d329d9595b7393e6784c5b887f (diff) | |
download | gpu-8946bcdee4c36dbc82b8c2a2abcf9c2f5eab5ae0.tar.gz |
Mali Bifrost DDK r11p0 KMD
Provenance:
b1581ebda (collaborate/EAC/b_r11p0)
BX304L01B-BU-00000-r11p0-01rel0
BX304L06A-BU-00000-r11p0-01rel0
BX304X07X-BU-00000-r11p0-01rel0
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Ia590e1eb21778d33cacbefba83598ee56790ca85
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r-- | mali_kbase/mali_kbase_mem.c | 809 |
1 files changed, 678 insertions, 131 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index a0897fb..59ccb40 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -535,7 +535,8 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, /* Although exec and custom_va_reg don't always exist, * initialize unconditionally because of the mem_view debugfs - * implementation which relies on these being empty */ + * implementation which relies on these being empty + */ kctx->reg_rbtree_exec = RB_ROOT; kctx->reg_rbtree_custom = RB_ROOT; @@ -567,6 +568,32 @@ void kbase_region_tracker_term(struct kbase_context *kctx) kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); } +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif + + return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +} + /** * Initialize the region tracker data structure. */ @@ -575,7 +602,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) struct kbase_va_region *same_va_reg; struct kbase_va_region *exec_reg = NULL; struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; + size_t same_va_bits = kbase_get_same_va_bits(kctx); u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; u64 same_va_pages; @@ -584,26 +611,6 @@ int kbase_region_tracker_init(struct kbase_context *kctx) /* Take the lock as kbase_free_alloced_region requires it */ kbase_gpu_vm_lock(kctx); -#if defined(CONFIG_ARM64) - same_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - same_va_bits = 47; -#elif defined(CONFIG_64BIT) -#error Unsupported 64-bit architecture -#endif - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - same_va_bits = 32; - else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) - same_va_bits = 33; -#endif - - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { - err = -EINVAL; - goto fail_unlock; - } - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, @@ -652,7 +659,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) } #endif - kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, + custom_va_reg); kctx->same_va_end = same_va_pages + 1; @@ -668,33 +676,16 @@ fail_unlock: return err; } -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) -{ #ifdef CONFIG_64BIT +static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + u64 jit_va_pages) +{ struct kbase_va_region *same_va; struct kbase_va_region *custom_va_reg; - u64 same_va_bits; + u64 same_va_bits = kbase_get_same_va_bits(kctx); u64 total_va_size; int err; - /* - * Nothing to do for 32-bit clients, JIT uses the existing - * custom VA zone. - */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - return 0; - -#if defined(CONFIG_ARM64) - same_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - same_va_bits = 47; -#elif defined(CONFIG_64BIT) -#error Unsupported 64-bit architecture -#endif - - if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) - same_va_bits = 33; - total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; kbase_gpu_vm_lock(kctx); @@ -754,9 +745,27 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) fail_unlock: kbase_gpu_vm_unlock(kctx); return err; -#else - return 0; +} #endif + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + u8 max_allocations, u8 trim_level) +{ + if (trim_level > 100) + return -EINVAL; + + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + return kbase_region_tracker_init_jit_64(kctx, jit_va_pages); +#endif + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + return 0; } int kbase_mem_init(struct kbase_device *kbdev) @@ -824,7 +833,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC + * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, + * or KBASE_REG_ZONE_EXEC * */ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) @@ -874,6 +884,8 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); void kbase_free_alloced_region(struct kbase_va_region *reg) { if (!(reg->flags & KBASE_REG_FREE)) { + mutex_lock(®->kctx->jit_evict_lock); + /* * The physical allocation should have been removed from the * eviction list before this function is called. However, in the @@ -882,6 +894,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * on the list at termination time of the region tracker. */ if (!list_empty(®->gpu_alloc->evict_node)) { + mutex_unlock(®->kctx->jit_evict_lock); + /* * Unlink the physical allocation before unmaking it * evictable so that the allocation isn't grown back to @@ -904,6 +918,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_MEM_TYPE_NATIVE); kbase_mem_evictable_unmake(reg->gpu_alloc); } + } else { + mutex_unlock(®->kctx->jit_evict_lock); } /* @@ -1497,9 +1513,8 @@ int kbase_update_region_flags(struct kbase_context *kctx, return 0; } -int kbase_alloc_phy_pages_helper( - struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested) +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) { int new_page_count __maybe_unused; size_t nr_left = nr_pages_requested; @@ -1649,12 +1664,18 @@ done: alloc_failed: /* rollback needed if got one or more 2MB but failed later */ - if (nr_left != nr_pages_requested) - kbase_mem_pool_free_pages(&kctx->lp_mem_pool, - nr_pages_requested - nr_left, - alloc->pages + alloc->nents, - false, - false); + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } kbase_process_page_usage_dec(kctx, nr_pages_requested); kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); @@ -1665,6 +1686,181 @@ invalid_request: return -ENOMEM; } +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct tagged_addr *tp; + struct tagged_addr *new_pages = NULL; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + + lockdep_assert_held(&pool->pool_lock); + +#if !defined(CONFIG_MALI_2MB_ALLOC) + WARN_ON(pool->order); +#endif + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.kctx; + + lockdep_assert_held(&kctx->mem_partials_lock); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = kbase_atomic_add_pages( + nr_pages_requested, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer + */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + new_pages = tp; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_lp * (SZ_2M / SZ_4K), + tp); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys( + sa->page + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, + SZ_2M / SZ_4K)) { + /* unlink from partial list when + * full + */ + list_del_init(&sa->link); + break; + } + } + } + } + + /* only if we actually have a chunk left <512. If more it + * indicates that we couldn't allocate a 2MB above, so no point + * to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it + */ + struct page *np = NULL; + + np = kbase_mem_pool_alloc_locked(pool); + + if (np) { + int i; + struct kbase_sub_alloc *sa; + struct page *p; + + sa = kmalloc(sizeof(*sa), GFP_KERNEL); + if (!sa) { + kbase_mem_pool_free_locked(pool, np, + false); + goto alloc_failed; + } + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag( + page_to_phys(np + i), + FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + + /* expose for later use */ + list_add(&sa->link, &kctx->mem_partials); + } + } + if (nr_left) + goto alloc_failed; + } else { +#endif + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_left, + tp); + if (res <= 0) + goto alloc_failed; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; +done: + return new_pages; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + +invalid_request: + return NULL; +} + static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) { struct page *p, *head_page; @@ -1776,6 +1972,124 @@ int kbase_free_phy_pages_helper( return 0; } +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + p = phys_to_page(as_phys_addr_t(tp)); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free(pool, head_page, true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } +} + +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.kctx; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + size_t freed = 0; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + /* early out if nothing to do */ + if (!nr_pages_to_free) + return; + + start_free = pages; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + WARN_ON(!pool->order); + kbase_mem_pool_free_pages_locked(pool, + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + WARN_ON(!pool->order); + free_partial_locked(kctx, pool, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + WARN_ON(pool->order); + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages_locked(pool, + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + int new_page_count; + + kbase_process_page_usage_dec(kctx, freed); + new_page_count = kbase_atomic_sub_pages(freed, + &kctx->used_pages); + kbase_atomic_sub_pages(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kctx->id, + (u64)new_page_count); + } +} + void kbase_mem_kref_free(struct kref *kref) { struct kbase_mem_phy_alloc *alloc; @@ -1784,12 +2098,15 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - WARN_ON(!alloc->imported.kctx); - /* - * The physical allocation must have been removed from the - * eviction list before trying to free it. - */ - WARN_ON(!list_empty(&alloc->evict_node)); + if (!WARN_ON(!alloc->imported.kctx)) { + /* + * The physical allocation must have been removed from + * the eviction list before trying to free it. + */ + mutex_lock(&alloc->imported.kctx->jit_evict_lock); + WARN_ON(!list_empty(&alloc->evict_node)); + mutex_unlock(&alloc->imported.kctx->jit_evict_lock); + } kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -2284,6 +2601,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work) int kbase_jit_init(struct kbase_context *kctx) { + mutex_lock(&kctx->jit_evict_lock); INIT_LIST_HEAD(&kctx->jit_active_head); INIT_LIST_HEAD(&kctx->jit_pool_head); INIT_LIST_HEAD(&kctx->jit_destroy_head); @@ -2291,49 +2609,255 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->jit_pending_alloc); INIT_LIST_HEAD(&kctx->jit_atoms_head); + mutex_unlock(&kctx->jit_evict_lock); + + kctx->jit_max_allocations = 0; + kctx->jit_current_allocations = 0; + kctx->trim_level = 0; return 0; } +/* Check if the allocation from JIT pool is of the same size as the new JIT + * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets + * the alignment requirements. + */ +static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, + struct kbase_va_region *walker, struct base_jit_alloc_info *info) +{ + bool meet_reqs = true; + + if (walker->nr_pages != info->va_pages) + meet_reqs = false; + else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + size_t align = info->extent; + size_t align_mask = align - 1; + + if ((walker->start_pfn + info->commit_pages) & align_mask) + meet_reqs = false; + } + + return meet_reqs; +} + +static int kbase_jit_grow(struct kbase_context *kctx, + struct base_jit_alloc_info *info, struct kbase_va_region *reg) +{ + size_t delta; + size_t pages_required; + size_t old_size; + struct kbase_mem_pool *pool; + int ret = -ENOMEM; + struct tagged_addr *gpu_pages; + + if (info->commit_pages > reg->nr_pages) { + /* Attempted to grow larger than maximum size */ + return -EINVAL; + } + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + if (reg->gpu_alloc->nents >= info->commit_pages) + goto done; + + /* Grow the backing */ + old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + pages_required = delta; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pages_required >= (SZ_2M / SZ_4K)) { + pool = &kctx->lp_mem_pool; + /* Round up to number of 2 MB pages required */ + pages_required += ((SZ_2M / SZ_4K) - 1); + pages_required /= (SZ_2M / SZ_4K); + } else { +#endif + pool = &kctx->mem_pool; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (reg->cpu_alloc != reg->gpu_alloc) + pages_required *= 2; + + mutex_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + + /* As we can not allocate memory from the kernel with the vm_lock held, + * grow the pool to the required size with the lock dropped. We hold the + * pool lock to prevent another thread from allocating from the pool + * between the grow and allocation. + */ + while (kbase_mem_pool_size(pool) < pages_required) { + int pool_delta = pages_required - kbase_mem_pool_size(pool); + + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + + if (kbase_mem_pool_grow(pool, pool_delta)) + goto update_failed_unlocked; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + } + + gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, + delta); + if (!gpu_pages) { + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + + if (reg->cpu_alloc != reg->gpu_alloc) { + struct tagged_addr *cpu_pages; + + cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, + pool, delta); + if (!cpu_pages) { + kbase_free_phy_pages_helper_locked(reg->gpu_alloc, + pool, gpu_pages, delta); + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + } + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, + old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + +done: + ret = 0; + + /* Update attributes of JIT allocation taken from the pool */ + reg->initial_commit = info->commit_pages; + reg->extent = info->extent; + +update_failed: + kbase_gpu_vm_unlock(kctx); +update_failed_unlocked: + return ret; +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { struct kbase_va_region *reg = NULL; - struct kbase_va_region *walker; - struct kbase_va_region *temp; - size_t current_diff = SIZE_MAX; - int ret; + if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { + /* Too many current allocations */ + return NULL; + } + if (info->max_allocations > 0 && + kctx->jit_current_allocations_per_bin[info->bin_id] >= + info->max_allocations) { + /* Too many current allocations in this bin */ + return NULL; + } mutex_lock(&kctx->jit_evict_lock); + /* * Scan the pool for an existing allocation which meets our * requirements and remove it. */ - list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { - - if (walker->nr_pages >= info->va_pages) { - size_t min_size, max_size, diff; + if (info->usage_id != 0) { + /* First scan for an allocation with the same usage ID */ + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, + jit_node) { + + if (walker->jit_usage_id == info->usage_id && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } - /* - * The JIT allocations VA requirements have been - * meet, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; + /* The allocation is an exact match */ + if (current_diff == 0) + break; } + } + } - /* The allocation is an exact match, stop looking */ - if (current_diff == 0) - break; + if (!reg) { + /* No allocation with the same usage ID, or usage IDs not in + * use. Search for an allocation we can reuse. + */ + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, + jit_node) { + + if (walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, so stop + * looking. + */ + if (current_diff == 0) + break; + } } } @@ -2352,42 +2876,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, list_del_init(®->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_lock(kctx); - - /* Make the physical backing no longer reclaimable */ - if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) - goto update_failed; - - /* Grow the backing if required */ - if (reg->gpu_alloc->nents < info->commit_pages) { - size_t delta; - size_t old_size = reg->gpu_alloc->nents; - - /* Allocate some more pages */ - delta = info->commit_pages - reg->gpu_alloc->nents; - if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) - != 0) - goto update_failed; - - if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper( - reg->cpu_alloc, delta) != 0) { - kbase_free_phy_pages_helper( - reg->gpu_alloc, delta); - goto update_failed; - } - } - - ret = kbase_mem_grow_gpu_mapping(kctx, reg, - info->commit_pages, old_size); + if (kbase_jit_grow(kctx, info, reg) < 0) { /* - * The grow failed so put the allocation back in the - * pool and return failure. + * An update to an allocation from the pool failed, + * chances are slim a new allocation would fair any + * better so return the allocation to the pool and + * return the function with failure. */ - if (ret) - goto update_failed; + goto update_failed_unlocked; } - kbase_gpu_vm_unlock(kctx); } else { /* No suitable JIT allocation was found so create a new one */ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | @@ -2397,6 +2894,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); + if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) + flags |= BASE_MEM_TILER_ALIGN_TOP; + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extent, &flags, &gpu_addr); if (!reg) @@ -2409,15 +2909,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); } + kctx->jit_current_allocations++; + kctx->jit_current_allocations_per_bin[info->bin_id]++; + + reg->jit_usage_id = info->usage_id; + reg->jit_bin_id = info->bin_id; + return reg; -update_failed: - /* - * An update to an allocation from the pool failed, chances - * are slim a new allocation would fair any better so return - * the allocation to the pool and return the function with failure. - */ - kbase_gpu_vm_unlock(kctx); +update_failed_unlocked: mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); mutex_unlock(&kctx->jit_evict_lock); @@ -2427,13 +2927,53 @@ out_unlocked: void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { - /* The physical backing of memory in the pool is always reclaimable */ + u64 old_pages; + + /* Get current size of JIT region */ + old_pages = kbase_reg_current_backed_size(reg); + if (reg->initial_commit < old_pages) { + /* Free trim_level % of region, but don't go below initial + * commit size + */ + u64 new_size = MAX(reg->initial_commit, + div_u64(old_pages * (100 - kctx->trim_level), 100)); + u64 delta = old_pages - new_size; + + if (delta) { + kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, + old_pages); + kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, + old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, + delta); + } + } + + kctx->jit_current_allocations--; + kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); + kbase_gpu_vm_lock(kctx); - kbase_mem_evictable_make(reg->gpu_alloc); + reg->flags |= KBASE_REG_DONT_NEED; + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); kbase_gpu_vm_unlock(kctx); + /* + * Add the allocation to the eviction list and the jit pool, after this + * point the shrink can reclaim it, or it may be reused. + */ mutex_lock(&kctx->jit_evict_lock); + + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(®->gpu_alloc->evict_node)); + list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); } @@ -2670,6 +3210,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, int err; size_t count = 0; struct kbase_mem_phy_alloc *alloc; + unsigned long gwt_mask = ~0; alloc = reg->gpu_alloc; @@ -2718,10 +3259,16 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, /* Update nents as we now have pages to map */ alloc->nents = reg->nr_pages; +#ifdef CONFIG_MALI_JOB_DUMP + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), count, - reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) & + gwt_mask); if (err) goto err_unmap_attachment; |