summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2018-03-19 13:26:23 +0100
committerSidath Senanayake <sidaths@google.com>2018-03-19 13:26:23 +0100
commit8946bcdee4c36dbc82b8c2a2abcf9c2f5eab5ae0 (patch)
treeadf890cf7a6af02a05c8eb94c177bd83ca21fd8b /mali_kbase/mali_kbase_mem.c
parente42736e67f7d84d329d9595b7393e6784c5b887f (diff)
downloadgpu-8946bcdee4c36dbc82b8c2a2abcf9c2f5eab5ae0.tar.gz
Mali Bifrost DDK r11p0 KMD
Provenance: b1581ebda (collaborate/EAC/b_r11p0) BX304L01B-BU-00000-r11p0-01rel0 BX304L06A-BU-00000-r11p0-01rel0 BX304X07X-BU-00000-r11p0-01rel0 Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Ia590e1eb21778d33cacbefba83598ee56790ca85
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c809
1 files changed, 678 insertions, 131 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index a0897fb..59ccb40 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -535,7 +535,8 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
/* Although exec and custom_va_reg don't always exist,
* initialize unconditionally because of the mem_view debugfs
- * implementation which relies on these being empty */
+ * implementation which relies on these being empty
+ */
kctx->reg_rbtree_exec = RB_ROOT;
kctx->reg_rbtree_custom = RB_ROOT;
@@ -567,6 +568,32 @@ void kbase_region_tracker_term(struct kbase_context *kctx)
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
}
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+#if defined(CONFIG_ARM64)
+ /* VA_BITS can be as high as 48 bits, but all bits are available for
+ * both user and kernel.
+ */
+ size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+ /* x86_64 can access 48 bits of VA, but the 48th is used to denote
+ * kernel (1) vs userspace (0), so the max here is 47.
+ */
+ size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+ size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
+
+#ifdef CONFIG_64BIT
+ if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ cpu_va_bits = 32;
+#endif
+
+ return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
/**
* Initialize the region tracker data structure.
*/
@@ -575,7 +602,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
struct kbase_va_region *same_va_reg;
struct kbase_va_region *exec_reg = NULL;
struct kbase_va_region *custom_va_reg = NULL;
- size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+ size_t same_va_bits = kbase_get_same_va_bits(kctx);
u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
u64 same_va_pages;
@@ -584,26 +611,6 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
/* Take the lock as kbase_free_alloced_region requires it */
kbase_gpu_vm_lock(kctx);
-#if defined(CONFIG_ARM64)
- same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
- same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
-#ifdef CONFIG_64BIT
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
- same_va_bits = 32;
- else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
- same_va_bits = 33;
-#endif
-
- if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
- err = -EINVAL;
- goto fail_unlock;
- }
-
same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
/* all have SAME_VA */
same_va_reg = kbase_alloc_free_region(kctx, 1,
@@ -652,7 +659,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
}
#endif
- kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+ kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg,
+ custom_va_reg);
kctx->same_va_end = same_va_pages + 1;
@@ -668,33 +676,16 @@ fail_unlock:
return err;
}
-int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
-{
#ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+ u64 jit_va_pages)
+{
struct kbase_va_region *same_va;
struct kbase_va_region *custom_va_reg;
- u64 same_va_bits;
+ u64 same_va_bits = kbase_get_same_va_bits(kctx);
u64 total_va_size;
int err;
- /*
- * Nothing to do for 32-bit clients, JIT uses the existing
- * custom VA zone.
- */
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
- return 0;
-
-#if defined(CONFIG_ARM64)
- same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
- same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
- if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
- same_va_bits = 33;
-
total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
kbase_gpu_vm_lock(kctx);
@@ -754,9 +745,27 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
fail_unlock:
kbase_gpu_vm_unlock(kctx);
return err;
-#else
- return 0;
+}
#endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+ u8 max_allocations, u8 trim_level)
+{
+ if (trim_level > 100)
+ return -EINVAL;
+
+ kctx->jit_max_allocations = max_allocations;
+ kctx->trim_level = trim_level;
+
+#ifdef CONFIG_64BIT
+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+ /*
+ * Nothing to do for 32-bit clients, JIT uses the existing
+ * custom VA zone.
+ */
+ return 0;
}
int kbase_mem_init(struct kbase_device *kbdev)
@@ -824,7 +833,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
* The allocated object is not part of any list yet, and is flagged as
* KBASE_REG_FREE. No mapping is allocated yet.
*
- * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA,
+ * or KBASE_REG_ZONE_EXEC
*
*/
struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
@@ -874,6 +884,8 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
if (!(reg->flags & KBASE_REG_FREE)) {
+ mutex_lock(&reg->kctx->jit_evict_lock);
+
/*
* The physical allocation should have been removed from the
* eviction list before this function is called. However, in the
@@ -882,6 +894,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
* on the list at termination time of the region tracker.
*/
if (!list_empty(&reg->gpu_alloc->evict_node)) {
+ mutex_unlock(&reg->kctx->jit_evict_lock);
+
/*
* Unlink the physical allocation before unmaking it
* evictable so that the allocation isn't grown back to
@@ -904,6 +918,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
KBASE_MEM_TYPE_NATIVE);
kbase_mem_evictable_unmake(reg->gpu_alloc);
}
+ } else {
+ mutex_unlock(&reg->kctx->jit_evict_lock);
}
/*
@@ -1497,9 +1513,8 @@ int kbase_update_region_flags(struct kbase_context *kctx,
return 0;
}
-int kbase_alloc_phy_pages_helper(
- struct kbase_mem_phy_alloc *alloc,
- size_t nr_pages_requested)
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+ size_t nr_pages_requested)
{
int new_page_count __maybe_unused;
size_t nr_left = nr_pages_requested;
@@ -1649,12 +1664,18 @@ done:
alloc_failed:
/* rollback needed if got one or more 2MB but failed later */
- if (nr_left != nr_pages_requested)
- kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
- nr_pages_requested - nr_left,
- alloc->pages + alloc->nents,
- false,
- false);
+ if (nr_left != nr_pages_requested) {
+ size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+ alloc->nents += nr_pages_to_free;
+
+ kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+ kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+ kbase_atomic_add_pages(nr_pages_to_free,
+ &kctx->kbdev->memdev.used_pages);
+
+ kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+ }
kbase_process_page_usage_dec(kctx, nr_pages_requested);
kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
@@ -1665,6 +1686,181 @@ invalid_request:
return -ENOMEM;
}
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+ struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+ size_t nr_pages_requested)
+{
+ int new_page_count __maybe_unused;
+ size_t nr_left = nr_pages_requested;
+ int res;
+ struct kbase_context *kctx;
+ struct tagged_addr *tp;
+ struct tagged_addr *new_pages = NULL;
+
+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+ KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+ lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+ WARN_ON(pool->order);
+#endif
+
+ if (alloc->reg) {
+ if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+ goto invalid_request;
+ }
+
+ kctx = alloc->imported.kctx;
+
+ lockdep_assert_held(&kctx->mem_partials_lock);
+
+ if (nr_pages_requested == 0)
+ goto done; /*nothing to do*/
+
+ new_page_count = kbase_atomic_add_pages(
+ nr_pages_requested, &kctx->used_pages);
+ kbase_atomic_add_pages(nr_pages_requested,
+ &kctx->kbdev->memdev.used_pages);
+
+ /* Increase mm counters before we allocate pages so that this
+ * allocation is visible to the OOM killer
+ */
+ kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+ tp = alloc->pages + alloc->nents;
+ new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+ if (pool->order) {
+ int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+ res = kbase_mem_pool_alloc_pages_locked(pool,
+ nr_lp * (SZ_2M / SZ_4K),
+ tp);
+
+ if (res > 0) {
+ nr_left -= res;
+ tp += res;
+ }
+
+ if (nr_left) {
+ struct kbase_sub_alloc *sa, *temp_sa;
+
+ list_for_each_entry_safe(sa, temp_sa,
+ &kctx->mem_partials, link) {
+ int pidx = 0;
+
+ while (nr_left) {
+ pidx = find_next_zero_bit(sa->sub_pages,
+ SZ_2M / SZ_4K,
+ pidx);
+ bitmap_set(sa->sub_pages, pidx, 1);
+ *tp++ = as_tagged_tag(page_to_phys(
+ sa->page + pidx),
+ FROM_PARTIAL);
+ nr_left--;
+
+ if (bitmap_full(sa->sub_pages,
+ SZ_2M / SZ_4K)) {
+ /* unlink from partial list when
+ * full
+ */
+ list_del_init(&sa->link);
+ break;
+ }
+ }
+ }
+ }
+
+ /* only if we actually have a chunk left <512. If more it
+ * indicates that we couldn't allocate a 2MB above, so no point
+ * to retry here.
+ */
+ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+ /* create a new partial and suballocate the rest from it
+ */
+ struct page *np = NULL;
+
+ np = kbase_mem_pool_alloc_locked(pool);
+
+ if (np) {
+ int i;
+ struct kbase_sub_alloc *sa;
+ struct page *p;
+
+ sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+ if (!sa) {
+ kbase_mem_pool_free_locked(pool, np,
+ false);
+ goto alloc_failed;
+ }
+
+ /* store pointers back to the control struct */
+ np->lru.next = (void *)sa;
+ for (p = np; p < np + SZ_2M / SZ_4K; p++)
+ p->lru.prev = (void *)np;
+ INIT_LIST_HEAD(&sa->link);
+ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+ sa->page = np;
+
+ for (i = 0; i < nr_left; i++)
+ *tp++ = as_tagged_tag(
+ page_to_phys(np + i),
+ FROM_PARTIAL);
+
+ bitmap_set(sa->sub_pages, 0, nr_left);
+ nr_left = 0;
+
+ /* expose for later use */
+ list_add(&sa->link, &kctx->mem_partials);
+ }
+ }
+ if (nr_left)
+ goto alloc_failed;
+ } else {
+#endif
+ res = kbase_mem_pool_alloc_pages_locked(pool,
+ nr_left,
+ tp);
+ if (res <= 0)
+ goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+ }
+#endif
+
+ KBASE_TLSTREAM_AUX_PAGESALLOC(
+ kctx->id,
+ (u64)new_page_count);
+
+ alloc->nents += nr_pages_requested;
+done:
+ return new_pages;
+
+alloc_failed:
+ /* rollback needed if got one or more 2MB but failed later */
+ if (nr_left != nr_pages_requested) {
+ size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+ alloc->nents += nr_pages_to_free;
+
+ kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+ kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+ kbase_atomic_add_pages(nr_pages_to_free,
+ &kctx->kbdev->memdev.used_pages);
+
+ kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+ }
+
+ kbase_process_page_usage_dec(kctx, nr_pages_requested);
+ kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+ kbase_atomic_sub_pages(nr_pages_requested,
+ &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+ return NULL;
+}
+
static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
{
struct page *p, *head_page;
@@ -1776,6 +1972,124 @@ int kbase_free_phy_pages_helper(
return 0;
}
+static void free_partial_locked(struct kbase_context *kctx,
+ struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+ struct page *p, *head_page;
+ struct kbase_sub_alloc *sa;
+
+ lockdep_assert_held(&pool->pool_lock);
+ lockdep_assert_held(&kctx->mem_partials_lock);
+
+ p = phys_to_page(as_phys_addr_t(tp));
+ head_page = (struct page *)p->lru.prev;
+ sa = (struct kbase_sub_alloc *)head_page->lru.next;
+ clear_bit(p - head_page, sa->sub_pages);
+ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+ list_del(&sa->link);
+ kbase_mem_pool_free(pool, head_page, true);
+ kfree(sa);
+ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+ SZ_2M / SZ_4K - 1) {
+ /* expose the partial again */
+ list_add(&sa->link, &kctx->mem_partials);
+ }
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+ struct kbase_mem_pool *pool, struct tagged_addr *pages,
+ size_t nr_pages_to_free)
+{
+ struct kbase_context *kctx = alloc->imported.kctx;
+ bool syncback;
+ bool reclaimed = (alloc->evicted != 0);
+ struct tagged_addr *start_free;
+ size_t freed = 0;
+
+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+ KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+ lockdep_assert_held(&pool->pool_lock);
+ lockdep_assert_held(&kctx->mem_partials_lock);
+
+ /* early out if nothing to do */
+ if (!nr_pages_to_free)
+ return;
+
+ start_free = pages;
+
+ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+ /* pad start_free to a valid start location */
+ while (nr_pages_to_free && is_huge(*start_free) &&
+ !is_huge_head(*start_free)) {
+ nr_pages_to_free--;
+ start_free++;
+ }
+
+ while (nr_pages_to_free) {
+ if (is_huge_head(*start_free)) {
+ /* This is a 2MB entry, so free all the 512 pages that
+ * it points to
+ */
+ WARN_ON(!pool->order);
+ kbase_mem_pool_free_pages_locked(pool,
+ 512,
+ start_free,
+ syncback,
+ reclaimed);
+ nr_pages_to_free -= 512;
+ start_free += 512;
+ freed += 512;
+ } else if (is_partial(*start_free)) {
+ WARN_ON(!pool->order);
+ free_partial_locked(kctx, pool, *start_free);
+ nr_pages_to_free--;
+ start_free++;
+ freed++;
+ } else {
+ struct tagged_addr *local_end_free;
+
+ WARN_ON(pool->order);
+ local_end_free = start_free;
+ while (nr_pages_to_free &&
+ !is_huge(*local_end_free) &&
+ !is_partial(*local_end_free)) {
+ local_end_free++;
+ nr_pages_to_free--;
+ }
+ kbase_mem_pool_free_pages_locked(pool,
+ local_end_free - start_free,
+ start_free,
+ syncback,
+ reclaimed);
+ freed += local_end_free - start_free;
+ start_free += local_end_free - start_free;
+ }
+ }
+
+ alloc->nents -= freed;
+
+ /*
+ * If the allocation was not evicted (i.e. evicted == 0) then
+ * the page accounting needs to be done.
+ */
+ if (!reclaimed) {
+ int new_page_count;
+
+ kbase_process_page_usage_dec(kctx, freed);
+ new_page_count = kbase_atomic_sub_pages(freed,
+ &kctx->used_pages);
+ kbase_atomic_sub_pages(freed,
+ &kctx->kbdev->memdev.used_pages);
+
+ KBASE_TLSTREAM_AUX_PAGESALLOC(
+ kctx->id,
+ (u64)new_page_count);
+ }
+}
+
void kbase_mem_kref_free(struct kref *kref)
{
struct kbase_mem_phy_alloc *alloc;
@@ -1784,12 +2098,15 @@ void kbase_mem_kref_free(struct kref *kref)
switch (alloc->type) {
case KBASE_MEM_TYPE_NATIVE: {
- WARN_ON(!alloc->imported.kctx);
- /*
- * The physical allocation must have been removed from the
- * eviction list before trying to free it.
- */
- WARN_ON(!list_empty(&alloc->evict_node));
+ if (!WARN_ON(!alloc->imported.kctx)) {
+ /*
+ * The physical allocation must have been removed from
+ * the eviction list before trying to free it.
+ */
+ mutex_lock(&alloc->imported.kctx->jit_evict_lock);
+ WARN_ON(!list_empty(&alloc->evict_node));
+ mutex_unlock(&alloc->imported.kctx->jit_evict_lock);
+ }
kbase_free_phy_pages_helper(alloc, alloc->nents);
break;
}
@@ -2284,6 +2601,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
int kbase_jit_init(struct kbase_context *kctx)
{
+ mutex_lock(&kctx->jit_evict_lock);
INIT_LIST_HEAD(&kctx->jit_active_head);
INIT_LIST_HEAD(&kctx->jit_pool_head);
INIT_LIST_HEAD(&kctx->jit_destroy_head);
@@ -2291,49 +2609,255 @@ int kbase_jit_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->jit_pending_alloc);
INIT_LIST_HEAD(&kctx->jit_atoms_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+
+ kctx->jit_max_allocations = 0;
+ kctx->jit_current_allocations = 0;
+ kctx->trim_level = 0;
return 0;
}
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+ struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+ bool meet_reqs = true;
+
+ if (walker->nr_pages != info->va_pages)
+ meet_reqs = false;
+ else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+ size_t align = info->extent;
+ size_t align_mask = align - 1;
+
+ if ((walker->start_pfn + info->commit_pages) & align_mask)
+ meet_reqs = false;
+ }
+
+ return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+ struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+ size_t delta;
+ size_t pages_required;
+ size_t old_size;
+ struct kbase_mem_pool *pool;
+ int ret = -ENOMEM;
+ struct tagged_addr *gpu_pages;
+
+ if (info->commit_pages > reg->nr_pages) {
+ /* Attempted to grow larger than maximum size */
+ return -EINVAL;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* Make the physical backing no longer reclaimable */
+ if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+ goto update_failed;
+
+ if (reg->gpu_alloc->nents >= info->commit_pages)
+ goto done;
+
+ /* Grow the backing */
+ old_size = reg->gpu_alloc->nents;
+
+ /* Allocate some more pages */
+ delta = info->commit_pages - reg->gpu_alloc->nents;
+ pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+ if (pages_required >= (SZ_2M / SZ_4K)) {
+ pool = &kctx->lp_mem_pool;
+ /* Round up to number of 2 MB pages required */
+ pages_required += ((SZ_2M / SZ_4K) - 1);
+ pages_required /= (SZ_2M / SZ_4K);
+ } else {
+#endif
+ pool = &kctx->mem_pool;
+#ifdef CONFIG_MALI_2MB_ALLOC
+ }
+#endif
+
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ pages_required *= 2;
+
+ mutex_lock(&kctx->mem_partials_lock);
+ kbase_mem_pool_lock(pool);
+
+ /* As we can not allocate memory from the kernel with the vm_lock held,
+ * grow the pool to the required size with the lock dropped. We hold the
+ * pool lock to prevent another thread from allocating from the pool
+ * between the grow and allocation.
+ */
+ while (kbase_mem_pool_size(pool) < pages_required) {
+ int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+ kbase_mem_pool_unlock(pool);
+ mutex_unlock(&kctx->mem_partials_lock);
+ kbase_gpu_vm_unlock(kctx);
+
+ if (kbase_mem_pool_grow(pool, pool_delta))
+ goto update_failed_unlocked;
+
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->mem_partials_lock);
+ kbase_mem_pool_lock(pool);
+ }
+
+ gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+ delta);
+ if (!gpu_pages) {
+ kbase_mem_pool_unlock(pool);
+ mutex_unlock(&kctx->mem_partials_lock);
+ goto update_failed;
+ }
+
+ if (reg->cpu_alloc != reg->gpu_alloc) {
+ struct tagged_addr *cpu_pages;
+
+ cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+ pool, delta);
+ if (!cpu_pages) {
+ kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+ pool, gpu_pages, delta);
+ kbase_mem_pool_unlock(pool);
+ mutex_unlock(&kctx->mem_partials_lock);
+ goto update_failed;
+ }
+ }
+ kbase_mem_pool_unlock(pool);
+ mutex_unlock(&kctx->mem_partials_lock);
+
+ ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+ old_size);
+ /*
+ * The grow failed so put the allocation back in the
+ * pool and return failure.
+ */
+ if (ret)
+ goto update_failed;
+
+done:
+ ret = 0;
+
+ /* Update attributes of JIT allocation taken from the pool */
+ reg->initial_commit = info->commit_pages;
+ reg->extent = info->extent;
+
+update_failed:
+ kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+ return ret;
+}
+
struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
struct base_jit_alloc_info *info)
{
struct kbase_va_region *reg = NULL;
- struct kbase_va_region *walker;
- struct kbase_va_region *temp;
- size_t current_diff = SIZE_MAX;
- int ret;
+ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+ /* Too many current allocations */
+ return NULL;
+ }
+ if (info->max_allocations > 0 &&
+ kctx->jit_current_allocations_per_bin[info->bin_id] >=
+ info->max_allocations) {
+ /* Too many current allocations in this bin */
+ return NULL;
+ }
mutex_lock(&kctx->jit_evict_lock);
+
/*
* Scan the pool for an existing allocation which meets our
* requirements and remove it.
*/
- list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
-
- if (walker->nr_pages >= info->va_pages) {
- size_t min_size, max_size, diff;
+ if (info->usage_id != 0) {
+ /* First scan for an allocation with the same usage ID */
+ struct kbase_va_region *walker;
+ struct kbase_va_region *temp;
+ size_t current_diff = SIZE_MAX;
+
+ list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+ jit_node) {
+
+ if (walker->jit_usage_id == info->usage_id &&
+ walker->jit_bin_id == info->bin_id &&
+ meet_size_and_tiler_align_top_requirements(
+ kctx, walker, info)) {
+ size_t min_size, max_size, diff;
+
+ /*
+ * The JIT allocations VA requirements have been
+ * met, it's suitable but other allocations
+ * might be a better fit.
+ */
+ min_size = min_t(size_t,
+ walker->gpu_alloc->nents,
+ info->commit_pages);
+ max_size = max_t(size_t,
+ walker->gpu_alloc->nents,
+ info->commit_pages);
+ diff = max_size - min_size;
+
+ if (current_diff > diff) {
+ current_diff = diff;
+ reg = walker;
+ }
- /*
- * The JIT allocations VA requirements have been
- * meet, it's suitable but other allocations
- * might be a better fit.
- */
- min_size = min_t(size_t, walker->gpu_alloc->nents,
- info->commit_pages);
- max_size = max_t(size_t, walker->gpu_alloc->nents,
- info->commit_pages);
- diff = max_size - min_size;
-
- if (current_diff > diff) {
- current_diff = diff;
- reg = walker;
+ /* The allocation is an exact match */
+ if (current_diff == 0)
+ break;
}
+ }
+ }
- /* The allocation is an exact match, stop looking */
- if (current_diff == 0)
- break;
+ if (!reg) {
+ /* No allocation with the same usage ID, or usage IDs not in
+ * use. Search for an allocation we can reuse.
+ */
+ struct kbase_va_region *walker;
+ struct kbase_va_region *temp;
+ size_t current_diff = SIZE_MAX;
+
+ list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+ jit_node) {
+
+ if (walker->jit_bin_id == info->bin_id &&
+ meet_size_and_tiler_align_top_requirements(
+ kctx, walker, info)) {
+ size_t min_size, max_size, diff;
+
+ /*
+ * The JIT allocations VA requirements have been
+ * met, it's suitable but other allocations
+ * might be a better fit.
+ */
+ min_size = min_t(size_t,
+ walker->gpu_alloc->nents,
+ info->commit_pages);
+ max_size = max_t(size_t,
+ walker->gpu_alloc->nents,
+ info->commit_pages);
+ diff = max_size - min_size;
+
+ if (current_diff > diff) {
+ current_diff = diff;
+ reg = walker;
+ }
+
+ /* The allocation is an exact match, so stop
+ * looking.
+ */
+ if (current_diff == 0)
+ break;
+ }
}
}
@@ -2352,42 +2876,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
list_del_init(&reg->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- kbase_gpu_vm_lock(kctx);
-
- /* Make the physical backing no longer reclaimable */
- if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
- goto update_failed;
-
- /* Grow the backing if required */
- if (reg->gpu_alloc->nents < info->commit_pages) {
- size_t delta;
- size_t old_size = reg->gpu_alloc->nents;
-
- /* Allocate some more pages */
- delta = info->commit_pages - reg->gpu_alloc->nents;
- if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
- != 0)
- goto update_failed;
-
- if (reg->cpu_alloc != reg->gpu_alloc) {
- if (kbase_alloc_phy_pages_helper(
- reg->cpu_alloc, delta) != 0) {
- kbase_free_phy_pages_helper(
- reg->gpu_alloc, delta);
- goto update_failed;
- }
- }
-
- ret = kbase_mem_grow_gpu_mapping(kctx, reg,
- info->commit_pages, old_size);
+ if (kbase_jit_grow(kctx, info, reg) < 0) {
/*
- * The grow failed so put the allocation back in the
- * pool and return failure.
+ * An update to an allocation from the pool failed,
+ * chances are slim a new allocation would fair any
+ * better so return the allocation to the pool and
+ * return the function with failure.
*/
- if (ret)
- goto update_failed;
+ goto update_failed_unlocked;
}
- kbase_gpu_vm_unlock(kctx);
} else {
/* No suitable JIT allocation was found so create a new one */
u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
@@ -2397,6 +2894,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
+ if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+ flags |= BASE_MEM_TILER_ALIGN_TOP;
+
reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
info->extent, &flags, &gpu_addr);
if (!reg)
@@ -2409,15 +2909,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
}
+ kctx->jit_current_allocations++;
+ kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+ reg->jit_usage_id = info->usage_id;
+ reg->jit_bin_id = info->bin_id;
+
return reg;
-update_failed:
- /*
- * An update to an allocation from the pool failed, chances
- * are slim a new allocation would fair any better so return
- * the allocation to the pool and return the function with failure.
- */
- kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
mutex_lock(&kctx->jit_evict_lock);
list_move(&reg->jit_node, &kctx->jit_pool_head);
mutex_unlock(&kctx->jit_evict_lock);
@@ -2427,13 +2927,53 @@ out_unlocked:
void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
- /* The physical backing of memory in the pool is always reclaimable */
+ u64 old_pages;
+
+ /* Get current size of JIT region */
+ old_pages = kbase_reg_current_backed_size(reg);
+ if (reg->initial_commit < old_pages) {
+ /* Free trim_level % of region, but don't go below initial
+ * commit size
+ */
+ u64 new_size = MAX(reg->initial_commit,
+ div_u64(old_pages * (100 - kctx->trim_level), 100));
+ u64 delta = old_pages - new_size;
+
+ if (delta) {
+ kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+ old_pages);
+ kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+ old_pages);
+
+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ kbase_free_phy_pages_helper(reg->gpu_alloc,
+ delta);
+ }
+ }
+
+ kctx->jit_current_allocations--;
+ kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+ kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
kbase_gpu_vm_lock(kctx);
- kbase_mem_evictable_make(reg->gpu_alloc);
+ reg->flags |= KBASE_REG_DONT_NEED;
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
kbase_gpu_vm_unlock(kctx);
+ /*
+ * Add the allocation to the eviction list and the jit pool, after this
+ * point the shrink can reclaim it, or it may be reused.
+ */
mutex_lock(&kctx->jit_evict_lock);
+
+ /* This allocation can't already be on a list. */
+ WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+ list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
list_move(&reg->jit_node, &kctx->jit_pool_head);
+
mutex_unlock(&kctx->jit_evict_lock);
}
@@ -2670,6 +3210,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
int err;
size_t count = 0;
struct kbase_mem_phy_alloc *alloc;
+ unsigned long gwt_mask = ~0;
alloc = reg->gpu_alloc;
@@ -2718,10 +3259,16 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
/* Update nents as we now have pages to map */
alloc->nents = reg->nr_pages;
+#ifdef CONFIG_MALI_JOB_DUMP
+ if (kctx->gwt_enabled)
+ gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
kbase_get_gpu_phy_pages(reg),
count,
- reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+ (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
+ gwt_mask);
if (err)
goto err_unmap_attachment;