Mali Bifrost DDK r7p0 KMD

Provenance: cbfad67c8 (collaborate/EAC/b_r7p0) BX304L01B-BU-00000-r7p0-01rel0 BX304L06A-BU-00000-r7p0-01rel0 BX304X07X-BU-00000-r7p0-01rel0 Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Icdf8b47a48b829cc228f4df3035f7b539da58104
author: Sidath Senanayake <sidaths@google.com> 2017-07-11 16:57:40 +0200
committer: Sidath Senanayake <sidaths@google.com> 2017-07-11 16:57:40 +0200
commit: ea23e535ae857c92d45cb11bdd5dba7c27579726 (patch)
tree: e1bcda85e529f9be3f02202b81fb3e8f6ab73129 /mali_kbase/mali_kbase_mem.c
parent: 6f5ab3baed824941f168ab133469f997d4450146 (diff)
download: gpu-ea23e535ae857c92d45cb11bdd5dba7c27579726.tar.gz
1 files changed, 244 insertions, 38 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index e76294d..6fefffe 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -37,7 +37,6 @@
 #include <mali_midg_regmap.h>
 #include <mali_kbase_cache_policy.h>
 #include <mali_kbase_hw.h>
-#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_tlstream.h>
 
 /* This function finds out which RB tree the given GPU VA region belongs to
@@ -738,6 +737,7 @@ fail_unlock:
 int kbase_mem_init(struct kbase_device *kbdev)
 {
 	struct kbasep_mem_device *memdev;
+	int ret;
 
 	KBASE_DEBUG_ASSERT(kbdev);
 
@@ -747,8 +747,23 @@ int kbase_mem_init(struct kbase_device *kbdev)
 	/* Initialize memory usage */
 	atomic_set(&memdev->used_pages, 0);
 
-	return kbase_mem_pool_init(&kbdev->mem_pool,
-			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+	ret = kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		return ret;
+
+	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		kbase_mem_pool_term(&kbdev->mem_pool);
+
+	return ret;
 }
 
 void kbase_mem_halt(struct kbase_device *kbdev)
@@ -770,6 +785,7 @@ void kbase_mem_term(struct kbase_device *kbdev)
 		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
 
 	kbase_mem_pool_term(&kbdev->mem_pool);
+	kbase_mem_pool_term(&kbdev->lp_mem_pool);
 }
 
 KBASE_EXPORT_TEST_API(kbase_mem_term);
@@ -921,7 +937,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
 			} else {
 				err = kbase_mmu_insert_single_page(kctx,
 					reg->start_pfn + i * stride,
-					page_to_phys(kctx->aliasing_sink_page),
+					kctx->aliasing_sink_page,
 					alloc->imported.alias.aliased[i].length,
 					(reg->flags & mask) | attr);
 
@@ -1070,10 +1086,12 @@ int kbasep_find_enclosing_cpu_mapping_offset(
 KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
 
 void kbase_sync_single(struct kbase_context *kctx,
-		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
 		off_t offset, size_t size, enum kbase_sync_type sync_fn)
 {
 	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
 
 	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
 
@@ -1128,8 +1146,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 	struct kbase_cpu_mapping *map;
 	unsigned long start;
 	size_t size;
-	phys_addr_t *cpu_pa;
-	phys_addr_t *gpu_pa;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
 	u64 page_off, page_count;
 	u64 i;
 	u64 offset;
@@ -1147,7 +1165,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 		goto out_unlock;
 	}
 
-	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+			kbase_mem_is_imported(reg->gpu_alloc->type))
 		goto out_unlock;
 
 	start = (uintptr_t)sset->user_addr;
@@ -1175,7 +1194,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 	}
 
 	/* Sync first page */
-	if (cpu_pa[page_off]) {
+	if (as_phys_addr_t(cpu_pa[page_off])) {
 		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
 
 		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
@@ -1185,7 +1204,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 	/* Sync middle pages (if any) */
 	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
 		/* we grow upwards, so bail on first non-present page */
-		if (!cpu_pa[page_off + i])
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
 			break;
 
 		kbase_sync_single(kctx, cpu_pa[page_off + i],
@@ -1193,7 +1212,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 	}
 
 	/* Sync last page (if any) */
-	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
 		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
 
 		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
@@ -1412,24 +1432,134 @@ int kbase_alloc_phy_pages_helper(
 {
 	int new_page_count __maybe_unused;
 	size_t old_page_count = alloc->nents;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
 
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
 	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
 
+	kctx = alloc->imported.kctx;
+
 	if (nr_pages_requested == 0)
 		goto done; /*nothing to do*/
 
 	new_page_count = kbase_atomic_add_pages(
-			nr_pages_requested, &alloc->imported.kctx->used_pages);
-	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
 
 	/* Increase mm counters before we allocate pages so that this
 	 * allocation is visible to the OOM killer */
-	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + old_page_count;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp,
+						 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			mutex_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			mutex_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+
+				if (err)
+					break;
+				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+			} while (!np);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
 
-	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
-			nr_pages_requested, alloc->pages + old_page_count) != 0)
-		goto no_alloc;
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				mutex_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				mutex_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+						 nr_left,
+						 tp,
+						 false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
 
 	/*
 	 * Request a zone cache update, this scans only the new pages an
@@ -1441,21 +1571,52 @@ int kbase_alloc_phy_pages_helper(
 		kbase_zone_cache_clear(alloc);
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
-			(u32)alloc->imported.kctx->id,
+			(u32)kctx->id,
 			(u64)new_page_count);
 
 	alloc->nents += nr_pages_requested;
 done:
 	return 0;
 
-no_alloc:
-	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
-	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
-	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested)
+		kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+				  nr_pages_requested - nr_left,
+				  alloc->pages + old_page_count,
+				  false,
+				  false);
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
 
 	return -ENOMEM;
 }
 
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	mutex_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	mutex_unlock(&kctx->mem_partials_lock);
+}
+
 int kbase_free_phy_pages_helper(
 	struct kbase_mem_phy_alloc *alloc,
 	size_t nr_pages_to_free)
@@ -1463,8 +1624,9 @@ int kbase_free_phy_pages_helper(
 	struct kbase_context *kctx = alloc->imported.kctx;
 	bool syncback;
 	bool reclaimed = (alloc->evicted != 0);
-	phys_addr_t *start_free;
+	struct tagged_addr *start_free;
 	int new_page_count __maybe_unused;
+	size_t freed = 0;
 
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
 	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
@@ -1478,6 +1640,13 @@ int kbase_free_phy_pages_helper(
 
 	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
 
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
 	/*
 	 * Clear the zone cache, we don't expect JIT allocations to be
 	 * shrunk in parts so there is no point trying to optimize for that
@@ -1486,23 +1655,56 @@ int kbase_free_phy_pages_helper(
 	 */
 	kbase_zone_cache_clear(alloc);
 
-	kbase_mem_pool_free_pages(&kctx->mem_pool,
-				  nr_pages_to_free,
-				  start_free,
-				  syncback,
-				  reclaimed);
 
-	alloc->nents -= nr_pages_to_free;
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(&kctx->mem_pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
 
 	/*
 	 * If the allocation was not evicted (i.e. evicted == 0) then
 	 * the page accounting needs to be done.
 	 */
 	if (!reclaimed) {
-		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
-		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
 							&kctx->used_pages);
-		kbase_atomic_sub_pages(nr_pages_to_free,
+		kbase_atomic_sub_pages(freed,
 				       &kctx->kbdev->memdev.used_pages);
 
 		KBASE_TLSTREAM_AUX_PAGESALLOC(
@@ -2160,7 +2362,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	long pinned_pages;
 	struct kbase_mem_phy_alloc *alloc;
 	struct page **pages;
-	phys_addr_t *pa;
+	struct tagged_addr *pa;
 	long i;
 	int err = -ENOMEM;
 	unsigned long address;
@@ -2229,7 +2431,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 			goto unwind;
 
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
-		pa[i] = page_to_phys(pages[i]);
+		pa[i] = as_tagged(page_to_phys(pages[i]));
 
 		local_size -= min;
 		offset = 0;
@@ -2250,6 +2452,9 @@ unwind:
 		dma_unmap_page(kctx->kbdev->dev,
 				alloc->imported.user_buf.dma_addrs[i],
 				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
 		put_page(pages[i]);
 		pages[i] = NULL;
 	}
@@ -2290,7 +2495,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
 	struct sg_table *sgt;
 	struct scatterlist *s;
 	int i;
-	phys_addr_t *pa;
+	struct tagged_addr *pa;
 	int err;
 	size_t count = 0;
 	struct kbase_mem_phy_alloc *alloc;
@@ -2325,9 +2530,10 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
 
 		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
 				count++)
-			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
 		WARN_ONCE(j < pages,
-		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
 		alloc->imported.umm.dma_buf->size);
 	}
 
@@ -2352,7 +2558,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
 	if (reg->flags & KBASE_REG_IMPORT_PAD) {
 		err = kbase_mmu_insert_single_page(kctx,
 				reg->start_pfn + count,
-				page_to_phys(kctx->aliasing_sink_page),
+				kctx->aliasing_sink_page,
 				reg->nr_pages - count,
 				(reg->flags | KBASE_REG_GPU_RD) &
 				~KBASE_REG_GPU_WR);
author	Sidath Senanayake <sidaths@google.com>	2017-07-11 16:57:40 +0200
committer	Sidath Senanayake <sidaths@google.com>	2017-07-11 16:57:40 +0200
commit	ea23e535ae857c92d45cb11bdd5dba7c27579726 (patch)
tree	e1bcda85e529f9be3f02202b81fb3e8f6ab73129 /mali_kbase/mali_kbase_mem.c
parent	6f5ab3baed824941f168ab133469f997d4450146 (diff)
download	gpu-ea23e535ae857c92d45cb11bdd5dba7c27579726.tar.gz