summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2017-07-11 16:57:40 +0200
committerSidath Senanayake <sidaths@google.com>2017-07-11 16:57:40 +0200
commitea23e535ae857c92d45cb11bdd5dba7c27579726 (patch)
treee1bcda85e529f9be3f02202b81fb3e8f6ab73129 /mali_kbase/mali_kbase_mem.c
parent6f5ab3baed824941f168ab133469f997d4450146 (diff)
downloadgpu-ea23e535ae857c92d45cb11bdd5dba7c27579726.tar.gz
Mali Bifrost DDK r7p0 KMD
Provenance: cbfad67c8 (collaborate/EAC/b_r7p0) BX304L01B-BU-00000-r7p0-01rel0 BX304L06A-BU-00000-r7p0-01rel0 BX304X07X-BU-00000-r7p0-01rel0 Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Icdf8b47a48b829cc228f4df3035f7b539da58104
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c282
1 files changed, 244 insertions, 38 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index e76294d..6fefffe 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -37,7 +37,6 @@
#include <mali_midg_regmap.h>
#include <mali_kbase_cache_policy.h>
#include <mali_kbase_hw.h>
-#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_tlstream.h>
/* This function finds out which RB tree the given GPU VA region belongs to
@@ -738,6 +737,7 @@ fail_unlock:
int kbase_mem_init(struct kbase_device *kbdev)
{
struct kbasep_mem_device *memdev;
+ int ret;
KBASE_DEBUG_ASSERT(kbdev);
@@ -747,8 +747,23 @@ int kbase_mem_init(struct kbase_device *kbdev)
/* Initialize memory usage */
atomic_set(&memdev->used_pages, 0);
- return kbase_mem_pool_init(&kbdev->mem_pool,
- KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+ ret = kbase_mem_pool_init(&kbdev->mem_pool,
+ KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+ kbdev,
+ NULL);
+ if (ret)
+ return ret;
+
+ ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+ (KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+ KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+ kbdev,
+ NULL);
+ if (ret)
+ kbase_mem_pool_term(&kbdev->mem_pool);
+
+ return ret;
}
void kbase_mem_halt(struct kbase_device *kbdev)
@@ -770,6 +785,7 @@ void kbase_mem_term(struct kbase_device *kbdev)
dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
kbase_mem_pool_term(&kbdev->mem_pool);
+ kbase_mem_pool_term(&kbdev->lp_mem_pool);
}
KBASE_EXPORT_TEST_API(kbase_mem_term);
@@ -921,7 +937,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
} else {
err = kbase_mmu_insert_single_page(kctx,
reg->start_pfn + i * stride,
- page_to_phys(kctx->aliasing_sink_page),
+ kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
(reg->flags & mask) | attr);
@@ -1070,10 +1086,12 @@ int kbasep_find_enclosing_cpu_mapping_offset(
KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
void kbase_sync_single(struct kbase_context *kctx,
- phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+ struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
off_t offset, size_t size, enum kbase_sync_type sync_fn)
{
struct page *cpu_page;
+ phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+ phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
@@ -1128,8 +1146,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
struct kbase_cpu_mapping *map;
unsigned long start;
size_t size;
- phys_addr_t *cpu_pa;
- phys_addr_t *gpu_pa;
+ struct tagged_addr *cpu_pa;
+ struct tagged_addr *gpu_pa;
u64 page_off, page_count;
u64 i;
u64 offset;
@@ -1147,7 +1165,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
goto out_unlock;
}
- if (!(reg->flags & KBASE_REG_CPU_CACHED))
+ if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+ kbase_mem_is_imported(reg->gpu_alloc->type))
goto out_unlock;
start = (uintptr_t)sset->user_addr;
@@ -1175,7 +1194,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
}
/* Sync first page */
- if (cpu_pa[page_off]) {
+ if (as_phys_addr_t(cpu_pa[page_off])) {
size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
@@ -1185,7 +1204,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
/* Sync middle pages (if any) */
for (i = 1; page_count > 2 && i < page_count - 1; i++) {
/* we grow upwards, so bail on first non-present page */
- if (!cpu_pa[page_off + i])
+ if (!as_phys_addr_t(cpu_pa[page_off + i]))
break;
kbase_sync_single(kctx, cpu_pa[page_off + i],
@@ -1193,7 +1212,8 @@ static int kbase_do_syncset(struct kbase_context *kctx,
}
/* Sync last page (if any) */
- if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+ if (page_count > 1 &&
+ as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
@@ -1412,24 +1432,134 @@ int kbase_alloc_phy_pages_helper(
{
int new_page_count __maybe_unused;
size_t old_page_count = alloc->nents;
+ size_t nr_left = nr_pages_requested;
+ int res;
+ struct kbase_context *kctx;
+ struct tagged_addr *tp;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ kctx = alloc->imported.kctx;
+
if (nr_pages_requested == 0)
goto done; /*nothing to do*/
new_page_count = kbase_atomic_add_pages(
- nr_pages_requested, &alloc->imported.kctx->used_pages);
- kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+ nr_pages_requested, &kctx->used_pages);
+ kbase_atomic_add_pages(nr_pages_requested,
+ &kctx->kbdev->memdev.used_pages);
/* Increase mm counters before we allocate pages so that this
* allocation is visible to the OOM killer */
- kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+ kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+ tp = alloc->pages + old_page_count;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+ /* Check if we have enough pages requested so we can allocate a large
+ * page (512 * 4KB = 2MB )
+ */
+ if (nr_left >= (SZ_2M / SZ_4K)) {
+ int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+ res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+ nr_lp * (SZ_2M / SZ_4K),
+ tp,
+ true);
+
+ if (res > 0) {
+ nr_left -= res;
+ tp += res;
+ }
+
+ if (nr_left) {
+ struct kbase_sub_alloc *sa, *temp_sa;
+
+ mutex_lock(&kctx->mem_partials_lock);
+
+ list_for_each_entry_safe(sa, temp_sa,
+ &kctx->mem_partials, link) {
+ int pidx = 0;
+
+ while (nr_left) {
+ pidx = find_next_zero_bit(sa->sub_pages,
+ SZ_2M / SZ_4K,
+ pidx);
+ bitmap_set(sa->sub_pages, pidx, 1);
+ *tp++ = as_tagged_tag(page_to_phys(sa->page +
+ pidx),
+ FROM_PARTIAL);
+ nr_left--;
+
+ if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+ /* unlink from partial list when full */
+ list_del_init(&sa->link);
+ break;
+ }
+ }
+ }
+ mutex_unlock(&kctx->mem_partials_lock);
+ }
+
+ /* only if we actually have a chunk left <512. If more it indicates
+ * that we couldn't allocate a 2MB above, so no point to retry here.
+ */
+ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+ /* create a new partial and suballocate the rest from it */
+ struct page *np = NULL;
+
+ do {
+ int err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+
+ if (err)
+ break;
+ np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+ } while (!np);
+
+ if (np) {
+ int i;
+ struct kbase_sub_alloc *sa;
+ struct page *p;
+
+ sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+ if (!sa) {
+ kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+ goto no_new_partial;
+ }
+
+ /* store pointers back to the control struct */
+ np->lru.next = (void *)sa;
+ for (p = np; p < np + SZ_2M / SZ_4K; p++)
+ p->lru.prev = (void *)np;
+ INIT_LIST_HEAD(&sa->link);
+ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+ sa->page = np;
- if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
- nr_pages_requested, alloc->pages + old_page_count) != 0)
- goto no_alloc;
+ for (i = 0; i < nr_left; i++)
+ *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+ bitmap_set(sa->sub_pages, 0, nr_left);
+ nr_left = 0;
+
+ /* expose for later use */
+ mutex_lock(&kctx->mem_partials_lock);
+ list_add(&sa->link, &kctx->mem_partials);
+ mutex_unlock(&kctx->mem_partials_lock);
+ }
+ }
+ }
+no_new_partial:
+#endif
+
+ if (nr_left) {
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+ nr_left,
+ tp,
+ false);
+ if (res <= 0)
+ goto alloc_failed;
+ }
/*
* Request a zone cache update, this scans only the new pages an
@@ -1441,21 +1571,52 @@ int kbase_alloc_phy_pages_helper(
kbase_zone_cache_clear(alloc);
KBASE_TLSTREAM_AUX_PAGESALLOC(
- (u32)alloc->imported.kctx->id,
+ (u32)kctx->id,
(u64)new_page_count);
alloc->nents += nr_pages_requested;
done:
return 0;
-no_alloc:
- kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
- kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
- kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+alloc_failed:
+ /* rollback needed if got one or more 2MB but failed later */
+ if (nr_left != nr_pages_requested)
+ kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+ nr_pages_requested - nr_left,
+ alloc->pages + old_page_count,
+ false,
+ false);
+
+ kbase_process_page_usage_dec(kctx, nr_pages_requested);
+ kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+ kbase_atomic_sub_pages(nr_pages_requested,
+ &kctx->kbdev->memdev.used_pages);
return -ENOMEM;
}
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+ struct page *p, *head_page;
+ struct kbase_sub_alloc *sa;
+
+ p = phys_to_page(as_phys_addr_t(tp));
+ head_page = (struct page *)p->lru.prev;
+ sa = (struct kbase_sub_alloc *)head_page->lru.next;
+ mutex_lock(&kctx->mem_partials_lock);
+ clear_bit(p - head_page, sa->sub_pages);
+ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+ list_del(&sa->link);
+ kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+ kfree(sa);
+ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+ SZ_2M / SZ_4K - 1) {
+ /* expose the partial again */
+ list_add(&sa->link, &kctx->mem_partials);
+ }
+ mutex_unlock(&kctx->mem_partials_lock);
+}
+
int kbase_free_phy_pages_helper(
struct kbase_mem_phy_alloc *alloc,
size_t nr_pages_to_free)
@@ -1463,8 +1624,9 @@ int kbase_free_phy_pages_helper(
struct kbase_context *kctx = alloc->imported.kctx;
bool syncback;
bool reclaimed = (alloc->evicted != 0);
- phys_addr_t *start_free;
+ struct tagged_addr *start_free;
int new_page_count __maybe_unused;
+ size_t freed = 0;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
KBASE_DEBUG_ASSERT(alloc->imported.kctx);
@@ -1478,6 +1640,13 @@ int kbase_free_phy_pages_helper(
syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+ /* pad start_free to a valid start location */
+ while (nr_pages_to_free && is_huge(*start_free) &&
+ !is_huge_head(*start_free)) {
+ nr_pages_to_free--;
+ start_free++;
+ }
+
/*
* Clear the zone cache, we don't expect JIT allocations to be
* shrunk in parts so there is no point trying to optimize for that
@@ -1486,23 +1655,56 @@ int kbase_free_phy_pages_helper(
*/
kbase_zone_cache_clear(alloc);
- kbase_mem_pool_free_pages(&kctx->mem_pool,
- nr_pages_to_free,
- start_free,
- syncback,
- reclaimed);
- alloc->nents -= nr_pages_to_free;
+ while (nr_pages_to_free) {
+ if (is_huge_head(*start_free)) {
+ /* This is a 2MB entry, so free all the 512 pages that
+ * it points to
+ */
+ kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+ 512,
+ start_free,
+ syncback,
+ reclaimed);
+ nr_pages_to_free -= 512;
+ start_free += 512;
+ freed += 512;
+ } else if (is_partial(*start_free)) {
+ free_partial(kctx, *start_free);
+ nr_pages_to_free--;
+ start_free++;
+ freed++;
+ } else {
+ struct tagged_addr *local_end_free;
+
+ local_end_free = start_free;
+ while (nr_pages_to_free &&
+ !is_huge(*local_end_free) &&
+ !is_partial(*local_end_free)) {
+ local_end_free++;
+ nr_pages_to_free--;
+ }
+ kbase_mem_pool_free_pages(&kctx->mem_pool,
+ local_end_free - start_free,
+ start_free,
+ syncback,
+ reclaimed);
+ freed += local_end_free - start_free;
+ start_free += local_end_free - start_free;
+ }
+ }
+
+ alloc->nents -= freed;
/*
* If the allocation was not evicted (i.e. evicted == 0) then
* the page accounting needs to be done.
*/
if (!reclaimed) {
- kbase_process_page_usage_dec(kctx, nr_pages_to_free);
- new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+ kbase_process_page_usage_dec(kctx, freed);
+ new_page_count = kbase_atomic_sub_pages(freed,
&kctx->used_pages);
- kbase_atomic_sub_pages(nr_pages_to_free,
+ kbase_atomic_sub_pages(freed,
&kctx->kbdev->memdev.used_pages);
KBASE_TLSTREAM_AUX_PAGESALLOC(
@@ -2160,7 +2362,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
long pinned_pages;
struct kbase_mem_phy_alloc *alloc;
struct page **pages;
- phys_addr_t *pa;
+ struct tagged_addr *pa;
long i;
int err = -ENOMEM;
unsigned long address;
@@ -2229,7 +2431,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
goto unwind;
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
- pa[i] = page_to_phys(pages[i]);
+ pa[i] = as_tagged(page_to_phys(pages[i]));
local_size -= min;
offset = 0;
@@ -2250,6 +2452,9 @@ unwind:
dma_unmap_page(kctx->kbdev->dev,
alloc->imported.user_buf.dma_addrs[i],
PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+
+ while (++i < pinned_pages) {
put_page(pages[i]);
pages[i] = NULL;
}
@@ -2290,7 +2495,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
struct sg_table *sgt;
struct scatterlist *s;
int i;
- phys_addr_t *pa;
+ struct tagged_addr *pa;
int err;
size_t count = 0;
struct kbase_mem_phy_alloc *alloc;
@@ -2325,9 +2530,10 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
count++)
- *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+ *pa++ = as_tagged(sg_dma_address(s) +
+ (j << PAGE_SHIFT));
WARN_ONCE(j < pages,
- "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
alloc->imported.umm.dma_buf->size);
}
@@ -2352,7 +2558,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
if (reg->flags & KBASE_REG_IMPORT_PAD) {
err = kbase_mmu_insert_single_page(kctx,
reg->start_pfn + count,
- page_to_phys(kctx->aliasing_sink_page),
+ kctx->aliasing_sink_page,
reg->nr_pages - count,
(reg->flags | KBASE_REG_GPU_RD) &
~KBASE_REG_GPU_WR);