diff options
author | Sidath Senanayake <sidaths@google.com> | 2018-07-31 15:28:14 +0200 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2018-07-31 15:28:14 +0200 |
commit | f32af5a9ba3c2b556d92827a96dbeec3df200968 (patch) | |
tree | f408feaf7cd8b87a980575c132f11d3ba45a3f8d /mali_kbase/mali_kbase_mem.c | |
parent | 5574d60cda52fa08ca2cc714ae051ee2b6f850d7 (diff) | |
download | gpu-f32af5a9ba3c2b556d92827a96dbeec3df200968.tar.gz |
Mali Bifrost DDK r14p0 KMD
Provenance:
37fe8262c (collaborate/EAC/b_r14p0)
BX304L01B-BU-00000-r14p0-01rel0
BX304L06A-BU-00000-r14p0-01rel0
BX304X07X-BU-00000-r14p0-01rel0
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: I0eb3b666045d72d33e2953954de5b416f909da0f
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r-- | mali_kbase/mali_kbase_mem.c | 640 |
1 files changed, 402 insertions, 238 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 1dd161b..3eff83a 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -42,29 +42,34 @@ #include <mali_kbase_hw.h> #include <mali_kbase_tlstream.h> -/* This function finds out which RB tree the given GPU VA region belongs to - * based on the region zone */ -static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx, - struct kbase_va_region *reg) +/* Forward declarations */ +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp); + +static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) { - struct rb_root *rbtree = NULL; +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - rbtree = &kctx->reg_rbtree_custom; - break; - case KBASE_REG_ZONE_EXEC: - rbtree = &kctx->reg_rbtree_exec; - break; - case KBASE_REG_ZONE_SAME_VA: - rbtree = &kctx->reg_rbtree_same; - /* fall through */ - default: - rbtree = &kctx->reg_rbtree_same; - break; - } +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif - return rbtree; + return cpu_va_bits; } /* This function finds out which RB tree the given pfn from the GPU VA belongs @@ -79,8 +84,6 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, #endif /* CONFIG_64BIT */ if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE) rbtree = &kctx->reg_rbtree_custom; - else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE) - rbtree = &kctx->reg_rbtree_exec; else rbtree = &kctx->reg_rbtree_same; #ifdef CONFIG_64BIT @@ -96,15 +99,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, } /* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_context *kctx, - struct kbase_va_region *new_reg) +static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) { u64 start_pfn = new_reg->start_pfn; struct rb_node **link = NULL; struct rb_node *parent = NULL; struct rb_root *rbtree = NULL; - rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg); + rbtree = new_reg->rbtree; link = &(rbtree->rb_node); /* Find the right place in the tree using tree search */ @@ -129,18 +131,13 @@ static void kbase_region_tracker_insert(struct kbase_context *kctx, rb_insert_color(&(new_reg->rblink), rbtree); } -/* Find allocated region enclosing free range. */ -static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free( - struct kbase_context *kctx, u64 start_pfn, size_t nr_pages) +static struct kbase_va_region *find_region_enclosing_range_rbtree( + struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) { - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - struct rb_root *rbtree = NULL; - + struct rb_node *rbnode; + struct kbase_va_region *reg; u64 end_pfn = start_pfn + nr_pages; - rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn); - rbnode = rbtree->rb_node; while (rbnode) { @@ -163,19 +160,12 @@ static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_ return NULL; } -/* Find region enclosing given address. */ -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr) { + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; struct rb_node *rbnode; struct kbase_va_region *reg; - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - KBASE_DEBUG_ASSERT(NULL != kctx); - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); rbnode = rbtree->rb_node; @@ -199,14 +189,11 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struc return NULL; } -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); - -/* Find region with given base address */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr) +/* Find region enclosing given address. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr) { u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; struct rb_root *rbtree = NULL; KBASE_DEBUG_ASSERT(NULL != kctx); @@ -215,6 +202,18 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +struct kbase_va_region *kbase_find_region_base_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + rbnode = rbtree->rb_node; while (rbnode) { @@ -231,11 +230,25 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba return NULL; } +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_base_address(rbtree, gpu_addr); +} + KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); /* Find region meeting given requirements */ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( - struct kbase_context *kctx, struct kbase_va_region *reg_reqs, + struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align_offset, size_t align_mask, u64 *out_start_pfn) { @@ -245,11 +258,9 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( /* Note that this search is a linear search, as we do not have a target address in mind, so does not benefit from the rbtree search */ - rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs); - - rbnode = rb_first(rbtree); + rbtree = reg_reqs->rbtree; - while (rbnode) { + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { reg = rb_entry(rbnode, struct kbase_va_region, rblink); if ((reg->nr_pages >= nr_pages) && (reg->flags & KBASE_REG_FREE)) { @@ -265,6 +276,27 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( start_pfn += align_mask; start_pfn -= (start_pfn - align_offset) & (align_mask); + if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { + /* Can't end at 4GB boundary */ + if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + /* Can't start at 4GB boundary */ + if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || + !(start_pfn & BASE_MEM_PFN_MASK_4GB)) + continue; + } else if (reg_reqs->flags & + KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + u64 end_pfn = start_pfn + nr_pages - 1; + + if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != + (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) + start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; + } + if ((start_pfn >= reg->start_pfn) && (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { @@ -272,7 +304,6 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( return reg; } } - rbnode = rb_next(rbnode); } return NULL; @@ -286,7 +317,7 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( * region lock held. The associated memory is not released (see * kbase_free_alloced_region). Internal use only. */ -static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg) +int kbase_remove_va_region(struct kbase_va_region *reg) { struct rb_node *rbprev; struct kbase_va_region *prev = NULL; @@ -298,7 +329,7 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re int merged_back = 0; int err = 0; - reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg); + reg_rbtree = reg->rbtree; /* Try to merge with the previous block first */ rbprev = rb_prev(&(reg->rblink)); @@ -344,7 +375,9 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re */ struct kbase_va_region *free_reg; - free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); + free_reg = kbase_alloc_free_region(reg_rbtree, + reg->start_pfn, reg->nr_pages, + reg->flags & KBASE_REG_ZONE_MASK); if (!free_reg) { err = -ENOMEM; goto out; @@ -359,14 +392,21 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re KBASE_EXPORT_TEST_API(kbase_remove_va_region); /** - * @brief Insert a VA region to the list, replacing the current at_reg. + * kbase_insert_va_region_nolock - Insert a VA region to the list, + * replacing the existing one. + * + * @new_reg: The new region to insert + * @at_reg: The region to replace + * @start_pfn: The Page Frame Number to insert at + * @nr_pages: The number of pages of the region */ -static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) { struct rb_root *reg_rbtree = NULL; int err = 0; - reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg); + reg_rbtree = at_reg->rbtree; /* Must be a free region */ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); @@ -390,19 +430,19 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); at_reg->nr_pages -= nr_pages; - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_reg); } /* New region replaces the end of the old one, so insert after. */ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { at_reg->nr_pages -= nr_pages; - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_reg); } /* New region splits the old one, so insert and create new */ else { struct kbase_va_region *new_front_reg; - new_front_reg = kbase_alloc_free_region(kctx, + new_front_reg = kbase_alloc_free_region(reg_rbtree, at_reg->start_pfn, start_pfn - at_reg->start_pfn, at_reg->flags & KBASE_REG_ZONE_MASK); @@ -411,8 +451,8 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; at_reg->start_pfn = start_pfn + nr_pages; - kbase_region_tracker_insert(kctx, new_front_reg); - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_front_reg); + kbase_region_tracker_insert(new_reg); } else { err = -ENOMEM; } @@ -422,21 +462,84 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas } /** - * @brief Add a VA region to the list. + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages */ int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { - struct kbase_va_region *tmp; - u64 gpu_pfn = addr >> PAGE_SHIFT; int err = 0; + struct kbase_device *kbdev = kctx->kbdev; + int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); + int gpu_pc_bits = + kbdev->gpu_props.props.core_props.log2_program_counter_size; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + /* The executable allocation from the SAME_VA zone would already have an + * appropriately aligned GPU VA chosen for it. + */ + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) { + if (cpu_va_bits > gpu_pc_bits) { + align = max(align, (size_t)((1ULL << gpu_pc_bits) + >> PAGE_SHIFT)); + } + } + + do { + err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, + align); + if (err != -ENOMEM) + break; + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align) +{ + struct rb_root *rbtree = NULL; + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + rbtree = reg->rbtree; + if (!align) align = 1; @@ -444,103 +547,82 @@ int kbase_add_va_region(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(is_power_of_2(align)); KBASE_DEBUG_ASSERT(nr_pages > 0); - /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ + /* Path 1: Map a specific address. Find the enclosing region, + * which *must* be free. + */ if (gpu_pfn) { - struct device *dev = kctx->kbdev->dev; + struct device *dev = kbdev->dev; KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); - tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); + tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, + nr_pages); if (!tmp) { dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); err = -ENOMEM; goto exit; } if (!(tmp->flags & KBASE_REG_FREE)) { - dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); - dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); - dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + tmp->start_pfn, tmp->flags, + tmp->nr_pages, gpu_pfn, nr_pages); err = -ENOMEM; goto exit; } - err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); + err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, + nr_pages); if (err) { dev_warn(dev, "Failed to insert va region"); err = -ENOMEM; - goto exit; } - - goto exit; - } - - /* Path 2: Map any free address which meets the requirements. - * - * Depending on the zone the allocation request is for - * we might need to retry it. */ - do { + } else { + /* Path 2: Map any free address which meets the requirements. */ u64 start_pfn; size_t align_offset = align; size_t align_mask = align - 1; if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { - WARN(align > 1, - "kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, (unsigned long)align); align_mask = reg->extent - 1; align_offset = reg->extent - reg->initial_commit; } - tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, nr_pages, align_offset, align_mask, &start_pfn); if (tmp) { - err = kbase_insert_va_region_nolock(kctx, reg, tmp, - start_pfn, nr_pages); - break; + err = kbase_insert_va_region_nolock(reg, tmp, + start_pfn, nr_pages); + } else { + err = -ENOMEM; } + } - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); - - if (!tmp) - err = -ENOMEM; - - exit: +exit: return err; } -KBASE_EXPORT_TEST_API(kbase_add_va_region); - /** * @brief Initialize the internal region tracker data structure. */ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, - struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(kctx, same_va_reg); + kbase_region_tracker_insert(same_va_reg); - /* Although exec and custom_va_reg don't always exist, + /* Although custom_va_reg doesn't always exist, * initialize unconditionally because of the mem_view debugfs - * implementation which relies on these being empty + * implementation which relies on this being empty. */ - kctx->reg_rbtree_exec = RB_ROOT; kctx->reg_rbtree_custom = RB_ROOT; - if (exec_reg) - kbase_region_tracker_insert(kctx, exec_reg); if (custom_va_reg) - kbase_region_tracker_insert(kctx, custom_va_reg); + kbase_region_tracker_insert(custom_va_reg); } static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) @@ -561,34 +643,18 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); } -static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) { -#if defined(CONFIG_ARM64) - /* VA_BITS can be as high as 48 bits, but all bits are available for - * both user and kernel. - */ - size_t cpu_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - /* x86_64 can access 48 bits of VA, but the 48th is used to denote - * kernel (1) vs userspace (0), so the max here is 47. - */ - size_t cpu_va_bits = 47; -#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) - size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; -#else -#error "Unknown CPU VA width for this architecture" -#endif - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - cpu_va_bits = 32; -#endif + kbase_region_tracker_erase_rbtree(rbtree); +} - return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ + return min(kbase_get_num_cpu_va_bits(kctx), + (size_t) kctx->kbdev->gpu_props.mmu.va_bits); } /** @@ -597,7 +663,6 @@ static size_t kbase_get_same_va_bits(struct kbase_context *kctx) int kbase_region_tracker_init(struct kbase_context *kctx) { struct kbase_va_region *same_va_reg; - struct kbase_va_region *exec_reg = NULL; struct kbase_va_region *custom_va_reg = NULL; size_t same_va_bits = kbase_get_same_va_bits(kctx); u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; @@ -610,7 +675,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(kctx, 1, + same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, same_va_pages, KBASE_REG_ZONE_SAME_VA); @@ -620,7 +685,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) } #ifdef CONFIG_64BIT - /* 32-bit clients have exec and custom VA zones */ + /* 32-bit clients have custom VA zones */ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { @@ -634,38 +699,27 @@ int kbase_region_tracker_init(struct kbase_context *kctx) if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - exec_reg = kbase_alloc_free_region(kctx, - KBASE_REG_ZONE_EXEC_BASE, - KBASE_REG_ZONE_EXEC_SIZE, - KBASE_REG_ZONE_EXEC); - - if (!exec_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } - - custom_va_reg = kbase_alloc_free_region(kctx, + custom_va_reg = kbase_alloc_free_region( + &kctx->reg_rbtree_custom, KBASE_REG_ZONE_CUSTOM_VA_BASE, custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { err = -ENOMEM; - goto fail_free_exec; + goto fail_free_same_va; } #ifdef CONFIG_64BIT } #endif - kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, - custom_va_reg); + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); kctx->same_va_end = same_va_pages + 1; + kbase_gpu_vm_unlock(kctx); return 0; -fail_free_exec: - kbase_free_alloced_region(exec_reg); fail_free_same_va: kbase_free_alloced_region(same_va_reg); fail_unlock: @@ -720,7 +774,7 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, * Create a custom VA zone at the end of the VA for allocations which * JIT can use so it doesn't have to allocate VA from the kernel. */ - custom_va_reg = kbase_alloc_free_region(kctx, + custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, kctx->same_va_end, jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); @@ -734,7 +788,7 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, goto fail_unlock; } - kbase_region_tracker_insert(kctx, custom_va_reg); + kbase_region_tracker_insert(custom_va_reg); kbase_gpu_vm_unlock(kctx); return 0; @@ -765,6 +819,7 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, return 0; } + int kbase_mem_init(struct kbase_device *kbdev) { struct kbasep_mem_device *memdev; @@ -830,15 +885,15 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, - * or KBASE_REG_ZONE_EXEC + * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. * */ -struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone) { struct kbase_va_region *new_reg; - KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(rbtree != NULL); /* zone argument should only contain zone related region flags */ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); @@ -853,7 +908,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->kctx = kctx; + new_reg->rbtree = rbtree; new_reg->flags = zone | KBASE_REG_FREE; new_reg->flags |= KBASE_REG_GROWABLE; @@ -868,6 +923,29 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 KBASE_EXPORT_TEST_API(kbase_alloc_free_region); +static struct kbase_context *kbase_reg_flags_to_kctx( + struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_same); + break; + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + /** * @brief Free a region object. * @@ -881,7 +959,13 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); void kbase_free_alloced_region(struct kbase_va_region *reg) { if (!(reg->flags & KBASE_REG_FREE)) { - mutex_lock(®->kctx->jit_evict_lock); + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + + mutex_lock(&kctx->jit_evict_lock); /* * The physical allocation should have been removed from the @@ -891,7 +975,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * on the list at termination time of the region tracker. */ if (!list_empty(®->gpu_alloc->evict_node)) { - mutex_unlock(®->kctx->jit_evict_lock); + mutex_unlock(&kctx->jit_evict_lock); /* * Unlink the physical allocation before unmaking it @@ -916,14 +1000,14 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) kbase_mem_evictable_unmake(reg->gpu_alloc); } } else { - mutex_unlock(®->kctx->jit_evict_lock); + mutex_unlock(&kctx->jit_evict_lock); } /* * Remove the region from the sticky resource metadata * list should it be there. */ - kbase_sticky_resource_release(reg->kctx, NULL, + kbase_sticky_resource_release(kctx, NULL, reg->start_pfn << PAGE_SHIFT); kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -971,11 +1055,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { - err = kbase_mmu_insert_pages(kctx, + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, reg->start_pfn + (i * stride), alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask); + reg->flags & gwt_mask, + kctx->as_nr); if (err) goto bad_insert; @@ -992,10 +1078,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 } } } else { - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask); + reg->flags & gwt_mask, + kctx->as_nr); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); @@ -1011,12 +1100,16 @@ bad_insert: KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); while (i--) if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { - kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length); + kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn + (i * stride), + reg->gpu_alloc->imported.alias.aliased[i].length, + kctx->as_nr); kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); } } - kbase_remove_va_region(kctx, reg); + kbase_remove_va_region(reg); return err; } @@ -1036,13 +1129,16 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { size_t i; - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, kctx->as_nr); KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) if (reg->gpu_alloc->imported.alias.aliased[i].alloc) kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); } else { - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, kbase_reg_current_backed_size(reg), + kctx->as_nr); kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); } @@ -1063,7 +1159,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (err) return err; - err = kbase_remove_va_region(kctx, reg); + err = kbase_remove_va_region(reg); return err; } @@ -1345,7 +1441,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re lockdep_assert_held(&kctx->reg_lock); if (reg->flags & KBASE_REG_JIT) { - dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n"); + dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n"); return -EINVAL; } @@ -1372,7 +1468,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re err = kbase_gpu_munmap(kctx, reg); if (err) { - dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); goto out; } @@ -1482,7 +1578,8 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_GPU_NX; if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { - if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) + if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && + !(flags & BASE_MEM_UNCACHED_GPU)) return -EINVAL; } else if (flags & (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { @@ -1497,8 +1594,20 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASE_MEM_TILER_ALIGN_TOP) reg->flags |= KBASE_REG_TILER_ALIGN_TOP; + /* Set up default MEMATTR usage */ - if (kctx->kbdev->system_coherency == COHERENCY_ACE && + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + if (kctx->kbdev->mmu_mode->flags & + KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* Override shareability, and MEMATTR for uncached */ + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + } else { + dev_warn(kctx->kbdev->dev, + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + return -EINVAL; + } + } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && (reg->flags & KBASE_REG_SHARE_BOTH)) { reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); @@ -1507,6 +1616,12 @@ int kbase_update_region_flags(struct kbase_context *kctx, KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); } + if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING) + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + + if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) + reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; + return 0; } @@ -1520,14 +1635,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, struct tagged_addr *tp; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); if (alloc->reg) { if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) goto invalid_request; } - kctx = alloc->imported.kctx; + kctx = alloc->imported.native.kctx; if (nr_pages_requested == 0) goto done; /*nothing to do*/ @@ -1563,7 +1678,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left) { struct kbase_sub_alloc *sa, *temp_sa; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); list_for_each_entry_safe(sa, temp_sa, &kctx->mem_partials, link) { @@ -1586,7 +1701,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, } } } - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } /* only if we actually have a chunk left <512. If more it indicates @@ -1633,9 +1748,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, nr_left = 0; /* expose for later use */ - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); list_add(&sa->link, &kctx->mem_partials); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } } } @@ -1696,7 +1811,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( struct tagged_addr *new_pages = NULL; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); lockdep_assert_held(&pool->pool_lock); @@ -1709,7 +1824,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( goto invalid_request; } - kctx = alloc->imported.kctx; + kctx = alloc->imported.native.kctx; lockdep_assert_held(&kctx->mem_partials_lock); @@ -1837,14 +1952,36 @@ alloc_failed: if (nr_left != nr_pages_requested) { size_t nr_pages_to_free = nr_pages_requested - nr_left; - alloc->nents += nr_pages_to_free; + struct tagged_addr *start_free = alloc->pages + alloc->nents; - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + kbase_mem_pool_free_pages_locked( + pool, 512, + start_free, + false, /* not dirty */ + true); /* return to pool */ + nr_pages_to_free -= 512; + start_free += 512; + } else if (is_partial(*start_free)) { + free_partial_locked(kctx, pool, + *start_free); + nr_pages_to_free--; + start_free++; + } + } + } else { +#endif + kbase_mem_pool_free_pages_locked(pool, + nr_pages_to_free, + start_free, + false, /* not dirty */ + true); /* return to pool */ +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif } kbase_process_page_usage_dec(kctx, nr_pages_requested); @@ -1861,10 +1998,10 @@ static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) struct page *p, *head_page; struct kbase_sub_alloc *sa; - p = phys_to_page(as_phys_addr_t(tp)); + p = as_page(tp); head_page = (struct page *)p->lru.prev; sa = (struct kbase_sub_alloc *)head_page->lru.next; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); clear_bit(p - head_page, sa->sub_pages); if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { list_del(&sa->link); @@ -1875,14 +2012,14 @@ static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) /* expose the partial again */ list_add(&sa->link, &kctx->mem_partials); } - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; @@ -1890,7 +2027,7 @@ int kbase_free_phy_pages_helper( size_t freed = 0; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); /* early out if nothing to do */ @@ -1976,13 +2113,13 @@ static void free_partial_locked(struct kbase_context *kctx, lockdep_assert_held(&pool->pool_lock); lockdep_assert_held(&kctx->mem_partials_lock); - p = phys_to_page(as_phys_addr_t(tp)); + p = as_page(tp); head_page = (struct page *)p->lru.prev; sa = (struct kbase_sub_alloc *)head_page->lru.next; clear_bit(p - head_page, sa->sub_pages); if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { list_del(&sa->link); - kbase_mem_pool_free(pool, head_page, true); + kbase_mem_pool_free_locked(pool, head_page, true); kfree(sa); } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == SZ_2M / SZ_4K - 1) { @@ -1995,14 +2132,14 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, size_t nr_pages_to_free) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; size_t freed = 0; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); lockdep_assert_held(&pool->pool_lock); @@ -2093,14 +2230,26 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - if (!WARN_ON(!alloc->imported.kctx)) { + + if (!WARN_ON(!alloc->imported.native.kctx)) { + if (alloc->permanent_map) + kbase_phy_alloc_mapping_term( + alloc->imported.native.kctx, + alloc); + /* * The physical allocation must have been removed from * the eviction list before trying to free it. */ - mutex_lock(&alloc->imported.kctx->jit_evict_lock); + mutex_lock( + &alloc->imported.native.kctx->jit_evict_lock); WARN_ON(!list_empty(&alloc->evict_node)); - mutex_unlock(&alloc->imported.kctx->jit_evict_lock); + mutex_unlock( + &alloc->imported.native.kctx->jit_evict_lock); + + kbase_process_page_usage_dec( + alloc->imported.native.kctx, + alloc->imported.native.nr_struct_pages); } kbase_free_phy_pages_helper(alloc, alloc->nents); break; @@ -2134,14 +2283,6 @@ void kbase_mem_kref_free(struct kref *kref) mmdrop(alloc->imported.user_buf.mm); kfree(alloc->imported.user_buf.pages); break; - case KBASE_MEM_TYPE_TB:{ - void *tb; - - tb = alloc->imported.kctx->jctx.tb; - kbase_device_trace_buffer_uninstall(alloc->imported.kctx); - vfree(tb); - break; - } default: WARN(1, "Unexecpted free of type %d\n", alloc->type); break; @@ -2220,6 +2361,14 @@ bool kbase_check_alloc_flags(unsigned long flags) BASE_MEM_TILER_ALIGN_TOP))) return false; + /* To have an allocation lie within a 4GB chunk is required only for + * TLS memory, which will never be used to contain executable code + * and also used for Tiler heap. + */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + return false; + /* GPU should have at least read or write access otherwise there is no reason for allocating. */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) @@ -2290,9 +2439,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, return -EINVAL; } - if (va_pages > (U64_MAX / PAGE_SIZE)) { - /* 64-bit address range is the max */ - dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!", + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", (unsigned long long)va_pages); return -ENOMEM; } @@ -2351,6 +2499,13 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, #undef KBASE_MSG_PRE_FLAG } + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && + (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", + (unsigned long long)va_pages); + return -EINVAL; + } + return 0; #undef KBASE_MSG_PRE } @@ -2687,7 +2842,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, if (reg->cpu_alloc != reg->gpu_alloc) pages_required *= 2; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); /* As we can not allocate memory from the kernel with the vm_lock held, @@ -2699,14 +2854,14 @@ static int kbase_jit_grow(struct kbase_context *kctx, int pool_delta = pages_required - kbase_mem_pool_size(pool); kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); if (kbase_mem_pool_grow(pool, pool_delta)) goto update_failed_unlocked; kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); } @@ -2714,7 +2869,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, delta, &prealloc_sas[0]); if (!gpu_pages) { kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); goto update_failed; } @@ -2727,12 +2882,12 @@ static int kbase_jit_grow(struct kbase_context *kctx, kbase_free_phy_pages_helper_locked(reg->gpu_alloc, pool, gpu_pages, delta); kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); goto update_failed; } } kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, old_size); @@ -2982,7 +3137,10 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) void kbase_jit_backing_lost(struct kbase_va_region *reg) { - struct kbase_context *kctx = reg->kctx; + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; lockdep_assert_held(&kctx->jit_evict_lock); @@ -3156,9 +3314,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + pa, kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr); if (err == 0) return 0; @@ -3270,11 +3428,12 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), count, (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) & - gwt_mask); + gwt_mask, + kctx->as_nr); if (err) goto err_unmap_attachment; @@ -3292,7 +3451,8 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, return 0; err_teardown_orig_pages: - kbase_mmu_teardown_pages(kctx, reg->start_pfn, count); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + count, kctx->as_nr); err_unmap_attachment: dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); @@ -3372,9 +3532,11 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages( - kctx, + kctx->kbdev, + &kctx->mmu, reg->start_pfn, - alloc->nents); + alloc->nents, + kctx->as_nr); WARN_ON(err); } @@ -3391,9 +3553,11 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (reg && reg->gpu_alloc == alloc) kbase_mmu_teardown_pages( - kctx, + kctx->kbdev, + &kctx->mmu, reg->start_pfn, - kbase_reg_current_backed_size(reg)); + kbase_reg_current_backed_size(reg), + kctx->as_nr); if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) writeable = false; |