diff options
author | Jörg Wagner <jorwag@google.com> | 2023-08-01 13:38:22 +0000 |
---|---|---|
committer | Jörg Wagner <jorwag@google.com> | 2023-08-03 09:29:34 +0000 |
commit | dacf004cc8a4b35f5a0fb5fb67246f9cc8fdaafb (patch) | |
tree | 07484dccba43bb2c2a07626c00154751f318bd47 /mali_kbase/mali_kbase_mem.c | |
parent | bce5281a0408a175137c08dc93028e2a2c0fb69b (diff) | |
download | gpu-dacf004cc8a4b35f5a0fb5fb67246f9cc8fdaafb.tar.gz |
Update KMD to 'mini release: update r44p1-01bet1 to r44p1-00dev2'
Provenance: ipdelivery@d10c137c7691a470b8b33786aec4965315db4561
Change-Id: I4fbcc669d3b8e36c8288c91fdddd8b79258b6635
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r-- | mali_kbase/mali_kbase_mem.c | 1066 |
1 files changed, 523 insertions, 543 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 1c94e9c..24039aa 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -43,7 +43,7 @@ #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_config_defaults.h> #include <mali_kbase_trace_gpu_mem.h> - +#include <linux/version_compat_defs.h> #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) @@ -101,56 +101,66 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) return cpu_va_bits; } -/* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to - */ -static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, - u64 gpu_pfn) +unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone) { - struct rb_root *rbtree = NULL; + return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) + << KBASE_REG_ZONE_SHIFT); +} - struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); +enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits) +{ + return (enum kbase_memory_zone)(((zone_bits) & KBASE_REG_ZONE_MASK) + >> KBASE_REG_ZONE_SHIFT); +} +char *kbase_reg_zone_get_name(enum kbase_memory_zone zone) +{ + switch (zone) { + case SAME_VA_ZONE: + return "SAME_VA"; + case CUSTOM_VA_ZONE: + return "CUSTOM_VA"; + case EXEC_VA_ZONE: + return "EXEC_VA"; #if MALI_USE_CSF - struct kbase_reg_zone *fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); - - struct kbase_reg_zone *exec_fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); - - if (gpu_pfn >= fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_fixed; - return rbtree; - } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_exec_fixed; - return rbtree; - } + case MCU_SHARED_ZONE: + return "MCU_SHARED"; + case EXEC_FIXED_VA_ZONE: + return "EXEC_FIXED_VA"; + case FIXED_VA_ZONE: + return "FIXED_VA"; #endif - if (gpu_pfn >= exec_va_zone->base_pfn) - rbtree = &kctx->reg_rbtree_exec; - else { - u64 same_va_end; + default: + return NULL; + } +} - if (kbase_ctx_compat_mode(kctx)) { - same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; - } else { - struct kbase_reg_zone *same_va_zone = - kbase_ctx_reg_zone_get(kctx, - KBASE_REG_ZONE_SAME_VA); - same_va_end = kbase_reg_zone_end_pfn(same_va_zone); - } +/** + * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU + * page frame number + * @kctx: kbase context + * @gpu_pfn: GPU PFN address + * + * Context: any context. + * + * Return: reference to the rb-tree root, NULL if not found + */ +static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn) +{ + enum kbase_memory_zone zone_idx; + struct kbase_reg_zone *zone; - if (gpu_pfn >= same_va_end) - rbtree = &kctx->reg_rbtree_custom; - else - rbtree = &kctx->reg_rbtree_same; + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + zone = &kctx->reg_zone[zone_idx]; + if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone))) + return &zone->reg_rbtree; } - return rbtree; + return NULL; } /* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) +void kbase_region_tracker_insert(struct kbase_va_region *new_reg) { u64 start_pfn = new_reg->start_pfn; struct rb_node **link = NULL; @@ -251,7 +261,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( lockdep_assert_held(&kctx->reg_lock); - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; return kbase_find_region_enclosing_address(rbtree, gpu_addr); } @@ -289,7 +301,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address( lockdep_assert_held(&kctx->reg_lock); - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; return kbase_find_region_base_address(rbtree, gpu_addr); } @@ -376,6 +390,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg) { struct rb_node *rbprev; + struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree); struct kbase_va_region *prev = NULL; struct rb_node *rbnext; struct kbase_va_region *next = NULL; @@ -400,8 +415,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; - WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); + WARN_ON((kbase_bits_to_zone(prev->flags)) != + (kbase_bits_to_zone(reg->flags))); if (!WARN_ON(reg->start_pfn < prev_end_pfn)) prev->nr_pages += reg->start_pfn - prev_end_pfn; prev->nr_pages += reg->nr_pages; @@ -422,8 +437,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; - WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); + WARN_ON((kbase_bits_to_zone(next->flags)) != + (kbase_bits_to_zone(reg->flags))); if (!WARN_ON(next->start_pfn < reg_end_pfn)) next->nr_pages += next->start_pfn - reg_end_pfn; next->start_pfn = reg->start_pfn; @@ -445,8 +460,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ struct kbase_va_region *free_reg; - free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, - reg->flags & KBASE_REG_ZONE_MASK); + free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages); if (!free_reg) { /* In case of failure, we cannot allocate a replacement * free region, so we will be left with a 'gap' in the @@ -507,6 +521,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, size_t nr_pages) { struct rb_root *reg_rbtree = NULL; + struct kbase_reg_zone *zone = + container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree); int err = 0; reg_rbtree = at_reg->rbtree; @@ -548,9 +564,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, else { struct kbase_va_region *new_front_reg; - new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, - start_pfn - at_reg->start_pfn, - at_reg->flags & KBASE_REG_ZONE_MASK); + new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn, + start_pfn - at_reg->start_pfn); if (new_front_reg) { at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; @@ -603,9 +618,9 @@ int kbase_add_va_region(struct kbase_context *kctx, #endif if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && #if MALI_USE_CSF - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && + ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) && #endif - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { + ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) { if (cpu_va_bits > gpu_pc_bits) { align = max(align, (size_t)((1ULL << gpu_pc_bits) >> PAGE_SHIFT)); @@ -623,8 +638,7 @@ int kbase_add_va_region(struct kbase_context *kctx, * then don't retry, we're out of VA and there is * nothing which can be done about it. */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) + if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE) break; } while (kbase_jit_evict(kctx)); @@ -728,119 +742,27 @@ exit: return err; } -/* - * @brief Initialize the internal region tracker data structure. +/** + * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region. + * @reg: VA region + * + * Return: + * * pointer to kbase context of the memory allocation + * * NULL if the region does not belong to a kbase context (for instance, + * if the allocation corresponds to a shared MCU region on CSF). */ -#if MALI_USE_CSF -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg, - struct kbase_va_region *exec_va_reg, - struct kbase_va_region *exec_fixed_va_reg, - struct kbase_va_region *fixed_va_reg) -{ - u64 last_zone_end_pfn; - - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; - - /* Although custom_va_reg doesn't always exist, initialize - * unconditionally because of the mem_view debugfs - * implementation which relies on it being empty. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) { - WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(custom_va_reg); - last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; - } - - /* Initialize exec, fixed and exec_fixed. These are always - * initialized at this stage, if they will exist at all. - */ - kctx->reg_rbtree_fixed = RB_ROOT; - kctx->reg_rbtree_exec_fixed = RB_ROOT; - - if (exec_va_reg) { - WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_va_reg); - last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; - } - - if (exec_fixed_va_reg) { - WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_fixed_va_reg); - last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; - } - - if (fixed_va_reg) { - WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(fixed_va_reg); - last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; - } -} -#else -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg) -{ - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - /* Although custom_va_reg and exec_va_reg don't always exist, - * initialize unconditionally because of the mem_view debugfs - * implementation which relies on them being empty. - * - * The difference between the two is that the EXEC_VA region - * is never initialized at this stage. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) - kbase_region_tracker_insert(custom_va_reg); -} -#endif /* MALI_USE_CSF */ - -static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +static struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg) { - struct kbase_context *kctx = NULL; struct rb_root *rbtree = reg->rbtree; + struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree); - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; - case KBASE_REG_ZONE_MCU_SHARED: - /* This is only expected to be called on driver unload. */ - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } + if (!kbase_is_ctx_reg_zone(zone->id)) + return NULL; - return kctx; + return container_of(zone - zone->id, struct kbase_context, reg_zone[0]); } -static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) { struct rb_node *rbnode; struct kbase_va_region *reg; @@ -851,8 +773,12 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); - if (kbase_page_migration_enabled) - kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); + if (kbase_is_page_migration_enabled()) { + struct kbase_context *kctx = kbase_reg_to_kctx(reg); + + if (kctx) + kbase_gpu_munmap(kctx, reg); + } /* Reset the start_pfn - as the rbtree is being * destroyed and we've already erased this region, there * is no further need to attempt to remove it. @@ -867,209 +793,261 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) } while (rbnode); } -void kbase_region_tracker_term(struct kbase_context *kctx) -{ - WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, - "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", - kctx->tgid, kctx->id); - - kbase_gpu_vm_lock(kctx); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -#if MALI_USE_CSF - WARN_ON(!list_empty(&kctx->csf.event_pages_head)); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); - -#endif - kbase_gpu_vm_unlock(kctx); -} - -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) -{ - kbase_region_tracker_erase_rbtree(rbtree); -} - static size_t kbase_get_same_va_bits(struct kbase_context *kctx) { return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), kctx->kbdev->gpu_props.mmu.va_bits); } -int kbase_region_tracker_init(struct kbase_context *kctx) +static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit) { - struct kbase_va_region *same_va_reg; - struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = kbase_get_same_va_bits(kctx); - u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; - u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; - u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; - u64 same_va_pages; - u64 same_va_base = 1u; int err; -#if MALI_USE_CSF - struct kbase_va_region *exec_va_reg; - struct kbase_va_region *exec_fixed_va_reg; - struct kbase_va_region *fixed_va_reg; - - u64 exec_va_base; - u64 fixed_va_end; - u64 exec_fixed_va_base; - u64 fixed_va_base; - u64 fixed_va_pages; -#endif - - /* Take the lock as kbase_free_alloced_region requires it */ - kbase_gpu_vm_lock(kctx); + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); + const size_t same_va_bits = kbase_get_same_va_bits(kctx); + const u64 base_pfn = 1u; + u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn; - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; + lockdep_assert_held(&kctx->reg_lock); #if MALI_USE_CSF - if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { + if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { /* Depending on how the kernel is configured, it's possible (eg on aarch64) for * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone * doesn't cross into the exec_va zone. */ - same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; + nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn; } #endif + err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages); + if (err) + return -ENOMEM; - /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, - same_va_pages, KBASE_REG_ZONE_SAME_VA); + kctx->gpu_va_end = base_pfn + nr_pages; - if (!same_va_reg) { - err = -ENOMEM; - goto fail_unlock; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, - same_va_pages); + return 0; +} - if (kbase_ctx_compat_mode(kctx)) { - if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - err = -EINVAL; - goto fail_free_same_va; - } - /* If the current size of TMEM is out of range of the - * virtual address space addressable by the MMU then - * we should shrink it to fit - */ - if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) - custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; +static void kbase_reg_zone_same_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + kbase_reg_zone_term(zone); +} - if (!custom_va_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size); - } else { - custom_va_size = 0; - } +static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE; -#if MALI_USE_CSF - /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; + /* If the context does not support CUSTOM_VA zones, then we don't need to + * proceed past this point, and can pretend that it was initialized properly. + * In practice, this will mean that the zone metadata structure will be zero + * initialized and not contain a valid zone ID. + */ + if (!kbase_ctx_compat_mode(kctx)) + return 0; + + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) + return -EINVAL; - /* Similarly the end of the FIXED_VA zone also depends on whether the client - * is 32 or 64-bits. + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit */ - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - if (kbase_ctx_compat_mode(kctx)) { - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; - } + if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE, + nr_pages)) + return -ENOMEM; - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE); + /* On JM systems, this is the last memory zone that gets initialized, + * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF, + * setting here is harmless, as the FIXED_VA initializer will overwrite + * it + */ + kctx->gpu_va_end += nr_pages; - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); + return 0; +} - if (!exec_va_reg) { - err = -ENOMEM; - goto fail_free_custom_va; - } +static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); - exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; + kbase_reg_zone_term(zone); +} - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, - KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); +static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx) +{ + u64 base_pfn; - exec_fixed_va_reg = - kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, - exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, - KBASE_REG_ZONE_EXEC_FIXED_VA); +#if MALI_USE_CSF + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64; + if (kbase_ctx_compat_mode(kctx)) + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32; +#else + /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is + * initially U64_MAX + */ + base_pfn = U64_MAX; +#endif - if (!exec_fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_va; - } + return base_pfn; +} + +static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + const u64 base_pfn = kbase_get_exec_va_zone_base(kctx); + u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE; + +#if !MALI_USE_CSF + nr_pages = 0; +#endif - fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; - fixed_va_pages = fixed_va_end - fixed_va_base; + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages); +} - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); +static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); - fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, - fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); + kbase_reg_zone_term(zone); +} + +static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx) +{ + return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE; +} + +#if MALI_USE_CSF +static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx); + + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn, + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); +} + +static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + + WARN_ON(!list_empty(&kctx->csf.event_pages_head)); + kbase_reg_zone_term(zone); +} + +static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); + const u64 base_pfn = + kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; + u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + u64 nr_pages; + + if (kbase_ctx_compat_mode(kctx)) + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; + + nr_pages = fixed_va_end - base_pfn; + + if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages)) + return -ENOMEM; kctx->gpu_va_end = fixed_va_end; - if (!fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_fixed_va; - } + return 0; +} - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, - exec_fixed_va_reg, fixed_va_reg); +static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); - INIT_LIST_HEAD(&kctx->csf.event_pages_head); -#else - /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is - * initially U64_MAX - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); - /* Other zones are 0: kbase_create_context() uses vzalloc */ + kbase_reg_zone_term(zone); +} +#endif + +typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit); +typedef void kbase_memory_zone_term(struct kbase_context *kctx); - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; +struct kbase_memory_zone_init_meta { + kbase_memory_zone_init *init; + kbase_memory_zone_term *term; + char *error_msg; +}; + +static const struct kbase_memory_zone_init_meta zones_init[] = { + [SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term, + "Could not initialize SAME_VA zone" }, + [CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term, + "Could not initialize CUSTOM_VA zone" }, + [EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term, + "Could not initialize EXEC_VA zone" }, +#if MALI_USE_CSF + [EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init, + kbase_reg_zone_exec_fixed_va_term, + "Could not initialize EXEC_FIXED_VA zone" }, + [FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term, + "Could not initialize FIXED_VA zone" }, #endif - kctx->jit_va = false; +}; - kbase_gpu_vm_unlock(kctx); - return 0; +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; + const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; + int err; + unsigned int i; + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) { + err = zones_init[i].init(kctx, gpu_va_limit); + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err); + goto term; + } + } #if MALI_USE_CSF -fail_free_exec_fixed_va: - kbase_free_alloced_region(exec_fixed_va_reg); -fail_free_exec_va: - kbase_free_alloced_region(exec_va_reg); -fail_free_custom_va: - if (custom_va_reg) - kbase_free_alloced_region(custom_va_reg); + INIT_LIST_HEAD(&kctx->csf.event_pages_head); #endif + kctx->jit_va = false; + + kbase_gpu_vm_unlock(kctx); + + return 0; +term: + while (i-- > 0) + zones_init[i].term(kctx); -fail_free_same_va: - kbase_free_alloced_region(same_va_reg); -fail_unlock: kbase_gpu_vm_unlock(kctx); return err; } +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + unsigned int i; + + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) + zones_init[i].term(kctx); + + kbase_gpu_vm_unlock(kctx); +} + static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) { struct kbase_reg_zone *exec_va_zone; lockdep_assert_held(&kctx->reg_lock); - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); return (exec_va_zone->base_pfn != U64_MAX); } @@ -1109,16 +1087,16 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) lockdep_assert_held(&kctx->reg_lock); - for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { + for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) { struct kbase_reg_zone *zone; struct kbase_va_region *reg; u64 zone_base_addr; - unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); - unsigned long reg_zone; + enum kbase_memory_zone reg_zone; - if (!kbase_is_ctx_reg_zone(zone_bits)) + if (!kbase_is_ctx_reg_zone(zone_idx)) continue; - zone = kbase_ctx_reg_zone_get(kctx, zone_bits); + + zone = kbase_ctx_reg_zone_get(kctx, zone_idx); zone_base_addr = zone->base_pfn << PAGE_SHIFT; reg = kbase_region_tracker_find_region_base_address( @@ -1126,21 +1104,21 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) if (!zone->va_size_pages) { WARN(reg, - "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits); + "Should not have found a region that starts at 0x%.16llx for zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)); continue; } if (WARN(!reg, - "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", - (unsigned long long)zone_base_addr, zone_bits)) + "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx))) return true; /* Safest return value */ - reg_zone = reg->flags & KBASE_REG_ZONE_MASK; - if (WARN(reg_zone != zone_bits, - "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits, - reg_zone)) + reg_zone = kbase_bits_to_zone(reg->flags); + if (WARN(reg_zone != zone_idx, + "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx), + kbase_reg_zone_get_name(reg_zone))) return true; /* Safest return value */ /* Unless the region is completely free, of the same size as @@ -1161,10 +1139,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) { struct kbase_va_region *same_va_reg; - struct kbase_reg_zone *same_va_zone; + struct kbase_reg_zone *same_va_zone, *custom_va_zone; u64 same_va_zone_base_addr; - const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; - struct kbase_va_region *custom_va_reg; u64 jit_va_start; lockdep_assert_held(&kctx->reg_lock); @@ -1175,14 +1151,14 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, * cause an overlap to happen with existing same VA allocations and the * custom VA zone. */ - same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); + same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; same_va_reg = kbase_region_tracker_find_region_base_address( kctx, same_va_zone_base_addr); if (WARN(!same_va_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) + "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx", + (unsigned long long)same_va_zone_base_addr)) return -ENOMEM; /* kbase_region_tracker_has_allocs() in the caller has already ensured @@ -1203,24 +1179,15 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, /* * Create a custom VA zone at the end of the VA for allocations which - * JIT can use so it doesn't have to allocate VA from the kernel. + * JIT can use so it doesn't have to allocate VA from the kernel. Note + * that while the zone has already been zero-initialized during the + * region tracker initialization, we can just overwrite it. */ - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); - - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - if (!custom_va_reg) + custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start, + jit_va_pages)) return -ENOMEM; - /* Since this is 64-bit, the custom zone will not have been - * initialized, so initialize it now - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, - jit_va_pages); - kbase_region_tracker_insert(custom_va_reg); return 0; } @@ -1291,12 +1258,11 @@ exit_unlock: int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) { #if !MALI_USE_CSF - struct kbase_va_region *exec_va_reg; struct kbase_reg_zone *exec_va_zone; struct kbase_reg_zone *target_zone; struct kbase_va_region *target_reg; u64 target_zone_base_addr; - unsigned long target_zone_bits; + enum kbase_memory_zone target_zone_id; u64 exec_va_start; int err; #endif @@ -1342,20 +1308,21 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages if (kbase_ctx_compat_mode(kctx)) { /* 32-bit client: take from CUSTOM_VA zone */ - target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; + target_zone_id = CUSTOM_VA_ZONE; } else { /* 64-bit client: take from SAME_VA zone */ - target_zone_bits = KBASE_REG_ZONE_SAME_VA; + target_zone_id = SAME_VA_ZONE; } - target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id); target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; target_reg = kbase_region_tracker_find_region_base_address( kctx, target_zone_base_addr); if (WARN(!target_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)target_zone_base_addr, target_zone_bits)) { + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s", + (unsigned long long)target_zone_base_addr, + kbase_reg_zone_get_name(target_zone_id))) { err = -ENOMEM; goto exit_unlock; } @@ -1374,26 +1341,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages /* Taken from the end of the target zone */ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; - - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, - exec_va_pages, KBASE_REG_ZONE_EXEC_VA); - if (!exec_va_reg) { - err = -ENOMEM; - goto exit_unlock; - } - /* Update EXEC_VA zone - * - * not using kbase_ctx_reg_zone_init() - it was already initialized - */ - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); - exec_va_zone->base_pfn = exec_va_start; - exec_va_zone->va_size_pages = exec_va_pages; + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start, + exec_va_pages)) + return -ENOMEM; /* Update target zone and corresponding region */ target_reg->nr_pages -= exec_va_pages; target_zone->va_size_pages -= exec_va_pages; - - kbase_region_tracker_insert(exec_va_reg); err = 0; exit_unlock: @@ -1405,28 +1360,13 @@ exit_unlock: #if MALI_USE_CSF void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) { - kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); + kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone); } int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) { - struct kbase_va_region *shared_reg; - u64 shared_reg_start_pfn; - u64 shared_reg_size; - - shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; - shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; - - kbdev->csf.shared_reg_rbtree = RB_ROOT; - - shared_reg = - kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, - shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); - if (!shared_reg) - return -ENOMEM; - - kbase_region_tracker_insert(shared_reg); - return 0; + return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE, + KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE); } #endif @@ -1582,33 +1522,31 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); /** * kbase_alloc_free_region - Allocate a free region object. * - * @kbdev: kbase device - * @rbtree: Backlink to the red-black tree of memory regions. + * @zone: CUSTOM_VA_ZONE or SAME_VA_ZONE * @start_pfn: The Page Frame Number in GPU virtual address space. * @nr_pages: The size of the region in pages. - * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA * * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. - * * Return: pointer to the allocated region object on success, NULL otherwise. */ -struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone) +struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn, + size_t nr_pages) { struct kbase_va_region *new_reg; - KBASE_DEBUG_ASSERT(rbtree != NULL); - - /* zone argument should only contain zone related region flags */ - KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); KBASE_DEBUG_ASSERT(nr_pages > 0); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); - new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); + if (WARN_ON(!zone)) + return NULL; + + if (unlikely(!zone->base_pfn || !zone->va_size_pages)) + return NULL; + + new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL); if (!new_reg) return NULL; @@ -1617,8 +1555,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru atomic_set(&new_reg->no_user_free_count, 0); new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->rbtree = rbtree; - new_reg->flags = zone | KBASE_REG_FREE; + new_reg->rbtree = &zone->reg_rbtree; + new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE; new_reg->flags |= KBASE_REG_GROWABLE; @@ -1630,9 +1568,17 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru return new_reg; } - KBASE_EXPORT_TEST_API(kbase_alloc_free_region); +struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx, + enum kbase_memory_zone id, u64 start_pfn, + size_t nr_pages) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id); + + return kbase_alloc_free_region(zone, start_pfn, nr_pages); +} + /** * kbase_free_alloced_region - Free a region object. * @@ -1644,19 +1590,18 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); * alloc object will be released. * It is a bug if no alloc object exists for non-free regions. * - * If region is KBASE_REG_ZONE_MCU_SHARED it is freed + * If region is MCU_SHARED_ZONE it is freed */ void kbase_free_alloced_region(struct kbase_va_region *reg) { #if MALI_USE_CSF - if ((reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_MCU_SHARED) { + if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) { kfree(reg); return; } #endif if (!(reg->flags & KBASE_REG_FREE)) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + struct kbase_context *kctx = kbase_reg_to_kctx(reg); if (WARN_ON(!kctx)) return; @@ -1664,8 +1609,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", - (void *)reg); + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg, + kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags))); #if MALI_USE_CSF if (reg->flags & KBASE_REG_CSF_EVENT) /* @@ -1801,8 +1746,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } else { if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - - err = kbase_mmu_insert_imported_pages( + err = kbase_mmu_insert_pages_skip_status_update( kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); @@ -1811,7 +1755,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, - mmu_sync_info, reg, true); + mmu_sync_info, reg); } if (err) @@ -1855,8 +1799,7 @@ bad_aliased_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + alloc->imported.alias.aliased[i].length, kctx->as_nr); } bad_insert: kbase_remove_va_region(kctx->kbdev, reg); @@ -1867,7 +1810,7 @@ bad_insert: KBASE_EXPORT_TEST_API(kbase_gpu_mmap); static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable); + struct kbase_va_region *reg); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1888,9 +1831,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) size_t i = 0; /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped - * should be passed to the kbase_mmu_teardown_pages() function, - * hence individual aliased regions needs to be unmapped - * separately. + * should be passed to the page teardown function, hence individual + * aliased regions needs to be unmapped separately. */ for (i = 0; i < alloc->imported.alias.nents; i++) { struct tagged_addr *phys_alloc = NULL; @@ -1904,8 +1846,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * alloc->imported.alias.stride), phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + alloc->imported.alias.aliased[i].length, kctx->as_nr); if (WARN_ON_ONCE(err_loop)) err = err_loop; @@ -1927,17 +1868,19 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (reg->flags & KBASE_REG_IMPORT_PAD) nr_phys_pages = alloc->nents + 1; - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_phys_pages, nr_virt_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, alloc->pages, + nr_phys_pages, nr_virt_pages, + kctx->as_nr); } break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { size_t nr_reg_pages = kbase_reg_current_backed_size(reg); - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, alloc->pages, + nr_reg_pages, nr_reg_pages, + kctx->as_nr); } break; default: { @@ -1945,7 +1888,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, false); + kctx->as_nr); } break; } @@ -1965,9 +1908,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, alloc, reg, - (reg->flags & - (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); + kbase_jd_user_buf_unmap(kctx, alloc, reg); } } } @@ -2111,18 +2052,18 @@ void kbase_sync_single(struct kbase_context *kctx, dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; if (sync_fn == KBASE_SYNC_TO_DEVICE) { - src = ((unsigned char *)kmap(cpu_page)) + offset; - dst = ((unsigned char *)kmap(gpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(cpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset; } else if (sync_fn == KBASE_SYNC_TO_CPU) { dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); - src = ((unsigned char *)kmap(gpu_page)) + offset; - dst = ((unsigned char *)kmap(cpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(gpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset; } memcpy(dst, src, size); - kunmap(gpu_page); - kunmap(cpu_page); + kbase_kunmap(gpu_page, src); + kbase_kunmap(cpu_page, dst); if (sync_fn == KBASE_SYNC_TO_DEVICE) dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); @@ -2302,8 +2243,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re } #if MALI_USE_CSF - if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || - ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) || + ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) { if (reg->flags & KBASE_REG_FIXED_ADDRESS) atomic64_dec(&kctx->num_fixed_allocs); else @@ -2380,7 +2321,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) goto out_unlock; } - if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) { /* SAME_VA must be freed through munmap */ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, gpu_addr); @@ -2543,6 +2484,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(kctx, nr_pages_requested); + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; @@ -2664,8 +2606,6 @@ no_new_partial: alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); - done: return 0; @@ -2675,19 +2615,13 @@ alloc_failed: size_t nr_pages_to_free = nr_pages_requested - nr_left; alloc->nents += nr_pages_to_free; - - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - atomic_add(nr_pages_to_free, &kctx->used_pages); - atomic_add(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); } - kbase_process_page_usage_dec(kctx, nr_pages_requested); - atomic_sub(nr_pages_requested, &kctx->used_pages); - atomic_sub(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_left); + kbase_process_page_usage_dec(kctx, nr_left); + atomic_sub(nr_left, &kctx->used_pages); + atomic_sub(nr_left, &kctx->kbdev->memdev.used_pages); invalid_request: return -ENOMEM; @@ -2736,6 +2670,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(kctx, nr_pages_requested); + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; new_pages = tp; @@ -2838,8 +2773,6 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); - done: return new_pages; @@ -2876,6 +2809,7 @@ alloc_failed: } } + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_requested); kbase_process_page_usage_dec(kctx, nr_pages_requested); atomic_sub(nr_pages_requested, &kctx->used_pages); atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); @@ -4538,7 +4472,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, /* A suitable JIT allocation existed on the evict list, so we need * to make sure that the NOT_MOVABLE property is cleared. */ - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); @@ -4716,14 +4650,14 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) * by page migration. Once freed, they will enter into the page migration * state machine via the mempools. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } void kbase_jit_backing_lost(struct kbase_va_region *reg) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + struct kbase_context *kctx = kbase_reg_to_kctx(reg); if (WARN_ON(!kctx)) return; @@ -5034,6 +4968,15 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + bool write; + enum dma_data_direction dma_dir; + + /* If neither the CPU nor the GPU needs write access, use DMA_TO_DEVICE + * to avoid potentially-destructive CPU cache invalidates that could + * corruption of user data. + */ + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; lockdep_assert_held(&kctx->reg_lock); @@ -5067,9 +5010,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, for (i = 0; i < pinned_pages; i++) { dma_addr_t dma_addr; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir); #else - dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif err = dma_mapping_error(dev, dma_addr); @@ -5079,7 +5022,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); } #ifdef CONFIG_MALI_CINSTR_GWT @@ -5087,10 +5030,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, - mmu_sync_info, NULL); + err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info, NULL); if (err == 0) return 0; @@ -5110,12 +5053,11 @@ unwind: for (i = 0; i < dma_mapped_pages; i++) { dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir); #else - dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif } @@ -5133,17 +5075,113 @@ unwind: return err; } +/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access, + * only, on both the CPU and * GPU, so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to sync + * @dma_addr: DMA address of the bytes to be sync'd + * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling + * signature is identical to user_buf_sync_writable_page(). + */ +static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual cache synchronization. + * + * Writes from neither the CPU nor GPU are possible via this mapping, + * so we just sync the entire page to the device. + */ + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE); +} + +/* user_buf_sync_writable_page - This function handles syncing a single page that has read + * and writable access, from either (or both of) the CPU and GPU, + * so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to unmap + * @dma_addr: DMA address of the bytes to be unmapped + * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of + * the memory that is "imported" and so is intended for GPU access. Areas of + * the page outside of this - whilst still GPU accessible - are not intended + * for use by GPU work, and should also not be modified as the userspace CPU + * threads may be modifying them. + */ +static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - + * | |/ offset_within_page + * | | + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } +} + /* This function would also perform the work of unpinning pages on Job Manager * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable) + struct kbase_va_region *reg) { long i; struct page **pages; unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long remaining_size = alloc->imported.user_buf.size; + bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)); lockdep_assert_held(&kctx->reg_lock); @@ -5152,8 +5190,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem #if !MALI_USE_CSF kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); -#else - CSTD_UNUSED(reg); #endif for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { @@ -5172,75 +5208,24 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem * whole memory page. */ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - /* Manual CPU cache synchronization. - * - * When the GPU returns ownership of the buffer to the CPU, the driver - * needs to treat imported and non-imported memory differently. - * - * The first case to consider is non-imported sub-regions at the - * beginning of the first page and at the end of last page. For these - * sub-regions: CPU cache shall be committed with a clean+invalidate, - * in order to keep the last CPU write. - * - * Imported region prefers the opposite treatment: this memory has been - * legitimately mapped and used by the GPU, hence GPU writes shall be - * committed to memory, while CPU cache shall be invalidated to make - * sure that CPU reads the correct memory content. - * - * The following diagram shows the expect value of the variables - * used in this loop in the corner case of an imported region encloed - * by a single memory page: - * - * page boundary ->|---------- | <- dma_addr (initial value) - * | | - * | - - - - - | <- offset_within_page - * |XXXXXXXXXXX|\ - * |XXXXXXXXXXX| \ - * |XXXXXXXXXXX| }- imported_size - * |XXXXXXXXXXX| / - * |XXXXXXXXXXX|/ - * | - - - - - | <- offset_within_page + imported_size - * | |\ - * | | }- PAGE_SIZE - imported_size - offset_within_page - * | |/ - * page boundary ->|-----------| - * - * If the imported region is enclosed by more than one page, then - * offset_within_page = 0 for any page after the first. - */ - - /* Only for first page: handle non-imported range at the beginning. */ - if (offset_within_page > 0) { - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, - DMA_BIDIRECTIONAL); - dma_addr += offset_within_page; - } - - /* For every page: handle imported range. */ - if (imported_size > 0) - dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, - DMA_BIDIRECTIONAL); - - /* Only for last page (that may coincide with first page): - * handle non-imported range at the end. - */ - if ((imported_size + offset_within_page) < PAGE_SIZE) { - dma_addr += imported_size; - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, - PAGE_SIZE - imported_size - offset_within_page, - DMA_BIDIRECTIONAL); - } + if (writable) + user_buf_sync_writable_page(kctx, imported_size, dma_addr, + offset_within_page); + else + user_buf_sync_read_only_page(kctx, imported_size, dma_addr, + offset_within_page); - /* Notice: use the original DMA address to unmap the whole memory page. */ + /* Notice: use the original DMA address to unmap the whole memory page. */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_dir); #else dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif - if (writeable) + if (writable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF kbase_unpin_user_buf_page(pages[i]); @@ -5259,7 +5244,8 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, void *src_page, size_t *to_copy, unsigned int nr_pages, unsigned int *target_page_nr, size_t offset) { - void *target_page = kmap(dest_pages[*target_page_nr]); + void *target_page = kbase_kmap(dest_pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; if (!target_page) { @@ -5272,13 +5258,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, memcpy(target_page + offset, src_page, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); *target_page_nr += 1; if (*target_page_nr >= nr_pages || *to_copy == 0) return 0; - target_page = kmap(dest_pages[*target_page_nr]); + target_page = kbase_kmap(dest_pages[*target_page_nr]); if (!target_page) { pr_err("%s: kmap failure", __func__); return -ENOMEM; @@ -5290,7 +5276,7 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); return 0; } @@ -5357,20 +5343,14 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r alloc->imported.user_buf.current_mapping_usage_count--; if (alloc->imported.user_buf.current_mapping_usage_count == 0) { - bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg)) { - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, - kbase_reg_current_backed_size(reg), - kbase_reg_current_backed_size(reg), - kctx->as_nr, true); + kbase_mmu_teardown_imported_pages( + kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + kbase_reg_current_backed_size(reg), + kbase_reg_current_backed_size(reg), kctx->as_nr); } - if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) - writeable = false; - - kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg); } } break; |