diff options
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r-- | mali_kbase/mali_kbase_mem.c | 292 |
1 files changed, 227 insertions, 65 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 375e484..a0897fb 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -7,16 +7,21 @@ * Foundation, and any use by you of this program is subject to the terms * of such GNU licence. * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 * */ - - /** * @file mali_kbase_mem.c * Base kernel memory APIs @@ -31,6 +36,7 @@ #include <linux/bug.h> #include <linux/compat.h> #include <linux/version.h> +#include <linux/log2.h> #include <mali_kbase_config.h> #include <mali_kbase.h> @@ -231,7 +237,10 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); /* Find region meeting given requirements */ -static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align) +static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + struct kbase_context *kctx, struct kbase_va_region *reg_reqs, + size_t nr_pages, size_t align_offset, size_t align_mask, + u64 *out_start_pfn) { struct rb_node *rbnode = NULL; struct kbase_va_region *reg = NULL; @@ -239,7 +248,6 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(str /* Note that this search is a linear search, as we do not have a target address in mind, so does not benefit from the rbtree search */ - rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs); rbnode = rb_first(rbtree); @@ -249,12 +257,23 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(str if ((reg->nr_pages >= nr_pages) && (reg->flags & KBASE_REG_FREE)) { /* Check alignment */ - u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1); + u64 start_pfn = reg->start_pfn; + + /* When align_offset == align, this sequence is + * equivalent to: + * (start_pfn + align_mask) & ~(align_mask) + * + * Otherwise, it aligns to n*align + offset, for the + * lowest value n that makes this still >start_pfn */ + start_pfn += align_mask; + start_pfn -= (start_pfn - align_offset) & (align_mask); if ((start_pfn >= reg->start_pfn) && (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && - ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) + ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { + *out_start_pfn = start_pfn; return reg; + } } rbnode = rb_next(rbnode); } @@ -425,7 +444,7 @@ int kbase_add_va_region(struct kbase_context *kctx, align = 1; /* must be a power of 2 */ - KBASE_DEBUG_ASSERT((align & (align - 1)) == 0); + KBASE_DEBUG_ASSERT(is_power_of_2(align)); KBASE_DEBUG_ASSERT(nr_pages > 0); /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ @@ -458,38 +477,44 @@ int kbase_add_va_region(struct kbase_context *kctx, goto exit; } - /* Path 2: Map any free address which meets the requirements. */ - { + /* Path 2: Map any free address which meets the requirements. + * + * Depending on the zone the allocation request is for + * we might need to retry it. */ + do { u64 start_pfn; + size_t align_offset = align; + size_t align_mask = align - 1; + + if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { + WARN(align > 1, + "kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + (unsigned long)align); + align_mask = reg->extent - 1; + align_offset = reg->extent - reg->initial_commit; + } + + tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, + nr_pages, align_offset, align_mask, + &start_pfn); + if (tmp) { + err = kbase_insert_va_region_nolock(kctx, reg, tmp, + start_pfn, nr_pages); + break; + } /* - * Depending on the zone the allocation request is for - * we might need to retry it. + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. */ - do { - tmp = kbase_region_tracker_find_region_meeting_reqs( - kctx, reg, nr_pages, align); - if (tmp) { - start_pfn = (tmp->start_pfn + align - 1) & - ~(align - 1); - err = kbase_insert_va_region_nolock(kctx, reg, - tmp, start_pfn, nr_pages); - break; - } - - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); - if (!tmp) - err = -ENOMEM; - } + if (!tmp) + err = -ENOMEM; exit: return err; @@ -829,6 +854,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 new_reg->start_pfn = start_pfn; new_reg->nr_pages = nr_pages; + INIT_LIST_HEAD(&new_reg->jit_node); + return new_reg; } @@ -902,6 +929,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 size_t i = 0; unsigned long attr; unsigned long mask = ~KBASE_REG_MEMATTR_MASK; + unsigned long gwt_mask = ~0; + +#ifdef CONFIG_MALI_JOB_DUMP + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && (reg->flags & KBASE_REG_SHARE_BOTH)) @@ -929,7 +962,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 reg->start_pfn + (i * stride), alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags); + reg->flags & gwt_mask); if (err) goto bad_insert; @@ -939,7 +972,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 reg->start_pfn + i * stride, kctx->aliasing_sink_page, alloc->imported.alias.aliased[i].length, - (reg->flags & mask) | attr); + (reg->flags & mask & gwt_mask) | attr); if (err) goto bad_insert; @@ -949,7 +982,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags); + reg->flags & gwt_mask); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); @@ -1085,6 +1118,36 @@ int kbasep_find_enclosing_cpu_mapping_offset( KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); +int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, + u64 gpu_addr, size_t size, u64 *start, u64 *offset) +{ + struct kbase_va_region *region; + + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + + if (!region) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + *start = region->start_pfn << PAGE_SHIFT; + + *offset = gpu_addr - *start; + + if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + kbase_gpu_vm_unlock(kctx); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); + void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, off_t offset, size_t size, enum kbase_sync_type sync_fn) @@ -1268,6 +1331,11 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + if (reg->flags & KBASE_REG_JIT) { + dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n"); + return -EINVAL; + } + /* * Unlink the physical allocation before unmaking it evictable so * that the allocation isn't grown back to its last backed size @@ -1413,6 +1481,9 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_SHARE_IN; } + if (flags & BASE_MEM_TILER_ALIGN_TOP) + reg->flags |= KBASE_REG_TILER_ALIGN_TOP; + /* Set up default MEMATTR usage */ if (kctx->kbdev->system_coherency == COHERENCY_ACE && (reg->flags & KBASE_REG_SHARE_BOTH)) { @@ -1431,7 +1502,6 @@ int kbase_alloc_phy_pages_helper( size_t nr_pages_requested) { int new_page_count __maybe_unused; - size_t old_page_count = alloc->nents; size_t nr_left = nr_pages_requested; int res; struct kbase_context *kctx; @@ -1459,7 +1529,7 @@ int kbase_alloc_phy_pages_helper( * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(kctx, nr_pages_requested); - tp = alloc->pages + old_page_count; + tp = alloc->pages + alloc->nents; #ifdef CONFIG_MALI_2MB_ALLOC /* Check if we have enough pages requested so we can allocate a large @@ -1569,15 +1639,6 @@ no_new_partial: goto alloc_failed; } - /* - * Request a zone cache update, this scans only the new pages an - * appends their information to the zone cache. if the update - * fails then clear the cache so we fall-back to doing things - * page by page. - */ - if (kbase_zone_cache_update(alloc, old_page_count) != 0) - kbase_zone_cache_clear(alloc); - KBASE_TLSTREAM_AUX_PAGESALLOC( kctx->id, (u64)new_page_count); @@ -1591,7 +1652,7 @@ alloc_failed: if (nr_left != nr_pages_requested) kbase_mem_pool_free_pages(&kctx->lp_mem_pool, nr_pages_requested - nr_left, - alloc->pages + old_page_count, + alloc->pages + alloc->nents, false, false); @@ -1656,15 +1717,6 @@ int kbase_free_phy_pages_helper( start_free++; } - /* - * Clear the zone cache, we don't expect JIT allocations to be - * shrunk in parts so there is no point trying to optimize for that - * by scanning for the changes caused by freeing this memory and - * updating the existing cache entries. - */ - kbase_zone_cache_clear(alloc); - - while (nr_pages_to_free) { if (is_huge_head(*start_free)) { /* This is a 2MB entry, so free all the 512 pages that @@ -1852,8 +1904,13 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) return false; - /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */ - if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + /* GPU executable memory cannot: + * - Be written by the GPU + * - Be grown on GPU page fault + * - Have the top of its initial commit aligned to 'extent' */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_TILER_ALIGN_TOP))) return false; /* GPU should have at least read or write access otherwise there is no @@ -1865,6 +1922,12 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) return false; + /* Should not combine BASE_MEM_COHERENT_LOCAL with + * BASE_MEM_COHERENT_SYSTEM */ + if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == + (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) + return false; + return true; } @@ -1886,6 +1949,10 @@ bool kbase_check_import_flags(unsigned long flags) if (flags & BASE_MEM_GROW_ON_GPF) return false; + /* Imported memory cannot be aligned to the end of its initial commit */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) + return false; + /* GPU should have at least read or write access otherwise there is no reason for importing. */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) @@ -1898,6 +1965,89 @@ bool kbase_check_import_flags(unsigned long flags) return true; } +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + u64 va_pages, u64 commit_pages, u64 large_extent) +{ + struct device *dev = kctx->kbdev->dev; + int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; + struct kbase_va_region test_reg; + + /* kbase_va_region's extent member can be of variable size, so check against that type */ + test_reg.extent = large_extent; + +#define KBASE_MSG_PRE "GPU allocation attempted with " + + if (0 == va_pages) { + dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); + return -EINVAL; + } + + if (va_pages > (U64_MAX / PAGE_SIZE)) { + /* 64-bit address range is the max */ + dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!", + (unsigned long long)va_pages); + return -ENOMEM; + } + + /* Note: commit_pages is checked against va_pages during + * kbase_alloc_phy_pages() */ + + /* Limit GPU executable allocs to GPU PC size */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", + (unsigned long long)va_pages, + (unsigned long long)gpu_pc_pages_max); + + return -EINVAL; + } + + if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && + test_reg.extent == 0) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + return -EINVAL; + } + + if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && + test_reg.extent != 0) { + dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); + return -EINVAL; + } + + /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) { +#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " + unsigned long small_extent; + + if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", + (unsigned long long)large_extent, + BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); + return -EINVAL; + } + /* For use with is_power_of_2, which takes unsigned long, so + * must ensure e.g. on 32-bit kernel it'll fit in that type */ + small_extent = (unsigned long)large_extent; + + if (!is_power_of_2(small_extent)) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", + small_extent); + return -EINVAL; + } + + if (commit_pages > large_extent) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", + (unsigned long)commit_pages, + (unsigned long)large_extent); + return -EINVAL; + } +#undef KBASE_MSG_PRE_FLAG + } + + return 0; +#undef KBASE_MSG_PRE +} + /** * @brief Acquire the per-context region list lock */ @@ -2126,6 +2276,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work) mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_lock(kctx); + reg->flags &= ~KBASE_REG_JIT; kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -2251,6 +2402,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (!reg) goto out_unlocked; + reg->flags |= KBASE_REG_JIT; + mutex_lock(&kctx->jit_evict_lock); list_add(®->jit_node, &kctx->jit_active_head); mutex_unlock(&kctx->jit_evict_lock); @@ -2322,8 +2475,10 @@ bool kbase_jit_evict(struct kbase_context *kctx) } mutex_unlock(&kctx->jit_evict_lock); - if (reg) + if (reg) { + reg->flags &= ~KBASE_REG_JIT; kbase_mem_free_region(kctx, reg); + } return (reg != NULL); } @@ -2348,6 +2503,7 @@ void kbase_jit_term(struct kbase_context *kctx) struct kbase_va_region, jit_node); list_del(&walker->jit_node); mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_JIT; kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -2358,6 +2514,7 @@ void kbase_jit_term(struct kbase_context *kctx) struct kbase_va_region, jit_node); list_del(&walker->jit_node); mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_JIT; kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -2379,6 +2536,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct device *dev; unsigned long offset; unsigned long local_size; + unsigned long gwt_mask = ~0; alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); @@ -2447,10 +2605,14 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, } alloc->nents = pinned_pages; +#ifdef CONFIG_MALI_JOB_DUMP + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, kbase_reg_current_backed_size(reg), - reg->flags); + reg->flags & gwt_mask); if (err == 0) return 0; |