summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c292
1 files changed, 227 insertions, 65 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 375e484..a0897fb 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -7,16 +7,21 @@
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
*
*/
-
-
/**
* @file mali_kbase_mem.c
* Base kernel memory APIs
@@ -31,6 +36,7 @@
#include <linux/bug.h>
#include <linux/compat.h>
#include <linux/version.h>
+#include <linux/log2.h>
#include <mali_kbase_config.h>
#include <mali_kbase.h>
@@ -231,7 +237,10 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba
KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
/* Find region meeting given requirements */
-static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
+ struct kbase_context *kctx, struct kbase_va_region *reg_reqs,
+ size_t nr_pages, size_t align_offset, size_t align_mask,
+ u64 *out_start_pfn)
{
struct rb_node *rbnode = NULL;
struct kbase_va_region *reg = NULL;
@@ -239,7 +248,6 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(str
/* Note that this search is a linear search, as we do not have a target
address in mind, so does not benefit from the rbtree search */
-
rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
rbnode = rb_first(rbtree);
@@ -249,12 +257,23 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(str
if ((reg->nr_pages >= nr_pages) &&
(reg->flags & KBASE_REG_FREE)) {
/* Check alignment */
- u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+ u64 start_pfn = reg->start_pfn;
+
+ /* When align_offset == align, this sequence is
+ * equivalent to:
+ * (start_pfn + align_mask) & ~(align_mask)
+ *
+ * Otherwise, it aligns to n*align + offset, for the
+ * lowest value n that makes this still >start_pfn */
+ start_pfn += align_mask;
+ start_pfn -= (start_pfn - align_offset) & (align_mask);
if ((start_pfn >= reg->start_pfn) &&
(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
- ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
+ *out_start_pfn = start_pfn;
return reg;
+ }
}
rbnode = rb_next(rbnode);
}
@@ -425,7 +444,7 @@ int kbase_add_va_region(struct kbase_context *kctx,
align = 1;
/* must be a power of 2 */
- KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+ KBASE_DEBUG_ASSERT(is_power_of_2(align));
KBASE_DEBUG_ASSERT(nr_pages > 0);
/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
@@ -458,38 +477,44 @@ int kbase_add_va_region(struct kbase_context *kctx,
goto exit;
}
- /* Path 2: Map any free address which meets the requirements. */
- {
+ /* Path 2: Map any free address which meets the requirements.
+ *
+ * Depending on the zone the allocation request is for
+ * we might need to retry it. */
+ do {
u64 start_pfn;
+ size_t align_offset = align;
+ size_t align_mask = align - 1;
+
+ if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+ WARN(align > 1,
+ "kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+ (unsigned long)align);
+ align_mask = reg->extent - 1;
+ align_offset = reg->extent - reg->initial_commit;
+ }
+
+ tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg,
+ nr_pages, align_offset, align_mask,
+ &start_pfn);
+ if (tmp) {
+ err = kbase_insert_va_region_nolock(kctx, reg, tmp,
+ start_pfn, nr_pages);
+ break;
+ }
/*
- * Depending on the zone the allocation request is for
- * we might need to retry it.
+ * If the allocation is not from the same zone as JIT
+ * then don't retry, we're out of VA and there is
+ * nothing which can be done about it.
*/
- do {
- tmp = kbase_region_tracker_find_region_meeting_reqs(
- kctx, reg, nr_pages, align);
- if (tmp) {
- start_pfn = (tmp->start_pfn + align - 1) &
- ~(align - 1);
- err = kbase_insert_va_region_nolock(kctx, reg,
- tmp, start_pfn, nr_pages);
- break;
- }
-
- /*
- * If the allocation is not from the same zone as JIT
- * then don't retry, we're out of VA and there is
- * nothing which can be done about it.
- */
- if ((reg->flags & KBASE_REG_ZONE_MASK) !=
- KBASE_REG_ZONE_CUSTOM_VA)
- break;
- } while (kbase_jit_evict(kctx));
+ if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+ KBASE_REG_ZONE_CUSTOM_VA)
+ break;
+ } while (kbase_jit_evict(kctx));
- if (!tmp)
- err = -ENOMEM;
- }
+ if (!tmp)
+ err = -ENOMEM;
exit:
return err;
@@ -829,6 +854,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64
new_reg->start_pfn = start_pfn;
new_reg->nr_pages = nr_pages;
+ INIT_LIST_HEAD(&new_reg->jit_node);
+
return new_reg;
}
@@ -902,6 +929,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
size_t i = 0;
unsigned long attr;
unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+ unsigned long gwt_mask = ~0;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+ if (kctx->gwt_enabled)
+ gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
(reg->flags & KBASE_REG_SHARE_BOTH))
@@ -929,7 +962,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
reg->start_pfn + (i * stride),
alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
alloc->imported.alias.aliased[i].length,
- reg->flags);
+ reg->flags & gwt_mask);
if (err)
goto bad_insert;
@@ -939,7 +972,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
reg->start_pfn + i * stride,
kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
- (reg->flags & mask) | attr);
+ (reg->flags & mask & gwt_mask) | attr);
if (err)
goto bad_insert;
@@ -949,7 +982,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
kbase_get_gpu_phy_pages(reg),
kbase_reg_current_backed_size(reg),
- reg->flags);
+ reg->flags & gwt_mask);
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
@@ -1085,6 +1118,36 @@ int kbasep_find_enclosing_cpu_mapping_offset(
KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx,
+ u64 gpu_addr, size_t size, u64 *start, u64 *offset)
+{
+ struct kbase_va_region *region;
+
+ kbase_gpu_vm_lock(kctx);
+
+ region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+
+ if (!region) {
+ kbase_gpu_vm_unlock(kctx);
+ return -EINVAL;
+ }
+
+ *start = region->start_pfn << PAGE_SHIFT;
+
+ *offset = gpu_addr - *start;
+
+ if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) {
+ kbase_gpu_vm_unlock(kctx);
+ return -EINVAL;
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset);
+
void kbase_sync_single(struct kbase_context *kctx,
struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
off_t offset, size_t size, enum kbase_sync_type sync_fn)
@@ -1268,6 +1331,11 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
KBASE_DEBUG_ASSERT(NULL != reg);
lockdep_assert_held(&kctx->reg_lock);
+ if (reg->flags & KBASE_REG_JIT) {
+ dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+ return -EINVAL;
+ }
+
/*
* Unlink the physical allocation before unmaking it evictable so
* that the allocation isn't grown back to its last backed size
@@ -1413,6 +1481,9 @@ int kbase_update_region_flags(struct kbase_context *kctx,
reg->flags |= KBASE_REG_SHARE_IN;
}
+ if (flags & BASE_MEM_TILER_ALIGN_TOP)
+ reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
+
/* Set up default MEMATTR usage */
if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
(reg->flags & KBASE_REG_SHARE_BOTH)) {
@@ -1431,7 +1502,6 @@ int kbase_alloc_phy_pages_helper(
size_t nr_pages_requested)
{
int new_page_count __maybe_unused;
- size_t old_page_count = alloc->nents;
size_t nr_left = nr_pages_requested;
int res;
struct kbase_context *kctx;
@@ -1459,7 +1529,7 @@ int kbase_alloc_phy_pages_helper(
* allocation is visible to the OOM killer */
kbase_process_page_usage_inc(kctx, nr_pages_requested);
- tp = alloc->pages + old_page_count;
+ tp = alloc->pages + alloc->nents;
#ifdef CONFIG_MALI_2MB_ALLOC
/* Check if we have enough pages requested so we can allocate a large
@@ -1569,15 +1639,6 @@ no_new_partial:
goto alloc_failed;
}
- /*
- * Request a zone cache update, this scans only the new pages an
- * appends their information to the zone cache. if the update
- * fails then clear the cache so we fall-back to doing things
- * page by page.
- */
- if (kbase_zone_cache_update(alloc, old_page_count) != 0)
- kbase_zone_cache_clear(alloc);
-
KBASE_TLSTREAM_AUX_PAGESALLOC(
kctx->id,
(u64)new_page_count);
@@ -1591,7 +1652,7 @@ alloc_failed:
if (nr_left != nr_pages_requested)
kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
nr_pages_requested - nr_left,
- alloc->pages + old_page_count,
+ alloc->pages + alloc->nents,
false,
false);
@@ -1656,15 +1717,6 @@ int kbase_free_phy_pages_helper(
start_free++;
}
- /*
- * Clear the zone cache, we don't expect JIT allocations to be
- * shrunk in parts so there is no point trying to optimize for that
- * by scanning for the changes caused by freeing this memory and
- * updating the existing cache entries.
- */
- kbase_zone_cache_clear(alloc);
-
-
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
/* This is a 2MB entry, so free all the 512 pages that
@@ -1852,8 +1904,13 @@ bool kbase_check_alloc_flags(unsigned long flags)
if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
return false;
- /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
- if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+ /* GPU executable memory cannot:
+ * - Be written by the GPU
+ * - Be grown on GPU page fault
+ * - Have the top of its initial commit aligned to 'extent' */
+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+ (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+ BASE_MEM_TILER_ALIGN_TOP)))
return false;
/* GPU should have at least read or write access otherwise there is no
@@ -1865,6 +1922,12 @@ bool kbase_check_alloc_flags(unsigned long flags)
if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
return false;
+ /* Should not combine BASE_MEM_COHERENT_LOCAL with
+ * BASE_MEM_COHERENT_SYSTEM */
+ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
+ (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
+ return false;
+
return true;
}
@@ -1886,6 +1949,10 @@ bool kbase_check_import_flags(unsigned long flags)
if (flags & BASE_MEM_GROW_ON_GPF)
return false;
+ /* Imported memory cannot be aligned to the end of its initial commit */
+ if (flags & BASE_MEM_TILER_ALIGN_TOP)
+ return false;
+
/* GPU should have at least read or write access otherwise there is no
reason for importing. */
if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
@@ -1898,6 +1965,89 @@ bool kbase_check_import_flags(unsigned long flags)
return true;
}
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+ u64 va_pages, u64 commit_pages, u64 large_extent)
+{
+ struct device *dev = kctx->kbdev->dev;
+ int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+ u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
+ struct kbase_va_region test_reg;
+
+ /* kbase_va_region's extent member can be of variable size, so check against that type */
+ test_reg.extent = large_extent;
+
+#define KBASE_MSG_PRE "GPU allocation attempted with "
+
+ if (0 == va_pages) {
+ dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
+ return -EINVAL;
+ }
+
+ if (va_pages > (U64_MAX / PAGE_SIZE)) {
+ /* 64-bit address range is the max */
+ dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!",
+ (unsigned long long)va_pages);
+ return -ENOMEM;
+ }
+
+ /* Note: commit_pages is checked against va_pages during
+ * kbase_alloc_phy_pages() */
+
+ /* Limit GPU executable allocs to GPU PC size */
+ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld",
+ (unsigned long long)va_pages,
+ (unsigned long long)gpu_pc_pages_max);
+
+ return -EINVAL;
+ }
+
+ if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+ test_reg.extent == 0) {
+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+ return -EINVAL;
+ }
+
+ if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+ test_reg.extent != 0) {
+ dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+ return -EINVAL;
+ }
+
+ /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
+ if (flags & BASE_MEM_TILER_ALIGN_TOP) {
+#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
+ unsigned long small_extent;
+
+ if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
+ dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
+ (unsigned long long)large_extent,
+ BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+ return -EINVAL;
+ }
+ /* For use with is_power_of_2, which takes unsigned long, so
+ * must ensure e.g. on 32-bit kernel it'll fit in that type */
+ small_extent = (unsigned long)large_extent;
+
+ if (!is_power_of_2(small_extent)) {
+ dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
+ small_extent);
+ return -EINVAL;
+ }
+
+ if (commit_pages > large_extent) {
+ dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
+ (unsigned long)commit_pages,
+ (unsigned long)large_extent);
+ return -EINVAL;
+ }
+#undef KBASE_MSG_PRE_FLAG
+ }
+
+ return 0;
+#undef KBASE_MSG_PRE
+}
+
/**
* @brief Acquire the per-context region list lock
*/
@@ -2126,6 +2276,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_lock(kctx);
+ reg->flags &= ~KBASE_REG_JIT;
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -2251,6 +2402,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
if (!reg)
goto out_unlocked;
+ reg->flags |= KBASE_REG_JIT;
+
mutex_lock(&kctx->jit_evict_lock);
list_add(&reg->jit_node, &kctx->jit_active_head);
mutex_unlock(&kctx->jit_evict_lock);
@@ -2322,8 +2475,10 @@ bool kbase_jit_evict(struct kbase_context *kctx)
}
mutex_unlock(&kctx->jit_evict_lock);
- if (reg)
+ if (reg) {
+ reg->flags &= ~KBASE_REG_JIT;
kbase_mem_free_region(kctx, reg);
+ }
return (reg != NULL);
}
@@ -2348,6 +2503,7 @@ void kbase_jit_term(struct kbase_context *kctx)
struct kbase_va_region, jit_node);
list_del(&walker->jit_node);
mutex_unlock(&kctx->jit_evict_lock);
+ walker->flags &= ~KBASE_REG_JIT;
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -2358,6 +2514,7 @@ void kbase_jit_term(struct kbase_context *kctx)
struct kbase_va_region, jit_node);
list_del(&walker->jit_node);
mutex_unlock(&kctx->jit_evict_lock);
+ walker->flags &= ~KBASE_REG_JIT;
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -2379,6 +2536,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct device *dev;
unsigned long offset;
unsigned long local_size;
+ unsigned long gwt_mask = ~0;
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
@@ -2447,10 +2605,14 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
}
alloc->nents = pinned_pages;
+#ifdef CONFIG_MALI_JOB_DUMP
+ if (kctx->gwt_enabled)
+ gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
kbase_reg_current_backed_size(reg),
- reg->flags);
+ reg->flags & gwt_mask);
if (err == 0)
return 0;