summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
authorToby Sunrise <tobyrs@google.com>2023-05-01 13:23:54 +0000
committerToby Sunrise <tobyrs@google.com>2023-05-01 13:33:11 +0000
commitf7a77046d77266482dedf54d134102e6031a7438 (patch)
tree4d6813894d79edb7ad605005087b0bce11055c4c /mali_kbase/mali_kbase_mem.c
parent25e383ffa36a9916065804029fbe3552c71329fe (diff)
downloadgpu-f7a77046d77266482dedf54d134102e6031a7438.tar.gz
Mali Valhall Android DDK r42p0-01eac0 KMD
Provenance: 300534375857cb2963042df7b788b1ab5616c500 (ipdelivery/EAC/v_r42p0) VX504X08X-BU-00000-r42p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r42p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r42p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r42p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I3b15e01574f03706574a8edaf50dae4ba16e30c0
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c146
1 files changed, 115 insertions, 31 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index abd01c1..b18b1e2 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -2062,6 +2062,7 @@ void kbase_sync_single(struct kbase_context *kctx,
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
@@ -4985,10 +4986,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i, dma_mapped_pages;
- unsigned long address;
struct device *dev;
- unsigned long offset_within_page;
- unsigned long remaining_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -5004,19 +5002,29 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < pinned_pages; i++) {
- unsigned long map_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
- dma_addr_t dma_addr = dma_map_page(dev, pages[i],
- offset_within_page, map_size,
- DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
err = dma_mapping_error(dev, dma_addr);
if (err)
@@ -5025,8 +5033,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- remaining_size -= map_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -5043,19 +5050,22 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
dma_mapped_pages = i;
- /* Run the unmap loop in the same order as map loop */
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
for (i = 0; i < dma_mapped_pages; i++) {
- unsigned long unmap_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- unmap_size, DMA_BIDIRECTIONAL);
- remaining_size -= unmap_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
}
/* The user buffer could already have been previously pinned before
@@ -5096,12 +5106,85 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long unmap_size =
- MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size,
- DMA_BIDIRECTIONAL);
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -5109,7 +5192,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- remaining_size -= unmap_size;
+ remaining_size -= imported_size;
offset_within_page = 0;
}
#if !MALI_USE_CSF
@@ -5190,8 +5273,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
break;
}
default:
- WARN(1, "Invalid external resource GPU allocation type (%x) on mapping",
- alloc->type);
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid external resource GPU allocation type (%x) on mapping",
+ alloc->type);
return -EINVAL;
}