From 1315948510c02a20fe94f27634c58f97803c1d29 Mon Sep 17 00:00:00 2001
From: Jack Diver <diverj@google.com>
Date: Thu, 18 Jan 2024 16:10:14 +0000
Subject: mali_kbase: platform: Remove liveness based SLC

Bug: 313458962
Test: gfxbench
Signed-off-by: Jack Diver <diverj@google.com>
(cherry picked from https://partner-android-review.googlesource.com/q/commit:89419b7e89dcd2bb9f5c0259e8b5c5a875610131)
Merged-In: I74a81b54764da315510ce988d0aa450b2761725f
Change-Id: I74a81b54764da315510ce988d0aa450b2761725f
---
 .../platform/pixel/mali_kbase_config_platform.h    |  21 -
 mali_kbase/platform/pixel/pixel_gpu_slc.c          | 451 +--------------------
 2 files changed, 8 insertions(+), 464 deletions(-)

diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
index a0bf623..06b76ea 100644
--- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
@@ -302,10 +302,6 @@ struct gpu_dvfs_metrics_uid_stats;
  * @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT.
  * @dvfs.qos.bts.scenario:  The index of the BTS scenario to be used. Set via DT.
  *
- * @slc.lock:           Synchronize updates to the SLC partition accounting variables.
- * @slc.demand:         The total demand for SLC space, an aggregation of each kctx's demand.
- * @slc.usage:          The total amount of SLC space used, an aggregation of each kctx's usage.
- *
  * @itmon.wq:     A workqueue for ITMON page table search.
  * @itmon.work:   The work item for the above.
  * @itmon.nb:     The ITMON notifier block.
@@ -418,12 +414,6 @@ struct pixel_context {
 	} dvfs;
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
-	struct {
-		struct mutex lock;
-		u64 demand;
-		u64 usage;
-	} slc;
-
 #if IS_ENABLED(CONFIG_EXYNOS_ITMON)
 	struct {
 		struct workqueue_struct *wq;
@@ -440,21 +430,10 @@ struct pixel_context {
  *
  * @kctx:  Handle to the parent kctx
  * @stats: Tracks the dvfs metrics for the UID associated with this context
- *
- * @slc.peak_demand:         The parent context's maximum demand for SLC space
- * @slc.peak_usage:          The parent context's maximum use of SLC space
- * @slc.idle_work:           Work item used to queue SLC partition shrink upon context idle
- * @slc.idle_work_cancelled: Flag for async cancellation of idle_work
  */
 struct pixel_platform_data {
 	struct kbase_context *kctx;
 	struct gpu_dvfs_metrics_uid_stats* stats;
-	struct {
-		u64 peak_demand;
-		u64 peak_usage;
-		struct work_struct idle_work;
-		atomic_t idle_work_cancelled;
-	} slc;
 };
 
 #endif /* _KBASE_CONFIG_PLATFORM_H_ */
diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c
index d6cb131..8e46be1 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_slc.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c
@@ -17,321 +17,6 @@
 #include "mali_kbase_config_platform.h"
 #include "pixel_gpu_slc.h"
 
-struct dirty_region {
-	u64 first_vpfn;
-	u64 last_vpfn;
-	u64 dirty_pgds;
-};
-
-/**
- * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
- *
- * @buffer_va:         Array of buffer base virtual addresses
- * @buffer_sizes:      Array of buffer sizes
- * @buffer_count:      Number of elements in the va and sizes buffers
- * @live_ranges:       Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
- *                     each buffer
- * @live_ranges_count: Number of elements in the live ranges buffer
- */
-struct gpu_slc_liveness_update_info {
-	u64* buffer_va;
-	u64* buffer_sizes;
-	u64 buffer_count;
-	struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
-	u64 live_ranges_count;
-};
-
-/**
- * gpu_slc_lock_as - Lock the current process address space
- *
- * @kctx:  The &struct kbase_context
- */
-static void gpu_slc_lock_as(struct kbase_context *kctx)
-{
-	down_write(kbase_mem_get_process_mmap_lock());
-	kbase_gpu_vm_lock(kctx);
-}
-
-/**
- * gpu_slc_unlock_as - Unlock the current process address space
- *
- * @kctx:  The &struct kbase_context
- */
-static void gpu_slc_unlock_as(struct kbase_context *kctx)
-{
-	kbase_gpu_vm_unlock(kctx);
-	up_write(kbase_mem_get_process_mmap_lock());
-}
-
-/**
- * gpu_slc_in_group - Check whether the region is SLC cacheable
- *
- * @reg:   The gpu memory region to check for an SLC cacheable memory group.
- */
-static bool gpu_slc_in_group(struct kbase_va_region* reg)
-{
-	return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID;
-}
-
-/**
- * gpu_slc_get_region - Find the gpu memory region from a virtual address
- *
- * @kctx:  The &struct kbase_context
- * @va:    The base gpu virtual address of the region
- *
- * Return: On success, returns a valid memory region. On failure NULL is returned.
- */
-static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va)
-{
-	struct kbase_va_region *reg;
-
-	if (!va)
-		goto invalid;
-
-	if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE))
-		goto invalid;
-
-	/* Find the region that the virtual address belongs to */
-	reg = kbase_region_tracker_find_region_base_address(kctx, va);
-
-	/* Validate the region */
-	if (kbase_is_region_invalid_or_free(reg))
-		goto invalid;
-
-	return reg;
-
-invalid:
-	dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va);
-	return NULL;
-}
-
-/**
- * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable
- *
- * @kctx:      The &struct kbase_context
- * @reg:       The gpu memory region migrate to an SLC cacheable memory group
- * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
- */
-static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg)
-{
-	int err;
-	u64 vpfn;
-	size_t page_nr;
-
-	KBASE_DEBUG_ASSERT(kctx);
-	KBASE_DEBUG_ASSERT(reg);
-
-	if (gpu_slc_in_group(reg)) {
-		return;
-	}
-
-	vpfn = reg->start_pfn;
-	page_nr = kbase_reg_current_backed_size(reg);
-
-	err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn,
-			kbase_get_gpu_phy_pages(reg),
-			page_nr,
-			reg->flags,
-			MGM_SLC_GROUP_ID,
-			&dirty_reg->dirty_pgds);
-
-	/* Track the dirty region */
-	dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn);
-	dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr);
-
-	if (err)
-		dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err);
-	else
-		/* If everything is good, then set the new group on the region. */
-		reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID;
-}
-
-/**
- * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region
- *
- * @kctx:      The &struct kbase_context
- * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
- */
-static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg)
-{
-	size_t const dirty_page_nr =
-	    (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn));
-
-	if (!dirty_page_nr)
-		return;
-
-	kbase_mmu_flush_invalidate_update_pages(
-	    kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds);
-}
-
-/**
- * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand.
- *
- * @kbdev: The &struct kbase_device for the GPU.
- */
-static void gpu_slc_resize_partition(struct kbase_device* kbdev)
-{
-	struct pixel_context *pc = kbdev->platform_context;
-
-	/* Request that the mgm select an SLC partition that fits our demand */
-	pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand);
-
-	dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand);
-}
-
-/**
- * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition.
- *
- * @kbdev: The &struct kbase_device for the GPU.
- *
- * Returns the size of the GPU's SLC partition.
- */
-static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev)
-{
-	u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID);
-
-	dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size);
-
-	return partition_size;
-}
-
-/**
- * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free
- *                           SLC space, and resizing the partition to meet demand.
- *
- * @kctx:   The &struct kbase_context corresponding to a user space context which sent the liveness
- *          update
- * @info:   See struct gpu_slc_liveness_update_info
- */
-static void gpu_slc_liveness_update(struct kbase_context* kctx,
-                                    struct gpu_slc_liveness_update_info* info)
-{
-	struct kbase_device* kbdev = kctx->kbdev;
-	struct pixel_context *pc = kbdev->platform_context;
-	struct pixel_platform_data *kctx_pd = kctx->platform_data;
-	struct dirty_region dirty_reg = {
-		.first_vpfn = U64_MAX,
-		.last_vpfn = 0,
-		.dirty_pgds = 0,
-	};
-	u64 current_usage = 0;
-	u64 current_demand = 0;
-	u64 free_space;
-	int i;
-
-	/* Lock the process address space before modifying ATE's */
-	gpu_slc_lock_as(kctx);
-
-	/* Synchronize updates to the partition size and usage */
-	mutex_lock(&pc->slc.lock);
-
-	dev_dbg(kbdev->dev, "pixel: buffer liveness update received");
-
-	/* Remove the usage and demand from the previous liveness update */
-	pc->slc.demand -= kctx_pd->slc.peak_demand;
-	pc->slc.usage -= kctx_pd->slc.peak_usage;
-	kctx_pd->slc.peak_demand = 0;
-	kctx_pd->slc.peak_usage = 0;
-
-	/* Calculate the remaining free space in the SLC partition (floored at 0) */
-	free_space = gpu_slc_get_partition_size(kbdev);
-	free_space -= min(free_space, pc->slc.usage);
-
-	for (i = 0; i < info->live_ranges_count; ++i)
-	{
-		struct kbase_va_region *reg;
-                u64 size;
-                u64 va;
-		u32 index = info->live_ranges[i].index;
-
-		if (unlikely(index >= info->buffer_count))
-			continue;
-
-		size = info->buffer_sizes[index];
-		va = info->buffer_va[index];
-
-		reg = gpu_slc_get_region(kctx, va);
-		if(!reg)
-			continue;
-
-		switch (info->live_ranges[i].type)
-		{
-		case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN:
-			/* Update demand as though there's no size limit */
-			current_demand += size;
-			kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand);
-
-			/* Check whether there's free space in the partition to store the buffer */
-			if (free_space >= current_usage + size)
-				gpu_slc_migrate_region(kctx, reg, &dirty_reg);
-
-			/* This may be true, even if the space calculation above returned false,
-			 * as a previous call to this function may have migrated the region.
-			 * In such a scenario, the current_usage may exceed the available free_space
-			 * and we will be oversubscribed to the SLC partition.
-			 * We could migrate the region back to the non-SLC group, but this would
-			 * require an SLC flush, so for now we do nothing.
-			 */
-			if (gpu_slc_in_group(reg)) {
-				current_usage += size;
-				kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage);
-			}
-			break;
-		case KBASE_PIXEL_GPU_LIVE_RANGE_END:
-			current_demand -= size;
-			if (gpu_slc_in_group(reg))
-				current_usage -= size;
-			break;
-		}
-	}
-	/* Perform single page table flush */
-	gpu_slc_flush_dirty_region(kctx, &dirty_reg);
-
-	/* Indicates a missing live range end marker */
-	WARN_ON_ONCE(current_demand != 0 || current_usage != 0);
-
-	/* Update the total usage and demand */
-	pc->slc.demand += kctx_pd->slc.peak_demand;
-	pc->slc.usage += kctx_pd->slc.peak_usage;
-
-	dev_dbg(kbdev->dev,
-	        "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu",
-	        kctx->id,
-	        kctx_pd->slc.peak_demand,
-	        kctx_pd->slc.peak_usage);
-	dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage);
-
-	/* Trigger partition resize based on the new demand */
-	gpu_slc_resize_partition(kctx->kbdev);
-
-	mutex_unlock(&pc->slc.lock);
-	gpu_slc_unlock_as(kctx);
-}
-
-static void gpu_slc_kctx_idle_worker(struct work_struct *work)
-{
-	struct pixel_platform_data *pd =
-		container_of(work, struct pixel_platform_data, slc.idle_work);
-	struct kbase_context *kctx = pd->kctx;
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct pixel_context *pc = kbdev->platform_context;
-
-	if (atomic_read(&pd->slc.idle_work_cancelled))
-		return;
-
-	mutex_lock(&pc->slc.lock);
-
-	pc->slc.demand -= pd->slc.peak_demand;
-	pc->slc.usage -= pd->slc.peak_usage;
-
-	pd->slc.peak_demand = 0;
-	pd->slc.peak_usage = 0;
-
-	gpu_slc_resize_partition(kctx->kbdev);
-
-	mutex_unlock(&pc->slc.lock);
-}
-
 /**
  * gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update
  *
@@ -345,80 +30,8 @@ static void gpu_slc_kctx_idle_worker(struct work_struct *work)
 int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
                                                   struct kbase_ioctl_buffer_liveness_update* update)
 {
-	int err = -EINVAL;
-	struct gpu_slc_liveness_update_info info;
-	u64* buff = NULL;
-	u64 total_buff_size;
-
-	/* Compute the sizes of the user space arrays that we need to copy */
-	u64 const buffer_info_size = sizeof(u64) * update->buffer_count;
-	u64 const live_ranges_size =
-	    sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count;
-
-	/* Guard against overflows and empty sizes */
-	if (!buffer_info_size || !live_ranges_size)
-		goto done;
-	if (U64_MAX / sizeof(u64) < update->buffer_count)
-		goto done;
-	if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count)
-		goto done;
-	/* Guard against nullptr */
-	if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address)
-		goto done;
-	/* Calculate the total buffer size required and detect overflows */
-	if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size)
-		goto done;
-
-	total_buff_size = buffer_info_size * 2 + live_ranges_size;
-
-	/* Allocate the memory we require to copy from user space */
-	buff = kmalloc(total_buff_size, GFP_KERNEL);
-	if (buff == NULL) {
-		dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update");
-		err = -ENOMEM;
-		goto done;
-	}
-
-	/* Set up the info struct by pointing into the allocation. All 8 byte aligned */
-	info = (struct gpu_slc_liveness_update_info){
-	    .buffer_va = buff,
-	    .buffer_sizes = buff + update->buffer_count,
-	    .buffer_count = update->buffer_count,
-	    .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2),
-	    .live_ranges_count = update->live_ranges_count,
-	};
-
-	/* Copy the data from user space */
-	err =
-	    copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
-	if (err) {
-		dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges");
-		err = -EFAULT;
-		goto done;
-	}
-
-	err = copy_from_user(
-	    info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size);
-	if (err) {
-		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes");
-		err = -EFAULT;
-		goto done;
-	}
-
-	err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size);
-	if (err) {
-		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses");
-		err = -EFAULT;
-		goto done;
-	}
-
-	/* Execute an slc update */
-	gpu_slc_liveness_update(kctx, &info);
-
-done:
-	kfree(buff);
-
-	return err;
+	(void)kctx, (void)update;
+	return 0;
 }
 
 /**
@@ -433,10 +46,7 @@ done:
  */
 int gpu_slc_kctx_init(struct kbase_context *kctx)
 {
-	struct pixel_platform_data *pd = kctx->platform_data;
-
-	INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker);
-
+	(void)kctx;
 	return 0;
 }
 
@@ -444,28 +54,10 @@ int gpu_slc_kctx_init(struct kbase_context *kctx)
  * gpu_slc_kctx_term() - Called when a kernel context is terminated
  *
  * @kctx: The &struct kbase_context that is being terminated
- *
- * Free up SLC space used by the buffers that this context owns.
  */
 void gpu_slc_kctx_term(struct kbase_context *kctx)
 {
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct pixel_context *pc = kbdev->platform_context;
-	struct pixel_platform_data *kctx_pd = kctx->platform_data;
-
-	atomic_set(&kctx_pd->slc.idle_work_cancelled, 1);
-	cancel_work_sync(&kctx_pd->slc.idle_work);
-
-	mutex_lock(&pc->slc.lock);
-
-	/* Deduct the usage and demand, freeing that SLC space for the next update */
-	pc->slc.demand -= kctx_pd->slc.peak_demand;
-	pc->slc.usage -= kctx_pd->slc.peak_usage;
-
-	/* Trigger partition resize based on the new demand */
-	gpu_slc_resize_partition(kctx->kbdev);
-
-	mutex_unlock(&pc->slc.lock);
+	(void)kctx;
 }
 
 /**
@@ -475,19 +67,7 @@ void gpu_slc_kctx_term(struct kbase_context *kctx)
  */
 void gpu_slc_kctx_active(struct kbase_context *kctx)
 {
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct pixel_platform_data *pd = kctx->platform_data;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	/* Asynchronously cancel the idle work, since we're in atomic context.
-	 * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure
-	 * that any  queued idle_work does not run *after* a liveness update for the now active kctx.
-	 * Either the idle_work is executing now, and beats the cancellation check, or it runs later
-	 * and early-exits at the cancellation check.
-	 * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update.
-	 */
-	atomic_set(&pd->slc.idle_work_cancelled, 1);
+	(void)kctx;
 }
 
 /**
@@ -497,22 +77,11 @@ void gpu_slc_kctx_active(struct kbase_context *kctx)
  */
 void gpu_slc_kctx_idle(struct kbase_context *kctx)
 {
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct pixel_platform_data *pd = kctx->platform_data;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	/* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing,
-	 * we will re-queue on the following line anyway, resulting in a unnecessary additional
-	 * execution of the worker.
-	 * While not optimal, it won't result in a correctness problem.
-	 */
-	atomic_set(&pd->slc.idle_work_cancelled, 0);
-	queue_work(system_highpri_wq, &pd->slc.idle_work);
+	(void)kctx;
 }
 
 /**
- * gpu_slc_init - Initialize the SLC partition for the GPU
+ * gpu_slc_init - Initialize the SLC context for the GPU
  *
  * @kbdev: The &struct kbase_device for the GPU.
  *
@@ -520,15 +89,11 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx)
  */
 int gpu_slc_init(struct kbase_device *kbdev)
 {
-	struct pixel_context *pc = kbdev->platform_context;
-
-	mutex_init(&pc->slc.lock);
-
 	return 0;
 }
 
 /**
- * gpu_slc_term() - Terminates the Pixel GPU SLC partition.
+ * gpu_slc_term() - Terminates the Pixel GPU SLC context.
  *
  * @kbdev: The &struct kbase_device for the GPU.
  */
-- 
cgit v1.2.3