From 1315948510c02a20fe94f27634c58f97803c1d29 Mon Sep 17 00:00:00 2001 From: Jack Diver Date: Thu, 18 Jan 2024 16:10:14 +0000 Subject: mali_kbase: platform: Remove liveness based SLC Bug: 313458962 Test: gfxbench Signed-off-by: Jack Diver (cherry picked from https://partner-android-review.googlesource.com/q/commit:89419b7e89dcd2bb9f5c0259e8b5c5a875610131) Merged-In: I74a81b54764da315510ce988d0aa450b2761725f Change-Id: I74a81b54764da315510ce988d0aa450b2761725f --- .../platform/pixel/mali_kbase_config_platform.h | 21 - mali_kbase/platform/pixel/pixel_gpu_slc.c | 451 +-------------------- 2 files changed, 8 insertions(+), 464 deletions(-) diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h index a0bf623..06b76ea 100644 --- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h +++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h @@ -302,10 +302,6 @@ struct gpu_dvfs_metrics_uid_stats; * @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT. * @dvfs.qos.bts.scenario: The index of the BTS scenario to be used. Set via DT. * - * @slc.lock: Synchronize updates to the SLC partition accounting variables. - * @slc.demand: The total demand for SLC space, an aggregation of each kctx's demand. - * @slc.usage: The total amount of SLC space used, an aggregation of each kctx's usage. - * * @itmon.wq: A workqueue for ITMON page table search. * @itmon.work: The work item for the above. * @itmon.nb: The ITMON notifier block. @@ -418,12 +414,6 @@ struct pixel_context { } dvfs; #endif /* CONFIG_MALI_MIDGARD_DVFS */ - struct { - struct mutex lock; - u64 demand; - u64 usage; - } slc; - #if IS_ENABLED(CONFIG_EXYNOS_ITMON) struct { struct workqueue_struct *wq; @@ -440,21 +430,10 @@ struct pixel_context { * * @kctx: Handle to the parent kctx * @stats: Tracks the dvfs metrics for the UID associated with this context - * - * @slc.peak_demand: The parent context's maximum demand for SLC space - * @slc.peak_usage: The parent context's maximum use of SLC space - * @slc.idle_work: Work item used to queue SLC partition shrink upon context idle - * @slc.idle_work_cancelled: Flag for async cancellation of idle_work */ struct pixel_platform_data { struct kbase_context *kctx; struct gpu_dvfs_metrics_uid_stats* stats; - struct { - u64 peak_demand; - u64 peak_usage; - struct work_struct idle_work; - atomic_t idle_work_cancelled; - } slc; }; #endif /* _KBASE_CONFIG_PLATFORM_H_ */ diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c index d6cb131..8e46be1 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_slc.c +++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c @@ -17,321 +17,6 @@ #include "mali_kbase_config_platform.h" #include "pixel_gpu_slc.h" -struct dirty_region { - u64 first_vpfn; - u64 last_vpfn; - u64 dirty_pgds; -}; - -/** - * struct gpu_slc_liveness_update_info - Buffer info, and live ranges - * - * @buffer_va: Array of buffer base virtual addresses - * @buffer_sizes: Array of buffer sizes - * @buffer_count: Number of elements in the va and sizes buffers - * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for - * each buffer - * @live_ranges_count: Number of elements in the live ranges buffer - */ -struct gpu_slc_liveness_update_info { - u64* buffer_va; - u64* buffer_sizes; - u64 buffer_count; - struct kbase_pixel_gpu_slc_liveness_mark* live_ranges; - u64 live_ranges_count; -}; - -/** - * gpu_slc_lock_as - Lock the current process address space - * - * @kctx: The &struct kbase_context - */ -static void gpu_slc_lock_as(struct kbase_context *kctx) -{ - down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); -} - -/** - * gpu_slc_unlock_as - Unlock the current process address space - * - * @kctx: The &struct kbase_context - */ -static void gpu_slc_unlock_as(struct kbase_context *kctx) -{ - kbase_gpu_vm_unlock(kctx); - up_write(kbase_mem_get_process_mmap_lock()); -} - -/** - * gpu_slc_in_group - Check whether the region is SLC cacheable - * - * @reg: The gpu memory region to check for an SLC cacheable memory group. - */ -static bool gpu_slc_in_group(struct kbase_va_region* reg) -{ - return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID; -} - -/** - * gpu_slc_get_region - Find the gpu memory region from a virtual address - * - * @kctx: The &struct kbase_context - * @va: The base gpu virtual address of the region - * - * Return: On success, returns a valid memory region. On failure NULL is returned. - */ -static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va) -{ - struct kbase_va_region *reg; - - if (!va) - goto invalid; - - if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE)) - goto invalid; - - /* Find the region that the virtual address belongs to */ - reg = kbase_region_tracker_find_region_base_address(kctx, va); - - /* Validate the region */ - if (kbase_is_region_invalid_or_free(reg)) - goto invalid; - - return reg; - -invalid: - dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va); - return NULL; -} - -/** - * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable - * - * @kctx: The &struct kbase_context - * @reg: The gpu memory region migrate to an SLC cacheable memory group - * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries - */ -static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg) -{ - int err; - u64 vpfn; - size_t page_nr; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(reg); - - if (gpu_slc_in_group(reg)) { - return; - } - - vpfn = reg->start_pfn; - page_nr = kbase_reg_current_backed_size(reg); - - err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn, - kbase_get_gpu_phy_pages(reg), - page_nr, - reg->flags, - MGM_SLC_GROUP_ID, - &dirty_reg->dirty_pgds); - - /* Track the dirty region */ - dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn); - dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr); - - if (err) - dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err); - else - /* If everything is good, then set the new group on the region. */ - reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID; -} - -/** - * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region - * - * @kctx: The &struct kbase_context - * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries - */ -static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg) -{ - size_t const dirty_page_nr = - (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn)); - - if (!dirty_page_nr) - return; - - kbase_mmu_flush_invalidate_update_pages( - kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds); -} - -/** - * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand. - * - * @kbdev: The &struct kbase_device for the GPU. - */ -static void gpu_slc_resize_partition(struct kbase_device* kbdev) -{ - struct pixel_context *pc = kbdev->platform_context; - - /* Request that the mgm select an SLC partition that fits our demand */ - pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand); - - dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand); -} - -/** - * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition. - * - * @kbdev: The &struct kbase_device for the GPU. - * - * Returns the size of the GPU's SLC partition. - */ -static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev) -{ - u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID); - - dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size); - - return partition_size; -} - -/** - * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free - * SLC space, and resizing the partition to meet demand. - * - * @kctx: The &struct kbase_context corresponding to a user space context which sent the liveness - * update - * @info: See struct gpu_slc_liveness_update_info - */ -static void gpu_slc_liveness_update(struct kbase_context* kctx, - struct gpu_slc_liveness_update_info* info) -{ - struct kbase_device* kbdev = kctx->kbdev; - struct pixel_context *pc = kbdev->platform_context; - struct pixel_platform_data *kctx_pd = kctx->platform_data; - struct dirty_region dirty_reg = { - .first_vpfn = U64_MAX, - .last_vpfn = 0, - .dirty_pgds = 0, - }; - u64 current_usage = 0; - u64 current_demand = 0; - u64 free_space; - int i; - - /* Lock the process address space before modifying ATE's */ - gpu_slc_lock_as(kctx); - - /* Synchronize updates to the partition size and usage */ - mutex_lock(&pc->slc.lock); - - dev_dbg(kbdev->dev, "pixel: buffer liveness update received"); - - /* Remove the usage and demand from the previous liveness update */ - pc->slc.demand -= kctx_pd->slc.peak_demand; - pc->slc.usage -= kctx_pd->slc.peak_usage; - kctx_pd->slc.peak_demand = 0; - kctx_pd->slc.peak_usage = 0; - - /* Calculate the remaining free space in the SLC partition (floored at 0) */ - free_space = gpu_slc_get_partition_size(kbdev); - free_space -= min(free_space, pc->slc.usage); - - for (i = 0; i < info->live_ranges_count; ++i) - { - struct kbase_va_region *reg; - u64 size; - u64 va; - u32 index = info->live_ranges[i].index; - - if (unlikely(index >= info->buffer_count)) - continue; - - size = info->buffer_sizes[index]; - va = info->buffer_va[index]; - - reg = gpu_slc_get_region(kctx, va); - if(!reg) - continue; - - switch (info->live_ranges[i].type) - { - case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN: - /* Update demand as though there's no size limit */ - current_demand += size; - kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand); - - /* Check whether there's free space in the partition to store the buffer */ - if (free_space >= current_usage + size) - gpu_slc_migrate_region(kctx, reg, &dirty_reg); - - /* This may be true, even if the space calculation above returned false, - * as a previous call to this function may have migrated the region. - * In such a scenario, the current_usage may exceed the available free_space - * and we will be oversubscribed to the SLC partition. - * We could migrate the region back to the non-SLC group, but this would - * require an SLC flush, so for now we do nothing. - */ - if (gpu_slc_in_group(reg)) { - current_usage += size; - kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage); - } - break; - case KBASE_PIXEL_GPU_LIVE_RANGE_END: - current_demand -= size; - if (gpu_slc_in_group(reg)) - current_usage -= size; - break; - } - } - /* Perform single page table flush */ - gpu_slc_flush_dirty_region(kctx, &dirty_reg); - - /* Indicates a missing live range end marker */ - WARN_ON_ONCE(current_demand != 0 || current_usage != 0); - - /* Update the total usage and demand */ - pc->slc.demand += kctx_pd->slc.peak_demand; - pc->slc.usage += kctx_pd->slc.peak_usage; - - dev_dbg(kbdev->dev, - "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu", - kctx->id, - kctx_pd->slc.peak_demand, - kctx_pd->slc.peak_usage); - dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage); - - /* Trigger partition resize based on the new demand */ - gpu_slc_resize_partition(kctx->kbdev); - - mutex_unlock(&pc->slc.lock); - gpu_slc_unlock_as(kctx); -} - -static void gpu_slc_kctx_idle_worker(struct work_struct *work) -{ - struct pixel_platform_data *pd = - container_of(work, struct pixel_platform_data, slc.idle_work); - struct kbase_context *kctx = pd->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_context *pc = kbdev->platform_context; - - if (atomic_read(&pd->slc.idle_work_cancelled)) - return; - - mutex_lock(&pc->slc.lock); - - pc->slc.demand -= pd->slc.peak_demand; - pc->slc.usage -= pd->slc.peak_usage; - - pd->slc.peak_demand = 0; - pd->slc.peak_usage = 0; - - gpu_slc_resize_partition(kctx->kbdev); - - mutex_unlock(&pc->slc.lock); -} - /** * gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update * @@ -345,80 +30,8 @@ static void gpu_slc_kctx_idle_worker(struct work_struct *work) int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, struct kbase_ioctl_buffer_liveness_update* update) { - int err = -EINVAL; - struct gpu_slc_liveness_update_info info; - u64* buff = NULL; - u64 total_buff_size; - - /* Compute the sizes of the user space arrays that we need to copy */ - u64 const buffer_info_size = sizeof(u64) * update->buffer_count; - u64 const live_ranges_size = - sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count; - - /* Guard against overflows and empty sizes */ - if (!buffer_info_size || !live_ranges_size) - goto done; - if (U64_MAX / sizeof(u64) < update->buffer_count) - goto done; - if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count) - goto done; - /* Guard against nullptr */ - if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address) - goto done; - /* Calculate the total buffer size required and detect overflows */ - if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size) - goto done; - - total_buff_size = buffer_info_size * 2 + live_ranges_size; - - /* Allocate the memory we require to copy from user space */ - buff = kmalloc(total_buff_size, GFP_KERNEL); - if (buff == NULL) { - dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update"); - err = -ENOMEM; - goto done; - } - - /* Set up the info struct by pointing into the allocation. All 8 byte aligned */ - info = (struct gpu_slc_liveness_update_info){ - .buffer_va = buff, - .buffer_sizes = buff + update->buffer_count, - .buffer_count = update->buffer_count, - .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2), - .live_ranges_count = update->live_ranges_count, - }; - - /* Copy the data from user space */ - err = - copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size); - if (err) { - dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges"); - err = -EFAULT; - goto done; - } - - err = copy_from_user( - info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size); - if (err) { - dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes"); - err = -EFAULT; - goto done; - } - - err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size); - if (err) { - dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses"); - err = -EFAULT; - goto done; - } - - /* Execute an slc update */ - gpu_slc_liveness_update(kctx, &info); - -done: - kfree(buff); - - return err; + (void)kctx, (void)update; + return 0; } /** @@ -433,10 +46,7 @@ done: */ int gpu_slc_kctx_init(struct kbase_context *kctx) { - struct pixel_platform_data *pd = kctx->platform_data; - - INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker); - + (void)kctx; return 0; } @@ -444,28 +54,10 @@ int gpu_slc_kctx_init(struct kbase_context *kctx) * gpu_slc_kctx_term() - Called when a kernel context is terminated * * @kctx: The &struct kbase_context that is being terminated - * - * Free up SLC space used by the buffers that this context owns. */ void gpu_slc_kctx_term(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_context *pc = kbdev->platform_context; - struct pixel_platform_data *kctx_pd = kctx->platform_data; - - atomic_set(&kctx_pd->slc.idle_work_cancelled, 1); - cancel_work_sync(&kctx_pd->slc.idle_work); - - mutex_lock(&pc->slc.lock); - - /* Deduct the usage and demand, freeing that SLC space for the next update */ - pc->slc.demand -= kctx_pd->slc.peak_demand; - pc->slc.usage -= kctx_pd->slc.peak_usage; - - /* Trigger partition resize based on the new demand */ - gpu_slc_resize_partition(kctx->kbdev); - - mutex_unlock(&pc->slc.lock); + (void)kctx; } /** @@ -475,19 +67,7 @@ void gpu_slc_kctx_term(struct kbase_context *kctx) */ void gpu_slc_kctx_active(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_platform_data *pd = kctx->platform_data; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* Asynchronously cancel the idle work, since we're in atomic context. - * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure - * that any queued idle_work does not run *after* a liveness update for the now active kctx. - * Either the idle_work is executing now, and beats the cancellation check, or it runs later - * and early-exits at the cancellation check. - * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update. - */ - atomic_set(&pd->slc.idle_work_cancelled, 1); + (void)kctx; } /** @@ -497,22 +77,11 @@ void gpu_slc_kctx_active(struct kbase_context *kctx) */ void gpu_slc_kctx_idle(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_platform_data *pd = kctx->platform_data; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing, - * we will re-queue on the following line anyway, resulting in a unnecessary additional - * execution of the worker. - * While not optimal, it won't result in a correctness problem. - */ - atomic_set(&pd->slc.idle_work_cancelled, 0); - queue_work(system_highpri_wq, &pd->slc.idle_work); + (void)kctx; } /** - * gpu_slc_init - Initialize the SLC partition for the GPU + * gpu_slc_init - Initialize the SLC context for the GPU * * @kbdev: The &struct kbase_device for the GPU. * @@ -520,15 +89,11 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx) */ int gpu_slc_init(struct kbase_device *kbdev) { - struct pixel_context *pc = kbdev->platform_context; - - mutex_init(&pc->slc.lock); - return 0; } /** - * gpu_slc_term() - Terminates the Pixel GPU SLC partition. + * gpu_slc_term() - Terminates the Pixel GPU SLC context. * * @kbdev: The &struct kbase_device for the GPU. */ -- cgit v1.2.3