diff options
-rw-r--r-- | mali_kbase/platform/pixel/mali_kbase_config_platform.h | 21 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_slc.c | 451 |
2 files changed, 464 insertions, 8 deletions
diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h index 06b76ea..a0bf623 100644 --- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h +++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h @@ -302,6 +302,10 @@ struct gpu_dvfs_metrics_uid_stats; * @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT. * @dvfs.qos.bts.scenario: The index of the BTS scenario to be used. Set via DT. * + * @slc.lock: Synchronize updates to the SLC partition accounting variables. + * @slc.demand: The total demand for SLC space, an aggregation of each kctx's demand. + * @slc.usage: The total amount of SLC space used, an aggregation of each kctx's usage. + * * @itmon.wq: A workqueue for ITMON page table search. * @itmon.work: The work item for the above. * @itmon.nb: The ITMON notifier block. @@ -414,6 +418,12 @@ struct pixel_context { } dvfs; #endif /* CONFIG_MALI_MIDGARD_DVFS */ + struct { + struct mutex lock; + u64 demand; + u64 usage; + } slc; + #if IS_ENABLED(CONFIG_EXYNOS_ITMON) struct { struct workqueue_struct *wq; @@ -430,10 +440,21 @@ struct pixel_context { * * @kctx: Handle to the parent kctx * @stats: Tracks the dvfs metrics for the UID associated with this context + * + * @slc.peak_demand: The parent context's maximum demand for SLC space + * @slc.peak_usage: The parent context's maximum use of SLC space + * @slc.idle_work: Work item used to queue SLC partition shrink upon context idle + * @slc.idle_work_cancelled: Flag for async cancellation of idle_work */ struct pixel_platform_data { struct kbase_context *kctx; struct gpu_dvfs_metrics_uid_stats* stats; + struct { + u64 peak_demand; + u64 peak_usage; + struct work_struct idle_work; + atomic_t idle_work_cancelled; + } slc; }; #endif /* _KBASE_CONFIG_PLATFORM_H_ */ diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c index 8e46be1..d6cb131 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_slc.c +++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c @@ -17,6 +17,321 @@ #include "mali_kbase_config_platform.h" #include "pixel_gpu_slc.h" +struct dirty_region { + u64 first_vpfn; + u64 last_vpfn; + u64 dirty_pgds; +}; + +/** + * struct gpu_slc_liveness_update_info - Buffer info, and live ranges + * + * @buffer_va: Array of buffer base virtual addresses + * @buffer_sizes: Array of buffer sizes + * @buffer_count: Number of elements in the va and sizes buffers + * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for + * each buffer + * @live_ranges_count: Number of elements in the live ranges buffer + */ +struct gpu_slc_liveness_update_info { + u64* buffer_va; + u64* buffer_sizes; + u64 buffer_count; + struct kbase_pixel_gpu_slc_liveness_mark* live_ranges; + u64 live_ranges_count; +}; + +/** + * gpu_slc_lock_as - Lock the current process address space + * + * @kctx: The &struct kbase_context + */ +static void gpu_slc_lock_as(struct kbase_context *kctx) +{ + down_write(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock(kctx); +} + +/** + * gpu_slc_unlock_as - Unlock the current process address space + * + * @kctx: The &struct kbase_context + */ +static void gpu_slc_unlock_as(struct kbase_context *kctx) +{ + kbase_gpu_vm_unlock(kctx); + up_write(kbase_mem_get_process_mmap_lock()); +} + +/** + * gpu_slc_in_group - Check whether the region is SLC cacheable + * + * @reg: The gpu memory region to check for an SLC cacheable memory group. + */ +static bool gpu_slc_in_group(struct kbase_va_region* reg) +{ + return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID; +} + +/** + * gpu_slc_get_region - Find the gpu memory region from a virtual address + * + * @kctx: The &struct kbase_context + * @va: The base gpu virtual address of the region + * + * Return: On success, returns a valid memory region. On failure NULL is returned. + */ +static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va) +{ + struct kbase_va_region *reg; + + if (!va) + goto invalid; + + if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE)) + goto invalid; + + /* Find the region that the virtual address belongs to */ + reg = kbase_region_tracker_find_region_base_address(kctx, va); + + /* Validate the region */ + if (kbase_is_region_invalid_or_free(reg)) + goto invalid; + + return reg; + +invalid: + dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va); + return NULL; +} + +/** + * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable + * + * @kctx: The &struct kbase_context + * @reg: The gpu memory region migrate to an SLC cacheable memory group + * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries + */ +static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg) +{ + int err; + u64 vpfn; + size_t page_nr; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(reg); + + if (gpu_slc_in_group(reg)) { + return; + } + + vpfn = reg->start_pfn; + page_nr = kbase_reg_current_backed_size(reg); + + err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn, + kbase_get_gpu_phy_pages(reg), + page_nr, + reg->flags, + MGM_SLC_GROUP_ID, + &dirty_reg->dirty_pgds); + + /* Track the dirty region */ + dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn); + dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr); + + if (err) + dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err); + else + /* If everything is good, then set the new group on the region. */ + reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID; +} + +/** + * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region + * + * @kctx: The &struct kbase_context + * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries + */ +static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg) +{ + size_t const dirty_page_nr = + (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn)); + + if (!dirty_page_nr) + return; + + kbase_mmu_flush_invalidate_update_pages( + kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds); +} + +/** + * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand. + * + * @kbdev: The &struct kbase_device for the GPU. + */ +static void gpu_slc_resize_partition(struct kbase_device* kbdev) +{ + struct pixel_context *pc = kbdev->platform_context; + + /* Request that the mgm select an SLC partition that fits our demand */ + pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand); + + dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand); +} + +/** + * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition. + * + * @kbdev: The &struct kbase_device for the GPU. + * + * Returns the size of the GPU's SLC partition. + */ +static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev) +{ + u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID); + + dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size); + + return partition_size; +} + +/** + * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free + * SLC space, and resizing the partition to meet demand. + * + * @kctx: The &struct kbase_context corresponding to a user space context which sent the liveness + * update + * @info: See struct gpu_slc_liveness_update_info + */ +static void gpu_slc_liveness_update(struct kbase_context* kctx, + struct gpu_slc_liveness_update_info* info) +{ + struct kbase_device* kbdev = kctx->kbdev; + struct pixel_context *pc = kbdev->platform_context; + struct pixel_platform_data *kctx_pd = kctx->platform_data; + struct dirty_region dirty_reg = { + .first_vpfn = U64_MAX, + .last_vpfn = 0, + .dirty_pgds = 0, + }; + u64 current_usage = 0; + u64 current_demand = 0; + u64 free_space; + int i; + + /* Lock the process address space before modifying ATE's */ + gpu_slc_lock_as(kctx); + + /* Synchronize updates to the partition size and usage */ + mutex_lock(&pc->slc.lock); + + dev_dbg(kbdev->dev, "pixel: buffer liveness update received"); + + /* Remove the usage and demand from the previous liveness update */ + pc->slc.demand -= kctx_pd->slc.peak_demand; + pc->slc.usage -= kctx_pd->slc.peak_usage; + kctx_pd->slc.peak_demand = 0; + kctx_pd->slc.peak_usage = 0; + + /* Calculate the remaining free space in the SLC partition (floored at 0) */ + free_space = gpu_slc_get_partition_size(kbdev); + free_space -= min(free_space, pc->slc.usage); + + for (i = 0; i < info->live_ranges_count; ++i) + { + struct kbase_va_region *reg; + u64 size; + u64 va; + u32 index = info->live_ranges[i].index; + + if (unlikely(index >= info->buffer_count)) + continue; + + size = info->buffer_sizes[index]; + va = info->buffer_va[index]; + + reg = gpu_slc_get_region(kctx, va); + if(!reg) + continue; + + switch (info->live_ranges[i].type) + { + case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN: + /* Update demand as though there's no size limit */ + current_demand += size; + kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand); + + /* Check whether there's free space in the partition to store the buffer */ + if (free_space >= current_usage + size) + gpu_slc_migrate_region(kctx, reg, &dirty_reg); + + /* This may be true, even if the space calculation above returned false, + * as a previous call to this function may have migrated the region. + * In such a scenario, the current_usage may exceed the available free_space + * and we will be oversubscribed to the SLC partition. + * We could migrate the region back to the non-SLC group, but this would + * require an SLC flush, so for now we do nothing. + */ + if (gpu_slc_in_group(reg)) { + current_usage += size; + kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage); + } + break; + case KBASE_PIXEL_GPU_LIVE_RANGE_END: + current_demand -= size; + if (gpu_slc_in_group(reg)) + current_usage -= size; + break; + } + } + /* Perform single page table flush */ + gpu_slc_flush_dirty_region(kctx, &dirty_reg); + + /* Indicates a missing live range end marker */ + WARN_ON_ONCE(current_demand != 0 || current_usage != 0); + + /* Update the total usage and demand */ + pc->slc.demand += kctx_pd->slc.peak_demand; + pc->slc.usage += kctx_pd->slc.peak_usage; + + dev_dbg(kbdev->dev, + "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu", + kctx->id, + kctx_pd->slc.peak_demand, + kctx_pd->slc.peak_usage); + dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage); + + /* Trigger partition resize based on the new demand */ + gpu_slc_resize_partition(kctx->kbdev); + + mutex_unlock(&pc->slc.lock); + gpu_slc_unlock_as(kctx); +} + +static void gpu_slc_kctx_idle_worker(struct work_struct *work) +{ + struct pixel_platform_data *pd = + container_of(work, struct pixel_platform_data, slc.idle_work); + struct kbase_context *kctx = pd->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct pixel_context *pc = kbdev->platform_context; + + if (atomic_read(&pd->slc.idle_work_cancelled)) + return; + + mutex_lock(&pc->slc.lock); + + pc->slc.demand -= pd->slc.peak_demand; + pc->slc.usage -= pd->slc.peak_usage; + + pd->slc.peak_demand = 0; + pd->slc.peak_usage = 0; + + gpu_slc_resize_partition(kctx->kbdev); + + mutex_unlock(&pc->slc.lock); +} + /** * gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update * @@ -30,8 +345,80 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, struct kbase_ioctl_buffer_liveness_update* update) { - (void)kctx, (void)update; - return 0; + int err = -EINVAL; + struct gpu_slc_liveness_update_info info; + u64* buff = NULL; + u64 total_buff_size; + + /* Compute the sizes of the user space arrays that we need to copy */ + u64 const buffer_info_size = sizeof(u64) * update->buffer_count; + u64 const live_ranges_size = + sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count; + + /* Guard against overflows and empty sizes */ + if (!buffer_info_size || !live_ranges_size) + goto done; + if (U64_MAX / sizeof(u64) < update->buffer_count) + goto done; + if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count) + goto done; + /* Guard against nullptr */ + if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address) + goto done; + /* Calculate the total buffer size required and detect overflows */ + if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size) + goto done; + + total_buff_size = buffer_info_size * 2 + live_ranges_size; + + /* Allocate the memory we require to copy from user space */ + buff = kmalloc(total_buff_size, GFP_KERNEL); + if (buff == NULL) { + dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update"); + err = -ENOMEM; + goto done; + } + + /* Set up the info struct by pointing into the allocation. All 8 byte aligned */ + info = (struct gpu_slc_liveness_update_info){ + .buffer_va = buff, + .buffer_sizes = buff + update->buffer_count, + .buffer_count = update->buffer_count, + .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2), + .live_ranges_count = update->live_ranges_count, + }; + + /* Copy the data from user space */ + err = + copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size); + if (err) { + dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges"); + err = -EFAULT; + goto done; + } + + err = copy_from_user( + info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size); + if (err) { + dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes"); + err = -EFAULT; + goto done; + } + + err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size); + if (err) { + dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses"); + err = -EFAULT; + goto done; + } + + /* Execute an slc update */ + gpu_slc_liveness_update(kctx, &info); + +done: + kfree(buff); + + return err; } /** @@ -46,7 +433,10 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, */ int gpu_slc_kctx_init(struct kbase_context *kctx) { - (void)kctx; + struct pixel_platform_data *pd = kctx->platform_data; + + INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker); + return 0; } @@ -54,10 +444,28 @@ int gpu_slc_kctx_init(struct kbase_context *kctx) * gpu_slc_kctx_term() - Called when a kernel context is terminated * * @kctx: The &struct kbase_context that is being terminated + * + * Free up SLC space used by the buffers that this context owns. */ void gpu_slc_kctx_term(struct kbase_context *kctx) { - (void)kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct pixel_context *pc = kbdev->platform_context; + struct pixel_platform_data *kctx_pd = kctx->platform_data; + + atomic_set(&kctx_pd->slc.idle_work_cancelled, 1); + cancel_work_sync(&kctx_pd->slc.idle_work); + + mutex_lock(&pc->slc.lock); + + /* Deduct the usage and demand, freeing that SLC space for the next update */ + pc->slc.demand -= kctx_pd->slc.peak_demand; + pc->slc.usage -= kctx_pd->slc.peak_usage; + + /* Trigger partition resize based on the new demand */ + gpu_slc_resize_partition(kctx->kbdev); + + mutex_unlock(&pc->slc.lock); } /** @@ -67,7 +475,19 @@ void gpu_slc_kctx_term(struct kbase_context *kctx) */ void gpu_slc_kctx_active(struct kbase_context *kctx) { - (void)kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct pixel_platform_data *pd = kctx->platform_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Asynchronously cancel the idle work, since we're in atomic context. + * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure + * that any queued idle_work does not run *after* a liveness update for the now active kctx. + * Either the idle_work is executing now, and beats the cancellation check, or it runs later + * and early-exits at the cancellation check. + * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update. + */ + atomic_set(&pd->slc.idle_work_cancelled, 1); } /** @@ -77,11 +497,22 @@ void gpu_slc_kctx_active(struct kbase_context *kctx) */ void gpu_slc_kctx_idle(struct kbase_context *kctx) { - (void)kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct pixel_platform_data *pd = kctx->platform_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing, + * we will re-queue on the following line anyway, resulting in a unnecessary additional + * execution of the worker. + * While not optimal, it won't result in a correctness problem. + */ + atomic_set(&pd->slc.idle_work_cancelled, 0); + queue_work(system_highpri_wq, &pd->slc.idle_work); } /** - * gpu_slc_init - Initialize the SLC context for the GPU + * gpu_slc_init - Initialize the SLC partition for the GPU * * @kbdev: The &struct kbase_device for the GPU. * @@ -89,11 +520,15 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx) */ int gpu_slc_init(struct kbase_device *kbdev) { + struct pixel_context *pc = kbdev->platform_context; + + mutex_init(&pc->slc.lock); + return 0; } /** - * gpu_slc_term() - Terminates the Pixel GPU SLC context. + * gpu_slc_term() - Terminates the Pixel GPU SLC partition. * * @kbdev: The &struct kbase_device for the GPU. */ |