mali_kbase/platform/pixel/pixel_gpu_slc.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright 2022-2023 Google LLC.
 *
 * Author: Jack Diver <diverj@google.com>
 */

/* Mali core includes */
#include <mali_kbase.h>

/* UAPI includes */
#include <uapi/gpu/arm/midgard/platform/pixel/pixel_gpu_common_slc.h>
/* Back-door mali_pixel include */
#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h>

/* Pixel integration includes */
#include "mali_kbase_config_platform.h"
#include "pixel_gpu_slc.h"

struct dirty_region {
	u64 first_vpfn;
	u64 last_vpfn;
	u64 dirty_pgds;
};

/**
 * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
 *
 * @buffer_va:         Array of buffer base virtual addresses
 * @buffer_sizes:      Array of buffer sizes
 * @buffer_count:      Number of elements in the va and sizes buffers
 * @live_ranges:       Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
 *                     each buffer
 * @live_ranges_count: Number of elements in the live ranges buffer
 */
struct gpu_slc_liveness_update_info {
	u64* buffer_va;
	u64* buffer_sizes;
	u64 buffer_count;
	struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
	u64 live_ranges_count;
};

/**
 * gpu_slc_lock_as - Lock the current process address space
 *
 * @kctx:  The &struct kbase_context
 */
static void gpu_slc_lock_as(struct kbase_context *kctx)
{
	down_write(kbase_mem_get_process_mmap_lock());
	kbase_gpu_vm_lock_with_pmode_sync(kctx);
}

/**
 * gpu_slc_unlock_as - Unlock the current process address space
 *
 * @kctx:  The &struct kbase_context
 */
static void gpu_slc_unlock_as(struct kbase_context *kctx)
{
	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
	up_write(kbase_mem_get_process_mmap_lock());
}

/**
 * gpu_slc_in_group - Check whether the region is SLC cacheable
 *
 * @reg:   The gpu memory region to check for an SLC cacheable memory group.
 */
static bool gpu_slc_in_group(struct kbase_va_region* reg)
{
	return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID;
}

/**
 * gpu_slc_get_region - Find the gpu memory region from a virtual address
 *
 * @kctx:  The &struct kbase_context
 * @va:    The base gpu virtual address of the region
 *
 * Return: On success, returns a valid memory region. On failure NULL is returned.
 */
static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va)
{
	struct kbase_va_region *reg;

	if (!va)
		goto invalid;

	if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE))
		goto invalid;

	/* Find the region that the virtual address belongs to */
	reg = kbase_region_tracker_find_region_base_address(kctx, va);

	/* Validate the region */
	if (kbase_is_region_invalid_or_free(reg))
		goto invalid;
	/* Might be shrunk */
	if (kbase_is_region_shrinkable(reg))
		goto invalid;
	/* Driver internal alloc */
	if (kbase_va_region_is_no_user_free(reg))
		goto invalid;

	return reg;

invalid:
	dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va);
	return NULL;
}

/**
 * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable
 *
 * @kctx:      The &struct kbase_context
 * @reg:       The gpu memory region migrate to an SLC cacheable memory group
 * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
 */
static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg)
{
	int err;
	u64 vpfn;
	size_t page_nr;

	KBASE_DEBUG_ASSERT(kctx);
	KBASE_DEBUG_ASSERT(reg);

	if (gpu_slc_in_group(reg)) {
		return;
	}

	vpfn = reg->start_pfn;
	page_nr = kbase_reg_current_backed_size(reg);

	err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn,
			kbase_get_gpu_phy_pages(reg),
			page_nr,
			reg->flags,
			MGM_SLC_GROUP_ID,
			&dirty_reg->dirty_pgds);

	/* Track the dirty region */
	dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn);
	dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr);

	if (err)
		dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err);
	else
		/* If everything is good, then set the new group on the region. */
		reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID;
}

/**
 * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region
 *
 * @kctx:      The &struct kbase_context
 * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
 */
static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg)
{
	size_t const dirty_page_nr =
	    (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn));

	if (!dirty_page_nr)
		return;

	kbase_mmu_flush_invalidate_update_pages(
	    kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds);
}

/**
 * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand.
 *
 * @kbdev: The &struct kbase_device for the GPU.
 */
static void gpu_slc_resize_partition(struct kbase_device* kbdev)
{
	struct pixel_context *pc = kbdev->platform_context;

	/* Request that the mgm select an SLC partition that fits our demand */
	pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand);

	dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand);
}

/**
 * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition.
 *
 * @kbdev: The &struct kbase_device for the GPU.
 *
 * Returns the size of the GPU's SLC partition.
 */
static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev)
{
	u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID);

	dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size);

	return partition_size;
}

/**
 * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free
 *                           SLC space, and resizing the partition to meet demand.
 *
 * @kctx:   The &struct kbase_context corresponding to a user space context which sent the liveness
 *          update
 * @info:   See struct gpu_slc_liveness_update_info
 */
static void gpu_slc_liveness_update(struct kbase_context* kctx,
                                    struct gpu_slc_liveness_update_info* info)
{
	struct kbase_device* kbdev = kctx->kbdev;
	struct pixel_context *pc = kbdev->platform_context;
	struct pixel_platform_data *kctx_pd = kctx->platform_data;
	struct dirty_region dirty_reg = {
		.first_vpfn = U64_MAX,
		.last_vpfn = 0,
		.dirty_pgds = 0,
	};
	u64 current_usage = 0;
	u64 current_demand = 0;
	u64 free_space;
	int i;

	/* Lock the process address space before modifying ATE's */
	gpu_slc_lock_as(kctx);

	/* Synchronize updates to the partition size and usage */
	mutex_lock(&pc->slc.lock);

	dev_dbg(kbdev->dev, "pixel: buffer liveness update received");

	/* Remove the usage and demand from the previous liveness update */
	pc->slc.demand -= kctx_pd->slc.peak_demand;
	pc->slc.usage -= kctx_pd->slc.peak_usage;
	kctx_pd->slc.peak_demand = 0;
	kctx_pd->slc.peak_usage = 0;

	/* Calculate the remaining free space in the SLC partition (floored at 0) */
	free_space = gpu_slc_get_partition_size(kbdev);
	free_space -= min(free_space, pc->slc.usage);

	for (i = 0; i < info->live_ranges_count; ++i)
	{
		struct kbase_va_region *reg;
                u64 size;
                u64 va;
		u32 index = info->live_ranges[i].index;

		if (unlikely(index >= info->buffer_count))
			continue;

		size = info->buffer_sizes[index];
		va = info->buffer_va[index];

		reg = gpu_slc_get_region(kctx, va);
		if(!reg)
			continue;

		switch (info->live_ranges[i].type)
		{
		case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN:
			/* Update demand as though there's no size limit */
			current_demand += size;
			kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand);

			/* Check whether there's free space in the partition to store the buffer */
			if (free_space >= current_usage + size)
				gpu_slc_migrate_region(kctx, reg, &dirty_reg);

			/* This may be true, even if the space calculation above returned false,
			 * as a previous call to this function may have migrated the region.
			 * In such a scenario, the current_usage may exceed the available free_space
			 * and we will be oversubscribed to the SLC partition.
			 * We could migrate the region back to the non-SLC group, but this would
			 * require an SLC flush, so for now we do nothing.
			 */
			if (gpu_slc_in_group(reg)) {
				current_usage += size;
				kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage);
			}
			break;
		case KBASE_PIXEL_GPU_LIVE_RANGE_END:
			current_demand -= size;
			if (gpu_slc_in_group(reg))
				current_usage -= size;
			break;
		}
	}
	/* Perform single page table flush */
	gpu_slc_flush_dirty_region(kctx, &dirty_reg);

	/* Indicates a missing live range end marker */
	WARN_ON_ONCE(current_demand != 0 || current_usage != 0);

	/* Update the total usage and demand */
	pc->slc.demand += kctx_pd->slc.peak_demand;
	pc->slc.usage += kctx_pd->slc.peak_usage;

	dev_dbg(kbdev->dev,
	        "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu",
	        kctx->id,
	        kctx_pd->slc.peak_demand,
	        kctx_pd->slc.peak_usage);
	dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage);

	/* Trigger partition resize based on the new demand */
	gpu_slc_resize_partition(kctx->kbdev);

	mutex_unlock(&pc->slc.lock);
	gpu_slc_unlock_as(kctx);
}

/**
 * gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update
 *
 * @kctx:   The &struct kbase_context corresponding to a user space context which sent the liveness
 *          update
 * @update: See struct kbase_ioctl_buffer_liveness_update
 *
 * Context: Process context. Takes and releases the GPU power domain lock. Expects the caller to
 *          hold the DVFS lock.
 */
int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
                                                  struct kbase_ioctl_buffer_liveness_update* update)
{
	int err = -EINVAL;
	struct gpu_slc_liveness_update_info info;
	u64* buff = NULL;
	u64 total_buff_size;

	/* Compute the sizes of the user space arrays that we need to copy */
	u64 const buffer_info_size = sizeof(u64) * update->buffer_count;
	u64 const live_ranges_size =
	    sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count;

	/* Guard against overflows and empty sizes */
	if (!buffer_info_size || !live_ranges_size)
		goto done;
	if (U64_MAX / sizeof(u64) < update->buffer_count)
		goto done;
	if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count)
		goto done;
	/* Guard against nullptr */
	if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address)
		goto done;
	/* Calculate the total buffer size required and detect overflows */
	if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size)
		goto done;

	total_buff_size = buffer_info_size * 2 + live_ranges_size;

	/* Allocate the memory we require to copy from user space */
	buff = kmalloc(total_buff_size, GFP_KERNEL);
	if (buff == NULL) {
		dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update");
		err = -ENOMEM;
		goto done;
	}

	/* Set up the info struct by pointing into the allocation. All 8 byte aligned */
	info = (struct gpu_slc_liveness_update_info){
	    .buffer_va = buff,
	    .buffer_sizes = buff + update->buffer_count,
	    .buffer_count = update->buffer_count,
	    .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2),
	    .live_ranges_count = update->live_ranges_count,
	};

	/* Copy the data from user space */
	err =
	    copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
	if (err) {
		dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges");
		err = -EFAULT;
		goto done;
	}

	err = copy_from_user(
	    info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size);
	if (err) {
		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes");
		err = -EFAULT;
		goto done;
	}

	err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size);
	if (err) {
		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses");
		err = -EFAULT;
		goto done;
	}

	/* Execute an slc update */
	gpu_slc_liveness_update(kctx, &info);

done:
	kfree(buff);

	return err;
}

/**
 * gpu_slc_kctx_init() - Called when a kernel context is created
 *
 * @kctx: The &struct kbase_context that is being initialized
 *
 * This function is called when the GPU driver is initializing a new kernel context. This event is
 * used to set up data structures that will be used to track this context's usage of the SLC.
 *
 * Return: Returns 0 on success, or an error code on failure.
 */
int gpu_slc_kctx_init(struct kbase_context *kctx)
{
	(void)kctx;
	return 0;
}

/**
 * gpu_slc_kctx_term() - Called when a kernel context is terminated
 *
 * @kctx: The &struct kbase_context that is being terminated
 *
 * Free up SLC space used by the buffers that this context owns.
 */
void gpu_slc_kctx_term(struct kbase_context *kctx)
{
	struct kbase_device* kbdev = kctx->kbdev;
	struct pixel_context *pc = kbdev->platform_context;
	struct pixel_platform_data *kctx_pd = kctx->platform_data;

	mutex_lock(&pc->slc.lock);

	/* Deduct the usage and demand, freeing that SLC space for the next update */
	pc->slc.demand -= kctx_pd->slc.peak_demand;
	pc->slc.usage -= kctx_pd->slc.peak_usage;

	/* Trigger partition resize based on the new demand */
	gpu_slc_resize_partition(kctx->kbdev);

	mutex_unlock(&pc->slc.lock);
}


/**
 * gpu_slc_init - Initialize the SLC partition for the GPU
 *
 * @kbdev: The &struct kbase_device for the GPU.
 *
 * Return: On success, returns 0. On failure an error code is returned.
 */
int gpu_slc_init(struct kbase_device *kbdev)
{
	struct pixel_context *pc = kbdev->platform_context;

	mutex_init(&pc->slc.lock);

	return 0;
}

/**
 * gpu_slc_term() - Terminates the Pixel GPU SLC partition.
 *
 * @kbdev: The &struct kbase_device for the GPU.
 */
void gpu_slc_term(struct kbase_device *kbdev)
{
	(void)kbdev;
}