1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
6
7 #include <drm/drm_print.h>
8 #include <drm/panthor_drm.h>
9
10 #include "panthor_device.h"
11 #include "panthor_gem.h"
12 #include "panthor_heap.h"
13 #include "panthor_mmu.h"
14 #include "panthor_regs.h"
15
16 /*
17 * The GPU heap context is an opaque structure used by the GPU to track the
18 * heap allocations. The driver should only touch it to initialize it (zero all
19 * fields). Because the CPU and GPU can both access this structure it is
20 * required to be GPU cache line aligned.
21 */
22 #define HEAP_CONTEXT_SIZE 32
23
24 /**
25 * struct panthor_heap_chunk_header - Heap chunk header
26 */
27 struct panthor_heap_chunk_header {
28 /**
29 * @next: Next heap chunk in the list.
30 *
31 * This is a GPU VA.
32 */
33 u64 next;
34
35 /** @unknown: MBZ. */
36 u32 unknown[14];
37 };
38
39 /**
40 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
41 */
42 struct panthor_heap_chunk {
43 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
44 struct list_head node;
45
46 /** @bo: Buffer object backing the heap chunk. */
47 struct panthor_kernel_bo *bo;
48 };
49
50 /**
51 * struct panthor_heap - Structure used to manage tiler heap contexts.
52 */
53 struct panthor_heap {
54 /** @chunks: List containing all heap chunks allocated so far. */
55 struct list_head chunks;
56
57 /** @lock: Lock protecting insertion in the chunks list. */
58 struct mutex lock;
59
60 /** @chunk_size: Size of each chunk. */
61 u32 chunk_size;
62
63 /** @max_chunks: Maximum number of chunks. */
64 u32 max_chunks;
65
66 /**
67 * @target_in_flight: Number of in-flight render passes after which
68 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
69 */
70 u32 target_in_flight;
71
72 /** @chunk_count: Number of heap chunks currently allocated. */
73 u32 chunk_count;
74 };
75
76 #define MAX_HEAPS_PER_POOL 128
77
78 /**
79 * struct panthor_heap_pool - Pool of heap contexts
80 *
81 * The pool is attached to a panthor_file and can't be shared across processes.
82 */
83 struct panthor_heap_pool {
84 /** @refcount: Reference count. */
85 struct kref refcount;
86
87 /** @ptdev: Device. */
88 struct panthor_device *ptdev;
89
90 /** @vm: VM this pool is bound to. */
91 struct panthor_vm *vm;
92
93 /** @lock: Lock protecting access to @xa. */
94 struct rw_semaphore lock;
95
96 /** @xa: Array storing panthor_heap objects. */
97 struct xarray xa;
98
99 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
100 struct panthor_kernel_bo *gpu_contexts;
101
102 /** @size: Size of all chunks across all heaps in the pool. */
103 atomic_t size;
104 };
105
panthor_heap_ctx_stride(struct panthor_device * ptdev)106 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
107 {
108 u32 l2_features = ptdev->gpu_info.l2_features;
109 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
110
111 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
112 }
113
panthor_get_heap_ctx_offset(struct panthor_heap_pool * pool,int id)114 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
115 {
116 return panthor_heap_ctx_stride(pool->ptdev) * id;
117 }
118
panthor_get_heap_ctx(struct panthor_heap_pool * pool,int id)119 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
120 {
121 return pool->gpu_contexts->kmap +
122 panthor_get_heap_ctx_offset(pool, id);
123 }
124
panthor_free_heap_chunk(struct panthor_heap_pool * pool,struct panthor_heap * heap,struct panthor_heap_chunk * chunk)125 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
126 struct panthor_heap *heap,
127 struct panthor_heap_chunk *chunk)
128 {
129 mutex_lock(&heap->lock);
130 list_del(&chunk->node);
131 heap->chunk_count--;
132 mutex_unlock(&heap->lock);
133
134 atomic_sub(heap->chunk_size, &pool->size);
135
136 panthor_kernel_bo_destroy(chunk->bo);
137 kfree(chunk);
138 }
139
panthor_alloc_heap_chunk(struct panthor_heap_pool * pool,struct panthor_heap * heap,bool initial_chunk)140 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
141 struct panthor_heap *heap,
142 bool initial_chunk)
143 {
144 struct panthor_heap_chunk *chunk;
145 struct panthor_heap_chunk_header *hdr;
146 int ret;
147
148 chunk = kmalloc_obj(*chunk);
149 if (!chunk)
150 return -ENOMEM;
151
152 chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
153 DRM_PANTHOR_BO_NO_MMAP,
154 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
155 PANTHOR_VM_KERNEL_AUTO_VA,
156 "Tiler heap chunk");
157 if (IS_ERR(chunk->bo)) {
158 ret = PTR_ERR(chunk->bo);
159 goto err_free_chunk;
160 }
161
162 ret = panthor_kernel_bo_vmap(chunk->bo);
163 if (ret)
164 goto err_destroy_bo;
165
166 hdr = chunk->bo->kmap;
167 memset(hdr, 0, sizeof(*hdr));
168
169 if (initial_chunk && !list_empty(&heap->chunks)) {
170 struct panthor_heap_chunk *prev_chunk;
171 u64 prev_gpuva;
172
173 prev_chunk = list_first_entry(&heap->chunks,
174 struct panthor_heap_chunk,
175 node);
176
177 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
178 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
179 (heap->chunk_size >> 12);
180 }
181
182 panthor_kernel_bo_vunmap(chunk->bo);
183
184 mutex_lock(&heap->lock);
185 list_add(&chunk->node, &heap->chunks);
186 heap->chunk_count++;
187 mutex_unlock(&heap->lock);
188
189 atomic_add(heap->chunk_size, &pool->size);
190
191 return 0;
192
193 err_destroy_bo:
194 panthor_kernel_bo_destroy(chunk->bo);
195
196 err_free_chunk:
197 kfree(chunk);
198
199 return ret;
200 }
201
panthor_free_heap_chunks(struct panthor_heap_pool * pool,struct panthor_heap * heap)202 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
203 struct panthor_heap *heap)
204 {
205 struct panthor_heap_chunk *chunk, *tmp;
206
207 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
208 panthor_free_heap_chunk(pool, heap, chunk);
209 }
210
panthor_alloc_heap_chunks(struct panthor_heap_pool * pool,struct panthor_heap * heap,u32 chunk_count)211 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
212 struct panthor_heap *heap,
213 u32 chunk_count)
214 {
215 int ret;
216 u32 i;
217
218 for (i = 0; i < chunk_count; i++) {
219 ret = panthor_alloc_heap_chunk(pool, heap, true);
220 if (ret)
221 return ret;
222 }
223
224 return 0;
225 }
226
227 static int
panthor_heap_destroy_locked(struct panthor_heap_pool * pool,u32 handle)228 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
229 {
230 struct panthor_heap *heap;
231
232 heap = xa_erase(&pool->xa, handle);
233 if (!heap)
234 return -EINVAL;
235
236 panthor_free_heap_chunks(pool, heap);
237 mutex_destroy(&heap->lock);
238 kfree(heap);
239 return 0;
240 }
241
242 /**
243 * panthor_heap_destroy() - Destroy a heap context
244 * @pool: Pool this context belongs to.
245 * @handle: Handle returned by panthor_heap_create().
246 */
panthor_heap_destroy(struct panthor_heap_pool * pool,u32 handle)247 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
248 {
249 int ret;
250
251 down_write(&pool->lock);
252 ret = panthor_heap_destroy_locked(pool, handle);
253 up_write(&pool->lock);
254
255 return ret;
256 }
257
258 /**
259 * panthor_heap_create() - Create a heap context
260 * @pool: Pool to instantiate the heap context from.
261 * @initial_chunk_count: Number of chunk allocated at initialization time.
262 * Must be at least 1.
263 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
264 * [128k:8M] range.
265 * @max_chunks: Maximum number of chunks that can be allocated.
266 * @target_in_flight: Maximum number of in-flight render passes.
267 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
268 * context.
269 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
270 * assigned to the heap context.
271 *
272 * Return: a positive handle on success, a negative error otherwise.
273 */
panthor_heap_create(struct panthor_heap_pool * pool,u32 initial_chunk_count,u32 chunk_size,u32 max_chunks,u32 target_in_flight,u64 * heap_ctx_gpu_va,u64 * first_chunk_gpu_va)274 int panthor_heap_create(struct panthor_heap_pool *pool,
275 u32 initial_chunk_count,
276 u32 chunk_size,
277 u32 max_chunks,
278 u32 target_in_flight,
279 u64 *heap_ctx_gpu_va,
280 u64 *first_chunk_gpu_va)
281 {
282 struct panthor_heap *heap;
283 struct panthor_heap_chunk *first_chunk;
284 struct panthor_vm *vm;
285 int ret = 0;
286 u32 id;
287
288 if (initial_chunk_count == 0)
289 return -EINVAL;
290
291 if (initial_chunk_count > max_chunks)
292 return -EINVAL;
293
294 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
295 chunk_size < SZ_128K || chunk_size > SZ_8M)
296 return -EINVAL;
297
298 down_read(&pool->lock);
299 vm = panthor_vm_get(pool->vm);
300 up_read(&pool->lock);
301
302 /* The pool has been destroyed, we can't create a new heap. */
303 if (!vm)
304 return -EINVAL;
305
306 heap = kzalloc_obj(*heap);
307 if (!heap) {
308 ret = -ENOMEM;
309 goto err_put_vm;
310 }
311
312 mutex_init(&heap->lock);
313 INIT_LIST_HEAD(&heap->chunks);
314 heap->chunk_size = chunk_size;
315 heap->max_chunks = max_chunks;
316 heap->target_in_flight = target_in_flight;
317
318 ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
319 if (ret)
320 goto err_free_heap;
321
322 first_chunk = list_first_entry(&heap->chunks,
323 struct panthor_heap_chunk,
324 node);
325 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
326
327 down_write(&pool->lock);
328 /* The pool has been destroyed, we can't create a new heap. */
329 if (!pool->vm) {
330 ret = -EINVAL;
331 } else {
332 ret = xa_alloc(&pool->xa, &id, heap,
333 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
334 if (!ret) {
335 void *gpu_ctx = panthor_get_heap_ctx(pool, id);
336
337 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
338 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
339 panthor_get_heap_ctx_offset(pool, id);
340 }
341 }
342 up_write(&pool->lock);
343
344 if (ret)
345 goto err_free_heap;
346
347 panthor_vm_put(vm);
348 return id;
349
350 err_free_heap:
351 panthor_free_heap_chunks(pool, heap);
352 mutex_destroy(&heap->lock);
353 kfree(heap);
354
355 err_put_vm:
356 panthor_vm_put(vm);
357 return ret;
358 }
359
360 /**
361 * panthor_heap_return_chunk() - Return an unused heap chunk
362 * @pool: The pool this heap belongs to.
363 * @heap_gpu_va: The GPU address of the heap context.
364 * @chunk_gpu_va: The chunk VA to return.
365 *
366 * This function is used when a chunk allocated with panthor_heap_grow()
367 * couldn't be linked to the heap context through the FW interface because
368 * the group requesting the allocation was scheduled out in the meantime.
369 */
panthor_heap_return_chunk(struct panthor_heap_pool * pool,u64 heap_gpu_va,u64 chunk_gpu_va)370 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
371 u64 heap_gpu_va,
372 u64 chunk_gpu_va)
373 {
374 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
375 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
376 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
377 struct panthor_heap *heap;
378 int ret;
379
380 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
381 return -EINVAL;
382
383 down_read(&pool->lock);
384 heap = xa_load(&pool->xa, heap_id);
385 if (!heap) {
386 ret = -EINVAL;
387 goto out_unlock;
388 }
389
390 chunk_gpu_va &= GENMASK_ULL(63, 12);
391
392 mutex_lock(&heap->lock);
393 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
394 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
395 removed = chunk;
396 list_del(&chunk->node);
397 heap->chunk_count--;
398 atomic_sub(heap->chunk_size, &pool->size);
399 break;
400 }
401 }
402 mutex_unlock(&heap->lock);
403
404 if (removed) {
405 panthor_kernel_bo_destroy(chunk->bo);
406 kfree(chunk);
407 ret = 0;
408 } else {
409 ret = -EINVAL;
410 }
411
412 out_unlock:
413 up_read(&pool->lock);
414 return ret;
415 }
416
417 /**
418 * panthor_heap_grow() - Make a heap context grow.
419 * @pool: The pool this heap belongs to.
420 * @heap_gpu_va: The GPU address of the heap context.
421 * @renderpasses_in_flight: Number of render passes currently in-flight.
422 * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
423 * @new_chunk_gpu_va: Pointer used to return the chunk VA.
424 *
425 * Return:
426 * - 0 if a new heap was allocated
427 * - -ENOMEM if the tiler context reached the maximum number of chunks
428 * or if too many render passes are in-flight
429 * or if the allocation failed
430 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
431 */
panthor_heap_grow(struct panthor_heap_pool * pool,u64 heap_gpu_va,u32 renderpasses_in_flight,u32 pending_frag_count,u64 * new_chunk_gpu_va)432 int panthor_heap_grow(struct panthor_heap_pool *pool,
433 u64 heap_gpu_va,
434 u32 renderpasses_in_flight,
435 u32 pending_frag_count,
436 u64 *new_chunk_gpu_va)
437 {
438 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
439 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
440 struct panthor_heap_chunk *chunk;
441 struct panthor_heap *heap;
442 int ret;
443
444 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
445 return -EINVAL;
446
447 down_read(&pool->lock);
448 heap = xa_load(&pool->xa, heap_id);
449 if (!heap) {
450 ret = -EINVAL;
451 goto out_unlock;
452 }
453
454 /* If we reached the target in-flight render passes, or if we
455 * reached the maximum number of chunks, let the FW figure another way to
456 * find some memory (wait for render passes to finish, or call the exception
457 * handler provided by the userspace driver, if any).
458 */
459 if (renderpasses_in_flight > heap->target_in_flight ||
460 heap->chunk_count >= heap->max_chunks) {
461 ret = -ENOMEM;
462 goto out_unlock;
463 }
464
465 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
466 * which goes through the blocking allocation path. Ultimately, we
467 * want a non-blocking allocation, so we can immediately report to the
468 * FW when the system is running out of memory. In that case, the FW
469 * can call a user-provided exception handler, which might try to free
470 * some tiler memory by issuing an intermediate fragment job. If the
471 * exception handler can't do anything, it will flag the queue as
472 * faulty so the job that triggered this tiler chunk allocation and all
473 * further jobs in this queue fail immediately instead of having to
474 * wait for the job timeout.
475 */
476 ret = panthor_alloc_heap_chunk(pool, heap, false);
477 if (ret)
478 goto out_unlock;
479
480 chunk = list_first_entry(&heap->chunks,
481 struct panthor_heap_chunk,
482 node);
483 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
484 (heap->chunk_size >> 12);
485 ret = 0;
486
487 out_unlock:
488 up_read(&pool->lock);
489 return ret;
490 }
491
panthor_heap_pool_release(struct kref * refcount)492 static void panthor_heap_pool_release(struct kref *refcount)
493 {
494 struct panthor_heap_pool *pool =
495 container_of(refcount, struct panthor_heap_pool, refcount);
496
497 xa_destroy(&pool->xa);
498 kfree(pool);
499 }
500
501 /**
502 * panthor_heap_pool_put() - Release a heap pool reference
503 * @pool: Pool to release the reference on. Can be NULL.
504 */
panthor_heap_pool_put(struct panthor_heap_pool * pool)505 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
506 {
507 if (pool)
508 kref_put(&pool->refcount, panthor_heap_pool_release);
509 }
510
511 /**
512 * panthor_heap_pool_get() - Get a heap pool reference
513 * @pool: Pool to get the reference on. Can be NULL.
514 *
515 * Return: @pool.
516 */
517 struct panthor_heap_pool *
panthor_heap_pool_get(struct panthor_heap_pool * pool)518 panthor_heap_pool_get(struct panthor_heap_pool *pool)
519 {
520 if (pool)
521 kref_get(&pool->refcount);
522
523 return pool;
524 }
525
526 /**
527 * panthor_heap_pool_create() - Create a heap pool
528 * @ptdev: Device.
529 * @vm: The VM this heap pool will be attached to.
530 *
531 * Heap pools might contain up to 128 heap contexts, and are per-VM.
532 *
533 * Return: A valid pointer on success, a negative error code otherwise.
534 */
535 struct panthor_heap_pool *
panthor_heap_pool_create(struct panthor_device * ptdev,struct panthor_vm * vm)536 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
537 {
538 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
539 panthor_heap_ctx_stride(ptdev),
540 4096);
541 struct panthor_heap_pool *pool;
542 int ret = 0;
543
544 pool = kzalloc_obj(*pool);
545 if (!pool)
546 return ERR_PTR(-ENOMEM);
547
548 /* We want a weak ref here: the heap pool belongs to the VM, so we're
549 * sure that, as long as the heap pool exists, the VM exists too.
550 */
551 pool->vm = vm;
552 pool->ptdev = ptdev;
553 init_rwsem(&pool->lock);
554 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
555 kref_init(&pool->refcount);
556
557 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
558 DRM_PANTHOR_BO_NO_MMAP,
559 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
560 PANTHOR_VM_KERNEL_AUTO_VA,
561 "Heap pool");
562 if (IS_ERR(pool->gpu_contexts)) {
563 ret = PTR_ERR(pool->gpu_contexts);
564 goto err_destroy_pool;
565 }
566
567 ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
568 if (ret)
569 goto err_destroy_pool;
570
571 atomic_add(pool->gpu_contexts->obj->size, &pool->size);
572
573 return pool;
574
575 err_destroy_pool:
576 panthor_heap_pool_destroy(pool);
577 return ERR_PTR(ret);
578 }
579
580 /**
581 * panthor_heap_pool_destroy() - Destroy a heap pool.
582 * @pool: Pool to destroy.
583 *
584 * This function destroys all heap contexts and their resources. Thus
585 * preventing any use of the heap context or the chunk attached to them
586 * after that point.
587 *
588 * If the GPU still has access to some heap contexts, a fault should be
589 * triggered, which should flag the command stream groups using these
590 * context as faulty.
591 *
592 * The heap pool object is only released when all references to this pool
593 * are released.
594 */
panthor_heap_pool_destroy(struct panthor_heap_pool * pool)595 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
596 {
597 struct panthor_heap *heap;
598 unsigned long i;
599
600 if (!pool)
601 return;
602
603 down_write(&pool->lock);
604 xa_for_each(&pool->xa, i, heap)
605 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
606
607 if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
608 atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
609 panthor_kernel_bo_destroy(pool->gpu_contexts);
610 }
611
612 /* Reflects the fact the pool has been destroyed. */
613 pool->vm = NULL;
614 up_write(&pool->lock);
615
616 panthor_heap_pool_put(pool);
617 }
618
619 /**
620 * panthor_heap_pool_size() - Get a heap pool's total size
621 * @pool: Pool whose total chunks size to return
622 *
623 * Returns the aggregated size of all chunks for all heaps in the pool
624 *
625 */
panthor_heap_pool_size(struct panthor_heap_pool * pool)626 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
627 {
628 if (!pool)
629 return 0;
630
631 return atomic_read(&pool->size);
632 }
633