xref: /linux/drivers/gpu/drm/panthor/panthor_heap.c (revision 3027ce13e04eee76539ca65c2cb1028a01c8c508)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
6 
7 #include <drm/panthor_drm.h>
8 
9 #include "panthor_device.h"
10 #include "panthor_gem.h"
11 #include "panthor_heap.h"
12 #include "panthor_mmu.h"
13 #include "panthor_regs.h"
14 
15 /*
16  * The GPU heap context is an opaque structure used by the GPU to track the
17  * heap allocations. The driver should only touch it to initialize it (zero all
18  * fields). Because the CPU and GPU can both access this structure it is
19  * required to be GPU cache line aligned.
20  */
21 #define HEAP_CONTEXT_SIZE	32
22 
23 /**
24  * struct panthor_heap_chunk_header - Heap chunk header
25  */
26 struct panthor_heap_chunk_header {
27 	/**
28 	 * @next: Next heap chunk in the list.
29 	 *
30 	 * This is a GPU VA.
31 	 */
32 	u64 next;
33 
34 	/** @unknown: MBZ. */
35 	u32 unknown[14];
36 };
37 
38 /**
39  * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
40  */
41 struct panthor_heap_chunk {
42 	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
43 	struct list_head node;
44 
45 	/** @bo: Buffer object backing the heap chunk. */
46 	struct panthor_kernel_bo *bo;
47 };
48 
49 /**
50  * struct panthor_heap - Structure used to manage tiler heap contexts.
51  */
52 struct panthor_heap {
53 	/** @chunks: List containing all heap chunks allocated so far. */
54 	struct list_head chunks;
55 
56 	/** @lock: Lock protecting insertion in the chunks list. */
57 	struct mutex lock;
58 
59 	/** @chunk_size: Size of each chunk. */
60 	u32 chunk_size;
61 
62 	/** @max_chunks: Maximum number of chunks. */
63 	u32 max_chunks;
64 
65 	/**
66 	 * @target_in_flight: Number of in-flight render passes after which
67 	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
68 	 */
69 	u32 target_in_flight;
70 
71 	/** @chunk_count: Number of heap chunks currently allocated. */
72 	u32 chunk_count;
73 };
74 
75 #define MAX_HEAPS_PER_POOL    128
76 
77 /**
78  * struct panthor_heap_pool - Pool of heap contexts
79  *
80  * The pool is attached to a panthor_file and can't be shared across processes.
81  */
82 struct panthor_heap_pool {
83 	/** @refcount: Reference count. */
84 	struct kref refcount;
85 
86 	/** @ptdev: Device. */
87 	struct panthor_device *ptdev;
88 
89 	/** @vm: VM this pool is bound to. */
90 	struct panthor_vm *vm;
91 
92 	/** @lock: Lock protecting access to @xa. */
93 	struct rw_semaphore lock;
94 
95 	/** @xa: Array storing panthor_heap objects. */
96 	struct xarray xa;
97 
98 	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
99 	struct panthor_kernel_bo *gpu_contexts;
100 };
101 
102 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
103 {
104 	u32 l2_features = ptdev->gpu_info.l2_features;
105 	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
106 
107 	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
108 }
109 
110 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
111 {
112 	return panthor_heap_ctx_stride(pool->ptdev) * id;
113 }
114 
115 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
116 {
117 	return pool->gpu_contexts->kmap +
118 	       panthor_get_heap_ctx_offset(pool, id);
119 }
120 
121 static void panthor_free_heap_chunk(struct panthor_vm *vm,
122 				    struct panthor_heap *heap,
123 				    struct panthor_heap_chunk *chunk)
124 {
125 	mutex_lock(&heap->lock);
126 	list_del(&chunk->node);
127 	heap->chunk_count--;
128 	mutex_unlock(&heap->lock);
129 
130 	panthor_kernel_bo_destroy(vm, chunk->bo);
131 	kfree(chunk);
132 }
133 
134 static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
135 				    struct panthor_vm *vm,
136 				    struct panthor_heap *heap,
137 				    bool initial_chunk)
138 {
139 	struct panthor_heap_chunk *chunk;
140 	struct panthor_heap_chunk_header *hdr;
141 	int ret;
142 
143 	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
144 	if (!chunk)
145 		return -ENOMEM;
146 
147 	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
148 					     DRM_PANTHOR_BO_NO_MMAP,
149 					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
150 					     PANTHOR_VM_KERNEL_AUTO_VA);
151 	if (IS_ERR(chunk->bo)) {
152 		ret = PTR_ERR(chunk->bo);
153 		goto err_free_chunk;
154 	}
155 
156 	ret = panthor_kernel_bo_vmap(chunk->bo);
157 	if (ret)
158 		goto err_destroy_bo;
159 
160 	hdr = chunk->bo->kmap;
161 	memset(hdr, 0, sizeof(*hdr));
162 
163 	if (initial_chunk && !list_empty(&heap->chunks)) {
164 		struct panthor_heap_chunk *prev_chunk;
165 		u64 prev_gpuva;
166 
167 		prev_chunk = list_first_entry(&heap->chunks,
168 					      struct panthor_heap_chunk,
169 					      node);
170 
171 		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
172 		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
173 			    (heap->chunk_size >> 12);
174 	}
175 
176 	panthor_kernel_bo_vunmap(chunk->bo);
177 
178 	mutex_lock(&heap->lock);
179 	list_add(&chunk->node, &heap->chunks);
180 	heap->chunk_count++;
181 	mutex_unlock(&heap->lock);
182 
183 	return 0;
184 
185 err_destroy_bo:
186 	panthor_kernel_bo_destroy(vm, chunk->bo);
187 
188 err_free_chunk:
189 	kfree(chunk);
190 
191 	return ret;
192 }
193 
194 static void panthor_free_heap_chunks(struct panthor_vm *vm,
195 				     struct panthor_heap *heap)
196 {
197 	struct panthor_heap_chunk *chunk, *tmp;
198 
199 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
200 		panthor_free_heap_chunk(vm, heap, chunk);
201 }
202 
203 static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
204 				     struct panthor_vm *vm,
205 				     struct panthor_heap *heap,
206 				     u32 chunk_count)
207 {
208 	int ret;
209 	u32 i;
210 
211 	for (i = 0; i < chunk_count; i++) {
212 		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
213 		if (ret)
214 			return ret;
215 	}
216 
217 	return 0;
218 }
219 
220 static int
221 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
222 {
223 	struct panthor_heap *heap;
224 
225 	heap = xa_erase(&pool->xa, handle);
226 	if (!heap)
227 		return -EINVAL;
228 
229 	panthor_free_heap_chunks(pool->vm, heap);
230 	mutex_destroy(&heap->lock);
231 	kfree(heap);
232 	return 0;
233 }
234 
235 /**
236  * panthor_heap_destroy() - Destroy a heap context
237  * @pool: Pool this context belongs to.
238  * @handle: Handle returned by panthor_heap_create().
239  */
240 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
241 {
242 	int ret;
243 
244 	down_write(&pool->lock);
245 	ret = panthor_heap_destroy_locked(pool, handle);
246 	up_write(&pool->lock);
247 
248 	return ret;
249 }
250 
251 /**
252  * panthor_heap_create() - Create a heap context
253  * @pool: Pool to instantiate the heap context from.
254  * @initial_chunk_count: Number of chunk allocated at initialization time.
255  * Must be at least 1.
256  * @chunk_size: The size of each chunk. Must be a power of two between 256k
257  * and 2M.
258  * @max_chunks: Maximum number of chunks that can be allocated.
259  * @target_in_flight: Maximum number of in-flight render passes.
260  * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
261  * context.
262  * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
263  * assigned to the heap context.
264  *
265  * Return: a positive handle on success, a negative error otherwise.
266  */
267 int panthor_heap_create(struct panthor_heap_pool *pool,
268 			u32 initial_chunk_count,
269 			u32 chunk_size,
270 			u32 max_chunks,
271 			u32 target_in_flight,
272 			u64 *heap_ctx_gpu_va,
273 			u64 *first_chunk_gpu_va)
274 {
275 	struct panthor_heap *heap;
276 	struct panthor_heap_chunk *first_chunk;
277 	struct panthor_vm *vm;
278 	int ret = 0;
279 	u32 id;
280 
281 	if (initial_chunk_count == 0)
282 		return -EINVAL;
283 
284 	if (hweight32(chunk_size) != 1 ||
285 	    chunk_size < SZ_256K || chunk_size > SZ_2M)
286 		return -EINVAL;
287 
288 	down_read(&pool->lock);
289 	vm = panthor_vm_get(pool->vm);
290 	up_read(&pool->lock);
291 
292 	/* The pool has been destroyed, we can't create a new heap. */
293 	if (!vm)
294 		return -EINVAL;
295 
296 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
297 	if (!heap) {
298 		ret = -ENOMEM;
299 		goto err_put_vm;
300 	}
301 
302 	mutex_init(&heap->lock);
303 	INIT_LIST_HEAD(&heap->chunks);
304 	heap->chunk_size = chunk_size;
305 	heap->max_chunks = max_chunks;
306 	heap->target_in_flight = target_in_flight;
307 
308 	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
309 					initial_chunk_count);
310 	if (ret)
311 		goto err_free_heap;
312 
313 	first_chunk = list_first_entry(&heap->chunks,
314 				       struct panthor_heap_chunk,
315 				       node);
316 	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
317 
318 	down_write(&pool->lock);
319 	/* The pool has been destroyed, we can't create a new heap. */
320 	if (!pool->vm) {
321 		ret = -EINVAL;
322 	} else {
323 		ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
324 		if (!ret) {
325 			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
326 
327 			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
328 			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
329 					   panthor_get_heap_ctx_offset(pool, id);
330 		}
331 	}
332 	up_write(&pool->lock);
333 
334 	if (ret)
335 		goto err_free_heap;
336 
337 	panthor_vm_put(vm);
338 	return id;
339 
340 err_free_heap:
341 	panthor_free_heap_chunks(pool->vm, heap);
342 	mutex_destroy(&heap->lock);
343 	kfree(heap);
344 
345 err_put_vm:
346 	panthor_vm_put(vm);
347 	return ret;
348 }
349 
350 /**
351  * panthor_heap_return_chunk() - Return an unused heap chunk
352  * @pool: The pool this heap belongs to.
353  * @heap_gpu_va: The GPU address of the heap context.
354  * @chunk_gpu_va: The chunk VA to return.
355  *
356  * This function is used when a chunk allocated with panthor_heap_grow()
357  * couldn't be linked to the heap context through the FW interface because
358  * the group requesting the allocation was scheduled out in the meantime.
359  */
360 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
361 			      u64 heap_gpu_va,
362 			      u64 chunk_gpu_va)
363 {
364 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
365 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
366 	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
367 	struct panthor_heap *heap;
368 	int ret;
369 
370 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
371 		return -EINVAL;
372 
373 	down_read(&pool->lock);
374 	heap = xa_load(&pool->xa, heap_id);
375 	if (!heap) {
376 		ret = -EINVAL;
377 		goto out_unlock;
378 	}
379 
380 	chunk_gpu_va &= GENMASK_ULL(63, 12);
381 
382 	mutex_lock(&heap->lock);
383 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
384 		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
385 			removed = chunk;
386 			list_del(&chunk->node);
387 			heap->chunk_count--;
388 			break;
389 		}
390 	}
391 	mutex_unlock(&heap->lock);
392 
393 	if (removed) {
394 		panthor_kernel_bo_destroy(pool->vm, chunk->bo);
395 		kfree(chunk);
396 		ret = 0;
397 	} else {
398 		ret = -EINVAL;
399 	}
400 
401 out_unlock:
402 	up_read(&pool->lock);
403 	return ret;
404 }
405 
406 /**
407  * panthor_heap_grow() - Make a heap context grow.
408  * @pool: The pool this heap belongs to.
409  * @heap_gpu_va: The GPU address of the heap context.
410  * @renderpasses_in_flight: Number of render passes currently in-flight.
411  * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
412  * @new_chunk_gpu_va: Pointer used to return the chunk VA.
413  */
414 int panthor_heap_grow(struct panthor_heap_pool *pool,
415 		      u64 heap_gpu_va,
416 		      u32 renderpasses_in_flight,
417 		      u32 pending_frag_count,
418 		      u64 *new_chunk_gpu_va)
419 {
420 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
421 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
422 	struct panthor_heap_chunk *chunk;
423 	struct panthor_heap *heap;
424 	int ret;
425 
426 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
427 		return -EINVAL;
428 
429 	down_read(&pool->lock);
430 	heap = xa_load(&pool->xa, heap_id);
431 	if (!heap) {
432 		ret = -EINVAL;
433 		goto out_unlock;
434 	}
435 
436 	/* If we reached the target in-flight render passes, or if we
437 	 * reached the maximum number of chunks, let the FW figure another way to
438 	 * find some memory (wait for render passes to finish, or call the exception
439 	 * handler provided by the userspace driver, if any).
440 	 */
441 	if (renderpasses_in_flight > heap->target_in_flight ||
442 	    (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
443 		ret = -EBUSY;
444 		goto out_unlock;
445 	} else if (heap->chunk_count >= heap->max_chunks) {
446 		ret = -ENOMEM;
447 		goto out_unlock;
448 	}
449 
450 	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
451 	 * which goes through the blocking allocation path. Ultimately, we
452 	 * want a non-blocking allocation, so we can immediately report to the
453 	 * FW when the system is running out of memory. In that case, the FW
454 	 * can call a user-provided exception handler, which might try to free
455 	 * some tiler memory by issuing an intermediate fragment job. If the
456 	 * exception handler can't do anything, it will flag the queue as
457 	 * faulty so the job that triggered this tiler chunk allocation and all
458 	 * further jobs in this queue fail immediately instead of having to
459 	 * wait for the job timeout.
460 	 */
461 	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
462 	if (ret)
463 		goto out_unlock;
464 
465 	chunk = list_first_entry(&heap->chunks,
466 				 struct panthor_heap_chunk,
467 				 node);
468 	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
469 			    (heap->chunk_size >> 12);
470 	ret = 0;
471 
472 out_unlock:
473 	up_read(&pool->lock);
474 	return ret;
475 }
476 
477 static void panthor_heap_pool_release(struct kref *refcount)
478 {
479 	struct panthor_heap_pool *pool =
480 		container_of(refcount, struct panthor_heap_pool, refcount);
481 
482 	xa_destroy(&pool->xa);
483 	kfree(pool);
484 }
485 
486 /**
487  * panthor_heap_pool_put() - Release a heap pool reference
488  * @pool: Pool to release the reference on. Can be NULL.
489  */
490 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
491 {
492 	if (pool)
493 		kref_put(&pool->refcount, panthor_heap_pool_release);
494 }
495 
496 /**
497  * panthor_heap_pool_get() - Get a heap pool reference
498  * @pool: Pool to get the reference on. Can be NULL.
499  *
500  * Return: @pool.
501  */
502 struct panthor_heap_pool *
503 panthor_heap_pool_get(struct panthor_heap_pool *pool)
504 {
505 	if (pool)
506 		kref_get(&pool->refcount);
507 
508 	return pool;
509 }
510 
511 /**
512  * panthor_heap_pool_create() - Create a heap pool
513  * @ptdev: Device.
514  * @vm: The VM this heap pool will be attached to.
515  *
516  * Heap pools might contain up to 128 heap contexts, and are per-VM.
517  *
518  * Return: A valid pointer on success, a negative error code otherwise.
519  */
520 struct panthor_heap_pool *
521 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
522 {
523 	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
524 			      panthor_heap_ctx_stride(ptdev),
525 			      4096);
526 	struct panthor_heap_pool *pool;
527 	int ret = 0;
528 
529 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
530 	if (!pool)
531 		return ERR_PTR(-ENOMEM);
532 
533 	/* We want a weak ref here: the heap pool belongs to the VM, so we're
534 	 * sure that, as long as the heap pool exists, the VM exists too.
535 	 */
536 	pool->vm = vm;
537 	pool->ptdev = ptdev;
538 	init_rwsem(&pool->lock);
539 	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
540 	kref_init(&pool->refcount);
541 
542 	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
543 						      DRM_PANTHOR_BO_NO_MMAP,
544 						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
545 						      PANTHOR_VM_KERNEL_AUTO_VA);
546 	if (IS_ERR(pool->gpu_contexts)) {
547 		ret = PTR_ERR(pool->gpu_contexts);
548 		goto err_destroy_pool;
549 	}
550 
551 	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
552 	if (ret)
553 		goto err_destroy_pool;
554 
555 	return pool;
556 
557 err_destroy_pool:
558 	panthor_heap_pool_destroy(pool);
559 	return ERR_PTR(ret);
560 }
561 
562 /**
563  * panthor_heap_pool_destroy() - Destroy a heap pool.
564  * @pool: Pool to destroy.
565  *
566  * This function destroys all heap contexts and their resources. Thus
567  * preventing any use of the heap context or the chunk attached to them
568  * after that point.
569  *
570  * If the GPU still has access to some heap contexts, a fault should be
571  * triggered, which should flag the command stream groups using these
572  * context as faulty.
573  *
574  * The heap pool object is only released when all references to this pool
575  * are released.
576  */
577 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
578 {
579 	struct panthor_heap *heap;
580 	unsigned long i;
581 
582 	if (!pool)
583 		return;
584 
585 	down_write(&pool->lock);
586 	xa_for_each(&pool->xa, i, heap)
587 		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
588 
589 	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
590 		panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
591 
592 	/* Reflects the fact the pool has been destroyed. */
593 	pool->vm = NULL;
594 	up_write(&pool->lock);
595 
596 	panthor_heap_pool_put(pool);
597 }
598