1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #include <linux/iosys-map.h> 5 #include <linux/rwsem.h> 6 7 #include <drm/panthor_drm.h> 8 9 #include "panthor_device.h" 10 #include "panthor_gem.h" 11 #include "panthor_heap.h" 12 #include "panthor_mmu.h" 13 #include "panthor_regs.h" 14 15 /* 16 * The GPU heap context is an opaque structure used by the GPU to track the 17 * heap allocations. The driver should only touch it to initialize it (zero all 18 * fields). Because the CPU and GPU can both access this structure it is 19 * required to be GPU cache line aligned. 20 */ 21 #define HEAP_CONTEXT_SIZE 32 22 23 /** 24 * struct panthor_heap_chunk_header - Heap chunk header 25 */ 26 struct panthor_heap_chunk_header { 27 /** 28 * @next: Next heap chunk in the list. 29 * 30 * This is a GPU VA. 31 */ 32 u64 next; 33 34 /** @unknown: MBZ. */ 35 u32 unknown[14]; 36 }; 37 38 /** 39 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. 40 */ 41 struct panthor_heap_chunk { 42 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ 43 struct list_head node; 44 45 /** @bo: Buffer object backing the heap chunk. */ 46 struct panthor_kernel_bo *bo; 47 }; 48 49 /** 50 * struct panthor_heap - Structure used to manage tiler heap contexts. 51 */ 52 struct panthor_heap { 53 /** @chunks: List containing all heap chunks allocated so far. */ 54 struct list_head chunks; 55 56 /** @lock: Lock protecting insertion in the chunks list. */ 57 struct mutex lock; 58 59 /** @chunk_size: Size of each chunk. */ 60 u32 chunk_size; 61 62 /** @max_chunks: Maximum number of chunks. */ 63 u32 max_chunks; 64 65 /** 66 * @target_in_flight: Number of in-flight render passes after which 67 * we'd let the FW wait for fragment job to finish instead of allocating new chunks. 68 */ 69 u32 target_in_flight; 70 71 /** @chunk_count: Number of heap chunks currently allocated. */ 72 u32 chunk_count; 73 }; 74 75 #define MAX_HEAPS_PER_POOL 128 76 77 /** 78 * struct panthor_heap_pool - Pool of heap contexts 79 * 80 * The pool is attached to a panthor_file and can't be shared across processes. 81 */ 82 struct panthor_heap_pool { 83 /** @refcount: Reference count. */ 84 struct kref refcount; 85 86 /** @ptdev: Device. */ 87 struct panthor_device *ptdev; 88 89 /** @vm: VM this pool is bound to. */ 90 struct panthor_vm *vm; 91 92 /** @lock: Lock protecting access to @xa. */ 93 struct rw_semaphore lock; 94 95 /** @xa: Array storing panthor_heap objects. */ 96 struct xarray xa; 97 98 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ 99 struct panthor_kernel_bo *gpu_contexts; 100 101 /** @size: Size of all chunks across all heaps in the pool. */ 102 atomic_t size; 103 }; 104 105 static int panthor_heap_ctx_stride(struct panthor_device *ptdev) 106 { 107 u32 l2_features = ptdev->gpu_info.l2_features; 108 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); 109 110 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); 111 } 112 113 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) 114 { 115 return panthor_heap_ctx_stride(pool->ptdev) * id; 116 } 117 118 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) 119 { 120 return pool->gpu_contexts->kmap + 121 panthor_get_heap_ctx_offset(pool, id); 122 } 123 124 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool, 125 struct panthor_heap *heap, 126 struct panthor_heap_chunk *chunk) 127 { 128 mutex_lock(&heap->lock); 129 list_del(&chunk->node); 130 heap->chunk_count--; 131 mutex_unlock(&heap->lock); 132 133 atomic_sub(heap->chunk_size, &pool->size); 134 135 panthor_kernel_bo_destroy(chunk->bo); 136 kfree(chunk); 137 } 138 139 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool, 140 struct panthor_heap *heap, 141 bool initial_chunk) 142 { 143 struct panthor_heap_chunk *chunk; 144 struct panthor_heap_chunk_header *hdr; 145 int ret; 146 147 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); 148 if (!chunk) 149 return -ENOMEM; 150 151 chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size, 152 DRM_PANTHOR_BO_NO_MMAP, 153 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 154 PANTHOR_VM_KERNEL_AUTO_VA, 155 "Tiler heap chunk"); 156 if (IS_ERR(chunk->bo)) { 157 ret = PTR_ERR(chunk->bo); 158 goto err_free_chunk; 159 } 160 161 ret = panthor_kernel_bo_vmap(chunk->bo); 162 if (ret) 163 goto err_destroy_bo; 164 165 hdr = chunk->bo->kmap; 166 memset(hdr, 0, sizeof(*hdr)); 167 168 if (initial_chunk && !list_empty(&heap->chunks)) { 169 struct panthor_heap_chunk *prev_chunk; 170 u64 prev_gpuva; 171 172 prev_chunk = list_first_entry(&heap->chunks, 173 struct panthor_heap_chunk, 174 node); 175 176 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); 177 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | 178 (heap->chunk_size >> 12); 179 } 180 181 panthor_kernel_bo_vunmap(chunk->bo); 182 183 mutex_lock(&heap->lock); 184 list_add(&chunk->node, &heap->chunks); 185 heap->chunk_count++; 186 mutex_unlock(&heap->lock); 187 188 atomic_add(heap->chunk_size, &pool->size); 189 190 return 0; 191 192 err_destroy_bo: 193 panthor_kernel_bo_destroy(chunk->bo); 194 195 err_free_chunk: 196 kfree(chunk); 197 198 return ret; 199 } 200 201 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool, 202 struct panthor_heap *heap) 203 { 204 struct panthor_heap_chunk *chunk, *tmp; 205 206 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) 207 panthor_free_heap_chunk(pool, heap, chunk); 208 } 209 210 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool, 211 struct panthor_heap *heap, 212 u32 chunk_count) 213 { 214 int ret; 215 u32 i; 216 217 for (i = 0; i < chunk_count; i++) { 218 ret = panthor_alloc_heap_chunk(pool, heap, true); 219 if (ret) 220 return ret; 221 } 222 223 return 0; 224 } 225 226 static int 227 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) 228 { 229 struct panthor_heap *heap; 230 231 heap = xa_erase(&pool->xa, handle); 232 if (!heap) 233 return -EINVAL; 234 235 panthor_free_heap_chunks(pool, heap); 236 mutex_destroy(&heap->lock); 237 kfree(heap); 238 return 0; 239 } 240 241 /** 242 * panthor_heap_destroy() - Destroy a heap context 243 * @pool: Pool this context belongs to. 244 * @handle: Handle returned by panthor_heap_create(). 245 */ 246 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) 247 { 248 int ret; 249 250 down_write(&pool->lock); 251 ret = panthor_heap_destroy_locked(pool, handle); 252 up_write(&pool->lock); 253 254 return ret; 255 } 256 257 /** 258 * panthor_heap_create() - Create a heap context 259 * @pool: Pool to instantiate the heap context from. 260 * @initial_chunk_count: Number of chunk allocated at initialization time. 261 * Must be at least 1. 262 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the 263 * [128k:8M] range. 264 * @max_chunks: Maximum number of chunks that can be allocated. 265 * @target_in_flight: Maximum number of in-flight render passes. 266 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap 267 * context. 268 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk 269 * assigned to the heap context. 270 * 271 * Return: a positive handle on success, a negative error otherwise. 272 */ 273 int panthor_heap_create(struct panthor_heap_pool *pool, 274 u32 initial_chunk_count, 275 u32 chunk_size, 276 u32 max_chunks, 277 u32 target_in_flight, 278 u64 *heap_ctx_gpu_va, 279 u64 *first_chunk_gpu_va) 280 { 281 struct panthor_heap *heap; 282 struct panthor_heap_chunk *first_chunk; 283 struct panthor_vm *vm; 284 int ret = 0; 285 u32 id; 286 287 if (initial_chunk_count == 0) 288 return -EINVAL; 289 290 if (initial_chunk_count > max_chunks) 291 return -EINVAL; 292 293 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || 294 chunk_size < SZ_128K || chunk_size > SZ_8M) 295 return -EINVAL; 296 297 down_read(&pool->lock); 298 vm = panthor_vm_get(pool->vm); 299 up_read(&pool->lock); 300 301 /* The pool has been destroyed, we can't create a new heap. */ 302 if (!vm) 303 return -EINVAL; 304 305 heap = kzalloc(sizeof(*heap), GFP_KERNEL); 306 if (!heap) { 307 ret = -ENOMEM; 308 goto err_put_vm; 309 } 310 311 mutex_init(&heap->lock); 312 INIT_LIST_HEAD(&heap->chunks); 313 heap->chunk_size = chunk_size; 314 heap->max_chunks = max_chunks; 315 heap->target_in_flight = target_in_flight; 316 317 ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count); 318 if (ret) 319 goto err_free_heap; 320 321 first_chunk = list_first_entry(&heap->chunks, 322 struct panthor_heap_chunk, 323 node); 324 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); 325 326 down_write(&pool->lock); 327 /* The pool has been destroyed, we can't create a new heap. */ 328 if (!pool->vm) { 329 ret = -EINVAL; 330 } else { 331 ret = xa_alloc(&pool->xa, &id, heap, 332 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); 333 if (!ret) { 334 void *gpu_ctx = panthor_get_heap_ctx(pool, id); 335 336 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); 337 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + 338 panthor_get_heap_ctx_offset(pool, id); 339 } 340 } 341 up_write(&pool->lock); 342 343 if (ret) 344 goto err_free_heap; 345 346 panthor_vm_put(vm); 347 return id; 348 349 err_free_heap: 350 panthor_free_heap_chunks(pool, heap); 351 mutex_destroy(&heap->lock); 352 kfree(heap); 353 354 err_put_vm: 355 panthor_vm_put(vm); 356 return ret; 357 } 358 359 /** 360 * panthor_heap_return_chunk() - Return an unused heap chunk 361 * @pool: The pool this heap belongs to. 362 * @heap_gpu_va: The GPU address of the heap context. 363 * @chunk_gpu_va: The chunk VA to return. 364 * 365 * This function is used when a chunk allocated with panthor_heap_grow() 366 * couldn't be linked to the heap context through the FW interface because 367 * the group requesting the allocation was scheduled out in the meantime. 368 */ 369 int panthor_heap_return_chunk(struct panthor_heap_pool *pool, 370 u64 heap_gpu_va, 371 u64 chunk_gpu_va) 372 { 373 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 374 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 375 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; 376 struct panthor_heap *heap; 377 int ret; 378 379 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 380 return -EINVAL; 381 382 down_read(&pool->lock); 383 heap = xa_load(&pool->xa, heap_id); 384 if (!heap) { 385 ret = -EINVAL; 386 goto out_unlock; 387 } 388 389 chunk_gpu_va &= GENMASK_ULL(63, 12); 390 391 mutex_lock(&heap->lock); 392 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { 393 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { 394 removed = chunk; 395 list_del(&chunk->node); 396 heap->chunk_count--; 397 atomic_sub(heap->chunk_size, &pool->size); 398 break; 399 } 400 } 401 mutex_unlock(&heap->lock); 402 403 if (removed) { 404 panthor_kernel_bo_destroy(chunk->bo); 405 kfree(chunk); 406 ret = 0; 407 } else { 408 ret = -EINVAL; 409 } 410 411 out_unlock: 412 up_read(&pool->lock); 413 return ret; 414 } 415 416 /** 417 * panthor_heap_grow() - Make a heap context grow. 418 * @pool: The pool this heap belongs to. 419 * @heap_gpu_va: The GPU address of the heap context. 420 * @renderpasses_in_flight: Number of render passes currently in-flight. 421 * @pending_frag_count: Number of fragment jobs waiting for execution/completion. 422 * @new_chunk_gpu_va: Pointer used to return the chunk VA. 423 * 424 * Return: 425 * - 0 if a new heap was allocated 426 * - -ENOMEM if the tiler context reached the maximum number of chunks 427 * or if too many render passes are in-flight 428 * or if the allocation failed 429 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid 430 */ 431 int panthor_heap_grow(struct panthor_heap_pool *pool, 432 u64 heap_gpu_va, 433 u32 renderpasses_in_flight, 434 u32 pending_frag_count, 435 u64 *new_chunk_gpu_va) 436 { 437 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 438 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 439 struct panthor_heap_chunk *chunk; 440 struct panthor_heap *heap; 441 int ret; 442 443 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 444 return -EINVAL; 445 446 down_read(&pool->lock); 447 heap = xa_load(&pool->xa, heap_id); 448 if (!heap) { 449 ret = -EINVAL; 450 goto out_unlock; 451 } 452 453 /* If we reached the target in-flight render passes, or if we 454 * reached the maximum number of chunks, let the FW figure another way to 455 * find some memory (wait for render passes to finish, or call the exception 456 * handler provided by the userspace driver, if any). 457 */ 458 if (renderpasses_in_flight > heap->target_in_flight || 459 heap->chunk_count >= heap->max_chunks) { 460 ret = -ENOMEM; 461 goto out_unlock; 462 } 463 464 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, 465 * which goes through the blocking allocation path. Ultimately, we 466 * want a non-blocking allocation, so we can immediately report to the 467 * FW when the system is running out of memory. In that case, the FW 468 * can call a user-provided exception handler, which might try to free 469 * some tiler memory by issuing an intermediate fragment job. If the 470 * exception handler can't do anything, it will flag the queue as 471 * faulty so the job that triggered this tiler chunk allocation and all 472 * further jobs in this queue fail immediately instead of having to 473 * wait for the job timeout. 474 */ 475 ret = panthor_alloc_heap_chunk(pool, heap, false); 476 if (ret) 477 goto out_unlock; 478 479 chunk = list_first_entry(&heap->chunks, 480 struct panthor_heap_chunk, 481 node); 482 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | 483 (heap->chunk_size >> 12); 484 ret = 0; 485 486 out_unlock: 487 up_read(&pool->lock); 488 return ret; 489 } 490 491 static void panthor_heap_pool_release(struct kref *refcount) 492 { 493 struct panthor_heap_pool *pool = 494 container_of(refcount, struct panthor_heap_pool, refcount); 495 496 xa_destroy(&pool->xa); 497 kfree(pool); 498 } 499 500 /** 501 * panthor_heap_pool_put() - Release a heap pool reference 502 * @pool: Pool to release the reference on. Can be NULL. 503 */ 504 void panthor_heap_pool_put(struct panthor_heap_pool *pool) 505 { 506 if (pool) 507 kref_put(&pool->refcount, panthor_heap_pool_release); 508 } 509 510 /** 511 * panthor_heap_pool_get() - Get a heap pool reference 512 * @pool: Pool to get the reference on. Can be NULL. 513 * 514 * Return: @pool. 515 */ 516 struct panthor_heap_pool * 517 panthor_heap_pool_get(struct panthor_heap_pool *pool) 518 { 519 if (pool) 520 kref_get(&pool->refcount); 521 522 return pool; 523 } 524 525 /** 526 * panthor_heap_pool_create() - Create a heap pool 527 * @ptdev: Device. 528 * @vm: The VM this heap pool will be attached to. 529 * 530 * Heap pools might contain up to 128 heap contexts, and are per-VM. 531 * 532 * Return: A valid pointer on success, a negative error code otherwise. 533 */ 534 struct panthor_heap_pool * 535 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) 536 { 537 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * 538 panthor_heap_ctx_stride(ptdev), 539 4096); 540 struct panthor_heap_pool *pool; 541 int ret = 0; 542 543 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 544 if (!pool) 545 return ERR_PTR(-ENOMEM); 546 547 /* We want a weak ref here: the heap pool belongs to the VM, so we're 548 * sure that, as long as the heap pool exists, the VM exists too. 549 */ 550 pool->vm = vm; 551 pool->ptdev = ptdev; 552 init_rwsem(&pool->lock); 553 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); 554 kref_init(&pool->refcount); 555 556 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, 557 DRM_PANTHOR_BO_NO_MMAP, 558 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 559 PANTHOR_VM_KERNEL_AUTO_VA, 560 "Heap pool"); 561 if (IS_ERR(pool->gpu_contexts)) { 562 ret = PTR_ERR(pool->gpu_contexts); 563 goto err_destroy_pool; 564 } 565 566 ret = panthor_kernel_bo_vmap(pool->gpu_contexts); 567 if (ret) 568 goto err_destroy_pool; 569 570 atomic_add(pool->gpu_contexts->obj->size, &pool->size); 571 572 return pool; 573 574 err_destroy_pool: 575 panthor_heap_pool_destroy(pool); 576 return ERR_PTR(ret); 577 } 578 579 /** 580 * panthor_heap_pool_destroy() - Destroy a heap pool. 581 * @pool: Pool to destroy. 582 * 583 * This function destroys all heap contexts and their resources. Thus 584 * preventing any use of the heap context or the chunk attached to them 585 * after that point. 586 * 587 * If the GPU still has access to some heap contexts, a fault should be 588 * triggered, which should flag the command stream groups using these 589 * context as faulty. 590 * 591 * The heap pool object is only released when all references to this pool 592 * are released. 593 */ 594 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) 595 { 596 struct panthor_heap *heap; 597 unsigned long i; 598 599 if (!pool) 600 return; 601 602 down_write(&pool->lock); 603 xa_for_each(&pool->xa, i, heap) 604 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); 605 606 if (!IS_ERR_OR_NULL(pool->gpu_contexts)) { 607 atomic_sub(pool->gpu_contexts->obj->size, &pool->size); 608 panthor_kernel_bo_destroy(pool->gpu_contexts); 609 } 610 611 /* Reflects the fact the pool has been destroyed. */ 612 pool->vm = NULL; 613 up_write(&pool->lock); 614 615 panthor_heap_pool_put(pool); 616 } 617 618 /** 619 * panthor_heap_pool_size() - Get a heap pool's total size 620 * @pool: Pool whose total chunks size to return 621 * 622 * Returns the aggregated size of all chunks for all heaps in the pool 623 * 624 */ 625 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool) 626 { 627 if (!pool) 628 return 0; 629 630 return atomic_read(&pool->size); 631 } 632