1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #include <linux/iosys-map.h> 5 #include <linux/rwsem.h> 6 7 #include <drm/panthor_drm.h> 8 9 #include "panthor_device.h" 10 #include "panthor_gem.h" 11 #include "panthor_heap.h" 12 #include "panthor_mmu.h" 13 #include "panthor_regs.h" 14 15 /* 16 * The GPU heap context is an opaque structure used by the GPU to track the 17 * heap allocations. The driver should only touch it to initialize it (zero all 18 * fields). Because the CPU and GPU can both access this structure it is 19 * required to be GPU cache line aligned. 20 */ 21 #define HEAP_CONTEXT_SIZE 32 22 23 /** 24 * struct panthor_heap_chunk_header - Heap chunk header 25 */ 26 struct panthor_heap_chunk_header { 27 /** 28 * @next: Next heap chunk in the list. 29 * 30 * This is a GPU VA. 31 */ 32 u64 next; 33 34 /** @unknown: MBZ. */ 35 u32 unknown[14]; 36 }; 37 38 /** 39 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. 40 */ 41 struct panthor_heap_chunk { 42 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ 43 struct list_head node; 44 45 /** @bo: Buffer object backing the heap chunk. */ 46 struct panthor_kernel_bo *bo; 47 }; 48 49 /** 50 * struct panthor_heap - Structure used to manage tiler heap contexts. 51 */ 52 struct panthor_heap { 53 /** @chunks: List containing all heap chunks allocated so far. */ 54 struct list_head chunks; 55 56 /** @lock: Lock protecting insertion in the chunks list. */ 57 struct mutex lock; 58 59 /** @chunk_size: Size of each chunk. */ 60 u32 chunk_size; 61 62 /** @max_chunks: Maximum number of chunks. */ 63 u32 max_chunks; 64 65 /** 66 * @target_in_flight: Number of in-flight render passes after which 67 * we'd let the FW wait for fragment job to finish instead of allocating new chunks. 68 */ 69 u32 target_in_flight; 70 71 /** @chunk_count: Number of heap chunks currently allocated. */ 72 u32 chunk_count; 73 }; 74 75 #define MAX_HEAPS_PER_POOL 128 76 77 /** 78 * struct panthor_heap_pool - Pool of heap contexts 79 * 80 * The pool is attached to a panthor_file and can't be shared across processes. 81 */ 82 struct panthor_heap_pool { 83 /** @refcount: Reference count. */ 84 struct kref refcount; 85 86 /** @ptdev: Device. */ 87 struct panthor_device *ptdev; 88 89 /** @vm: VM this pool is bound to. */ 90 struct panthor_vm *vm; 91 92 /** @lock: Lock protecting access to @xa. */ 93 struct rw_semaphore lock; 94 95 /** @xa: Array storing panthor_heap objects. */ 96 struct xarray xa; 97 98 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ 99 struct panthor_kernel_bo *gpu_contexts; 100 }; 101 102 static int panthor_heap_ctx_stride(struct panthor_device *ptdev) 103 { 104 u32 l2_features = ptdev->gpu_info.l2_features; 105 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); 106 107 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); 108 } 109 110 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) 111 { 112 return panthor_heap_ctx_stride(pool->ptdev) * id; 113 } 114 115 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) 116 { 117 return pool->gpu_contexts->kmap + 118 panthor_get_heap_ctx_offset(pool, id); 119 } 120 121 static void panthor_free_heap_chunk(struct panthor_vm *vm, 122 struct panthor_heap *heap, 123 struct panthor_heap_chunk *chunk) 124 { 125 mutex_lock(&heap->lock); 126 list_del(&chunk->node); 127 heap->chunk_count--; 128 mutex_unlock(&heap->lock); 129 130 panthor_kernel_bo_destroy(chunk->bo); 131 kfree(chunk); 132 } 133 134 static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, 135 struct panthor_vm *vm, 136 struct panthor_heap *heap, 137 bool initial_chunk) 138 { 139 struct panthor_heap_chunk *chunk; 140 struct panthor_heap_chunk_header *hdr; 141 int ret; 142 143 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); 144 if (!chunk) 145 return -ENOMEM; 146 147 chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, 148 DRM_PANTHOR_BO_NO_MMAP, 149 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 150 PANTHOR_VM_KERNEL_AUTO_VA); 151 if (IS_ERR(chunk->bo)) { 152 ret = PTR_ERR(chunk->bo); 153 goto err_free_chunk; 154 } 155 156 ret = panthor_kernel_bo_vmap(chunk->bo); 157 if (ret) 158 goto err_destroy_bo; 159 160 hdr = chunk->bo->kmap; 161 memset(hdr, 0, sizeof(*hdr)); 162 163 if (initial_chunk && !list_empty(&heap->chunks)) { 164 struct panthor_heap_chunk *prev_chunk; 165 u64 prev_gpuva; 166 167 prev_chunk = list_first_entry(&heap->chunks, 168 struct panthor_heap_chunk, 169 node); 170 171 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); 172 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | 173 (heap->chunk_size >> 12); 174 } 175 176 panthor_kernel_bo_vunmap(chunk->bo); 177 178 mutex_lock(&heap->lock); 179 list_add(&chunk->node, &heap->chunks); 180 heap->chunk_count++; 181 mutex_unlock(&heap->lock); 182 183 return 0; 184 185 err_destroy_bo: 186 panthor_kernel_bo_destroy(chunk->bo); 187 188 err_free_chunk: 189 kfree(chunk); 190 191 return ret; 192 } 193 194 static void panthor_free_heap_chunks(struct panthor_vm *vm, 195 struct panthor_heap *heap) 196 { 197 struct panthor_heap_chunk *chunk, *tmp; 198 199 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) 200 panthor_free_heap_chunk(vm, heap, chunk); 201 } 202 203 static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, 204 struct panthor_vm *vm, 205 struct panthor_heap *heap, 206 u32 chunk_count) 207 { 208 int ret; 209 u32 i; 210 211 for (i = 0; i < chunk_count; i++) { 212 ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); 213 if (ret) 214 return ret; 215 } 216 217 return 0; 218 } 219 220 static int 221 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) 222 { 223 struct panthor_heap *heap; 224 225 heap = xa_erase(&pool->xa, handle); 226 if (!heap) 227 return -EINVAL; 228 229 panthor_free_heap_chunks(pool->vm, heap); 230 mutex_destroy(&heap->lock); 231 kfree(heap); 232 return 0; 233 } 234 235 /** 236 * panthor_heap_destroy() - Destroy a heap context 237 * @pool: Pool this context belongs to. 238 * @handle: Handle returned by panthor_heap_create(). 239 */ 240 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) 241 { 242 int ret; 243 244 down_write(&pool->lock); 245 ret = panthor_heap_destroy_locked(pool, handle); 246 up_write(&pool->lock); 247 248 return ret; 249 } 250 251 /** 252 * panthor_heap_create() - Create a heap context 253 * @pool: Pool to instantiate the heap context from. 254 * @initial_chunk_count: Number of chunk allocated at initialization time. 255 * Must be at least 1. 256 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the 257 * [128k:8M] range. 258 * @max_chunks: Maximum number of chunks that can be allocated. 259 * @target_in_flight: Maximum number of in-flight render passes. 260 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap 261 * context. 262 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk 263 * assigned to the heap context. 264 * 265 * Return: a positive handle on success, a negative error otherwise. 266 */ 267 int panthor_heap_create(struct panthor_heap_pool *pool, 268 u32 initial_chunk_count, 269 u32 chunk_size, 270 u32 max_chunks, 271 u32 target_in_flight, 272 u64 *heap_ctx_gpu_va, 273 u64 *first_chunk_gpu_va) 274 { 275 struct panthor_heap *heap; 276 struct panthor_heap_chunk *first_chunk; 277 struct panthor_vm *vm; 278 int ret = 0; 279 u32 id; 280 281 if (initial_chunk_count == 0) 282 return -EINVAL; 283 284 if (initial_chunk_count > max_chunks) 285 return -EINVAL; 286 287 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || 288 chunk_size < SZ_128K || chunk_size > SZ_8M) 289 return -EINVAL; 290 291 down_read(&pool->lock); 292 vm = panthor_vm_get(pool->vm); 293 up_read(&pool->lock); 294 295 /* The pool has been destroyed, we can't create a new heap. */ 296 if (!vm) 297 return -EINVAL; 298 299 heap = kzalloc(sizeof(*heap), GFP_KERNEL); 300 if (!heap) { 301 ret = -ENOMEM; 302 goto err_put_vm; 303 } 304 305 mutex_init(&heap->lock); 306 INIT_LIST_HEAD(&heap->chunks); 307 heap->chunk_size = chunk_size; 308 heap->max_chunks = max_chunks; 309 heap->target_in_flight = target_in_flight; 310 311 ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, 312 initial_chunk_count); 313 if (ret) 314 goto err_free_heap; 315 316 first_chunk = list_first_entry(&heap->chunks, 317 struct panthor_heap_chunk, 318 node); 319 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); 320 321 down_write(&pool->lock); 322 /* The pool has been destroyed, we can't create a new heap. */ 323 if (!pool->vm) { 324 ret = -EINVAL; 325 } else { 326 ret = xa_alloc(&pool->xa, &id, heap, 327 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); 328 if (!ret) { 329 void *gpu_ctx = panthor_get_heap_ctx(pool, id); 330 331 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); 332 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + 333 panthor_get_heap_ctx_offset(pool, id); 334 } 335 } 336 up_write(&pool->lock); 337 338 if (ret) 339 goto err_free_heap; 340 341 panthor_vm_put(vm); 342 return id; 343 344 err_free_heap: 345 panthor_free_heap_chunks(pool->vm, heap); 346 mutex_destroy(&heap->lock); 347 kfree(heap); 348 349 err_put_vm: 350 panthor_vm_put(vm); 351 return ret; 352 } 353 354 /** 355 * panthor_heap_return_chunk() - Return an unused heap chunk 356 * @pool: The pool this heap belongs to. 357 * @heap_gpu_va: The GPU address of the heap context. 358 * @chunk_gpu_va: The chunk VA to return. 359 * 360 * This function is used when a chunk allocated with panthor_heap_grow() 361 * couldn't be linked to the heap context through the FW interface because 362 * the group requesting the allocation was scheduled out in the meantime. 363 */ 364 int panthor_heap_return_chunk(struct panthor_heap_pool *pool, 365 u64 heap_gpu_va, 366 u64 chunk_gpu_va) 367 { 368 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 369 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 370 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; 371 struct panthor_heap *heap; 372 int ret; 373 374 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 375 return -EINVAL; 376 377 down_read(&pool->lock); 378 heap = xa_load(&pool->xa, heap_id); 379 if (!heap) { 380 ret = -EINVAL; 381 goto out_unlock; 382 } 383 384 chunk_gpu_va &= GENMASK_ULL(63, 12); 385 386 mutex_lock(&heap->lock); 387 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { 388 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { 389 removed = chunk; 390 list_del(&chunk->node); 391 heap->chunk_count--; 392 break; 393 } 394 } 395 mutex_unlock(&heap->lock); 396 397 if (removed) { 398 panthor_kernel_bo_destroy(chunk->bo); 399 kfree(chunk); 400 ret = 0; 401 } else { 402 ret = -EINVAL; 403 } 404 405 out_unlock: 406 up_read(&pool->lock); 407 return ret; 408 } 409 410 /** 411 * panthor_heap_grow() - Make a heap context grow. 412 * @pool: The pool this heap belongs to. 413 * @heap_gpu_va: The GPU address of the heap context. 414 * @renderpasses_in_flight: Number of render passes currently in-flight. 415 * @pending_frag_count: Number of fragment jobs waiting for execution/completion. 416 * @new_chunk_gpu_va: Pointer used to return the chunk VA. 417 * 418 * Return: 419 * - 0 if a new heap was allocated 420 * - -ENOMEM if the tiler context reached the maximum number of chunks 421 * or if too many render passes are in-flight 422 * or if the allocation failed 423 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid 424 */ 425 int panthor_heap_grow(struct panthor_heap_pool *pool, 426 u64 heap_gpu_va, 427 u32 renderpasses_in_flight, 428 u32 pending_frag_count, 429 u64 *new_chunk_gpu_va) 430 { 431 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 432 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 433 struct panthor_heap_chunk *chunk; 434 struct panthor_heap *heap; 435 int ret; 436 437 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 438 return -EINVAL; 439 440 down_read(&pool->lock); 441 heap = xa_load(&pool->xa, heap_id); 442 if (!heap) { 443 ret = -EINVAL; 444 goto out_unlock; 445 } 446 447 /* If we reached the target in-flight render passes, or if we 448 * reached the maximum number of chunks, let the FW figure another way to 449 * find some memory (wait for render passes to finish, or call the exception 450 * handler provided by the userspace driver, if any). 451 */ 452 if (renderpasses_in_flight > heap->target_in_flight || 453 heap->chunk_count >= heap->max_chunks) { 454 ret = -ENOMEM; 455 goto out_unlock; 456 } 457 458 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, 459 * which goes through the blocking allocation path. Ultimately, we 460 * want a non-blocking allocation, so we can immediately report to the 461 * FW when the system is running out of memory. In that case, the FW 462 * can call a user-provided exception handler, which might try to free 463 * some tiler memory by issuing an intermediate fragment job. If the 464 * exception handler can't do anything, it will flag the queue as 465 * faulty so the job that triggered this tiler chunk allocation and all 466 * further jobs in this queue fail immediately instead of having to 467 * wait for the job timeout. 468 */ 469 ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); 470 if (ret) 471 goto out_unlock; 472 473 chunk = list_first_entry(&heap->chunks, 474 struct panthor_heap_chunk, 475 node); 476 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | 477 (heap->chunk_size >> 12); 478 ret = 0; 479 480 out_unlock: 481 up_read(&pool->lock); 482 return ret; 483 } 484 485 static void panthor_heap_pool_release(struct kref *refcount) 486 { 487 struct panthor_heap_pool *pool = 488 container_of(refcount, struct panthor_heap_pool, refcount); 489 490 xa_destroy(&pool->xa); 491 kfree(pool); 492 } 493 494 /** 495 * panthor_heap_pool_put() - Release a heap pool reference 496 * @pool: Pool to release the reference on. Can be NULL. 497 */ 498 void panthor_heap_pool_put(struct panthor_heap_pool *pool) 499 { 500 if (pool) 501 kref_put(&pool->refcount, panthor_heap_pool_release); 502 } 503 504 /** 505 * panthor_heap_pool_get() - Get a heap pool reference 506 * @pool: Pool to get the reference on. Can be NULL. 507 * 508 * Return: @pool. 509 */ 510 struct panthor_heap_pool * 511 panthor_heap_pool_get(struct panthor_heap_pool *pool) 512 { 513 if (pool) 514 kref_get(&pool->refcount); 515 516 return pool; 517 } 518 519 /** 520 * panthor_heap_pool_create() - Create a heap pool 521 * @ptdev: Device. 522 * @vm: The VM this heap pool will be attached to. 523 * 524 * Heap pools might contain up to 128 heap contexts, and are per-VM. 525 * 526 * Return: A valid pointer on success, a negative error code otherwise. 527 */ 528 struct panthor_heap_pool * 529 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) 530 { 531 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * 532 panthor_heap_ctx_stride(ptdev), 533 4096); 534 struct panthor_heap_pool *pool; 535 int ret = 0; 536 537 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 538 if (!pool) 539 return ERR_PTR(-ENOMEM); 540 541 /* We want a weak ref here: the heap pool belongs to the VM, so we're 542 * sure that, as long as the heap pool exists, the VM exists too. 543 */ 544 pool->vm = vm; 545 pool->ptdev = ptdev; 546 init_rwsem(&pool->lock); 547 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); 548 kref_init(&pool->refcount); 549 550 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, 551 DRM_PANTHOR_BO_NO_MMAP, 552 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 553 PANTHOR_VM_KERNEL_AUTO_VA); 554 if (IS_ERR(pool->gpu_contexts)) { 555 ret = PTR_ERR(pool->gpu_contexts); 556 goto err_destroy_pool; 557 } 558 559 ret = panthor_kernel_bo_vmap(pool->gpu_contexts); 560 if (ret) 561 goto err_destroy_pool; 562 563 return pool; 564 565 err_destroy_pool: 566 panthor_heap_pool_destroy(pool); 567 return ERR_PTR(ret); 568 } 569 570 /** 571 * panthor_heap_pool_destroy() - Destroy a heap pool. 572 * @pool: Pool to destroy. 573 * 574 * This function destroys all heap contexts and their resources. Thus 575 * preventing any use of the heap context or the chunk attached to them 576 * after that point. 577 * 578 * If the GPU still has access to some heap contexts, a fault should be 579 * triggered, which should flag the command stream groups using these 580 * context as faulty. 581 * 582 * The heap pool object is only released when all references to this pool 583 * are released. 584 */ 585 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) 586 { 587 struct panthor_heap *heap; 588 unsigned long i; 589 590 if (!pool) 591 return; 592 593 down_write(&pool->lock); 594 xa_for_each(&pool->xa, i, heap) 595 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); 596 597 if (!IS_ERR_OR_NULL(pool->gpu_contexts)) 598 panthor_kernel_bo_destroy(pool->gpu_contexts); 599 600 /* Reflects the fact the pool has been destroyed. */ 601 pool->vm = NULL; 602 up_write(&pool->lock); 603 604 panthor_heap_pool_put(pool); 605 } 606