1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kref.h> 26 #include <linux/slab.h> 27 #include <linux/dma-fence-unwrap.h> 28 29 #include <drm/drm_exec.h> 30 #include <drm/drm_syncobj.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_userq_fence.h" 34 35 static const struct dma_fence_ops amdgpu_userq_fence_ops; 36 static struct kmem_cache *amdgpu_userq_fence_slab; 37 38 int amdgpu_userq_fence_slab_init(void) 39 { 40 amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", 41 sizeof(struct amdgpu_userq_fence), 42 0, 43 SLAB_HWCACHE_ALIGN, 44 NULL); 45 if (!amdgpu_userq_fence_slab) 46 return -ENOMEM; 47 48 return 0; 49 } 50 51 void amdgpu_userq_fence_slab_fini(void) 52 { 53 rcu_barrier(); 54 kmem_cache_destroy(amdgpu_userq_fence_slab); 55 } 56 57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) 58 { 59 if (!f || f->ops != &amdgpu_userq_fence_ops) 60 return NULL; 61 62 return container_of(f, struct amdgpu_userq_fence, base); 63 } 64 65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) 66 { 67 return le64_to_cpu(*fence_drv->cpu_addr); 68 } 69 70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, 71 struct amdgpu_usermode_queue *userq) 72 { 73 struct amdgpu_userq_fence_driver *fence_drv; 74 unsigned long flags; 75 int r; 76 77 fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); 78 if (!fence_drv) { 79 DRM_ERROR("Failed to allocate memory for fence driver\n"); 80 r = -ENOMEM; 81 goto free_fence_drv; 82 } 83 84 /* Acquire seq64 memory */ 85 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, 86 &fence_drv->cpu_addr); 87 if (r) { 88 kfree(fence_drv); 89 r = -ENOMEM; 90 goto free_seq64; 91 } 92 93 memset(fence_drv->cpu_addr, 0, sizeof(u64)); 94 95 kref_init(&fence_drv->refcount); 96 INIT_LIST_HEAD(&fence_drv->fences); 97 spin_lock_init(&fence_drv->fence_list_lock); 98 99 fence_drv->adev = adev; 100 fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa; 101 fence_drv->context = dma_fence_context_alloc(1); 102 get_task_comm(fence_drv->timeline_name, current); 103 104 xa_lock_irqsave(&adev->userq_xa, flags); 105 r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, 106 fence_drv, GFP_KERNEL)); 107 xa_unlock_irqrestore(&adev->userq_xa, flags); 108 if (r) 109 goto free_seq64; 110 111 userq->fence_drv = fence_drv; 112 113 return 0; 114 115 free_seq64: 116 amdgpu_seq64_free(adev, fence_drv->va); 117 free_fence_drv: 118 kfree(fence_drv); 119 120 return r; 121 } 122 123 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) 124 { 125 struct amdgpu_userq_fence *userq_fence, *tmp; 126 struct dma_fence *fence; 127 u64 rptr; 128 int i; 129 130 if (!fence_drv) 131 return; 132 133 rptr = amdgpu_userq_fence_read(fence_drv); 134 135 spin_lock(&fence_drv->fence_list_lock); 136 list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { 137 fence = &userq_fence->base; 138 139 if (rptr < fence->seqno) 140 break; 141 142 dma_fence_signal(fence); 143 144 for (i = 0; i < userq_fence->fence_drv_array_count; i++) 145 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); 146 147 list_del(&userq_fence->link); 148 dma_fence_put(fence); 149 } 150 spin_unlock(&fence_drv->fence_list_lock); 151 } 152 153 void amdgpu_userq_fence_driver_destroy(struct kref *ref) 154 { 155 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, 156 struct amdgpu_userq_fence_driver, 157 refcount); 158 struct amdgpu_userq_fence_driver *xa_fence_drv; 159 struct amdgpu_device *adev = fence_drv->adev; 160 struct amdgpu_userq_fence *fence, *tmp; 161 struct xarray *xa = &adev->userq_xa; 162 unsigned long index, flags; 163 struct dma_fence *f; 164 165 spin_lock(&fence_drv->fence_list_lock); 166 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { 167 f = &fence->base; 168 169 if (!dma_fence_is_signaled(f)) { 170 dma_fence_set_error(f, -ECANCELED); 171 dma_fence_signal(f); 172 } 173 174 list_del(&fence->link); 175 dma_fence_put(f); 176 } 177 spin_unlock(&fence_drv->fence_list_lock); 178 179 xa_lock_irqsave(xa, flags); 180 xa_for_each(xa, index, xa_fence_drv) 181 if (xa_fence_drv == fence_drv) 182 __xa_erase(xa, index); 183 xa_unlock_irqrestore(xa, flags); 184 185 /* Free seq64 memory */ 186 amdgpu_seq64_free(adev, fence_drv->va); 187 kfree(fence_drv); 188 } 189 190 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) 191 { 192 kref_get(&fence_drv->refcount); 193 } 194 195 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) 196 { 197 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); 198 } 199 200 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 201 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) 202 { 203 *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); 204 return *userq_fence ? 0 : -ENOMEM; 205 } 206 207 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, 208 struct amdgpu_userq_fence *userq_fence, 209 u64 seq, struct dma_fence **f) 210 { 211 struct amdgpu_userq_fence_driver *fence_drv; 212 struct dma_fence *fence; 213 unsigned long flags; 214 215 fence_drv = userq->fence_drv; 216 if (!fence_drv) 217 return -EINVAL; 218 219 spin_lock_init(&userq_fence->lock); 220 INIT_LIST_HEAD(&userq_fence->link); 221 fence = &userq_fence->base; 222 userq_fence->fence_drv = fence_drv; 223 224 dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, 225 fence_drv->context, seq); 226 227 amdgpu_userq_fence_driver_get(fence_drv); 228 dma_fence_get(fence); 229 230 if (!xa_empty(&userq->fence_drv_xa)) { 231 struct amdgpu_userq_fence_driver *stored_fence_drv; 232 unsigned long index, count = 0; 233 int i = 0; 234 235 xa_lock(&userq->fence_drv_xa); 236 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) 237 count++; 238 239 userq_fence->fence_drv_array = 240 kvmalloc_array(count, 241 sizeof(struct amdgpu_userq_fence_driver *), 242 GFP_ATOMIC); 243 244 if (userq_fence->fence_drv_array) { 245 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { 246 userq_fence->fence_drv_array[i] = stored_fence_drv; 247 __xa_erase(&userq->fence_drv_xa, index); 248 i++; 249 } 250 } 251 252 userq_fence->fence_drv_array_count = i; 253 xa_unlock(&userq->fence_drv_xa); 254 } else { 255 userq_fence->fence_drv_array = NULL; 256 userq_fence->fence_drv_array_count = 0; 257 } 258 259 /* Check if hardware has already processed the job */ 260 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 261 if (!dma_fence_is_signaled_locked(fence)) 262 list_add_tail(&userq_fence->link, &fence_drv->fences); 263 else 264 dma_fence_put(fence); 265 266 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 267 268 *f = fence; 269 270 return 0; 271 } 272 #endif 273 274 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) 275 { 276 return "amdgpu_userqueue_fence"; 277 } 278 279 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) 280 { 281 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 282 283 return fence->fence_drv->timeline_name; 284 } 285 286 static bool amdgpu_userq_fence_signaled(struct dma_fence *f) 287 { 288 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 289 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 290 u64 rptr, wptr; 291 292 rptr = amdgpu_userq_fence_read(fence_drv); 293 wptr = fence->base.seqno; 294 295 if (rptr >= wptr) 296 return true; 297 298 return false; 299 } 300 301 static void amdgpu_userq_fence_free(struct rcu_head *rcu) 302 { 303 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); 304 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); 305 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; 306 307 /* Release the fence driver reference */ 308 amdgpu_userq_fence_driver_put(fence_drv); 309 310 kvfree(userq_fence->fence_drv_array); 311 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 312 } 313 314 static void amdgpu_userq_fence_release(struct dma_fence *f) 315 { 316 call_rcu(&f->rcu, amdgpu_userq_fence_free); 317 } 318 319 static const struct dma_fence_ops amdgpu_userq_fence_ops = { 320 .use_64bit_seqno = true, 321 .get_driver_name = amdgpu_userq_fence_get_driver_name, 322 .get_timeline_name = amdgpu_userq_fence_get_timeline_name, 323 .signaled = amdgpu_userq_fence_signaled, 324 .release = amdgpu_userq_fence_release, 325 }; 326 327 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 328 /** 329 * amdgpu_userq_fence_read_wptr - Read the userq wptr value 330 * 331 * @queue: user mode queue structure pointer 332 * @wptr: write pointer value 333 * 334 * Read the wptr value from userq's MQD. The userq signal IOCTL 335 * creates a dma_fence for the shared buffers that expects the 336 * RPTR value written to seq64 memory >= WPTR. 337 * 338 * Returns wptr value on success, error on failure. 339 */ 340 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, 341 u64 *wptr) 342 { 343 struct amdgpu_bo_va_mapping *mapping; 344 struct amdgpu_bo *bo; 345 u64 addr, *ptr; 346 int r; 347 348 r = amdgpu_bo_reserve(queue->vm->root.bo, false); 349 if (r) 350 return r; 351 352 addr = queue->userq_prop->wptr_gpu_addr; 353 addr &= AMDGPU_GMC_HOLE_MASK; 354 355 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); 356 if (!mapping) { 357 DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); 358 return -EINVAL; 359 } 360 361 bo = amdgpu_bo_ref(mapping->bo_va->base.bo); 362 amdgpu_bo_unreserve(queue->vm->root.bo); 363 r = amdgpu_bo_reserve(bo, true); 364 if (r) { 365 DRM_ERROR("Failed to reserve userqueue wptr bo"); 366 return r; 367 } 368 369 r = amdgpu_bo_kmap(bo, (void **)&ptr); 370 if (r) { 371 DRM_ERROR("Failed mapping the userqueue wptr bo"); 372 goto map_error; 373 } 374 375 *wptr = le64_to_cpu(*ptr); 376 377 amdgpu_bo_kunmap(bo); 378 amdgpu_bo_unreserve(bo); 379 amdgpu_bo_unref(&bo); 380 381 return 0; 382 383 map_error: 384 amdgpu_bo_unreserve(bo); 385 amdgpu_bo_unref(&bo); 386 387 return r; 388 } 389 390 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) 391 { 392 dma_fence_put(fence); 393 } 394 395 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 396 struct drm_file *filp) 397 { 398 struct amdgpu_fpriv *fpriv = filp->driver_priv; 399 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 400 struct drm_amdgpu_userq_signal *args = data; 401 struct drm_gem_object **gobj_write = NULL; 402 struct drm_gem_object **gobj_read = NULL; 403 struct amdgpu_usermode_queue *queue; 404 struct amdgpu_userq_fence *userq_fence; 405 struct drm_syncobj **syncobj = NULL; 406 u32 *bo_handles_write, num_write_bo_handles; 407 u32 *syncobj_handles, num_syncobj_handles; 408 u32 *bo_handles_read, num_read_bo_handles; 409 int r, i, entry, rentry, wentry; 410 struct dma_fence *fence; 411 struct drm_exec exec; 412 u64 wptr; 413 414 num_syncobj_handles = args->num_syncobj_handles; 415 syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), 416 sizeof(u32) * num_syncobj_handles); 417 if (IS_ERR(syncobj_handles)) 418 return PTR_ERR(syncobj_handles); 419 420 /* Array of pointers to the looked up syncobjs */ 421 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); 422 if (!syncobj) { 423 r = -ENOMEM; 424 goto free_syncobj_handles; 425 } 426 427 for (entry = 0; entry < num_syncobj_handles; entry++) { 428 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); 429 if (!syncobj[entry]) { 430 r = -ENOENT; 431 goto free_syncobj; 432 } 433 } 434 435 num_read_bo_handles = args->num_bo_read_handles; 436 bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), 437 sizeof(u32) * num_read_bo_handles); 438 if (IS_ERR(bo_handles_read)) { 439 r = PTR_ERR(bo_handles_read); 440 goto free_syncobj; 441 } 442 443 /* Array of pointers to the GEM read objects */ 444 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 445 if (!gobj_read) { 446 r = -ENOMEM; 447 goto free_bo_handles_read; 448 } 449 450 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 451 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 452 if (!gobj_read[rentry]) { 453 r = -ENOENT; 454 goto put_gobj_read; 455 } 456 } 457 458 num_write_bo_handles = args->num_bo_write_handles; 459 bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), 460 sizeof(u32) * num_write_bo_handles); 461 if (IS_ERR(bo_handles_write)) { 462 r = PTR_ERR(bo_handles_write); 463 goto put_gobj_read; 464 } 465 466 /* Array of pointers to the GEM write objects */ 467 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 468 if (!gobj_write) { 469 r = -ENOMEM; 470 goto free_bo_handles_write; 471 } 472 473 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 474 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 475 if (!gobj_write[wentry]) { 476 r = -ENOENT; 477 goto put_gobj_write; 478 } 479 } 480 481 /* Retrieve the user queue */ 482 queue = idr_find(&userq_mgr->userq_idr, args->queue_id); 483 if (!queue) { 484 r = -ENOENT; 485 goto put_gobj_write; 486 } 487 488 r = amdgpu_userq_fence_read_wptr(queue, &wptr); 489 if (r) 490 goto put_gobj_write; 491 492 r = amdgpu_userq_fence_alloc(&userq_fence); 493 if (r) 494 goto put_gobj_write; 495 496 /* We are here means UQ is active, make sure the eviction fence is valid */ 497 amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 498 499 /* Create a new fence */ 500 r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); 501 if (r) { 502 mutex_unlock(&userq_mgr->userq_mutex); 503 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 504 goto put_gobj_write; 505 } 506 507 dma_fence_put(queue->last_fence); 508 queue->last_fence = dma_fence_get(fence); 509 mutex_unlock(&userq_mgr->userq_mutex); 510 511 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 512 (num_read_bo_handles + num_write_bo_handles)); 513 514 /* Lock all BOs with retry handling */ 515 drm_exec_until_all_locked(&exec) { 516 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 517 drm_exec_retry_on_contention(&exec); 518 if (r) { 519 amdgpu_userq_fence_cleanup(fence); 520 goto exec_fini; 521 } 522 523 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 524 drm_exec_retry_on_contention(&exec); 525 if (r) { 526 amdgpu_userq_fence_cleanup(fence); 527 goto exec_fini; 528 } 529 } 530 531 for (i = 0; i < num_read_bo_handles; i++) { 532 if (!gobj_read || !gobj_read[i]->resv) 533 continue; 534 535 dma_resv_add_fence(gobj_read[i]->resv, fence, 536 DMA_RESV_USAGE_READ); 537 } 538 539 for (i = 0; i < num_write_bo_handles; i++) { 540 if (!gobj_write || !gobj_write[i]->resv) 541 continue; 542 543 dma_resv_add_fence(gobj_write[i]->resv, fence, 544 DMA_RESV_USAGE_WRITE); 545 } 546 547 /* Add the created fence to syncobj/BO's */ 548 for (i = 0; i < num_syncobj_handles; i++) 549 drm_syncobj_replace_fence(syncobj[i], fence); 550 551 /* drop the reference acquired in fence creation function */ 552 dma_fence_put(fence); 553 554 exec_fini: 555 drm_exec_fini(&exec); 556 put_gobj_write: 557 while (wentry-- > 0) 558 drm_gem_object_put(gobj_write[wentry]); 559 kfree(gobj_write); 560 free_bo_handles_write: 561 kfree(bo_handles_write); 562 put_gobj_read: 563 while (rentry-- > 0) 564 drm_gem_object_put(gobj_read[rentry]); 565 kfree(gobj_read); 566 free_bo_handles_read: 567 kfree(bo_handles_read); 568 free_syncobj: 569 while (entry-- > 0) 570 if (syncobj[entry]) 571 drm_syncobj_put(syncobj[entry]); 572 kfree(syncobj); 573 free_syncobj_handles: 574 kfree(syncobj_handles); 575 576 return r; 577 } 578 #else 579 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 580 struct drm_file *filp) 581 { 582 return -ENOTSUPP; 583 } 584 #endif 585 586 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 587 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 588 struct drm_file *filp) 589 { 590 u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; 591 u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; 592 struct drm_amdgpu_userq_fence_info *fence_info = NULL; 593 struct drm_amdgpu_userq_wait *wait_info = data; 594 struct drm_gem_object **gobj_write; 595 struct drm_gem_object **gobj_read; 596 struct dma_fence **fences = NULL; 597 u16 num_points, num_fences = 0; 598 int r, i, rentry, wentry, cnt; 599 struct drm_exec exec; 600 601 num_read_bo_handles = wait_info->num_bo_read_handles; 602 bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), 603 sizeof(u32) * num_read_bo_handles); 604 if (IS_ERR(bo_handles_read)) 605 return PTR_ERR(bo_handles_read); 606 607 num_write_bo_handles = wait_info->num_bo_write_handles; 608 bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), 609 sizeof(u32) * num_write_bo_handles); 610 if (IS_ERR(bo_handles_write)) { 611 r = PTR_ERR(bo_handles_write); 612 goto free_bo_handles_read; 613 } 614 615 num_syncobj = wait_info->num_syncobj_handles; 616 syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), 617 sizeof(u32) * num_syncobj); 618 if (IS_ERR(syncobj_handles)) { 619 r = PTR_ERR(syncobj_handles); 620 goto free_bo_handles_write; 621 } 622 623 num_points = wait_info->num_syncobj_timeline_handles; 624 timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), 625 sizeof(u32) * num_points); 626 if (IS_ERR(timeline_handles)) { 627 r = PTR_ERR(timeline_handles); 628 goto free_syncobj_handles; 629 } 630 631 timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), 632 sizeof(u32) * num_points); 633 if (IS_ERR(timeline_points)) { 634 r = PTR_ERR(timeline_points); 635 goto free_timeline_handles; 636 } 637 638 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 639 if (!gobj_read) { 640 r = -ENOMEM; 641 goto free_timeline_points; 642 } 643 644 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 645 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 646 if (!gobj_read[rentry]) { 647 r = -ENOENT; 648 goto put_gobj_read; 649 } 650 } 651 652 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 653 if (!gobj_write) { 654 r = -ENOMEM; 655 goto put_gobj_read; 656 } 657 658 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 659 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 660 if (!gobj_write[wentry]) { 661 r = -ENOENT; 662 goto put_gobj_write; 663 } 664 } 665 666 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 667 (num_read_bo_handles + num_write_bo_handles)); 668 669 /* Lock all BOs with retry handling */ 670 drm_exec_until_all_locked(&exec) { 671 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 672 drm_exec_retry_on_contention(&exec); 673 if (r) { 674 drm_exec_fini(&exec); 675 goto put_gobj_write; 676 } 677 678 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 679 drm_exec_retry_on_contention(&exec); 680 if (r) { 681 drm_exec_fini(&exec); 682 goto put_gobj_write; 683 } 684 } 685 686 if (!wait_info->num_fences) { 687 if (num_points) { 688 struct dma_fence_unwrap iter; 689 struct dma_fence *fence; 690 struct dma_fence *f; 691 692 for (i = 0; i < num_points; i++) { 693 r = drm_syncobj_find_fence(filp, timeline_handles[i], 694 timeline_points[i], 695 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 696 &fence); 697 if (r) 698 goto exec_fini; 699 700 dma_fence_unwrap_for_each(f, &iter, fence) 701 num_fences++; 702 703 dma_fence_put(fence); 704 } 705 } 706 707 /* Count syncobj's fence */ 708 for (i = 0; i < num_syncobj; i++) { 709 struct dma_fence *fence; 710 711 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 712 0, 713 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 714 &fence); 715 if (r) 716 goto exec_fini; 717 718 num_fences++; 719 dma_fence_put(fence); 720 } 721 722 /* Count GEM objects fence */ 723 for (i = 0; i < num_read_bo_handles; i++) { 724 struct dma_resv_iter resv_cursor; 725 struct dma_fence *fence; 726 727 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 728 DMA_RESV_USAGE_READ, fence) 729 num_fences++; 730 } 731 732 for (i = 0; i < num_write_bo_handles; i++) { 733 struct dma_resv_iter resv_cursor; 734 struct dma_fence *fence; 735 736 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 737 DMA_RESV_USAGE_WRITE, fence) 738 num_fences++; 739 } 740 741 /* 742 * Passing num_fences = 0 means that userspace doesn't want to 743 * retrieve userq_fence_info. If num_fences = 0 we skip filling 744 * userq_fence_info and return the actual number of fences on 745 * args->num_fences. 746 */ 747 wait_info->num_fences = num_fences; 748 } else { 749 /* Array of fence info */ 750 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); 751 if (!fence_info) { 752 r = -ENOMEM; 753 goto exec_fini; 754 } 755 756 /* Array of fences */ 757 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); 758 if (!fences) { 759 r = -ENOMEM; 760 goto free_fence_info; 761 } 762 763 /* Retrieve GEM read objects fence */ 764 for (i = 0; i < num_read_bo_handles; i++) { 765 struct dma_resv_iter resv_cursor; 766 struct dma_fence *fence; 767 768 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 769 DMA_RESV_USAGE_READ, fence) { 770 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 771 r = -EINVAL; 772 goto free_fences; 773 } 774 775 fences[num_fences++] = fence; 776 dma_fence_get(fence); 777 } 778 } 779 780 /* Retrieve GEM write objects fence */ 781 for (i = 0; i < num_write_bo_handles; i++) { 782 struct dma_resv_iter resv_cursor; 783 struct dma_fence *fence; 784 785 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 786 DMA_RESV_USAGE_WRITE, fence) { 787 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 788 r = -EINVAL; 789 goto free_fences; 790 } 791 792 fences[num_fences++] = fence; 793 dma_fence_get(fence); 794 } 795 } 796 797 if (num_points) { 798 struct dma_fence_unwrap iter; 799 struct dma_fence *fence; 800 struct dma_fence *f; 801 802 for (i = 0; i < num_points; i++) { 803 r = drm_syncobj_find_fence(filp, timeline_handles[i], 804 timeline_points[i], 805 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 806 &fence); 807 if (r) 808 goto free_fences; 809 810 dma_fence_unwrap_for_each(f, &iter, fence) { 811 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 812 r = -EINVAL; 813 goto free_fences; 814 } 815 816 dma_fence_get(f); 817 fences[num_fences++] = f; 818 } 819 820 dma_fence_put(fence); 821 } 822 } 823 824 /* Retrieve syncobj's fence */ 825 for (i = 0; i < num_syncobj; i++) { 826 struct dma_fence *fence; 827 828 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 829 0, 830 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 831 &fence); 832 if (r) 833 goto free_fences; 834 835 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 836 r = -EINVAL; 837 goto free_fences; 838 } 839 840 fences[num_fences++] = fence; 841 } 842 843 for (i = 0, cnt = 0; i < num_fences; i++) { 844 struct amdgpu_userq_fence_driver *fence_drv; 845 struct amdgpu_userq_fence *userq_fence; 846 u32 index; 847 848 userq_fence = to_amdgpu_userq_fence(fences[i]); 849 if (!userq_fence) { 850 /* 851 * Just waiting on other driver fences should 852 * be good for now 853 */ 854 r = dma_fence_wait(fences[i], true); 855 if (r) { 856 dma_fence_put(fences[i]); 857 goto free_fences; 858 } 859 860 dma_fence_put(fences[i]); 861 continue; 862 } 863 864 fence_drv = userq_fence->fence_drv; 865 /* 866 * We need to make sure the user queue release their reference 867 * to the fence drivers at some point before queue destruction. 868 * Otherwise, we would gather those references until we don't 869 * have any more space left and crash. 870 */ 871 if (fence_drv->fence_drv_xa_ptr) { 872 r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv, 873 xa_limit_32b, GFP_KERNEL); 874 if (r) 875 goto free_fences; 876 877 amdgpu_userq_fence_driver_get(fence_drv); 878 } 879 880 /* Store drm syncobj's gpu va address and value */ 881 fence_info[cnt].va = fence_drv->va; 882 fence_info[cnt].value = fences[i]->seqno; 883 884 dma_fence_put(fences[i]); 885 /* Increment the actual userq fence count */ 886 cnt++; 887 } 888 889 wait_info->num_fences = cnt; 890 /* Copy userq fence info to user space */ 891 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 892 fence_info, wait_info->num_fences * sizeof(*fence_info))) { 893 r = -EFAULT; 894 goto free_fences; 895 } 896 897 kfree(fences); 898 kfree(fence_info); 899 } 900 901 drm_exec_fini(&exec); 902 for (i = 0; i < num_read_bo_handles; i++) 903 drm_gem_object_put(gobj_read[i]); 904 kfree(gobj_read); 905 906 for (i = 0; i < num_write_bo_handles; i++) 907 drm_gem_object_put(gobj_write[i]); 908 kfree(gobj_write); 909 910 kfree(timeline_points); 911 kfree(timeline_handles); 912 kfree(syncobj_handles); 913 kfree(bo_handles_write); 914 kfree(bo_handles_read); 915 916 return 0; 917 918 free_fences: 919 while (num_fences-- > 0) 920 dma_fence_put(fences[num_fences]); 921 kfree(fences); 922 free_fence_info: 923 kfree(fence_info); 924 exec_fini: 925 drm_exec_fini(&exec); 926 put_gobj_write: 927 while (wentry-- > 0) 928 drm_gem_object_put(gobj_write[wentry]); 929 kfree(gobj_write); 930 put_gobj_read: 931 while (rentry-- > 0) 932 drm_gem_object_put(gobj_read[rentry]); 933 kfree(gobj_read); 934 free_timeline_points: 935 kfree(timeline_points); 936 free_timeline_handles: 937 kfree(timeline_handles); 938 free_syncobj_handles: 939 kfree(syncobj_handles); 940 free_bo_handles_write: 941 kfree(bo_handles_write); 942 free_bo_handles_read: 943 kfree(bo_handles_read); 944 945 return r; 946 } 947 #else 948 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 949 struct drm_file *filp) 950 { 951 return -ENOTSUPP; 952 } 953 #endif 954