1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kref.h> 26 #include <linux/slab.h> 27 #include <linux/dma-fence-unwrap.h> 28 29 #include <drm/drm_exec.h> 30 #include <drm/drm_syncobj.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_userq_fence.h" 34 35 static const struct dma_fence_ops amdgpu_userq_fence_ops; 36 static struct kmem_cache *amdgpu_userq_fence_slab; 37 38 int amdgpu_userq_fence_slab_init(void) 39 { 40 amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", 41 sizeof(struct amdgpu_userq_fence), 42 0, 43 SLAB_HWCACHE_ALIGN, 44 NULL); 45 if (!amdgpu_userq_fence_slab) 46 return -ENOMEM; 47 48 return 0; 49 } 50 51 void amdgpu_userq_fence_slab_fini(void) 52 { 53 rcu_barrier(); 54 kmem_cache_destroy(amdgpu_userq_fence_slab); 55 } 56 57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) 58 { 59 if (!f || f->ops != &amdgpu_userq_fence_ops) 60 return NULL; 61 62 return container_of(f, struct amdgpu_userq_fence, base); 63 } 64 65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) 66 { 67 return le64_to_cpu(*fence_drv->cpu_addr); 68 } 69 70 static void 71 amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, 72 u64 seq) 73 { 74 if (fence_drv->cpu_addr) 75 *fence_drv->cpu_addr = cpu_to_le64(seq); 76 } 77 78 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, 79 struct amdgpu_usermode_queue *userq) 80 { 81 struct amdgpu_userq_fence_driver *fence_drv; 82 unsigned long flags; 83 int r; 84 85 fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); 86 if (!fence_drv) 87 return -ENOMEM; 88 89 /* Acquire seq64 memory */ 90 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, 91 &fence_drv->cpu_addr); 92 if (r) 93 goto free_fence_drv; 94 95 memset(fence_drv->cpu_addr, 0, sizeof(u64)); 96 97 kref_init(&fence_drv->refcount); 98 INIT_LIST_HEAD(&fence_drv->fences); 99 spin_lock_init(&fence_drv->fence_list_lock); 100 101 fence_drv->adev = adev; 102 fence_drv->context = dma_fence_context_alloc(1); 103 get_task_comm(fence_drv->timeline_name, current); 104 105 xa_lock_irqsave(&adev->userq_xa, flags); 106 r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, 107 fence_drv, GFP_KERNEL)); 108 xa_unlock_irqrestore(&adev->userq_xa, flags); 109 if (r) 110 goto free_seq64; 111 112 userq->fence_drv = fence_drv; 113 114 return 0; 115 116 free_seq64: 117 amdgpu_seq64_free(adev, fence_drv->va); 118 free_fence_drv: 119 kfree(fence_drv); 120 121 return r; 122 } 123 124 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) 125 { 126 struct amdgpu_userq_fence_driver *fence_drv; 127 unsigned long index; 128 129 if (xa_empty(xa)) 130 return; 131 132 xa_lock(xa); 133 xa_for_each(xa, index, fence_drv) { 134 __xa_erase(xa, index); 135 amdgpu_userq_fence_driver_put(fence_drv); 136 } 137 138 xa_unlock(xa); 139 } 140 141 void 142 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) 143 { 144 amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); 145 xa_destroy(&userq->fence_drv_xa); 146 /* Drop the fence_drv reference held by user queue */ 147 amdgpu_userq_fence_driver_put(userq->fence_drv); 148 } 149 150 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) 151 { 152 struct amdgpu_userq_fence *userq_fence, *tmp; 153 struct dma_fence *fence; 154 u64 rptr; 155 int i; 156 157 if (!fence_drv) 158 return; 159 160 rptr = amdgpu_userq_fence_read(fence_drv); 161 162 spin_lock(&fence_drv->fence_list_lock); 163 list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { 164 fence = &userq_fence->base; 165 166 if (rptr < fence->seqno) 167 break; 168 169 dma_fence_signal(fence); 170 171 for (i = 0; i < userq_fence->fence_drv_array_count; i++) 172 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); 173 174 list_del(&userq_fence->link); 175 dma_fence_put(fence); 176 } 177 spin_unlock(&fence_drv->fence_list_lock); 178 } 179 180 void amdgpu_userq_fence_driver_destroy(struct kref *ref) 181 { 182 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, 183 struct amdgpu_userq_fence_driver, 184 refcount); 185 struct amdgpu_userq_fence_driver *xa_fence_drv; 186 struct amdgpu_device *adev = fence_drv->adev; 187 struct amdgpu_userq_fence *fence, *tmp; 188 struct xarray *xa = &adev->userq_xa; 189 unsigned long index, flags; 190 struct dma_fence *f; 191 192 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 193 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { 194 f = &fence->base; 195 196 if (!dma_fence_is_signaled(f)) { 197 dma_fence_set_error(f, -ECANCELED); 198 dma_fence_signal(f); 199 } 200 201 list_del(&fence->link); 202 dma_fence_put(f); 203 } 204 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 205 206 xa_lock_irqsave(xa, flags); 207 xa_for_each(xa, index, xa_fence_drv) 208 if (xa_fence_drv == fence_drv) 209 __xa_erase(xa, index); 210 xa_unlock_irqrestore(xa, flags); 211 212 /* Free seq64 memory */ 213 amdgpu_seq64_free(adev, fence_drv->va); 214 kfree(fence_drv); 215 } 216 217 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) 218 { 219 kref_get(&fence_drv->refcount); 220 } 221 222 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) 223 { 224 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); 225 } 226 227 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) 228 { 229 *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); 230 return *userq_fence ? 0 : -ENOMEM; 231 } 232 233 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, 234 struct amdgpu_userq_fence *userq_fence, 235 u64 seq, struct dma_fence **f) 236 { 237 struct amdgpu_userq_fence_driver *fence_drv; 238 struct dma_fence *fence; 239 unsigned long flags; 240 241 fence_drv = userq->fence_drv; 242 if (!fence_drv) 243 return -EINVAL; 244 245 spin_lock_init(&userq_fence->lock); 246 INIT_LIST_HEAD(&userq_fence->link); 247 fence = &userq_fence->base; 248 userq_fence->fence_drv = fence_drv; 249 250 dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, 251 fence_drv->context, seq); 252 253 amdgpu_userq_fence_driver_get(fence_drv); 254 dma_fence_get(fence); 255 256 if (!xa_empty(&userq->fence_drv_xa)) { 257 struct amdgpu_userq_fence_driver *stored_fence_drv; 258 unsigned long index, count = 0; 259 int i = 0; 260 261 xa_lock(&userq->fence_drv_xa); 262 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) 263 count++; 264 265 userq_fence->fence_drv_array = 266 kvmalloc_array(count, 267 sizeof(struct amdgpu_userq_fence_driver *), 268 GFP_ATOMIC); 269 270 if (userq_fence->fence_drv_array) { 271 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { 272 userq_fence->fence_drv_array[i] = stored_fence_drv; 273 __xa_erase(&userq->fence_drv_xa, index); 274 i++; 275 } 276 } 277 278 userq_fence->fence_drv_array_count = i; 279 xa_unlock(&userq->fence_drv_xa); 280 } else { 281 userq_fence->fence_drv_array = NULL; 282 userq_fence->fence_drv_array_count = 0; 283 } 284 285 /* Check if hardware has already processed the job */ 286 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 287 if (!dma_fence_is_signaled_locked(fence)) 288 list_add_tail(&userq_fence->link, &fence_drv->fences); 289 else 290 dma_fence_put(fence); 291 292 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 293 294 *f = fence; 295 296 return 0; 297 } 298 299 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) 300 { 301 return "amdgpu_userq_fence"; 302 } 303 304 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) 305 { 306 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 307 308 return fence->fence_drv->timeline_name; 309 } 310 311 static bool amdgpu_userq_fence_signaled(struct dma_fence *f) 312 { 313 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 314 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 315 u64 rptr, wptr; 316 317 rptr = amdgpu_userq_fence_read(fence_drv); 318 wptr = fence->base.seqno; 319 320 if (rptr >= wptr) 321 return true; 322 323 return false; 324 } 325 326 static void amdgpu_userq_fence_free(struct rcu_head *rcu) 327 { 328 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); 329 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); 330 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; 331 332 /* Release the fence driver reference */ 333 amdgpu_userq_fence_driver_put(fence_drv); 334 335 kvfree(userq_fence->fence_drv_array); 336 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 337 } 338 339 static void amdgpu_userq_fence_release(struct dma_fence *f) 340 { 341 call_rcu(&f->rcu, amdgpu_userq_fence_free); 342 } 343 344 static const struct dma_fence_ops amdgpu_userq_fence_ops = { 345 .get_driver_name = amdgpu_userq_fence_get_driver_name, 346 .get_timeline_name = amdgpu_userq_fence_get_timeline_name, 347 .signaled = amdgpu_userq_fence_signaled, 348 .release = amdgpu_userq_fence_release, 349 }; 350 351 /** 352 * amdgpu_userq_fence_read_wptr - Read the userq wptr value 353 * 354 * @queue: user mode queue structure pointer 355 * @wptr: write pointer value 356 * 357 * Read the wptr value from userq's MQD. The userq signal IOCTL 358 * creates a dma_fence for the shared buffers that expects the 359 * RPTR value written to seq64 memory >= WPTR. 360 * 361 * Returns wptr value on success, error on failure. 362 */ 363 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, 364 u64 *wptr) 365 { 366 struct amdgpu_bo_va_mapping *mapping; 367 struct amdgpu_bo *bo; 368 u64 addr, *ptr; 369 int r; 370 371 r = amdgpu_bo_reserve(queue->vm->root.bo, false); 372 if (r) 373 return r; 374 375 addr = queue->userq_prop->wptr_gpu_addr; 376 addr &= AMDGPU_GMC_HOLE_MASK; 377 378 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); 379 if (!mapping) { 380 amdgpu_bo_unreserve(queue->vm->root.bo); 381 DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); 382 return -EINVAL; 383 } 384 385 bo = amdgpu_bo_ref(mapping->bo_va->base.bo); 386 amdgpu_bo_unreserve(queue->vm->root.bo); 387 r = amdgpu_bo_reserve(bo, true); 388 if (r) { 389 DRM_ERROR("Failed to reserve userqueue wptr bo"); 390 return r; 391 } 392 393 r = amdgpu_bo_kmap(bo, (void **)&ptr); 394 if (r) { 395 DRM_ERROR("Failed mapping the userqueue wptr bo"); 396 goto map_error; 397 } 398 399 *wptr = le64_to_cpu(*ptr); 400 401 amdgpu_bo_kunmap(bo); 402 amdgpu_bo_unreserve(bo); 403 amdgpu_bo_unref(&bo); 404 405 return 0; 406 407 map_error: 408 amdgpu_bo_unreserve(bo); 409 amdgpu_bo_unref(&bo); 410 411 return r; 412 } 413 414 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) 415 { 416 dma_fence_put(fence); 417 } 418 419 static void 420 amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, 421 int error) 422 { 423 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 424 unsigned long flags; 425 struct dma_fence *f; 426 427 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 428 429 f = rcu_dereference_protected(&fence->base, 430 lockdep_is_held(&fence_drv->fence_list_lock)); 431 if (f && !dma_fence_is_signaled_locked(f)) 432 dma_fence_set_error(f, error); 433 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 434 } 435 436 void 437 amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) 438 { 439 struct dma_fence *f = userq->last_fence; 440 441 if (f) { 442 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 443 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 444 u64 wptr = fence->base.seqno; 445 446 amdgpu_userq_fence_driver_set_error(fence, -ECANCELED); 447 amdgpu_userq_fence_write(fence_drv, wptr); 448 amdgpu_userq_fence_driver_process(fence_drv); 449 450 } 451 } 452 453 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 454 struct drm_file *filp) 455 { 456 struct amdgpu_fpriv *fpriv = filp->driver_priv; 457 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 458 struct drm_amdgpu_userq_signal *args = data; 459 struct drm_gem_object **gobj_write = NULL; 460 struct drm_gem_object **gobj_read = NULL; 461 struct amdgpu_usermode_queue *queue; 462 struct amdgpu_userq_fence *userq_fence; 463 struct drm_syncobj **syncobj = NULL; 464 u32 *bo_handles_write, num_write_bo_handles; 465 u32 *syncobj_handles, num_syncobj_handles; 466 u32 *bo_handles_read, num_read_bo_handles; 467 int r, i, entry, rentry, wentry; 468 struct dma_fence *fence; 469 struct drm_exec exec; 470 u64 wptr; 471 472 num_syncobj_handles = args->num_syncobj_handles; 473 syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), 474 size_mul(sizeof(u32), num_syncobj_handles)); 475 if (IS_ERR(syncobj_handles)) 476 return PTR_ERR(syncobj_handles); 477 478 /* Array of pointers to the looked up syncobjs */ 479 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); 480 if (!syncobj) { 481 r = -ENOMEM; 482 goto free_syncobj_handles; 483 } 484 485 for (entry = 0; entry < num_syncobj_handles; entry++) { 486 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); 487 if (!syncobj[entry]) { 488 r = -ENOENT; 489 goto free_syncobj; 490 } 491 } 492 493 num_read_bo_handles = args->num_bo_read_handles; 494 bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), 495 sizeof(u32) * num_read_bo_handles); 496 if (IS_ERR(bo_handles_read)) { 497 r = PTR_ERR(bo_handles_read); 498 goto free_syncobj; 499 } 500 501 /* Array of pointers to the GEM read objects */ 502 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 503 if (!gobj_read) { 504 r = -ENOMEM; 505 goto free_bo_handles_read; 506 } 507 508 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 509 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 510 if (!gobj_read[rentry]) { 511 r = -ENOENT; 512 goto put_gobj_read; 513 } 514 } 515 516 num_write_bo_handles = args->num_bo_write_handles; 517 bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), 518 sizeof(u32) * num_write_bo_handles); 519 if (IS_ERR(bo_handles_write)) { 520 r = PTR_ERR(bo_handles_write); 521 goto put_gobj_read; 522 } 523 524 /* Array of pointers to the GEM write objects */ 525 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 526 if (!gobj_write) { 527 r = -ENOMEM; 528 goto free_bo_handles_write; 529 } 530 531 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 532 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 533 if (!gobj_write[wentry]) { 534 r = -ENOENT; 535 goto put_gobj_write; 536 } 537 } 538 539 /* Retrieve the user queue */ 540 queue = idr_find(&userq_mgr->userq_idr, args->queue_id); 541 if (!queue) { 542 r = -ENOENT; 543 goto put_gobj_write; 544 } 545 546 r = amdgpu_userq_fence_read_wptr(queue, &wptr); 547 if (r) 548 goto put_gobj_write; 549 550 r = amdgpu_userq_fence_alloc(&userq_fence); 551 if (r) 552 goto put_gobj_write; 553 554 /* We are here means UQ is active, make sure the eviction fence is valid */ 555 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 556 557 /* Create a new fence */ 558 r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); 559 if (r) { 560 mutex_unlock(&userq_mgr->userq_mutex); 561 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 562 goto put_gobj_write; 563 } 564 565 dma_fence_put(queue->last_fence); 566 queue->last_fence = dma_fence_get(fence); 567 mutex_unlock(&userq_mgr->userq_mutex); 568 569 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 570 (num_read_bo_handles + num_write_bo_handles)); 571 572 /* Lock all BOs with retry handling */ 573 drm_exec_until_all_locked(&exec) { 574 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 575 drm_exec_retry_on_contention(&exec); 576 if (r) { 577 amdgpu_userq_fence_cleanup(fence); 578 goto exec_fini; 579 } 580 581 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 582 drm_exec_retry_on_contention(&exec); 583 if (r) { 584 amdgpu_userq_fence_cleanup(fence); 585 goto exec_fini; 586 } 587 } 588 589 for (i = 0; i < num_read_bo_handles; i++) { 590 if (!gobj_read || !gobj_read[i]->resv) 591 continue; 592 593 dma_resv_add_fence(gobj_read[i]->resv, fence, 594 DMA_RESV_USAGE_READ); 595 } 596 597 for (i = 0; i < num_write_bo_handles; i++) { 598 if (!gobj_write || !gobj_write[i]->resv) 599 continue; 600 601 dma_resv_add_fence(gobj_write[i]->resv, fence, 602 DMA_RESV_USAGE_WRITE); 603 } 604 605 /* Add the created fence to syncobj/BO's */ 606 for (i = 0; i < num_syncobj_handles; i++) 607 drm_syncobj_replace_fence(syncobj[i], fence); 608 609 /* drop the reference acquired in fence creation function */ 610 dma_fence_put(fence); 611 612 exec_fini: 613 drm_exec_fini(&exec); 614 put_gobj_write: 615 while (wentry-- > 0) 616 drm_gem_object_put(gobj_write[wentry]); 617 kfree(gobj_write); 618 free_bo_handles_write: 619 kfree(bo_handles_write); 620 put_gobj_read: 621 while (rentry-- > 0) 622 drm_gem_object_put(gobj_read[rentry]); 623 kfree(gobj_read); 624 free_bo_handles_read: 625 kfree(bo_handles_read); 626 free_syncobj: 627 while (entry-- > 0) 628 if (syncobj[entry]) 629 drm_syncobj_put(syncobj[entry]); 630 kfree(syncobj); 631 free_syncobj_handles: 632 kfree(syncobj_handles); 633 634 return r; 635 } 636 637 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 638 struct drm_file *filp) 639 { 640 u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; 641 u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; 642 struct drm_amdgpu_userq_fence_info *fence_info = NULL; 643 struct drm_amdgpu_userq_wait *wait_info = data; 644 struct amdgpu_fpriv *fpriv = filp->driver_priv; 645 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 646 struct amdgpu_usermode_queue *waitq; 647 struct drm_gem_object **gobj_write; 648 struct drm_gem_object **gobj_read; 649 struct dma_fence **fences = NULL; 650 u16 num_points, num_fences = 0; 651 int r, i, rentry, wentry, cnt; 652 struct drm_exec exec; 653 654 num_read_bo_handles = wait_info->num_bo_read_handles; 655 bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), 656 size_mul(sizeof(u32), num_read_bo_handles)); 657 if (IS_ERR(bo_handles_read)) 658 return PTR_ERR(bo_handles_read); 659 660 num_write_bo_handles = wait_info->num_bo_write_handles; 661 bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), 662 size_mul(sizeof(u32), num_write_bo_handles)); 663 if (IS_ERR(bo_handles_write)) { 664 r = PTR_ERR(bo_handles_write); 665 goto free_bo_handles_read; 666 } 667 668 num_syncobj = wait_info->num_syncobj_handles; 669 syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), 670 size_mul(sizeof(u32), num_syncobj)); 671 if (IS_ERR(syncobj_handles)) { 672 r = PTR_ERR(syncobj_handles); 673 goto free_bo_handles_write; 674 } 675 676 num_points = wait_info->num_syncobj_timeline_handles; 677 timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), 678 sizeof(u32) * num_points); 679 if (IS_ERR(timeline_handles)) { 680 r = PTR_ERR(timeline_handles); 681 goto free_syncobj_handles; 682 } 683 684 timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), 685 sizeof(u32) * num_points); 686 if (IS_ERR(timeline_points)) { 687 r = PTR_ERR(timeline_points); 688 goto free_timeline_handles; 689 } 690 691 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 692 if (!gobj_read) { 693 r = -ENOMEM; 694 goto free_timeline_points; 695 } 696 697 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 698 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 699 if (!gobj_read[rentry]) { 700 r = -ENOENT; 701 goto put_gobj_read; 702 } 703 } 704 705 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 706 if (!gobj_write) { 707 r = -ENOMEM; 708 goto put_gobj_read; 709 } 710 711 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 712 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 713 if (!gobj_write[wentry]) { 714 r = -ENOENT; 715 goto put_gobj_write; 716 } 717 } 718 719 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 720 (num_read_bo_handles + num_write_bo_handles)); 721 722 /* Lock all BOs with retry handling */ 723 drm_exec_until_all_locked(&exec) { 724 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 725 drm_exec_retry_on_contention(&exec); 726 if (r) { 727 drm_exec_fini(&exec); 728 goto put_gobj_write; 729 } 730 731 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 732 drm_exec_retry_on_contention(&exec); 733 if (r) { 734 drm_exec_fini(&exec); 735 goto put_gobj_write; 736 } 737 } 738 739 if (!wait_info->num_fences) { 740 if (num_points) { 741 struct dma_fence_unwrap iter; 742 struct dma_fence *fence; 743 struct dma_fence *f; 744 745 for (i = 0; i < num_points; i++) { 746 r = drm_syncobj_find_fence(filp, timeline_handles[i], 747 timeline_points[i], 748 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 749 &fence); 750 if (r) 751 goto exec_fini; 752 753 dma_fence_unwrap_for_each(f, &iter, fence) 754 num_fences++; 755 756 dma_fence_put(fence); 757 } 758 } 759 760 /* Count syncobj's fence */ 761 for (i = 0; i < num_syncobj; i++) { 762 struct dma_fence *fence; 763 764 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 765 0, 766 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 767 &fence); 768 if (r) 769 goto exec_fini; 770 771 num_fences++; 772 dma_fence_put(fence); 773 } 774 775 /* Count GEM objects fence */ 776 for (i = 0; i < num_read_bo_handles; i++) { 777 struct dma_resv_iter resv_cursor; 778 struct dma_fence *fence; 779 780 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 781 DMA_RESV_USAGE_READ, fence) 782 num_fences++; 783 } 784 785 for (i = 0; i < num_write_bo_handles; i++) { 786 struct dma_resv_iter resv_cursor; 787 struct dma_fence *fence; 788 789 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 790 DMA_RESV_USAGE_WRITE, fence) 791 num_fences++; 792 } 793 794 /* 795 * Passing num_fences = 0 means that userspace doesn't want to 796 * retrieve userq_fence_info. If num_fences = 0 we skip filling 797 * userq_fence_info and return the actual number of fences on 798 * args->num_fences. 799 */ 800 wait_info->num_fences = num_fences; 801 } else { 802 /* Array of fence info */ 803 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); 804 if (!fence_info) { 805 r = -ENOMEM; 806 goto exec_fini; 807 } 808 809 /* Array of fences */ 810 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); 811 if (!fences) { 812 r = -ENOMEM; 813 goto free_fence_info; 814 } 815 816 /* Retrieve GEM read objects fence */ 817 for (i = 0; i < num_read_bo_handles; i++) { 818 struct dma_resv_iter resv_cursor; 819 struct dma_fence *fence; 820 821 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 822 DMA_RESV_USAGE_READ, fence) { 823 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 824 r = -EINVAL; 825 goto free_fences; 826 } 827 828 fences[num_fences++] = fence; 829 dma_fence_get(fence); 830 } 831 } 832 833 /* Retrieve GEM write objects fence */ 834 for (i = 0; i < num_write_bo_handles; i++) { 835 struct dma_resv_iter resv_cursor; 836 struct dma_fence *fence; 837 838 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 839 DMA_RESV_USAGE_WRITE, fence) { 840 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 841 r = -EINVAL; 842 goto free_fences; 843 } 844 845 fences[num_fences++] = fence; 846 dma_fence_get(fence); 847 } 848 } 849 850 if (num_points) { 851 struct dma_fence_unwrap iter; 852 struct dma_fence *fence; 853 struct dma_fence *f; 854 855 for (i = 0; i < num_points; i++) { 856 r = drm_syncobj_find_fence(filp, timeline_handles[i], 857 timeline_points[i], 858 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 859 &fence); 860 if (r) 861 goto free_fences; 862 863 dma_fence_unwrap_for_each(f, &iter, fence) { 864 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 865 r = -EINVAL; 866 goto free_fences; 867 } 868 869 dma_fence_get(f); 870 fences[num_fences++] = f; 871 } 872 873 dma_fence_put(fence); 874 } 875 } 876 877 /* Retrieve syncobj's fence */ 878 for (i = 0; i < num_syncobj; i++) { 879 struct dma_fence *fence; 880 881 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 882 0, 883 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 884 &fence); 885 if (r) 886 goto free_fences; 887 888 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 889 r = -EINVAL; 890 goto free_fences; 891 } 892 893 fences[num_fences++] = fence; 894 } 895 896 /* 897 * Keep only the latest fences to reduce the number of values 898 * given back to userspace. 899 */ 900 num_fences = dma_fence_dedup_array(fences, num_fences); 901 902 waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); 903 if (!waitq) { 904 r = -EINVAL; 905 goto free_fences; 906 } 907 908 for (i = 0, cnt = 0; i < num_fences; i++) { 909 struct amdgpu_userq_fence_driver *fence_drv; 910 struct amdgpu_userq_fence *userq_fence; 911 u32 index; 912 913 userq_fence = to_amdgpu_userq_fence(fences[i]); 914 if (!userq_fence) { 915 /* 916 * Just waiting on other driver fences should 917 * be good for now 918 */ 919 r = dma_fence_wait(fences[i], true); 920 if (r) { 921 dma_fence_put(fences[i]); 922 goto free_fences; 923 } 924 925 dma_fence_put(fences[i]); 926 continue; 927 } 928 929 fence_drv = userq_fence->fence_drv; 930 /* 931 * We need to make sure the user queue release their reference 932 * to the fence drivers at some point before queue destruction. 933 * Otherwise, we would gather those references until we don't 934 * have any more space left and crash. 935 */ 936 r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 937 xa_limit_32b, GFP_KERNEL); 938 if (r) 939 goto free_fences; 940 941 amdgpu_userq_fence_driver_get(fence_drv); 942 943 /* Store drm syncobj's gpu va address and value */ 944 fence_info[cnt].va = fence_drv->va; 945 fence_info[cnt].value = fences[i]->seqno; 946 947 dma_fence_put(fences[i]); 948 /* Increment the actual userq fence count */ 949 cnt++; 950 } 951 952 wait_info->num_fences = cnt; 953 /* Copy userq fence info to user space */ 954 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 955 fence_info, wait_info->num_fences * sizeof(*fence_info))) { 956 r = -EFAULT; 957 goto free_fences; 958 } 959 960 kfree(fences); 961 kfree(fence_info); 962 } 963 964 drm_exec_fini(&exec); 965 for (i = 0; i < num_read_bo_handles; i++) 966 drm_gem_object_put(gobj_read[i]); 967 kfree(gobj_read); 968 969 for (i = 0; i < num_write_bo_handles; i++) 970 drm_gem_object_put(gobj_write[i]); 971 kfree(gobj_write); 972 973 kfree(timeline_points); 974 kfree(timeline_handles); 975 kfree(syncobj_handles); 976 kfree(bo_handles_write); 977 kfree(bo_handles_read); 978 979 return 0; 980 981 free_fences: 982 while (num_fences-- > 0) 983 dma_fence_put(fences[num_fences]); 984 kfree(fences); 985 free_fence_info: 986 kfree(fence_info); 987 exec_fini: 988 drm_exec_fini(&exec); 989 put_gobj_write: 990 while (wentry-- > 0) 991 drm_gem_object_put(gobj_write[wentry]); 992 kfree(gobj_write); 993 put_gobj_read: 994 while (rentry-- > 0) 995 drm_gem_object_put(gobj_read[rentry]); 996 kfree(gobj_read); 997 free_timeline_points: 998 kfree(timeline_points); 999 free_timeline_handles: 1000 kfree(timeline_handles); 1001 free_syncobj_handles: 1002 kfree(syncobj_handles); 1003 free_bo_handles_write: 1004 kfree(bo_handles_write); 1005 free_bo_handles_read: 1006 kfree(bo_handles_read); 1007 1008 return r; 1009 } 1010