1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kref.h> 26 #include <linux/slab.h> 27 #include <linux/dma-fence-unwrap.h> 28 29 #include <drm/drm_exec.h> 30 #include <drm/drm_syncobj.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_userq_fence.h" 34 35 static const struct dma_fence_ops amdgpu_userq_fence_ops; 36 static struct kmem_cache *amdgpu_userq_fence_slab; 37 38 int amdgpu_userq_fence_slab_init(void) 39 { 40 amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", 41 sizeof(struct amdgpu_userq_fence), 42 0, 43 SLAB_HWCACHE_ALIGN, 44 NULL); 45 if (!amdgpu_userq_fence_slab) 46 return -ENOMEM; 47 48 return 0; 49 } 50 51 void amdgpu_userq_fence_slab_fini(void) 52 { 53 rcu_barrier(); 54 kmem_cache_destroy(amdgpu_userq_fence_slab); 55 } 56 57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) 58 { 59 if (!f || f->ops != &amdgpu_userq_fence_ops) 60 return NULL; 61 62 return container_of(f, struct amdgpu_userq_fence, base); 63 } 64 65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) 66 { 67 return le64_to_cpu(*fence_drv->cpu_addr); 68 } 69 70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, 71 struct amdgpu_usermode_queue *userq) 72 { 73 struct amdgpu_userq_fence_driver *fence_drv; 74 unsigned long flags; 75 int r; 76 77 fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); 78 if (!fence_drv) 79 return -ENOMEM; 80 81 /* Acquire seq64 memory */ 82 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, 83 &fence_drv->cpu_addr); 84 if (r) 85 goto free_fence_drv; 86 87 memset(fence_drv->cpu_addr, 0, sizeof(u64)); 88 89 kref_init(&fence_drv->refcount); 90 INIT_LIST_HEAD(&fence_drv->fences); 91 spin_lock_init(&fence_drv->fence_list_lock); 92 93 fence_drv->adev = adev; 94 fence_drv->context = dma_fence_context_alloc(1); 95 get_task_comm(fence_drv->timeline_name, current); 96 97 xa_lock_irqsave(&adev->userq_xa, flags); 98 r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, 99 fence_drv, GFP_KERNEL)); 100 xa_unlock_irqrestore(&adev->userq_xa, flags); 101 if (r) 102 goto free_seq64; 103 104 userq->fence_drv = fence_drv; 105 106 return 0; 107 108 free_seq64: 109 amdgpu_seq64_free(adev, fence_drv->va); 110 free_fence_drv: 111 kfree(fence_drv); 112 113 return r; 114 } 115 116 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) 117 { 118 struct amdgpu_userq_fence_driver *fence_drv; 119 unsigned long index; 120 121 if (xa_empty(xa)) 122 return; 123 124 xa_lock(xa); 125 xa_for_each(xa, index, fence_drv) { 126 __xa_erase(xa, index); 127 amdgpu_userq_fence_driver_put(fence_drv); 128 } 129 130 xa_unlock(xa); 131 } 132 133 void 134 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) 135 { 136 amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); 137 xa_destroy(&userq->fence_drv_xa); 138 /* Drop the fence_drv reference held by user queue */ 139 amdgpu_userq_fence_driver_put(userq->fence_drv); 140 } 141 142 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) 143 { 144 struct amdgpu_userq_fence *userq_fence, *tmp; 145 struct dma_fence *fence; 146 u64 rptr; 147 int i; 148 149 if (!fence_drv) 150 return; 151 152 rptr = amdgpu_userq_fence_read(fence_drv); 153 154 spin_lock(&fence_drv->fence_list_lock); 155 list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { 156 fence = &userq_fence->base; 157 158 if (rptr < fence->seqno) 159 break; 160 161 dma_fence_signal(fence); 162 163 for (i = 0; i < userq_fence->fence_drv_array_count; i++) 164 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); 165 166 list_del(&userq_fence->link); 167 dma_fence_put(fence); 168 } 169 spin_unlock(&fence_drv->fence_list_lock); 170 } 171 172 void amdgpu_userq_fence_driver_destroy(struct kref *ref) 173 { 174 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, 175 struct amdgpu_userq_fence_driver, 176 refcount); 177 struct amdgpu_userq_fence_driver *xa_fence_drv; 178 struct amdgpu_device *adev = fence_drv->adev; 179 struct amdgpu_userq_fence *fence, *tmp; 180 struct xarray *xa = &adev->userq_xa; 181 unsigned long index, flags; 182 struct dma_fence *f; 183 184 spin_lock(&fence_drv->fence_list_lock); 185 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { 186 f = &fence->base; 187 188 if (!dma_fence_is_signaled(f)) { 189 dma_fence_set_error(f, -ECANCELED); 190 dma_fence_signal(f); 191 } 192 193 list_del(&fence->link); 194 dma_fence_put(f); 195 } 196 spin_unlock(&fence_drv->fence_list_lock); 197 198 xa_lock_irqsave(xa, flags); 199 xa_for_each(xa, index, xa_fence_drv) 200 if (xa_fence_drv == fence_drv) 201 __xa_erase(xa, index); 202 xa_unlock_irqrestore(xa, flags); 203 204 /* Free seq64 memory */ 205 amdgpu_seq64_free(adev, fence_drv->va); 206 kfree(fence_drv); 207 } 208 209 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) 210 { 211 kref_get(&fence_drv->refcount); 212 } 213 214 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) 215 { 216 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); 217 } 218 219 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 220 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) 221 { 222 *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); 223 return *userq_fence ? 0 : -ENOMEM; 224 } 225 226 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, 227 struct amdgpu_userq_fence *userq_fence, 228 u64 seq, struct dma_fence **f) 229 { 230 struct amdgpu_userq_fence_driver *fence_drv; 231 struct dma_fence *fence; 232 unsigned long flags; 233 234 fence_drv = userq->fence_drv; 235 if (!fence_drv) 236 return -EINVAL; 237 238 spin_lock_init(&userq_fence->lock); 239 INIT_LIST_HEAD(&userq_fence->link); 240 fence = &userq_fence->base; 241 userq_fence->fence_drv = fence_drv; 242 243 dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, 244 fence_drv->context, seq); 245 246 amdgpu_userq_fence_driver_get(fence_drv); 247 dma_fence_get(fence); 248 249 if (!xa_empty(&userq->fence_drv_xa)) { 250 struct amdgpu_userq_fence_driver *stored_fence_drv; 251 unsigned long index, count = 0; 252 int i = 0; 253 254 xa_lock(&userq->fence_drv_xa); 255 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) 256 count++; 257 258 userq_fence->fence_drv_array = 259 kvmalloc_array(count, 260 sizeof(struct amdgpu_userq_fence_driver *), 261 GFP_ATOMIC); 262 263 if (userq_fence->fence_drv_array) { 264 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { 265 userq_fence->fence_drv_array[i] = stored_fence_drv; 266 __xa_erase(&userq->fence_drv_xa, index); 267 i++; 268 } 269 } 270 271 userq_fence->fence_drv_array_count = i; 272 xa_unlock(&userq->fence_drv_xa); 273 } else { 274 userq_fence->fence_drv_array = NULL; 275 userq_fence->fence_drv_array_count = 0; 276 } 277 278 /* Check if hardware has already processed the job */ 279 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 280 if (!dma_fence_is_signaled_locked(fence)) 281 list_add_tail(&userq_fence->link, &fence_drv->fences); 282 else 283 dma_fence_put(fence); 284 285 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 286 287 *f = fence; 288 289 return 0; 290 } 291 #endif 292 293 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) 294 { 295 return "amdgpu_userq_fence"; 296 } 297 298 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) 299 { 300 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 301 302 return fence->fence_drv->timeline_name; 303 } 304 305 static bool amdgpu_userq_fence_signaled(struct dma_fence *f) 306 { 307 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 308 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 309 u64 rptr, wptr; 310 311 rptr = amdgpu_userq_fence_read(fence_drv); 312 wptr = fence->base.seqno; 313 314 if (rptr >= wptr) 315 return true; 316 317 return false; 318 } 319 320 static void amdgpu_userq_fence_free(struct rcu_head *rcu) 321 { 322 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); 323 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); 324 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; 325 326 /* Release the fence driver reference */ 327 amdgpu_userq_fence_driver_put(fence_drv); 328 329 kvfree(userq_fence->fence_drv_array); 330 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 331 } 332 333 static void amdgpu_userq_fence_release(struct dma_fence *f) 334 { 335 call_rcu(&f->rcu, amdgpu_userq_fence_free); 336 } 337 338 static const struct dma_fence_ops amdgpu_userq_fence_ops = { 339 .use_64bit_seqno = true, 340 .get_driver_name = amdgpu_userq_fence_get_driver_name, 341 .get_timeline_name = amdgpu_userq_fence_get_timeline_name, 342 .signaled = amdgpu_userq_fence_signaled, 343 .release = amdgpu_userq_fence_release, 344 }; 345 346 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 347 /** 348 * amdgpu_userq_fence_read_wptr - Read the userq wptr value 349 * 350 * @queue: user mode queue structure pointer 351 * @wptr: write pointer value 352 * 353 * Read the wptr value from userq's MQD. The userq signal IOCTL 354 * creates a dma_fence for the shared buffers that expects the 355 * RPTR value written to seq64 memory >= WPTR. 356 * 357 * Returns wptr value on success, error on failure. 358 */ 359 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, 360 u64 *wptr) 361 { 362 struct amdgpu_bo_va_mapping *mapping; 363 struct amdgpu_bo *bo; 364 u64 addr, *ptr; 365 int r; 366 367 r = amdgpu_bo_reserve(queue->vm->root.bo, false); 368 if (r) 369 return r; 370 371 addr = queue->userq_prop->wptr_gpu_addr; 372 addr &= AMDGPU_GMC_HOLE_MASK; 373 374 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); 375 if (!mapping) { 376 DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); 377 return -EINVAL; 378 } 379 380 bo = amdgpu_bo_ref(mapping->bo_va->base.bo); 381 amdgpu_bo_unreserve(queue->vm->root.bo); 382 r = amdgpu_bo_reserve(bo, true); 383 if (r) { 384 DRM_ERROR("Failed to reserve userqueue wptr bo"); 385 return r; 386 } 387 388 r = amdgpu_bo_kmap(bo, (void **)&ptr); 389 if (r) { 390 DRM_ERROR("Failed mapping the userqueue wptr bo"); 391 goto map_error; 392 } 393 394 *wptr = le64_to_cpu(*ptr); 395 396 amdgpu_bo_kunmap(bo); 397 amdgpu_bo_unreserve(bo); 398 amdgpu_bo_unref(&bo); 399 400 return 0; 401 402 map_error: 403 amdgpu_bo_unreserve(bo); 404 amdgpu_bo_unref(&bo); 405 406 return r; 407 } 408 409 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) 410 { 411 dma_fence_put(fence); 412 } 413 414 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 415 struct drm_file *filp) 416 { 417 struct amdgpu_fpriv *fpriv = filp->driver_priv; 418 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 419 struct drm_amdgpu_userq_signal *args = data; 420 struct drm_gem_object **gobj_write = NULL; 421 struct drm_gem_object **gobj_read = NULL; 422 struct amdgpu_usermode_queue *queue; 423 struct amdgpu_userq_fence *userq_fence; 424 struct drm_syncobj **syncobj = NULL; 425 u32 *bo_handles_write, num_write_bo_handles; 426 u32 *syncobj_handles, num_syncobj_handles; 427 u32 *bo_handles_read, num_read_bo_handles; 428 int r, i, entry, rentry, wentry; 429 struct dma_fence *fence; 430 struct drm_exec exec; 431 u64 wptr; 432 433 num_syncobj_handles = args->num_syncobj_handles; 434 syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), 435 sizeof(u32) * num_syncobj_handles); 436 if (IS_ERR(syncobj_handles)) 437 return PTR_ERR(syncobj_handles); 438 439 /* Array of pointers to the looked up syncobjs */ 440 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); 441 if (!syncobj) { 442 r = -ENOMEM; 443 goto free_syncobj_handles; 444 } 445 446 for (entry = 0; entry < num_syncobj_handles; entry++) { 447 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); 448 if (!syncobj[entry]) { 449 r = -ENOENT; 450 goto free_syncobj; 451 } 452 } 453 454 num_read_bo_handles = args->num_bo_read_handles; 455 bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), 456 sizeof(u32) * num_read_bo_handles); 457 if (IS_ERR(bo_handles_read)) { 458 r = PTR_ERR(bo_handles_read); 459 goto free_syncobj; 460 } 461 462 /* Array of pointers to the GEM read objects */ 463 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 464 if (!gobj_read) { 465 r = -ENOMEM; 466 goto free_bo_handles_read; 467 } 468 469 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 470 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 471 if (!gobj_read[rentry]) { 472 r = -ENOENT; 473 goto put_gobj_read; 474 } 475 } 476 477 num_write_bo_handles = args->num_bo_write_handles; 478 bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), 479 sizeof(u32) * num_write_bo_handles); 480 if (IS_ERR(bo_handles_write)) { 481 r = PTR_ERR(bo_handles_write); 482 goto put_gobj_read; 483 } 484 485 /* Array of pointers to the GEM write objects */ 486 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 487 if (!gobj_write) { 488 r = -ENOMEM; 489 goto free_bo_handles_write; 490 } 491 492 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 493 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 494 if (!gobj_write[wentry]) { 495 r = -ENOENT; 496 goto put_gobj_write; 497 } 498 } 499 500 /* Retrieve the user queue */ 501 queue = idr_find(&userq_mgr->userq_idr, args->queue_id); 502 if (!queue) { 503 r = -ENOENT; 504 goto put_gobj_write; 505 } 506 507 r = amdgpu_userq_fence_read_wptr(queue, &wptr); 508 if (r) 509 goto put_gobj_write; 510 511 r = amdgpu_userq_fence_alloc(&userq_fence); 512 if (r) 513 goto put_gobj_write; 514 515 /* We are here means UQ is active, make sure the eviction fence is valid */ 516 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 517 518 /* Create a new fence */ 519 r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); 520 if (r) { 521 mutex_unlock(&userq_mgr->userq_mutex); 522 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); 523 goto put_gobj_write; 524 } 525 526 dma_fence_put(queue->last_fence); 527 queue->last_fence = dma_fence_get(fence); 528 mutex_unlock(&userq_mgr->userq_mutex); 529 530 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 531 (num_read_bo_handles + num_write_bo_handles)); 532 533 /* Lock all BOs with retry handling */ 534 drm_exec_until_all_locked(&exec) { 535 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 536 drm_exec_retry_on_contention(&exec); 537 if (r) { 538 amdgpu_userq_fence_cleanup(fence); 539 goto exec_fini; 540 } 541 542 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 543 drm_exec_retry_on_contention(&exec); 544 if (r) { 545 amdgpu_userq_fence_cleanup(fence); 546 goto exec_fini; 547 } 548 } 549 550 for (i = 0; i < num_read_bo_handles; i++) { 551 if (!gobj_read || !gobj_read[i]->resv) 552 continue; 553 554 dma_resv_add_fence(gobj_read[i]->resv, fence, 555 DMA_RESV_USAGE_READ); 556 } 557 558 for (i = 0; i < num_write_bo_handles; i++) { 559 if (!gobj_write || !gobj_write[i]->resv) 560 continue; 561 562 dma_resv_add_fence(gobj_write[i]->resv, fence, 563 DMA_RESV_USAGE_WRITE); 564 } 565 566 /* Add the created fence to syncobj/BO's */ 567 for (i = 0; i < num_syncobj_handles; i++) 568 drm_syncobj_replace_fence(syncobj[i], fence); 569 570 /* drop the reference acquired in fence creation function */ 571 dma_fence_put(fence); 572 573 exec_fini: 574 drm_exec_fini(&exec); 575 put_gobj_write: 576 while (wentry-- > 0) 577 drm_gem_object_put(gobj_write[wentry]); 578 kfree(gobj_write); 579 free_bo_handles_write: 580 kfree(bo_handles_write); 581 put_gobj_read: 582 while (rentry-- > 0) 583 drm_gem_object_put(gobj_read[rentry]); 584 kfree(gobj_read); 585 free_bo_handles_read: 586 kfree(bo_handles_read); 587 free_syncobj: 588 while (entry-- > 0) 589 if (syncobj[entry]) 590 drm_syncobj_put(syncobj[entry]); 591 kfree(syncobj); 592 free_syncobj_handles: 593 kfree(syncobj_handles); 594 595 return r; 596 } 597 #else 598 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 599 struct drm_file *filp) 600 { 601 return -ENOTSUPP; 602 } 603 #endif 604 605 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 606 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 607 struct drm_file *filp) 608 { 609 u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; 610 u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; 611 struct drm_amdgpu_userq_fence_info *fence_info = NULL; 612 struct drm_amdgpu_userq_wait *wait_info = data; 613 struct amdgpu_fpriv *fpriv = filp->driver_priv; 614 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 615 struct amdgpu_usermode_queue *waitq; 616 struct drm_gem_object **gobj_write; 617 struct drm_gem_object **gobj_read; 618 struct dma_fence **fences = NULL; 619 u16 num_points, num_fences = 0; 620 int r, i, rentry, wentry, cnt; 621 struct drm_exec exec; 622 623 num_read_bo_handles = wait_info->num_bo_read_handles; 624 bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), 625 sizeof(u32) * num_read_bo_handles); 626 if (IS_ERR(bo_handles_read)) 627 return PTR_ERR(bo_handles_read); 628 629 num_write_bo_handles = wait_info->num_bo_write_handles; 630 bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), 631 sizeof(u32) * num_write_bo_handles); 632 if (IS_ERR(bo_handles_write)) { 633 r = PTR_ERR(bo_handles_write); 634 goto free_bo_handles_read; 635 } 636 637 num_syncobj = wait_info->num_syncobj_handles; 638 syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), 639 sizeof(u32) * num_syncobj); 640 if (IS_ERR(syncobj_handles)) { 641 r = PTR_ERR(syncobj_handles); 642 goto free_bo_handles_write; 643 } 644 645 num_points = wait_info->num_syncobj_timeline_handles; 646 timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), 647 sizeof(u32) * num_points); 648 if (IS_ERR(timeline_handles)) { 649 r = PTR_ERR(timeline_handles); 650 goto free_syncobj_handles; 651 } 652 653 timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), 654 sizeof(u32) * num_points); 655 if (IS_ERR(timeline_points)) { 656 r = PTR_ERR(timeline_points); 657 goto free_timeline_handles; 658 } 659 660 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); 661 if (!gobj_read) { 662 r = -ENOMEM; 663 goto free_timeline_points; 664 } 665 666 for (rentry = 0; rentry < num_read_bo_handles; rentry++) { 667 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); 668 if (!gobj_read[rentry]) { 669 r = -ENOENT; 670 goto put_gobj_read; 671 } 672 } 673 674 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); 675 if (!gobj_write) { 676 r = -ENOMEM; 677 goto put_gobj_read; 678 } 679 680 for (wentry = 0; wentry < num_write_bo_handles; wentry++) { 681 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); 682 if (!gobj_write[wentry]) { 683 r = -ENOENT; 684 goto put_gobj_write; 685 } 686 } 687 688 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 689 (num_read_bo_handles + num_write_bo_handles)); 690 691 /* Lock all BOs with retry handling */ 692 drm_exec_until_all_locked(&exec) { 693 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 694 drm_exec_retry_on_contention(&exec); 695 if (r) { 696 drm_exec_fini(&exec); 697 goto put_gobj_write; 698 } 699 700 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 701 drm_exec_retry_on_contention(&exec); 702 if (r) { 703 drm_exec_fini(&exec); 704 goto put_gobj_write; 705 } 706 } 707 708 if (!wait_info->num_fences) { 709 if (num_points) { 710 struct dma_fence_unwrap iter; 711 struct dma_fence *fence; 712 struct dma_fence *f; 713 714 for (i = 0; i < num_points; i++) { 715 r = drm_syncobj_find_fence(filp, timeline_handles[i], 716 timeline_points[i], 717 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 718 &fence); 719 if (r) 720 goto exec_fini; 721 722 dma_fence_unwrap_for_each(f, &iter, fence) 723 num_fences++; 724 725 dma_fence_put(fence); 726 } 727 } 728 729 /* Count syncobj's fence */ 730 for (i = 0; i < num_syncobj; i++) { 731 struct dma_fence *fence; 732 733 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 734 0, 735 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 736 &fence); 737 if (r) 738 goto exec_fini; 739 740 num_fences++; 741 dma_fence_put(fence); 742 } 743 744 /* Count GEM objects fence */ 745 for (i = 0; i < num_read_bo_handles; i++) { 746 struct dma_resv_iter resv_cursor; 747 struct dma_fence *fence; 748 749 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 750 DMA_RESV_USAGE_READ, fence) 751 num_fences++; 752 } 753 754 for (i = 0; i < num_write_bo_handles; i++) { 755 struct dma_resv_iter resv_cursor; 756 struct dma_fence *fence; 757 758 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 759 DMA_RESV_USAGE_WRITE, fence) 760 num_fences++; 761 } 762 763 /* 764 * Passing num_fences = 0 means that userspace doesn't want to 765 * retrieve userq_fence_info. If num_fences = 0 we skip filling 766 * userq_fence_info and return the actual number of fences on 767 * args->num_fences. 768 */ 769 wait_info->num_fences = num_fences; 770 } else { 771 /* Array of fence info */ 772 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); 773 if (!fence_info) { 774 r = -ENOMEM; 775 goto exec_fini; 776 } 777 778 /* Array of fences */ 779 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); 780 if (!fences) { 781 r = -ENOMEM; 782 goto free_fence_info; 783 } 784 785 /* Retrieve GEM read objects fence */ 786 for (i = 0; i < num_read_bo_handles; i++) { 787 struct dma_resv_iter resv_cursor; 788 struct dma_fence *fence; 789 790 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 791 DMA_RESV_USAGE_READ, fence) { 792 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 793 r = -EINVAL; 794 goto free_fences; 795 } 796 797 fences[num_fences++] = fence; 798 dma_fence_get(fence); 799 } 800 } 801 802 /* Retrieve GEM write objects fence */ 803 for (i = 0; i < num_write_bo_handles; i++) { 804 struct dma_resv_iter resv_cursor; 805 struct dma_fence *fence; 806 807 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 808 DMA_RESV_USAGE_WRITE, fence) { 809 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 810 r = -EINVAL; 811 goto free_fences; 812 } 813 814 fences[num_fences++] = fence; 815 dma_fence_get(fence); 816 } 817 } 818 819 if (num_points) { 820 struct dma_fence_unwrap iter; 821 struct dma_fence *fence; 822 struct dma_fence *f; 823 824 for (i = 0; i < num_points; i++) { 825 r = drm_syncobj_find_fence(filp, timeline_handles[i], 826 timeline_points[i], 827 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 828 &fence); 829 if (r) 830 goto free_fences; 831 832 dma_fence_unwrap_for_each(f, &iter, fence) { 833 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 834 r = -EINVAL; 835 goto free_fences; 836 } 837 838 dma_fence_get(f); 839 fences[num_fences++] = f; 840 } 841 842 dma_fence_put(fence); 843 } 844 } 845 846 /* Retrieve syncobj's fence */ 847 for (i = 0; i < num_syncobj; i++) { 848 struct dma_fence *fence; 849 850 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 851 0, 852 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 853 &fence); 854 if (r) 855 goto free_fences; 856 857 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { 858 r = -EINVAL; 859 goto free_fences; 860 } 861 862 fences[num_fences++] = fence; 863 } 864 865 waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); 866 if (!waitq) 867 goto free_fences; 868 869 for (i = 0, cnt = 0; i < num_fences; i++) { 870 struct amdgpu_userq_fence_driver *fence_drv; 871 struct amdgpu_userq_fence *userq_fence; 872 u32 index; 873 874 userq_fence = to_amdgpu_userq_fence(fences[i]); 875 if (!userq_fence) { 876 /* 877 * Just waiting on other driver fences should 878 * be good for now 879 */ 880 r = dma_fence_wait(fences[i], true); 881 if (r) { 882 dma_fence_put(fences[i]); 883 goto free_fences; 884 } 885 886 dma_fence_put(fences[i]); 887 continue; 888 } 889 890 fence_drv = userq_fence->fence_drv; 891 /* 892 * We need to make sure the user queue release their reference 893 * to the fence drivers at some point before queue destruction. 894 * Otherwise, we would gather those references until we don't 895 * have any more space left and crash. 896 */ 897 r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 898 xa_limit_32b, GFP_KERNEL); 899 if (r) 900 goto free_fences; 901 902 amdgpu_userq_fence_driver_get(fence_drv); 903 904 /* Store drm syncobj's gpu va address and value */ 905 fence_info[cnt].va = fence_drv->va; 906 fence_info[cnt].value = fences[i]->seqno; 907 908 dma_fence_put(fences[i]); 909 /* Increment the actual userq fence count */ 910 cnt++; 911 } 912 913 wait_info->num_fences = cnt; 914 /* Copy userq fence info to user space */ 915 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 916 fence_info, wait_info->num_fences * sizeof(*fence_info))) { 917 r = -EFAULT; 918 goto free_fences; 919 } 920 921 kfree(fences); 922 kfree(fence_info); 923 } 924 925 drm_exec_fini(&exec); 926 for (i = 0; i < num_read_bo_handles; i++) 927 drm_gem_object_put(gobj_read[i]); 928 kfree(gobj_read); 929 930 for (i = 0; i < num_write_bo_handles; i++) 931 drm_gem_object_put(gobj_write[i]); 932 kfree(gobj_write); 933 934 kfree(timeline_points); 935 kfree(timeline_handles); 936 kfree(syncobj_handles); 937 kfree(bo_handles_write); 938 kfree(bo_handles_read); 939 940 return 0; 941 942 free_fences: 943 while (num_fences-- > 0) 944 dma_fence_put(fences[num_fences]); 945 kfree(fences); 946 free_fence_info: 947 kfree(fence_info); 948 exec_fini: 949 drm_exec_fini(&exec); 950 put_gobj_write: 951 while (wentry-- > 0) 952 drm_gem_object_put(gobj_write[wentry]); 953 kfree(gobj_write); 954 put_gobj_read: 955 while (rentry-- > 0) 956 drm_gem_object_put(gobj_read[rentry]); 957 kfree(gobj_read); 958 free_timeline_points: 959 kfree(timeline_points); 960 free_timeline_handles: 961 kfree(timeline_handles); 962 free_syncobj_handles: 963 kfree(syncobj_handles); 964 free_bo_handles_write: 965 kfree(bo_handles_write); 966 free_bo_handles_read: 967 kfree(bo_handles_read); 968 969 return r; 970 } 971 #else 972 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 973 struct drm_file *filp) 974 { 975 return -ENOTSUPP; 976 } 977 #endif 978