1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kref.h> 26 #include <linux/slab.h> 27 #include <linux/dma-fence-unwrap.h> 28 29 #include <drm/drm_exec.h> 30 #include <drm/drm_syncobj.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_userq_fence.h" 34 35 #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) 36 37 static const struct dma_fence_ops amdgpu_userq_fence_ops; 38 39 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) 40 { 41 if (!f || f->ops != &amdgpu_userq_fence_ops) 42 return NULL; 43 44 return container_of(f, struct amdgpu_userq_fence, base); 45 } 46 47 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) 48 { 49 return le64_to_cpu(*fence_drv->cpu_addr); 50 } 51 52 static void 53 amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, 54 u64 seq) 55 { 56 if (fence_drv->cpu_addr) 57 *fence_drv->cpu_addr = cpu_to_le64(seq); 58 } 59 60 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, 61 struct amdgpu_userq_fence_driver **fence_drv_req) 62 { 63 struct amdgpu_userq_fence_driver *fence_drv; 64 int r; 65 66 if (!fence_drv_req) 67 return -EINVAL; 68 *fence_drv_req = NULL; 69 70 fence_drv = kzalloc_obj(*fence_drv); 71 if (!fence_drv) 72 return -ENOMEM; 73 74 /* Acquire seq64 memory */ 75 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, 76 &fence_drv->cpu_addr); 77 if (r) 78 goto free_fence_drv; 79 80 memset(fence_drv->cpu_addr, 0, sizeof(u64)); 81 82 kref_init(&fence_drv->refcount); 83 INIT_LIST_HEAD(&fence_drv->fences); 84 spin_lock_init(&fence_drv->fence_list_lock); 85 86 fence_drv->adev = adev; 87 fence_drv->context = dma_fence_context_alloc(1); 88 get_task_comm(fence_drv->timeline_name, current); 89 90 *fence_drv_req = fence_drv; 91 92 return 0; 93 94 free_fence_drv: 95 kfree(fence_drv); 96 97 return r; 98 } 99 100 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) 101 { 102 struct amdgpu_userq_fence_driver *fence_drv; 103 unsigned long index; 104 105 if (xa_empty(xa)) 106 return; 107 108 xa_lock(xa); 109 xa_for_each(xa, index, fence_drv) { 110 __xa_erase(xa, index); 111 amdgpu_userq_fence_driver_put(fence_drv); 112 } 113 114 xa_unlock(xa); 115 } 116 117 void 118 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) 119 { 120 dma_fence_put(userq->last_fence); 121 userq->last_fence = NULL; 122 amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); 123 xa_destroy(&userq->fence_drv_xa); 124 mutex_destroy(&userq->fence_drv_lock); 125 /* Drop the queue's ownership reference to fence_drv explicitly */ 126 amdgpu_userq_fence_driver_put(userq->fence_drv); 127 } 128 129 static void 130 amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) 131 { 132 unsigned long i; 133 for (i = 0; i < userq_fence->fence_drv_array_count; i++) 134 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); 135 userq_fence->fence_drv_array_count = 0; 136 } 137 138 /* 139 * Returns: 140 * -ENOENT when no fences were processes 141 * 1 when more fences are pending 142 * 0 when no fences are pending any more 143 */ 144 int 145 amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) 146 { 147 struct amdgpu_userq_fence *userq_fence, *tmp; 148 LIST_HEAD(to_be_signaled); 149 struct dma_fence *fence; 150 unsigned long flags; 151 u64 rptr; 152 153 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 154 rptr = amdgpu_userq_fence_read(fence_drv); 155 156 list_for_each_entry(userq_fence, &fence_drv->fences, link) { 157 if (rptr < userq_fence->base.seqno) 158 break; 159 } 160 161 list_cut_before(&to_be_signaled, &fence_drv->fences, 162 &userq_fence->link); 163 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 164 165 if (list_empty(&to_be_signaled)) 166 return -ENOENT; 167 168 list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { 169 fence = &userq_fence->base; 170 list_del_init(&userq_fence->link); 171 dma_fence_signal(fence); 172 /* Drop fence_drv_array outside fence_list_lock 173 * to avoid the recursion lock. 174 */ 175 amdgpu_userq_fence_put_fence_drv_array(userq_fence); 176 dma_fence_put(fence); 177 } 178 179 /* That doesn't need to be accurate so no locking */ 180 return list_empty(&fence_drv->fences) ? 0 : 1; 181 } 182 183 void amdgpu_userq_fence_driver_destroy(struct kref *ref) 184 { 185 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, 186 struct amdgpu_userq_fence_driver, 187 refcount); 188 struct amdgpu_device *adev = fence_drv->adev; 189 struct amdgpu_userq_fence *fence, *tmp; 190 unsigned long flags; 191 struct dma_fence *f; 192 193 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 194 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { 195 f = &fence->base; 196 197 if (!dma_fence_is_signaled(f)) { 198 dma_fence_set_error(f, -ECANCELED); 199 dma_fence_signal(f); 200 } 201 202 list_del(&fence->link); 203 dma_fence_put(f); 204 } 205 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 206 207 /* Free seq64 memory */ 208 amdgpu_seq64_free(adev, fence_drv->va); 209 kfree(fence_drv); 210 } 211 212 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) 213 { 214 kref_get(&fence_drv->refcount); 215 } 216 217 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) 218 { 219 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); 220 } 221 222 static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, 223 struct amdgpu_userq_fence **pfence) 224 { 225 struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; 226 struct amdgpu_userq_fence *userq_fence; 227 void *entry; 228 229 userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); 230 if (!userq_fence) 231 return -ENOMEM; 232 233 /* 234 * Get the next unused entry, since we fill from the start this can be 235 * used as size to allocate the array. 236 */ 237 mutex_lock(&userq->fence_drv_lock); 238 XA_STATE(xas, &userq->fence_drv_xa, 0); 239 240 rcu_read_lock(); 241 do { 242 entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); 243 } while (xas_retry(&xas, entry)); 244 rcu_read_unlock(); 245 246 userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, 247 sizeof(fence_drv), 248 GFP_KERNEL); 249 if (!userq_fence->fence_drv_array) { 250 mutex_unlock(&userq->fence_drv_lock); 251 kfree(userq_fence); 252 return -ENOMEM; 253 } 254 255 userq_fence->fence_drv_array_count = xas.xa_index; 256 xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, 257 0, ULONG_MAX, xas.xa_index, XA_PRESENT); 258 xa_destroy(&userq->fence_drv_xa); 259 260 mutex_unlock(&userq->fence_drv_lock); 261 262 amdgpu_userq_fence_driver_get(fence_drv); 263 userq_fence->fence_drv = fence_drv; 264 265 *pfence = userq_fence; 266 return 0; 267 } 268 269 static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, 270 struct amdgpu_userq_fence *fence, 271 u64 seq) 272 { 273 struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; 274 unsigned long flags; 275 bool signaled = false; 276 277 spin_lock_init(&fence->lock); 278 dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, 279 fence_drv->context, seq); 280 281 /* Make sure the fence is visible to the hang detect worker */ 282 dma_fence_put(userq->last_fence); 283 userq->last_fence = dma_fence_get(&fence->base); 284 285 /* Check if hardware has already processed the fence */ 286 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 287 if (!dma_fence_is_signaled(&fence->base)) { 288 dma_fence_get(&fence->base); 289 list_add_tail(&fence->link, &fence_drv->fences); 290 } else { 291 INIT_LIST_HEAD(&fence->link); 292 signaled = true; 293 } 294 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 295 296 if (signaled) 297 amdgpu_userq_fence_put_fence_drv_array(fence); 298 else 299 amdgpu_userq_start_hang_detect_work(userq); 300 } 301 302 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) 303 { 304 return "amdgpu_userq_fence"; 305 } 306 307 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) 308 { 309 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 310 311 return fence->fence_drv->timeline_name; 312 } 313 314 static bool amdgpu_userq_fence_signaled(struct dma_fence *f) 315 { 316 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 317 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 318 u64 rptr, wptr; 319 320 rptr = amdgpu_userq_fence_read(fence_drv); 321 wptr = fence->base.seqno; 322 323 if (rptr >= wptr) 324 return true; 325 326 return false; 327 } 328 329 static void amdgpu_userq_fence_free(struct rcu_head *rcu) 330 { 331 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); 332 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); 333 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; 334 335 /* Release the fence driver reference */ 336 amdgpu_userq_fence_driver_put(fence_drv); 337 338 kvfree(userq_fence->fence_drv_array); 339 kfree(userq_fence); 340 } 341 342 static void amdgpu_userq_fence_release(struct dma_fence *f) 343 { 344 call_rcu(&f->rcu, amdgpu_userq_fence_free); 345 } 346 347 static const struct dma_fence_ops amdgpu_userq_fence_ops = { 348 .get_driver_name = amdgpu_userq_fence_get_driver_name, 349 .get_timeline_name = amdgpu_userq_fence_get_timeline_name, 350 .signaled = amdgpu_userq_fence_signaled, 351 .release = amdgpu_userq_fence_release, 352 }; 353 354 /** 355 * amdgpu_userq_fence_read_wptr - Read the userq wptr value 356 * 357 * @adev: amdgpu_device pointer 358 * @queue: user mode queue structure pointer 359 * @wptr: write pointer value 360 * 361 * Read the wptr value from userq's MQD. The userq signal IOCTL 362 * creates a dma_fence for the shared buffers that expects the 363 * RPTR value written to seq64 memory >= WPTR. 364 * 365 * Returns wptr value on success, error on failure. 366 */ 367 static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, 368 struct amdgpu_usermode_queue *queue, 369 u64 *wptr) 370 { 371 struct amdgpu_bo_va_mapping *mapping; 372 struct amdgpu_bo *bo; 373 struct drm_exec exec; 374 u64 addr, *ptr; 375 int ret; 376 377 addr = queue->userq_prop->wptr_gpu_addr; 378 addr &= AMDGPU_GMC_HOLE_MASK; 379 380 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); 381 drm_exec_until_all_locked(&exec) { 382 ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1); 383 drm_exec_retry_on_contention(&exec); 384 if (unlikely(ret)) 385 goto lock_error; 386 387 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); 388 if (!mapping) { 389 ret = -EINVAL; 390 goto lock_error; 391 } 392 393 ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base); 394 drm_exec_retry_on_contention(&exec); 395 if (unlikely(ret)) 396 goto lock_error; 397 } 398 399 bo = mapping->bo_va->base.bo; 400 ret = amdgpu_bo_kmap(bo, (void **)&ptr); 401 if (ret) { 402 DRM_ERROR("Failed mapping the userqueue wptr bo"); 403 goto lock_error; 404 } 405 406 *wptr = le64_to_cpu(*ptr); 407 408 amdgpu_bo_kunmap(bo); 409 drm_exec_fini(&exec); 410 return 0; 411 412 lock_error: 413 drm_exec_fini(&exec); 414 return ret; 415 } 416 417 static void 418 amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, 419 int error) 420 { 421 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 422 unsigned long flags; 423 struct dma_fence *f; 424 425 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 426 427 f = rcu_dereference_protected(&fence->base, 428 lockdep_is_held(&fence_drv->fence_list_lock)); 429 if (f && !dma_fence_is_signaled_locked(f)) 430 dma_fence_set_error(f, error); 431 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 432 } 433 434 void 435 amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) 436 { 437 struct dma_fence *f = userq->last_fence; 438 439 if (f) { 440 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 441 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 442 u64 wptr = fence->base.seqno; 443 444 amdgpu_userq_fence_driver_set_error(fence, -ECANCELED); 445 amdgpu_userq_fence_write(fence_drv, wptr); 446 amdgpu_userq_fence_driver_process(fence_drv); 447 448 } 449 } 450 451 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 452 struct drm_file *filp) 453 { 454 struct amdgpu_device *adev = drm_to_adev(dev); 455 struct drm_amdgpu_userq_signal *args = data; 456 const unsigned int num_write_bo_handles = args->num_bo_write_handles; 457 const unsigned int num_read_bo_handles = args->num_bo_read_handles; 458 struct amdgpu_fpriv *fpriv = filp->driver_priv; 459 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 460 461 struct drm_gem_object **gobj_write, **gobj_read; 462 u32 *syncobj_handles, num_syncobj_handles; 463 struct amdgpu_usermode_queue *queue; 464 struct amdgpu_userq_fence *fence; 465 struct drm_syncobj **syncobj; 466 struct drm_exec exec; 467 void __user *ptr; 468 int r, i, entry; 469 u64 wptr; 470 471 if (!amdgpu_userq_enabled(dev)) 472 return -ENOTSUPP; 473 474 if (args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || 475 args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) 476 return -EINVAL; 477 478 num_syncobj_handles = args->num_syncobj_handles; 479 ptr = u64_to_user_ptr(args->syncobj_handles); 480 syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, 481 sizeof(u32)); 482 if (IS_ERR(syncobj_handles)) 483 return PTR_ERR(syncobj_handles); 484 485 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), 486 GFP_KERNEL); 487 if (!syncobj) { 488 r = -ENOMEM; 489 goto free_syncobj_handles; 490 } 491 492 for (entry = 0; entry < num_syncobj_handles; entry++) { 493 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); 494 if (!syncobj[entry]) { 495 r = -ENOENT; 496 goto free_syncobj; 497 } 498 } 499 500 ptr = u64_to_user_ptr(args->bo_read_handles); 501 r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); 502 if (r) 503 goto free_syncobj; 504 505 ptr = u64_to_user_ptr(args->bo_write_handles); 506 r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, 507 &gobj_write); 508 if (r) 509 goto put_gobj_read; 510 511 queue = amdgpu_userq_get(userq_mgr, args->queue_id); 512 if (!queue) { 513 r = -ENOENT; 514 goto put_gobj_write; 515 } 516 517 r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); 518 if (r) 519 goto put_queue; 520 521 r = amdgpu_userq_fence_alloc(queue, &fence); 522 if (r) 523 goto put_queue; 524 525 /* We are here means UQ is active, make sure the eviction fence is valid */ 526 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 527 528 /* Create the new fence */ 529 amdgpu_userq_fence_init(queue, fence, wptr); 530 531 mutex_unlock(&userq_mgr->userq_mutex); 532 533 /* 534 * This needs to come after the fence is created since 535 * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv 536 * locks. 537 */ 538 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 539 (num_read_bo_handles + num_write_bo_handles)); 540 541 drm_exec_until_all_locked(&exec) { 542 r = drm_exec_prepare_array(&exec, gobj_read, 543 num_read_bo_handles, 1); 544 drm_exec_retry_on_contention(&exec); 545 if (r) 546 goto exec_fini; 547 548 r = drm_exec_prepare_array(&exec, gobj_write, 549 num_write_bo_handles, 1); 550 drm_exec_retry_on_contention(&exec); 551 if (r) 552 goto exec_fini; 553 } 554 555 /* And publish the new fence in the BOs and syncobj */ 556 for (i = 0; i < num_read_bo_handles; i++) 557 dma_resv_add_fence(gobj_read[i]->resv, &fence->base, 558 DMA_RESV_USAGE_READ); 559 560 for (i = 0; i < num_write_bo_handles; i++) 561 dma_resv_add_fence(gobj_write[i]->resv, &fence->base, 562 DMA_RESV_USAGE_WRITE); 563 564 for (i = 0; i < num_syncobj_handles; i++) 565 drm_syncobj_replace_fence(syncobj[i], &fence->base); 566 567 exec_fini: 568 /* drop the reference acquired in fence creation function */ 569 dma_fence_put(&fence->base); 570 571 drm_exec_fini(&exec); 572 put_queue: 573 amdgpu_userq_put(queue); 574 put_gobj_write: 575 for (i = 0; i < num_write_bo_handles; i++) 576 drm_gem_object_put(gobj_write[i]); 577 kvfree(gobj_write); 578 put_gobj_read: 579 for (i = 0; i < num_read_bo_handles; i++) 580 drm_gem_object_put(gobj_read[i]); 581 kvfree(gobj_read); 582 free_syncobj: 583 while (entry-- > 0) 584 drm_syncobj_put(syncobj[entry]); 585 kfree(syncobj); 586 free_syncobj_handles: 587 kfree(syncobj_handles); 588 589 return r; 590 } 591 592 /* Count the number of expected fences so userspace can alloc a buffer */ 593 static int 594 amdgpu_userq_wait_count_fences(struct drm_file *filp, 595 struct drm_amdgpu_userq_wait *wait_info, 596 u32 *syncobj_handles, u32 *timeline_points, 597 u32 *timeline_handles, 598 struct drm_gem_object **gobj_write, 599 struct drm_gem_object **gobj_read) 600 { 601 int num_read_bo_handles, num_write_bo_handles; 602 struct dma_fence_unwrap iter; 603 struct dma_fence *fence, *f; 604 unsigned int num_fences = 0; 605 struct drm_exec exec; 606 int i, r; 607 608 /* 609 * This needs to be outside of the lock provided by drm_exec for 610 * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT to work correctly. 611 */ 612 613 /* Count timeline fences */ 614 for (i = 0; i < wait_info->num_syncobj_timeline_handles; i++) { 615 r = drm_syncobj_find_fence(filp, timeline_handles[i], 616 timeline_points[i], 617 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 618 &fence); 619 if (r) 620 return r; 621 622 dma_fence_unwrap_for_each(f, &iter, fence) 623 num_fences++; 624 625 dma_fence_put(fence); 626 } 627 628 /* Count boolean fences */ 629 for (i = 0; i < wait_info->num_syncobj_handles; i++) { 630 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 631 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 632 &fence); 633 if (r) 634 return r; 635 636 num_fences++; 637 dma_fence_put(fence); 638 } 639 640 /* Lock all the GEM objects */ 641 /* TODO: It is actually not necessary to lock them */ 642 num_read_bo_handles = wait_info->num_bo_read_handles; 643 num_write_bo_handles = wait_info->num_bo_write_handles; 644 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 645 num_read_bo_handles + num_write_bo_handles); 646 647 drm_exec_until_all_locked(&exec) { 648 r = drm_exec_prepare_array(&exec, gobj_read, 649 num_read_bo_handles, 1); 650 drm_exec_retry_on_contention(&exec); 651 if (r) 652 goto error_unlock; 653 654 r = drm_exec_prepare_array(&exec, gobj_write, 655 num_write_bo_handles, 1); 656 drm_exec_retry_on_contention(&exec); 657 if (r) 658 goto error_unlock; 659 } 660 661 /* Count read fences */ 662 for (i = 0; i < num_read_bo_handles; i++) { 663 struct dma_resv_iter resv_cursor; 664 struct dma_fence *fence; 665 666 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 667 DMA_RESV_USAGE_READ, fence) 668 num_fences++; 669 } 670 671 /* Count write fences */ 672 for (i = 0; i < num_write_bo_handles; i++) { 673 struct dma_resv_iter resv_cursor; 674 struct dma_fence *fence; 675 676 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 677 DMA_RESV_USAGE_WRITE, fence) 678 num_fences++; 679 } 680 681 wait_info->num_fences = min(num_fences, USHRT_MAX); 682 r = 0; 683 684 error_unlock: 685 /* Unlock all the GEM objects */ 686 drm_exec_fini(&exec); 687 return r; 688 } 689 690 static int 691 amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait *wait_info, 692 struct dma_fence **fences, unsigned int *num_fences, 693 struct dma_fence *fence) 694 { 695 /* As fallback shouldn't userspace allocate enough space */ 696 if (*num_fences >= wait_info->num_fences) 697 return dma_fence_wait(fence, true); 698 699 fences[(*num_fences)++] = dma_fence_get(fence); 700 return 0; 701 } 702 703 static int 704 amdgpu_userq_wait_return_fence_info(struct drm_file *filp, 705 struct drm_amdgpu_userq_wait *wait_info, 706 u32 *syncobj_handles, u32 *timeline_points, 707 u32 *timeline_handles, 708 struct drm_gem_object **gobj_write, 709 struct drm_gem_object **gobj_read) 710 { 711 struct amdgpu_fpriv *fpriv = filp->driver_priv; 712 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 713 struct drm_amdgpu_userq_fence_info *fence_info; 714 int num_read_bo_handles, num_write_bo_handles; 715 struct amdgpu_usermode_queue *waitq; 716 struct dma_fence **fences, *fence, *f; 717 struct dma_fence_unwrap iter; 718 int num_points, num_syncobj; 719 unsigned int num_fences = 0; 720 struct drm_exec exec; 721 int i, cnt, r; 722 723 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), 724 GFP_KERNEL); 725 if (!fence_info) 726 return -ENOMEM; 727 728 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), 729 GFP_KERNEL); 730 if (!fences) { 731 r = -ENOMEM; 732 goto free_fence_info; 733 } 734 735 /* Retrieve timeline fences */ 736 num_points = wait_info->num_syncobj_timeline_handles; 737 for (i = 0; i < num_points; i++) { 738 r = drm_syncobj_find_fence(filp, timeline_handles[i], 739 timeline_points[i], 740 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 741 &fence); 742 if (r) 743 goto free_fences; 744 745 dma_fence_unwrap_for_each(f, &iter, fence) { 746 r = amdgpu_userq_wait_add_fence(wait_info, fences, 747 &num_fences, f); 748 if (r) { 749 dma_fence_put(fence); 750 goto free_fences; 751 } 752 } 753 754 dma_fence_put(fence); 755 } 756 757 /* Retrieve boolean fences */ 758 num_syncobj = wait_info->num_syncobj_handles; 759 for (i = 0; i < num_syncobj; i++) { 760 struct dma_fence *fence; 761 762 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 763 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 764 &fence); 765 if (r) 766 goto free_fences; 767 768 r = amdgpu_userq_wait_add_fence(wait_info, fences, 769 &num_fences, fence); 770 dma_fence_put(fence); 771 if (r) 772 goto free_fences; 773 774 } 775 776 /* Lock all the GEM objects */ 777 num_read_bo_handles = wait_info->num_bo_read_handles; 778 num_write_bo_handles = wait_info->num_bo_write_handles; 779 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 780 num_read_bo_handles + num_write_bo_handles); 781 782 drm_exec_until_all_locked(&exec) { 783 r = drm_exec_prepare_array(&exec, gobj_read, 784 num_read_bo_handles, 1); 785 drm_exec_retry_on_contention(&exec); 786 if (r) 787 goto error_unlock; 788 789 r = drm_exec_prepare_array(&exec, gobj_write, 790 num_write_bo_handles, 1); 791 drm_exec_retry_on_contention(&exec); 792 if (r) 793 goto error_unlock; 794 } 795 796 /* Retrieve GEM read objects fence */ 797 for (i = 0; i < num_read_bo_handles; i++) { 798 struct dma_resv_iter resv_cursor; 799 struct dma_fence *fence; 800 801 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 802 DMA_RESV_USAGE_READ, fence) { 803 r = amdgpu_userq_wait_add_fence(wait_info, fences, 804 &num_fences, fence); 805 if (r) 806 goto error_unlock; 807 } 808 } 809 810 /* Retrieve GEM write objects fence */ 811 for (i = 0; i < num_write_bo_handles; i++) { 812 struct dma_resv_iter resv_cursor; 813 struct dma_fence *fence; 814 815 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 816 DMA_RESV_USAGE_WRITE, fence) { 817 r = amdgpu_userq_wait_add_fence(wait_info, fences, 818 &num_fences, fence); 819 if (r) 820 goto error_unlock; 821 } 822 } 823 824 drm_exec_fini(&exec); 825 826 /* 827 * Keep only the latest fences to reduce the number of values 828 * given back to userspace. 829 */ 830 num_fences = dma_fence_dedup_array(fences, num_fences); 831 832 waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); 833 if (!waitq) { 834 r = -EINVAL; 835 goto free_fences; 836 } 837 838 for (i = 0, cnt = 0; i < num_fences; i++) { 839 struct amdgpu_userq_fence_driver *fence_drv; 840 struct amdgpu_userq_fence *userq_fence; 841 u32 index; 842 843 userq_fence = to_amdgpu_userq_fence(fences[i]); 844 if (!userq_fence) { 845 /* 846 * Just waiting on other driver fences should 847 * be good for now 848 */ 849 r = dma_fence_wait(fences[i], true); 850 if (r) 851 goto put_waitq; 852 853 continue; 854 } 855 856 fence_drv = userq_fence->fence_drv; 857 /* 858 * We need to make sure the user queue release their reference 859 * to the fence drivers at some point before queue destruction. 860 * Otherwise, we would gather those references until we don't 861 * have any more space left and crash. 862 */ 863 mutex_lock(&waitq->fence_drv_lock); 864 r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 865 xa_limit_32b, GFP_KERNEL); 866 mutex_unlock(&waitq->fence_drv_lock); 867 if (r) 868 goto put_waitq; 869 870 amdgpu_userq_fence_driver_get(fence_drv); 871 872 /* Store drm syncobj's gpu va address and value */ 873 fence_info[cnt].va = fence_drv->va; 874 fence_info[cnt].value = fences[i]->seqno; 875 876 /* Increment the actual userq fence count */ 877 cnt++; 878 } 879 wait_info->num_fences = cnt; 880 881 /* Copy userq fence info to user space */ 882 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 883 fence_info, cnt * sizeof(*fence_info))) 884 r = -EFAULT; 885 else 886 r = 0; 887 888 put_waitq: 889 amdgpu_userq_put(waitq); 890 891 free_fences: 892 while (num_fences--) 893 dma_fence_put(fences[num_fences]); 894 kfree(fences); 895 896 free_fence_info: 897 kfree(fence_info); 898 return r; 899 900 error_unlock: 901 drm_exec_fini(&exec); 902 goto free_fences; 903 } 904 905 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 906 struct drm_file *filp) 907 { 908 int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles; 909 u32 *syncobj_handles, *timeline_points, *timeline_handles; 910 struct drm_amdgpu_userq_wait *wait_info = data; 911 struct drm_gem_object **gobj_write; 912 struct drm_gem_object **gobj_read; 913 void __user *ptr; 914 int r; 915 916 if (!amdgpu_userq_enabled(dev)) 917 return -ENOTSUPP; 918 919 if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || 920 wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) 921 return -EINVAL; 922 923 num_syncobj = wait_info->num_syncobj_handles; 924 ptr = u64_to_user_ptr(wait_info->syncobj_handles); 925 syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32)); 926 if (IS_ERR(syncobj_handles)) 927 return PTR_ERR(syncobj_handles); 928 929 num_points = wait_info->num_syncobj_timeline_handles; 930 ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles); 931 timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32)); 932 if (IS_ERR(timeline_handles)) { 933 r = PTR_ERR(timeline_handles); 934 goto free_syncobj_handles; 935 } 936 937 ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points); 938 timeline_points = memdup_array_user(ptr, num_points, sizeof(u32)); 939 if (IS_ERR(timeline_points)) { 940 r = PTR_ERR(timeline_points); 941 goto free_timeline_handles; 942 } 943 944 num_read_bo_handles = wait_info->num_bo_read_handles; 945 ptr = u64_to_user_ptr(wait_info->bo_read_handles); 946 r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); 947 if (r) 948 goto free_timeline_points; 949 950 num_write_bo_handles = wait_info->num_bo_write_handles; 951 ptr = u64_to_user_ptr(wait_info->bo_write_handles); 952 r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, 953 &gobj_write); 954 if (r) 955 goto put_gobj_read; 956 957 /* 958 * Passing num_fences = 0 means that userspace doesn't want to 959 * retrieve userq_fence_info. If num_fences = 0 we skip filling 960 * userq_fence_info and return the actual number of fences on 961 * args->num_fences. 962 */ 963 if (!wait_info->num_fences) { 964 r = amdgpu_userq_wait_count_fences(filp, wait_info, 965 syncobj_handles, 966 timeline_points, 967 timeline_handles, 968 gobj_write, 969 gobj_read); 970 } else { 971 r = amdgpu_userq_wait_return_fence_info(filp, wait_info, 972 syncobj_handles, 973 timeline_points, 974 timeline_handles, 975 gobj_write, 976 gobj_read); 977 } 978 979 while (num_write_bo_handles--) 980 drm_gem_object_put(gobj_write[num_write_bo_handles]); 981 kvfree(gobj_write); 982 983 put_gobj_read: 984 while (num_read_bo_handles--) 985 drm_gem_object_put(gobj_read[num_read_bo_handles]); 986 kvfree(gobj_read); 987 988 free_timeline_points: 989 kfree(timeline_points); 990 free_timeline_handles: 991 kfree(timeline_handles); 992 free_syncobj_handles: 993 kfree(syncobj_handles); 994 return r; 995 } 996