1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kref.h> 26 #include <linux/slab.h> 27 #include <linux/dma-fence-unwrap.h> 28 29 #include <drm/drm_exec.h> 30 #include <drm/drm_syncobj.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_userq_fence.h" 34 35 #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) 36 37 static const struct dma_fence_ops amdgpu_userq_fence_ops; 38 39 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) 40 { 41 if (!f || f->ops != &amdgpu_userq_fence_ops) 42 return NULL; 43 44 return container_of(f, struct amdgpu_userq_fence, base); 45 } 46 47 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) 48 { 49 return le64_to_cpu(*fence_drv->cpu_addr); 50 } 51 52 static void 53 amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, 54 u64 seq) 55 { 56 if (fence_drv->cpu_addr) 57 *fence_drv->cpu_addr = cpu_to_le64(seq); 58 } 59 60 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, 61 struct amdgpu_userq_fence_driver **fence_drv_req) 62 { 63 struct amdgpu_userq_fence_driver *fence_drv; 64 int r; 65 66 if (!fence_drv_req) 67 return -EINVAL; 68 *fence_drv_req = NULL; 69 70 fence_drv = kzalloc_obj(*fence_drv); 71 if (!fence_drv) 72 return -ENOMEM; 73 74 /* Acquire seq64 memory */ 75 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, 76 &fence_drv->cpu_addr); 77 if (r) 78 goto free_fence_drv; 79 80 memset(fence_drv->cpu_addr, 0, sizeof(u64)); 81 82 kref_init(&fence_drv->refcount); 83 INIT_LIST_HEAD(&fence_drv->fences); 84 spin_lock_init(&fence_drv->fence_list_lock); 85 86 fence_drv->adev = adev; 87 fence_drv->context = dma_fence_context_alloc(1); 88 get_task_comm(fence_drv->timeline_name, current); 89 90 *fence_drv_req = fence_drv; 91 92 return 0; 93 94 free_fence_drv: 95 kfree(fence_drv); 96 97 return r; 98 } 99 100 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) 101 { 102 struct amdgpu_userq_fence_driver *fence_drv; 103 unsigned long index; 104 105 if (xa_empty(xa)) 106 return; 107 108 xa_lock(xa); 109 xa_for_each(xa, index, fence_drv) { 110 __xa_erase(xa, index); 111 amdgpu_userq_fence_driver_put(fence_drv); 112 } 113 114 xa_unlock(xa); 115 } 116 117 void 118 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) 119 { 120 dma_fence_put(userq->last_fence); 121 userq->last_fence = NULL; 122 amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); 123 xa_destroy(&userq->fence_drv_xa); 124 mutex_destroy(&userq->fence_drv_lock); 125 /* Drop the queue's ownership reference to fence_drv explicitly */ 126 amdgpu_userq_fence_driver_put(userq->fence_drv); 127 } 128 129 static void 130 amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) 131 { 132 unsigned long i; 133 for (i = 0; i < userq_fence->fence_drv_array_count; i++) 134 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); 135 userq_fence->fence_drv_array_count = 0; 136 } 137 138 /* 139 * Returns: 140 * -ENOENT when no fences were processes 141 * 1 when more fences are pending 142 * 0 when no fences are pending any more 143 */ 144 int 145 amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) 146 { 147 struct amdgpu_userq_fence *userq_fence, *tmp; 148 LIST_HEAD(to_be_signaled); 149 struct dma_fence *fence; 150 unsigned long flags; 151 u64 rptr; 152 153 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 154 rptr = amdgpu_userq_fence_read(fence_drv); 155 156 list_for_each_entry(userq_fence, &fence_drv->fences, link) { 157 if (rptr < userq_fence->base.seqno) 158 break; 159 } 160 161 list_cut_before(&to_be_signaled, &fence_drv->fences, 162 &userq_fence->link); 163 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 164 165 if (list_empty(&to_be_signaled)) 166 return -ENOENT; 167 168 list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { 169 fence = &userq_fence->base; 170 list_del_init(&userq_fence->link); 171 dma_fence_signal(fence); 172 /* Drop fence_drv_array outside fence_list_lock 173 * to avoid the recursion lock. 174 */ 175 amdgpu_userq_fence_put_fence_drv_array(userq_fence); 176 dma_fence_put(fence); 177 } 178 179 /* That doesn't need to be accurate so no locking */ 180 return list_empty(&fence_drv->fences) ? 0 : 1; 181 } 182 183 void amdgpu_userq_fence_driver_destroy(struct kref *ref) 184 { 185 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, 186 struct amdgpu_userq_fence_driver, 187 refcount); 188 struct amdgpu_device *adev = fence_drv->adev; 189 struct amdgpu_userq_fence *fence, *tmp; 190 unsigned long flags; 191 struct dma_fence *f; 192 193 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 194 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { 195 f = &fence->base; 196 197 if (!dma_fence_is_signaled(f)) { 198 dma_fence_set_error(f, -ECANCELED); 199 dma_fence_signal(f); 200 } 201 202 list_del(&fence->link); 203 dma_fence_put(f); 204 } 205 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 206 207 /* Free seq64 memory */ 208 amdgpu_seq64_free(adev, fence_drv->va); 209 kfree(fence_drv); 210 } 211 212 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) 213 { 214 kref_get(&fence_drv->refcount); 215 } 216 217 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) 218 { 219 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); 220 } 221 222 static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, 223 struct amdgpu_userq_fence **pfence) 224 { 225 struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; 226 struct amdgpu_userq_fence *userq_fence; 227 void *entry; 228 229 userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); 230 if (!userq_fence) 231 return -ENOMEM; 232 233 /* 234 * Get the next unused entry, since we fill from the start this can be 235 * used as size to allocate the array. 236 */ 237 mutex_lock(&userq->fence_drv_lock); 238 XA_STATE(xas, &userq->fence_drv_xa, 0); 239 240 rcu_read_lock(); 241 do { 242 entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); 243 } while (xas_retry(&xas, entry)); 244 rcu_read_unlock(); 245 246 userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, 247 sizeof(fence_drv), 248 GFP_KERNEL); 249 if (!userq_fence->fence_drv_array) { 250 mutex_unlock(&userq->fence_drv_lock); 251 kfree(userq_fence); 252 return -ENOMEM; 253 } 254 255 userq_fence->fence_drv_array_count = xas.xa_index; 256 xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, 257 0, ULONG_MAX, xas.xa_index, XA_PRESENT); 258 xa_destroy(&userq->fence_drv_xa); 259 260 mutex_unlock(&userq->fence_drv_lock); 261 262 amdgpu_userq_fence_driver_get(fence_drv); 263 userq_fence->fence_drv = fence_drv; 264 265 *pfence = userq_fence; 266 return 0; 267 } 268 269 static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, 270 struct amdgpu_userq_fence *fence, 271 u64 seq) 272 { 273 struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; 274 unsigned long flags; 275 bool signaled = false; 276 277 spin_lock_init(&fence->lock); 278 dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, 279 fence_drv->context, seq); 280 281 /* Make sure the fence is visible to the hang detect worker */ 282 dma_fence_put(userq->last_fence); 283 userq->last_fence = dma_fence_get(&fence->base); 284 285 /* Check if hardware has already processed the fence */ 286 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 287 if (!dma_fence_is_signaled(&fence->base)) { 288 dma_fence_get(&fence->base); 289 list_add_tail(&fence->link, &fence_drv->fences); 290 } else { 291 INIT_LIST_HEAD(&fence->link); 292 signaled = true; 293 } 294 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 295 296 if (signaled) 297 amdgpu_userq_fence_put_fence_drv_array(fence); 298 else 299 amdgpu_userq_start_hang_detect_work(userq); 300 } 301 302 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) 303 { 304 return "amdgpu_userq_fence"; 305 } 306 307 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) 308 { 309 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 310 311 return fence->fence_drv->timeline_name; 312 } 313 314 static bool amdgpu_userq_fence_signaled(struct dma_fence *f) 315 { 316 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 317 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 318 u64 rptr, wptr; 319 320 rptr = amdgpu_userq_fence_read(fence_drv); 321 wptr = fence->base.seqno; 322 323 if (rptr >= wptr) 324 return true; 325 326 return false; 327 } 328 329 static void amdgpu_userq_fence_free(struct rcu_head *rcu) 330 { 331 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); 332 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); 333 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; 334 335 /* Release the fence driver reference */ 336 amdgpu_userq_fence_driver_put(fence_drv); 337 338 kvfree(userq_fence->fence_drv_array); 339 kfree(userq_fence); 340 } 341 342 static void amdgpu_userq_fence_release(struct dma_fence *f) 343 { 344 call_rcu(&f->rcu, amdgpu_userq_fence_free); 345 } 346 347 static const struct dma_fence_ops amdgpu_userq_fence_ops = { 348 .get_driver_name = amdgpu_userq_fence_get_driver_name, 349 .get_timeline_name = amdgpu_userq_fence_get_timeline_name, 350 .signaled = amdgpu_userq_fence_signaled, 351 .release = amdgpu_userq_fence_release, 352 }; 353 354 /** 355 * amdgpu_userq_fence_read_wptr - Read the userq wptr value 356 * 357 * @adev: amdgpu_device pointer 358 * @queue: user mode queue structure pointer 359 * @wptr: write pointer value 360 * 361 * Read the wptr value from userq's MQD. The userq signal IOCTL 362 * creates a dma_fence for the shared buffers that expects the 363 * RPTR value written to seq64 memory >= WPTR. 364 * 365 * Returns wptr value on success, error on failure. 366 */ 367 static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, 368 struct amdgpu_usermode_queue *queue, 369 u64 *wptr) 370 { 371 struct amdgpu_bo_va_mapping *mapping; 372 struct amdgpu_bo *bo; 373 u64 addr, *ptr; 374 int r; 375 376 r = amdgpu_bo_reserve(queue->vm->root.bo, false); 377 if (r) 378 return r; 379 380 addr = queue->userq_prop->wptr_gpu_addr; 381 addr &= AMDGPU_GMC_HOLE_MASK; 382 383 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); 384 if (!mapping) { 385 amdgpu_bo_unreserve(queue->vm->root.bo); 386 DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); 387 return -EINVAL; 388 } 389 390 bo = amdgpu_bo_ref(mapping->bo_va->base.bo); 391 amdgpu_bo_unreserve(queue->vm->root.bo); 392 r = amdgpu_bo_reserve(bo, true); 393 if (r) { 394 amdgpu_bo_unref(&bo); 395 DRM_ERROR("Failed to reserve userqueue wptr bo"); 396 return r; 397 } 398 399 r = amdgpu_bo_kmap(bo, (void **)&ptr); 400 if (r) { 401 DRM_ERROR("Failed mapping the userqueue wptr bo"); 402 goto map_error; 403 } 404 405 *wptr = le64_to_cpu(*ptr); 406 407 amdgpu_bo_kunmap(bo); 408 amdgpu_bo_unreserve(bo); 409 amdgpu_bo_unref(&bo); 410 411 return 0; 412 413 map_error: 414 amdgpu_bo_unreserve(bo); 415 amdgpu_bo_unref(&bo); 416 417 return r; 418 } 419 420 static void 421 amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, 422 int error) 423 { 424 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 425 unsigned long flags; 426 struct dma_fence *f; 427 428 spin_lock_irqsave(&fence_drv->fence_list_lock, flags); 429 430 f = rcu_dereference_protected(&fence->base, 431 lockdep_is_held(&fence_drv->fence_list_lock)); 432 if (f && !dma_fence_is_signaled_locked(f)) 433 dma_fence_set_error(f, error); 434 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); 435 } 436 437 void 438 amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) 439 { 440 struct dma_fence *f = userq->last_fence; 441 442 if (f) { 443 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); 444 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; 445 u64 wptr = fence->base.seqno; 446 447 amdgpu_userq_fence_driver_set_error(fence, -ECANCELED); 448 amdgpu_userq_fence_write(fence_drv, wptr); 449 amdgpu_userq_fence_driver_process(fence_drv); 450 451 } 452 } 453 454 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, 455 struct drm_file *filp) 456 { 457 struct amdgpu_device *adev = drm_to_adev(dev); 458 struct drm_amdgpu_userq_signal *args = data; 459 const unsigned int num_write_bo_handles = args->num_bo_write_handles; 460 const unsigned int num_read_bo_handles = args->num_bo_read_handles; 461 struct amdgpu_fpriv *fpriv = filp->driver_priv; 462 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 463 464 struct drm_gem_object **gobj_write, **gobj_read; 465 u32 *syncobj_handles, num_syncobj_handles; 466 struct amdgpu_usermode_queue *queue; 467 struct amdgpu_userq_fence *fence; 468 struct drm_syncobj **syncobj; 469 struct drm_exec exec; 470 void __user *ptr; 471 int r, i, entry; 472 u64 wptr; 473 474 if (!amdgpu_userq_enabled(dev)) 475 return -ENOTSUPP; 476 477 if (args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || 478 args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) 479 return -EINVAL; 480 481 num_syncobj_handles = args->num_syncobj_handles; 482 ptr = u64_to_user_ptr(args->syncobj_handles); 483 syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, 484 sizeof(u32)); 485 if (IS_ERR(syncobj_handles)) 486 return PTR_ERR(syncobj_handles); 487 488 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), 489 GFP_KERNEL); 490 if (!syncobj) { 491 r = -ENOMEM; 492 goto free_syncobj_handles; 493 } 494 495 for (entry = 0; entry < num_syncobj_handles; entry++) { 496 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); 497 if (!syncobj[entry]) { 498 r = -ENOENT; 499 goto free_syncobj; 500 } 501 } 502 503 ptr = u64_to_user_ptr(args->bo_read_handles); 504 r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); 505 if (r) 506 goto free_syncobj; 507 508 ptr = u64_to_user_ptr(args->bo_write_handles); 509 r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, 510 &gobj_write); 511 if (r) 512 goto put_gobj_read; 513 514 queue = amdgpu_userq_get(userq_mgr, args->queue_id); 515 if (!queue) { 516 r = -ENOENT; 517 goto put_gobj_write; 518 } 519 520 r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); 521 if (r) 522 goto put_queue; 523 524 r = amdgpu_userq_fence_alloc(queue, &fence); 525 if (r) 526 goto put_queue; 527 528 /* We are here means UQ is active, make sure the eviction fence is valid */ 529 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 530 531 /* Create the new fence */ 532 amdgpu_userq_fence_init(queue, fence, wptr); 533 534 mutex_unlock(&userq_mgr->userq_mutex); 535 536 /* 537 * This needs to come after the fence is created since 538 * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv 539 * locks. 540 */ 541 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 542 (num_read_bo_handles + num_write_bo_handles)); 543 544 drm_exec_until_all_locked(&exec) { 545 r = drm_exec_prepare_array(&exec, gobj_read, 546 num_read_bo_handles, 1); 547 drm_exec_retry_on_contention(&exec); 548 if (r) 549 goto exec_fini; 550 551 r = drm_exec_prepare_array(&exec, gobj_write, 552 num_write_bo_handles, 1); 553 drm_exec_retry_on_contention(&exec); 554 if (r) 555 goto exec_fini; 556 } 557 558 /* And publish the new fence in the BOs and syncobj */ 559 for (i = 0; i < num_read_bo_handles; i++) 560 dma_resv_add_fence(gobj_read[i]->resv, &fence->base, 561 DMA_RESV_USAGE_READ); 562 563 for (i = 0; i < num_write_bo_handles; i++) 564 dma_resv_add_fence(gobj_write[i]->resv, &fence->base, 565 DMA_RESV_USAGE_WRITE); 566 567 for (i = 0; i < num_syncobj_handles; i++) 568 drm_syncobj_replace_fence(syncobj[i], &fence->base); 569 570 exec_fini: 571 /* drop the reference acquired in fence creation function */ 572 dma_fence_put(&fence->base); 573 574 drm_exec_fini(&exec); 575 put_queue: 576 amdgpu_userq_put(queue); 577 put_gobj_write: 578 for (i = 0; i < num_write_bo_handles; i++) 579 drm_gem_object_put(gobj_write[i]); 580 kvfree(gobj_write); 581 put_gobj_read: 582 for (i = 0; i < num_read_bo_handles; i++) 583 drm_gem_object_put(gobj_read[i]); 584 kvfree(gobj_read); 585 free_syncobj: 586 while (entry-- > 0) 587 drm_syncobj_put(syncobj[entry]); 588 kfree(syncobj); 589 free_syncobj_handles: 590 kfree(syncobj_handles); 591 592 return r; 593 } 594 595 /* Count the number of expected fences so userspace can alloc a buffer */ 596 static int 597 amdgpu_userq_wait_count_fences(struct drm_file *filp, 598 struct drm_amdgpu_userq_wait *wait_info, 599 u32 *syncobj_handles, u32 *timeline_points, 600 u32 *timeline_handles, 601 struct drm_gem_object **gobj_write, 602 struct drm_gem_object **gobj_read) 603 { 604 int num_read_bo_handles, num_write_bo_handles; 605 struct dma_fence_unwrap iter; 606 struct dma_fence *fence, *f; 607 unsigned int num_fences = 0; 608 struct drm_exec exec; 609 int i, r; 610 611 /* 612 * This needs to be outside of the lock provided by drm_exec for 613 * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT to work correctly. 614 */ 615 616 /* Count timeline fences */ 617 for (i = 0; i < wait_info->num_syncobj_timeline_handles; i++) { 618 r = drm_syncobj_find_fence(filp, timeline_handles[i], 619 timeline_points[i], 620 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 621 &fence); 622 if (r) 623 return r; 624 625 dma_fence_unwrap_for_each(f, &iter, fence) 626 num_fences++; 627 628 dma_fence_put(fence); 629 } 630 631 /* Count boolean fences */ 632 for (i = 0; i < wait_info->num_syncobj_handles; i++) { 633 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 634 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 635 &fence); 636 if (r) 637 return r; 638 639 num_fences++; 640 dma_fence_put(fence); 641 } 642 643 /* Lock all the GEM objects */ 644 /* TODO: It is actually not necessary to lock them */ 645 num_read_bo_handles = wait_info->num_bo_read_handles; 646 num_write_bo_handles = wait_info->num_bo_write_handles; 647 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 648 num_read_bo_handles + num_write_bo_handles); 649 650 drm_exec_until_all_locked(&exec) { 651 r = drm_exec_prepare_array(&exec, gobj_read, 652 num_read_bo_handles, 1); 653 drm_exec_retry_on_contention(&exec); 654 if (r) 655 goto error_unlock; 656 657 r = drm_exec_prepare_array(&exec, gobj_write, 658 num_write_bo_handles, 1); 659 drm_exec_retry_on_contention(&exec); 660 if (r) 661 goto error_unlock; 662 } 663 664 /* Count read fences */ 665 for (i = 0; i < num_read_bo_handles; i++) { 666 struct dma_resv_iter resv_cursor; 667 struct dma_fence *fence; 668 669 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 670 DMA_RESV_USAGE_READ, fence) 671 num_fences++; 672 } 673 674 /* Count write fences */ 675 for (i = 0; i < num_write_bo_handles; i++) { 676 struct dma_resv_iter resv_cursor; 677 struct dma_fence *fence; 678 679 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 680 DMA_RESV_USAGE_WRITE, fence) 681 num_fences++; 682 } 683 684 wait_info->num_fences = min(num_fences, USHRT_MAX); 685 r = 0; 686 687 error_unlock: 688 /* Unlock all the GEM objects */ 689 drm_exec_fini(&exec); 690 return r; 691 } 692 693 static int 694 amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait *wait_info, 695 struct dma_fence **fences, unsigned int *num_fences, 696 struct dma_fence *fence) 697 { 698 /* As fallback shouldn't userspace allocate enough space */ 699 if (*num_fences >= wait_info->num_fences) 700 return dma_fence_wait(fence, true); 701 702 fences[(*num_fences)++] = dma_fence_get(fence); 703 return 0; 704 } 705 706 static int 707 amdgpu_userq_wait_return_fence_info(struct drm_file *filp, 708 struct drm_amdgpu_userq_wait *wait_info, 709 u32 *syncobj_handles, u32 *timeline_points, 710 u32 *timeline_handles, 711 struct drm_gem_object **gobj_write, 712 struct drm_gem_object **gobj_read) 713 { 714 struct amdgpu_fpriv *fpriv = filp->driver_priv; 715 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 716 struct drm_amdgpu_userq_fence_info *fence_info; 717 int num_read_bo_handles, num_write_bo_handles; 718 struct amdgpu_usermode_queue *waitq; 719 struct dma_fence **fences, *fence, *f; 720 struct dma_fence_unwrap iter; 721 int num_points, num_syncobj; 722 unsigned int num_fences = 0; 723 struct drm_exec exec; 724 int i, cnt, r; 725 726 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), 727 GFP_KERNEL); 728 if (!fence_info) 729 return -ENOMEM; 730 731 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), 732 GFP_KERNEL); 733 if (!fences) { 734 r = -ENOMEM; 735 goto free_fence_info; 736 } 737 738 /* Retrieve timeline fences */ 739 num_points = wait_info->num_syncobj_timeline_handles; 740 for (i = 0; i < num_points; i++) { 741 r = drm_syncobj_find_fence(filp, timeline_handles[i], 742 timeline_points[i], 743 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 744 &fence); 745 if (r) 746 goto free_fences; 747 748 dma_fence_unwrap_for_each(f, &iter, fence) { 749 r = amdgpu_userq_wait_add_fence(wait_info, fences, 750 &num_fences, f); 751 if (r) { 752 dma_fence_put(fence); 753 goto free_fences; 754 } 755 } 756 757 dma_fence_put(fence); 758 } 759 760 /* Retrieve boolean fences */ 761 num_syncobj = wait_info->num_syncobj_handles; 762 for (i = 0; i < num_syncobj; i++) { 763 struct dma_fence *fence; 764 765 r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 766 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 767 &fence); 768 if (r) 769 goto free_fences; 770 771 r = amdgpu_userq_wait_add_fence(wait_info, fences, 772 &num_fences, fence); 773 dma_fence_put(fence); 774 if (r) 775 goto free_fences; 776 777 } 778 779 /* Lock all the GEM objects */ 780 num_read_bo_handles = wait_info->num_bo_read_handles; 781 num_write_bo_handles = wait_info->num_bo_write_handles; 782 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 783 num_read_bo_handles + num_write_bo_handles); 784 785 drm_exec_until_all_locked(&exec) { 786 r = drm_exec_prepare_array(&exec, gobj_read, 787 num_read_bo_handles, 1); 788 drm_exec_retry_on_contention(&exec); 789 if (r) 790 goto error_unlock; 791 792 r = drm_exec_prepare_array(&exec, gobj_write, 793 num_write_bo_handles, 1); 794 drm_exec_retry_on_contention(&exec); 795 if (r) 796 goto error_unlock; 797 } 798 799 /* Retrieve GEM read objects fence */ 800 for (i = 0; i < num_read_bo_handles; i++) { 801 struct dma_resv_iter resv_cursor; 802 struct dma_fence *fence; 803 804 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 805 DMA_RESV_USAGE_READ, fence) { 806 r = amdgpu_userq_wait_add_fence(wait_info, fences, 807 &num_fences, fence); 808 if (r) 809 goto error_unlock; 810 } 811 } 812 813 /* Retrieve GEM write objects fence */ 814 for (i = 0; i < num_write_bo_handles; i++) { 815 struct dma_resv_iter resv_cursor; 816 struct dma_fence *fence; 817 818 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 819 DMA_RESV_USAGE_WRITE, fence) { 820 r = amdgpu_userq_wait_add_fence(wait_info, fences, 821 &num_fences, fence); 822 if (r) 823 goto error_unlock; 824 } 825 } 826 827 drm_exec_fini(&exec); 828 829 /* 830 * Keep only the latest fences to reduce the number of values 831 * given back to userspace. 832 */ 833 num_fences = dma_fence_dedup_array(fences, num_fences); 834 835 waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); 836 if (!waitq) { 837 r = -EINVAL; 838 goto free_fences; 839 } 840 841 for (i = 0, cnt = 0; i < num_fences; i++) { 842 struct amdgpu_userq_fence_driver *fence_drv; 843 struct amdgpu_userq_fence *userq_fence; 844 u32 index; 845 846 userq_fence = to_amdgpu_userq_fence(fences[i]); 847 if (!userq_fence) { 848 /* 849 * Just waiting on other driver fences should 850 * be good for now 851 */ 852 r = dma_fence_wait(fences[i], true); 853 if (r) 854 goto put_waitq; 855 856 continue; 857 } 858 859 fence_drv = userq_fence->fence_drv; 860 /* 861 * We need to make sure the user queue release their reference 862 * to the fence drivers at some point before queue destruction. 863 * Otherwise, we would gather those references until we don't 864 * have any more space left and crash. 865 */ 866 mutex_lock(&waitq->fence_drv_lock); 867 r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 868 xa_limit_32b, GFP_KERNEL); 869 mutex_unlock(&waitq->fence_drv_lock); 870 if (r) 871 goto put_waitq; 872 873 amdgpu_userq_fence_driver_get(fence_drv); 874 875 /* Store drm syncobj's gpu va address and value */ 876 fence_info[cnt].va = fence_drv->va; 877 fence_info[cnt].value = fences[i]->seqno; 878 879 /* Increment the actual userq fence count */ 880 cnt++; 881 } 882 wait_info->num_fences = cnt; 883 884 /* Copy userq fence info to user space */ 885 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 886 fence_info, cnt * sizeof(*fence_info))) 887 r = -EFAULT; 888 else 889 r = 0; 890 891 put_waitq: 892 amdgpu_userq_put(waitq); 893 894 free_fences: 895 while (num_fences--) 896 dma_fence_put(fences[num_fences]); 897 kfree(fences); 898 899 free_fence_info: 900 kfree(fence_info); 901 return r; 902 903 error_unlock: 904 drm_exec_fini(&exec); 905 goto free_fences; 906 } 907 908 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 909 struct drm_file *filp) 910 { 911 int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles; 912 u32 *syncobj_handles, *timeline_points, *timeline_handles; 913 struct drm_amdgpu_userq_wait *wait_info = data; 914 struct drm_gem_object **gobj_write; 915 struct drm_gem_object **gobj_read; 916 void __user *ptr; 917 int r; 918 919 if (!amdgpu_userq_enabled(dev)) 920 return -ENOTSUPP; 921 922 if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || 923 wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) 924 return -EINVAL; 925 926 num_syncobj = wait_info->num_syncobj_handles; 927 ptr = u64_to_user_ptr(wait_info->syncobj_handles); 928 syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32)); 929 if (IS_ERR(syncobj_handles)) 930 return PTR_ERR(syncobj_handles); 931 932 num_points = wait_info->num_syncobj_timeline_handles; 933 ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles); 934 timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32)); 935 if (IS_ERR(timeline_handles)) { 936 r = PTR_ERR(timeline_handles); 937 goto free_syncobj_handles; 938 } 939 940 ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points); 941 timeline_points = memdup_array_user(ptr, num_points, sizeof(u32)); 942 if (IS_ERR(timeline_points)) { 943 r = PTR_ERR(timeline_points); 944 goto free_timeline_handles; 945 } 946 947 num_read_bo_handles = wait_info->num_bo_read_handles; 948 ptr = u64_to_user_ptr(wait_info->bo_read_handles); 949 r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); 950 if (r) 951 goto free_timeline_points; 952 953 num_write_bo_handles = wait_info->num_bo_write_handles; 954 ptr = u64_to_user_ptr(wait_info->bo_write_handles); 955 r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, 956 &gobj_write); 957 if (r) 958 goto put_gobj_read; 959 960 /* 961 * Passing num_fences = 0 means that userspace doesn't want to 962 * retrieve userq_fence_info. If num_fences = 0 we skip filling 963 * userq_fence_info and return the actual number of fences on 964 * args->num_fences. 965 */ 966 if (!wait_info->num_fences) { 967 r = amdgpu_userq_wait_count_fences(filp, wait_info, 968 syncobj_handles, 969 timeline_points, 970 timeline_handles, 971 gobj_write, 972 gobj_read); 973 } else { 974 r = amdgpu_userq_wait_return_fence_info(filp, wait_info, 975 syncobj_handles, 976 timeline_points, 977 timeline_handles, 978 gobj_write, 979 gobj_read); 980 } 981 982 while (num_write_bo_handles--) 983 drm_gem_object_put(gobj_write[num_write_bo_handles]); 984 kvfree(gobj_write); 985 986 put_gobj_read: 987 while (num_read_bo_handles--) 988 drm_gem_object_put(gobj_read[num_read_bo_handles]); 989 kvfree(gobj_read); 990 991 free_timeline_points: 992 kfree(timeline_points); 993 free_timeline_handles: 994 kfree(timeline_handles); 995 free_syncobj_handles: 996 kfree(syncobj_handles); 997 return r; 998 } 999