1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_execbuf_util.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <drm/xe_drm.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_device.h" 24 #include "xe_drm_client.h" 25 #include "xe_exec_queue.h" 26 #include "xe_gt.h" 27 #include "xe_gt_pagefault.h" 28 #include "xe_gt_tlb_invalidation.h" 29 #include "xe_migrate.h" 30 #include "xe_pat.h" 31 #include "xe_pm.h" 32 #include "xe_preempt_fence.h" 33 #include "xe_pt.h" 34 #include "xe_res_cursor.h" 35 #include "xe_sync.h" 36 #include "xe_trace.h" 37 #include "generated/xe_wa_oob.h" 38 #include "xe_wa.h" 39 40 #define TEST_VM_ASYNC_OPS_ERROR 41 42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 43 { 44 return vm->gpuvm.r_obj; 45 } 46 47 /** 48 * xe_vma_userptr_check_repin() - Advisory check for repin needed 49 * @uvma: The userptr vma 50 * 51 * Check if the userptr vma has been invalidated since last successful 52 * repin. The check is advisory only and can the function can be called 53 * without the vm->userptr.notifier_lock held. There is no guarantee that the 54 * vma userptr will remain valid after a lockless check, so typically 55 * the call needs to be followed by a proper check under the notifier_lock. 56 * 57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 */ 59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 { 61 return mmu_interval_check_retry(&uvma->userptr.notifier, 62 uvma->userptr.notifier_seq) ? 63 -EAGAIN : 0; 64 } 65 66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 67 { 68 struct xe_userptr *userptr = &uvma->userptr; 69 struct xe_vma *vma = &uvma->vma; 70 struct xe_vm *vm = xe_vma_vm(vma); 71 struct xe_device *xe = vm->xe; 72 const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; 73 struct page **pages; 74 bool in_kthread = !current->mm; 75 unsigned long notifier_seq; 76 int pinned, ret, i; 77 bool read_only = xe_vma_read_only(vma); 78 79 lockdep_assert_held(&vm->lock); 80 xe_assert(xe, xe_vma_is_userptr(vma)); 81 retry: 82 if (vma->gpuva.flags & XE_VMA_DESTROYED) 83 return 0; 84 85 notifier_seq = mmu_interval_read_begin(&userptr->notifier); 86 if (notifier_seq == userptr->notifier_seq) 87 return 0; 88 89 pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); 90 if (!pages) 91 return -ENOMEM; 92 93 if (userptr->sg) { 94 dma_unmap_sgtable(xe->drm.dev, 95 userptr->sg, 96 read_only ? DMA_TO_DEVICE : 97 DMA_BIDIRECTIONAL, 0); 98 sg_free_table(userptr->sg); 99 userptr->sg = NULL; 100 } 101 102 pinned = ret = 0; 103 if (in_kthread) { 104 if (!mmget_not_zero(userptr->notifier.mm)) { 105 ret = -EFAULT; 106 goto mm_closed; 107 } 108 kthread_use_mm(userptr->notifier.mm); 109 } 110 111 while (pinned < num_pages) { 112 ret = get_user_pages_fast(xe_vma_userptr(vma) + 113 pinned * PAGE_SIZE, 114 num_pages - pinned, 115 read_only ? 0 : FOLL_WRITE, 116 &pages[pinned]); 117 if (ret < 0) { 118 if (in_kthread) 119 ret = 0; 120 break; 121 } 122 123 pinned += ret; 124 ret = 0; 125 } 126 127 if (in_kthread) { 128 kthread_unuse_mm(userptr->notifier.mm); 129 mmput(userptr->notifier.mm); 130 } 131 mm_closed: 132 if (ret) 133 goto out; 134 135 ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, 136 pinned, 0, 137 (u64)pinned << PAGE_SHIFT, 138 xe_sg_segment_size(xe->drm.dev), 139 GFP_KERNEL); 140 if (ret) { 141 userptr->sg = NULL; 142 goto out; 143 } 144 userptr->sg = &userptr->sgt; 145 146 ret = dma_map_sgtable(xe->drm.dev, userptr->sg, 147 read_only ? DMA_TO_DEVICE : 148 DMA_BIDIRECTIONAL, 149 DMA_ATTR_SKIP_CPU_SYNC | 150 DMA_ATTR_NO_KERNEL_MAPPING); 151 if (ret) { 152 sg_free_table(userptr->sg); 153 userptr->sg = NULL; 154 goto out; 155 } 156 157 for (i = 0; i < pinned; ++i) { 158 if (!read_only) { 159 lock_page(pages[i]); 160 set_page_dirty(pages[i]); 161 unlock_page(pages[i]); 162 } 163 164 mark_page_accessed(pages[i]); 165 } 166 167 out: 168 release_pages(pages, pinned); 169 kvfree(pages); 170 171 if (!(ret < 0)) { 172 userptr->notifier_seq = notifier_seq; 173 if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) 174 goto retry; 175 } 176 177 return ret < 0 ? ret : 0; 178 } 179 180 static bool preempt_fences_waiting(struct xe_vm *vm) 181 { 182 struct xe_exec_queue *q; 183 184 lockdep_assert_held(&vm->lock); 185 xe_vm_assert_held(vm); 186 187 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 188 if (!q->compute.pfence || 189 (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 190 &q->compute.pfence->flags))) { 191 return true; 192 } 193 } 194 195 return false; 196 } 197 198 static void free_preempt_fences(struct list_head *list) 199 { 200 struct list_head *link, *next; 201 202 list_for_each_safe(link, next, list) 203 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 204 } 205 206 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 207 unsigned int *count) 208 { 209 lockdep_assert_held(&vm->lock); 210 xe_vm_assert_held(vm); 211 212 if (*count >= vm->preempt.num_exec_queues) 213 return 0; 214 215 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 216 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 217 218 if (IS_ERR(pfence)) 219 return PTR_ERR(pfence); 220 221 list_move_tail(xe_preempt_fence_link(pfence), list); 222 } 223 224 return 0; 225 } 226 227 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 228 { 229 struct xe_exec_queue *q; 230 231 xe_vm_assert_held(vm); 232 233 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 234 if (q->compute.pfence) { 235 long timeout = dma_fence_wait(q->compute.pfence, false); 236 237 if (timeout < 0) 238 return -ETIME; 239 dma_fence_put(q->compute.pfence); 240 q->compute.pfence = NULL; 241 } 242 } 243 244 return 0; 245 } 246 247 static bool xe_vm_is_idle(struct xe_vm *vm) 248 { 249 struct xe_exec_queue *q; 250 251 xe_vm_assert_held(vm); 252 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 253 if (!xe_exec_queue_is_idle(q)) 254 return false; 255 } 256 257 return true; 258 } 259 260 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 261 { 262 struct list_head *link; 263 struct xe_exec_queue *q; 264 265 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 266 struct dma_fence *fence; 267 268 link = list->next; 269 xe_assert(vm->xe, link != list); 270 271 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 272 q, q->compute.context, 273 ++q->compute.seqno); 274 dma_fence_put(q->compute.pfence); 275 q->compute.pfence = fence; 276 } 277 } 278 279 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 280 { 281 struct xe_exec_queue *q; 282 int err; 283 284 if (!vm->preempt.num_exec_queues) 285 return 0; 286 287 err = xe_bo_lock(bo, true); 288 if (err) 289 return err; 290 291 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 292 if (err) 293 goto out_unlock; 294 295 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) 296 if (q->compute.pfence) { 297 dma_resv_add_fence(bo->ttm.base.resv, 298 q->compute.pfence, 299 DMA_RESV_USAGE_BOOKKEEP); 300 } 301 302 out_unlock: 303 xe_bo_unlock(bo); 304 return err; 305 } 306 307 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 308 struct drm_exec *exec) 309 { 310 struct xe_exec_queue *q; 311 312 lockdep_assert_held(&vm->lock); 313 xe_vm_assert_held(vm); 314 315 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 316 q->ops->resume(q); 317 318 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, 319 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 320 } 321 } 322 323 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 324 { 325 struct drm_gpuvm_exec vm_exec = { 326 .vm = &vm->gpuvm, 327 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 328 .num_fences = 1, 329 }; 330 struct drm_exec *exec = &vm_exec.exec; 331 struct dma_fence *pfence; 332 int err; 333 bool wait; 334 335 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 336 337 down_write(&vm->lock); 338 err = drm_gpuvm_exec_lock(&vm_exec); 339 if (err) 340 goto out_up_write; 341 342 pfence = xe_preempt_fence_create(q, q->compute.context, 343 ++q->compute.seqno); 344 if (!pfence) { 345 err = -ENOMEM; 346 goto out_fini; 347 } 348 349 list_add(&q->compute.link, &vm->preempt.exec_queues); 350 ++vm->preempt.num_exec_queues; 351 q->compute.pfence = pfence; 352 353 down_read(&vm->userptr.notifier_lock); 354 355 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 356 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 357 358 /* 359 * Check to see if a preemption on VM is in flight or userptr 360 * invalidation, if so trigger this preempt fence to sync state with 361 * other preempt fences on the VM. 362 */ 363 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 364 if (wait) 365 dma_fence_enable_sw_signaling(pfence); 366 367 up_read(&vm->userptr.notifier_lock); 368 369 out_fini: 370 drm_exec_fini(exec); 371 out_up_write: 372 up_write(&vm->lock); 373 374 return err; 375 } 376 377 /** 378 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 379 * @vm: The VM. 380 * @q: The exec_queue 381 */ 382 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 383 { 384 if (!xe_vm_in_preempt_fence_mode(vm)) 385 return; 386 387 down_write(&vm->lock); 388 list_del(&q->compute.link); 389 --vm->preempt.num_exec_queues; 390 if (q->compute.pfence) { 391 dma_fence_enable_sw_signaling(q->compute.pfence); 392 dma_fence_put(q->compute.pfence); 393 q->compute.pfence = NULL; 394 } 395 up_write(&vm->lock); 396 } 397 398 /** 399 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 400 * that need repinning. 401 * @vm: The VM. 402 * 403 * This function checks for whether the VM has userptrs that need repinning, 404 * and provides a release-type barrier on the userptr.notifier_lock after 405 * checking. 406 * 407 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 408 */ 409 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 410 { 411 lockdep_assert_held_read(&vm->userptr.notifier_lock); 412 413 return (list_empty(&vm->userptr.repin_list) && 414 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 415 } 416 417 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 418 419 static void xe_vm_kill(struct xe_vm *vm) 420 { 421 struct xe_exec_queue *q; 422 423 lockdep_assert_held(&vm->lock); 424 425 xe_vm_lock(vm, false); 426 vm->flags |= XE_VM_FLAG_BANNED; 427 trace_xe_vm_kill(vm); 428 429 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) 430 q->ops->kill(q); 431 xe_vm_unlock(vm); 432 433 /* TODO: Inform user the VM is banned */ 434 } 435 436 /** 437 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 438 * @exec: The drm_exec object used for locking before validation. 439 * @err: The error returned from ttm_bo_validate(). 440 * @end: A ktime_t cookie that should be set to 0 before first use and 441 * that should be reused on subsequent calls. 442 * 443 * With multiple active VMs, under memory pressure, it is possible that 444 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 445 * Until ttm properly handles locking in such scenarios, best thing the 446 * driver can do is retry with a timeout. Check if that is necessary, and 447 * if so unlock the drm_exec's objects while keeping the ticket to prepare 448 * for a rerun. 449 * 450 * Return: true if a retry after drm_exec_init() is recommended; 451 * false otherwise. 452 */ 453 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 454 { 455 ktime_t cur; 456 457 if (err != -ENOMEM) 458 return false; 459 460 cur = ktime_get(); 461 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 462 if (!ktime_before(cur, *end)) 463 return false; 464 465 msleep(20); 466 return true; 467 } 468 469 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 470 { 471 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 472 struct drm_gpuva *gpuva; 473 int ret; 474 475 lockdep_assert_held(&vm->lock); 476 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 477 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 478 &vm->rebind_list); 479 480 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 481 if (ret) 482 return ret; 483 484 vm_bo->evicted = false; 485 return 0; 486 } 487 488 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 489 bool *done) 490 { 491 int err; 492 493 /* 494 * 1 fence for each preempt fence plus a fence for each tile from a 495 * possible rebind 496 */ 497 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + 498 vm->xe->info.tile_count); 499 if (err) 500 return err; 501 502 if (xe_vm_is_idle(vm)) { 503 vm->preempt.rebind_deactivated = true; 504 *done = true; 505 return 0; 506 } 507 508 if (!preempt_fences_waiting(vm)) { 509 *done = true; 510 return 0; 511 } 512 513 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); 514 if (err) 515 return err; 516 517 err = wait_for_existing_preempt_fences(vm); 518 if (err) 519 return err; 520 521 return drm_gpuvm_validate(&vm->gpuvm, exec); 522 } 523 524 static void preempt_rebind_work_func(struct work_struct *w) 525 { 526 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 527 struct drm_exec exec; 528 struct dma_fence *rebind_fence; 529 unsigned int fence_count = 0; 530 LIST_HEAD(preempt_fences); 531 ktime_t end = 0; 532 int err = 0; 533 long wait; 534 int __maybe_unused tries = 0; 535 536 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 537 trace_xe_vm_rebind_worker_enter(vm); 538 539 down_write(&vm->lock); 540 541 if (xe_vm_is_closed_or_banned(vm)) { 542 up_write(&vm->lock); 543 trace_xe_vm_rebind_worker_exit(vm); 544 return; 545 } 546 547 retry: 548 if (xe_vm_userptr_check_repin(vm)) { 549 err = xe_vm_userptr_pin(vm); 550 if (err) 551 goto out_unlock_outer; 552 } 553 554 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 555 556 drm_exec_until_all_locked(&exec) { 557 bool done = false; 558 559 err = xe_preempt_work_begin(&exec, vm, &done); 560 drm_exec_retry_on_contention(&exec); 561 if (err || done) { 562 drm_exec_fini(&exec); 563 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 564 err = -EAGAIN; 565 566 goto out_unlock_outer; 567 } 568 } 569 570 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 571 if (err) 572 goto out_unlock; 573 574 rebind_fence = xe_vm_rebind(vm, true); 575 if (IS_ERR(rebind_fence)) { 576 err = PTR_ERR(rebind_fence); 577 goto out_unlock; 578 } 579 580 if (rebind_fence) { 581 dma_fence_wait(rebind_fence, false); 582 dma_fence_put(rebind_fence); 583 } 584 585 /* Wait on munmap style VM unbinds */ 586 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 587 DMA_RESV_USAGE_KERNEL, 588 false, MAX_SCHEDULE_TIMEOUT); 589 if (wait <= 0) { 590 err = -ETIME; 591 goto out_unlock; 592 } 593 594 #define retry_required(__tries, __vm) \ 595 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 596 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 597 __xe_vm_userptr_needs_repin(__vm)) 598 599 down_read(&vm->userptr.notifier_lock); 600 if (retry_required(tries, vm)) { 601 up_read(&vm->userptr.notifier_lock); 602 err = -EAGAIN; 603 goto out_unlock; 604 } 605 606 #undef retry_required 607 608 spin_lock(&vm->xe->ttm.lru_lock); 609 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 610 spin_unlock(&vm->xe->ttm.lru_lock); 611 612 /* Point of no return. */ 613 arm_preempt_fences(vm, &preempt_fences); 614 resume_and_reinstall_preempt_fences(vm, &exec); 615 up_read(&vm->userptr.notifier_lock); 616 617 out_unlock: 618 drm_exec_fini(&exec); 619 out_unlock_outer: 620 if (err == -EAGAIN) { 621 trace_xe_vm_rebind_worker_retry(vm); 622 goto retry; 623 } 624 625 if (err) { 626 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 627 xe_vm_kill(vm); 628 } 629 up_write(&vm->lock); 630 631 free_preempt_fences(&preempt_fences); 632 633 trace_xe_vm_rebind_worker_exit(vm); 634 } 635 636 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 637 const struct mmu_notifier_range *range, 638 unsigned long cur_seq) 639 { 640 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); 641 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); 642 struct xe_vma *vma = &uvma->vma; 643 struct xe_vm *vm = xe_vma_vm(vma); 644 struct dma_resv_iter cursor; 645 struct dma_fence *fence; 646 long err; 647 648 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 649 trace_xe_vma_userptr_invalidate(vma); 650 651 if (!mmu_notifier_range_blockable(range)) 652 return false; 653 654 down_write(&vm->userptr.notifier_lock); 655 mmu_interval_set_seq(mni, cur_seq); 656 657 /* No need to stop gpu access if the userptr is not yet bound. */ 658 if (!userptr->initial_bind) { 659 up_write(&vm->userptr.notifier_lock); 660 return true; 661 } 662 663 /* 664 * Tell exec and rebind worker they need to repin and rebind this 665 * userptr. 666 */ 667 if (!xe_vm_in_fault_mode(vm) && 668 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { 669 spin_lock(&vm->userptr.invalidated_lock); 670 list_move_tail(&userptr->invalidate_link, 671 &vm->userptr.invalidated); 672 spin_unlock(&vm->userptr.invalidated_lock); 673 } 674 675 up_write(&vm->userptr.notifier_lock); 676 677 /* 678 * Preempt fences turn into schedule disables, pipeline these. 679 * Note that even in fault mode, we need to wait for binds and 680 * unbinds to complete, and those are attached as BOOKMARK fences 681 * to the vm. 682 */ 683 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 684 DMA_RESV_USAGE_BOOKKEEP); 685 dma_resv_for_each_fence_unlocked(&cursor, fence) 686 dma_fence_enable_sw_signaling(fence); 687 dma_resv_iter_end(&cursor); 688 689 err = dma_resv_wait_timeout(xe_vm_resv(vm), 690 DMA_RESV_USAGE_BOOKKEEP, 691 false, MAX_SCHEDULE_TIMEOUT); 692 XE_WARN_ON(err <= 0); 693 694 if (xe_vm_in_fault_mode(vm)) { 695 err = xe_vm_invalidate_vma(vma); 696 XE_WARN_ON(err); 697 } 698 699 trace_xe_vma_userptr_invalidate_complete(vma); 700 701 return true; 702 } 703 704 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 705 .invalidate = vma_userptr_invalidate, 706 }; 707 708 int xe_vm_userptr_pin(struct xe_vm *vm) 709 { 710 struct xe_userptr_vma *uvma, *next; 711 int err = 0; 712 LIST_HEAD(tmp_evict); 713 714 lockdep_assert_held_write(&vm->lock); 715 716 /* Collect invalidated userptrs */ 717 spin_lock(&vm->userptr.invalidated_lock); 718 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 719 userptr.invalidate_link) { 720 list_del_init(&uvma->userptr.invalidate_link); 721 list_move_tail(&uvma->userptr.repin_link, 722 &vm->userptr.repin_list); 723 } 724 spin_unlock(&vm->userptr.invalidated_lock); 725 726 /* Pin and move to temporary list */ 727 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 728 userptr.repin_link) { 729 err = xe_vma_userptr_pin_pages(uvma); 730 if (err < 0) 731 return err; 732 733 list_del_init(&uvma->userptr.repin_link); 734 list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list); 735 } 736 737 return 0; 738 } 739 740 /** 741 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 742 * that need repinning. 743 * @vm: The VM. 744 * 745 * This function does an advisory check for whether the VM has userptrs that 746 * need repinning. 747 * 748 * Return: 0 if there are no indications of userptrs needing repinning, 749 * -EAGAIN if there are. 750 */ 751 int xe_vm_userptr_check_repin(struct xe_vm *vm) 752 { 753 return (list_empty_careful(&vm->userptr.repin_list) && 754 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 755 } 756 757 static struct dma_fence * 758 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, 759 struct xe_sync_entry *syncs, u32 num_syncs, 760 bool first_op, bool last_op); 761 762 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 763 { 764 struct dma_fence *fence = NULL; 765 struct xe_vma *vma, *next; 766 767 lockdep_assert_held(&vm->lock); 768 if (xe_vm_in_lr_mode(vm) && !rebind_worker) 769 return NULL; 770 771 xe_vm_assert_held(vm); 772 list_for_each_entry_safe(vma, next, &vm->rebind_list, 773 combined_links.rebind) { 774 xe_assert(vm->xe, vma->tile_present); 775 776 list_del_init(&vma->combined_links.rebind); 777 dma_fence_put(fence); 778 if (rebind_worker) 779 trace_xe_vma_rebind_worker(vma); 780 else 781 trace_xe_vma_rebind_exec(vma); 782 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); 783 if (IS_ERR(fence)) 784 return fence; 785 } 786 787 return fence; 788 } 789 790 static void xe_vma_free(struct xe_vma *vma) 791 { 792 if (xe_vma_is_userptr(vma)) 793 kfree(to_userptr_vma(vma)); 794 else 795 kfree(vma); 796 } 797 798 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 799 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 800 801 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 802 struct xe_bo *bo, 803 u64 bo_offset_or_userptr, 804 u64 start, u64 end, 805 u16 pat_index, unsigned int flags) 806 { 807 struct xe_vma *vma; 808 struct xe_tile *tile; 809 u8 id; 810 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 811 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 812 813 xe_assert(vm->xe, start < end); 814 xe_assert(vm->xe, end < vm->size); 815 816 /* 817 * Allocate and ensure that the xe_vma_is_userptr() return 818 * matches what was allocated. 819 */ 820 if (!bo && !is_null) { 821 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 822 823 if (!uvma) 824 return ERR_PTR(-ENOMEM); 825 826 vma = &uvma->vma; 827 } else { 828 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 829 if (!vma) 830 return ERR_PTR(-ENOMEM); 831 832 if (is_null) 833 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 834 if (bo) 835 vma->gpuva.gem.obj = &bo->ttm.base; 836 } 837 838 INIT_LIST_HEAD(&vma->combined_links.rebind); 839 840 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 841 vma->gpuva.vm = &vm->gpuvm; 842 vma->gpuva.va.addr = start; 843 vma->gpuva.va.range = end - start + 1; 844 if (read_only) 845 vma->gpuva.flags |= XE_VMA_READ_ONLY; 846 847 for_each_tile(tile, vm->xe, id) 848 vma->tile_mask |= 0x1 << id; 849 850 if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) 851 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 852 853 vma->pat_index = pat_index; 854 855 if (bo) { 856 struct drm_gpuvm_bo *vm_bo; 857 858 xe_bo_assert_held(bo); 859 860 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 861 if (IS_ERR(vm_bo)) { 862 xe_vma_free(vma); 863 return ERR_CAST(vm_bo); 864 } 865 866 drm_gpuvm_bo_extobj_add(vm_bo); 867 drm_gem_object_get(&bo->ttm.base); 868 vma->gpuva.gem.offset = bo_offset_or_userptr; 869 drm_gpuva_link(&vma->gpuva, vm_bo); 870 drm_gpuvm_bo_put(vm_bo); 871 } else /* userptr or null */ { 872 if (!is_null) { 873 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 874 u64 size = end - start + 1; 875 int err; 876 877 INIT_LIST_HEAD(&userptr->invalidate_link); 878 INIT_LIST_HEAD(&userptr->repin_link); 879 vma->gpuva.gem.offset = bo_offset_or_userptr; 880 881 err = mmu_interval_notifier_insert(&userptr->notifier, 882 current->mm, 883 xe_vma_userptr(vma), size, 884 &vma_userptr_notifier_ops); 885 if (err) { 886 xe_vma_free(vma); 887 return ERR_PTR(err); 888 } 889 890 userptr->notifier_seq = LONG_MAX; 891 } 892 893 xe_vm_get(vm); 894 } 895 896 return vma; 897 } 898 899 static void xe_vma_destroy_late(struct xe_vma *vma) 900 { 901 struct xe_vm *vm = xe_vma_vm(vma); 902 struct xe_device *xe = vm->xe; 903 bool read_only = xe_vma_read_only(vma); 904 905 if (xe_vma_is_userptr(vma)) { 906 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 907 908 if (userptr->sg) { 909 dma_unmap_sgtable(xe->drm.dev, 910 userptr->sg, 911 read_only ? DMA_TO_DEVICE : 912 DMA_BIDIRECTIONAL, 0); 913 sg_free_table(userptr->sg); 914 userptr->sg = NULL; 915 } 916 917 /* 918 * Since userptr pages are not pinned, we can't remove 919 * the notifer until we're sure the GPU is not accessing 920 * them anymore 921 */ 922 mmu_interval_notifier_remove(&userptr->notifier); 923 xe_vm_put(vm); 924 } else if (xe_vma_is_null(vma)) { 925 xe_vm_put(vm); 926 } else { 927 xe_bo_put(xe_vma_bo(vma)); 928 } 929 930 xe_vma_free(vma); 931 } 932 933 static void vma_destroy_work_func(struct work_struct *w) 934 { 935 struct xe_vma *vma = 936 container_of(w, struct xe_vma, destroy_work); 937 938 xe_vma_destroy_late(vma); 939 } 940 941 static void vma_destroy_cb(struct dma_fence *fence, 942 struct dma_fence_cb *cb) 943 { 944 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 945 946 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 947 queue_work(system_unbound_wq, &vma->destroy_work); 948 } 949 950 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 951 { 952 struct xe_vm *vm = xe_vma_vm(vma); 953 954 lockdep_assert_held_write(&vm->lock); 955 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 956 957 if (xe_vma_is_userptr(vma)) { 958 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 959 960 spin_lock(&vm->userptr.invalidated_lock); 961 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 962 spin_unlock(&vm->userptr.invalidated_lock); 963 } else if (!xe_vma_is_null(vma)) { 964 xe_bo_assert_held(xe_vma_bo(vma)); 965 966 drm_gpuva_unlink(&vma->gpuva); 967 } 968 969 xe_vm_assert_held(vm); 970 if (fence) { 971 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 972 vma_destroy_cb); 973 974 if (ret) { 975 XE_WARN_ON(ret != -ENOENT); 976 xe_vma_destroy_late(vma); 977 } 978 } else { 979 xe_vma_destroy_late(vma); 980 } 981 } 982 983 /** 984 * xe_vm_prepare_vma() - drm_exec utility to lock a vma 985 * @exec: The drm_exec object we're currently locking for. 986 * @vma: The vma for witch we want to lock the vm resv and any attached 987 * object's resv. 988 * @num_shared: The number of dma-fence slots to pre-allocate in the 989 * objects' reservation objects. 990 * 991 * Return: 0 on success, negative error code on error. In particular 992 * may return -EDEADLK on WW transaction contention and -EINTR if 993 * an interruptible wait is terminated by a signal. 994 */ 995 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, 996 unsigned int num_shared) 997 { 998 struct xe_vm *vm = xe_vma_vm(vma); 999 struct xe_bo *bo = xe_vma_bo(vma); 1000 int err; 1001 1002 XE_WARN_ON(!vm); 1003 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); 1004 if (!err && bo && !bo->vm) 1005 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); 1006 1007 return err; 1008 } 1009 1010 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1011 { 1012 struct drm_exec exec; 1013 int err; 1014 1015 drm_exec_init(&exec, 0, 0); 1016 drm_exec_until_all_locked(&exec) { 1017 err = xe_vm_prepare_vma(&exec, vma, 0); 1018 drm_exec_retry_on_contention(&exec); 1019 if (XE_WARN_ON(err)) 1020 break; 1021 } 1022 1023 xe_vma_destroy(vma, NULL); 1024 1025 drm_exec_fini(&exec); 1026 } 1027 1028 struct xe_vma * 1029 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1030 { 1031 struct drm_gpuva *gpuva; 1032 1033 lockdep_assert_held(&vm->lock); 1034 1035 if (xe_vm_is_closed_or_banned(vm)) 1036 return NULL; 1037 1038 xe_assert(vm->xe, start + range <= vm->size); 1039 1040 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1041 1042 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1043 } 1044 1045 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1046 { 1047 int err; 1048 1049 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1050 lockdep_assert_held(&vm->lock); 1051 1052 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1053 XE_WARN_ON(err); /* Shouldn't be possible */ 1054 1055 return err; 1056 } 1057 1058 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1059 { 1060 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1061 lockdep_assert_held(&vm->lock); 1062 1063 drm_gpuva_remove(&vma->gpuva); 1064 if (vm->usm.last_fault_vma == vma) 1065 vm->usm.last_fault_vma = NULL; 1066 } 1067 1068 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1069 { 1070 struct xe_vma_op *op; 1071 1072 op = kzalloc(sizeof(*op), GFP_KERNEL); 1073 1074 if (unlikely(!op)) 1075 return NULL; 1076 1077 return &op->base; 1078 } 1079 1080 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1081 1082 static struct drm_gpuvm_ops gpuvm_ops = { 1083 .op_alloc = xe_vm_op_alloc, 1084 .vm_bo_validate = xe_gpuvm_validate, 1085 .vm_free = xe_vm_free, 1086 }; 1087 1088 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) 1089 { 1090 u64 pte = 0; 1091 1092 if (pat_index & BIT(0)) 1093 pte |= XE_PPGTT_PTE_PAT0; 1094 1095 if (pat_index & BIT(1)) 1096 pte |= XE_PPGTT_PTE_PAT1; 1097 1098 return pte; 1099 } 1100 1101 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, 1102 u32 pt_level) 1103 { 1104 u64 pte = 0; 1105 1106 if (pat_index & BIT(0)) 1107 pte |= XE_PPGTT_PTE_PAT0; 1108 1109 if (pat_index & BIT(1)) 1110 pte |= XE_PPGTT_PTE_PAT1; 1111 1112 if (pat_index & BIT(2)) { 1113 if (pt_level) 1114 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1115 else 1116 pte |= XE_PPGTT_PTE_PAT2; 1117 } 1118 1119 if (pat_index & BIT(3)) 1120 pte |= XELPG_PPGTT_PTE_PAT3; 1121 1122 if (pat_index & (BIT(4))) 1123 pte |= XE2_PPGTT_PTE_PAT4; 1124 1125 return pte; 1126 } 1127 1128 static u64 pte_encode_ps(u32 pt_level) 1129 { 1130 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1131 1132 if (pt_level == 1) 1133 return XE_PDE_PS_2M; 1134 else if (pt_level == 2) 1135 return XE_PDPE_PS_1G; 1136 1137 return 0; 1138 } 1139 1140 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1141 const u16 pat_index) 1142 { 1143 struct xe_device *xe = xe_bo_device(bo); 1144 u64 pde; 1145 1146 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1147 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1148 pde |= pde_encode_pat_index(xe, pat_index); 1149 1150 return pde; 1151 } 1152 1153 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1154 u16 pat_index, u32 pt_level) 1155 { 1156 struct xe_device *xe = xe_bo_device(bo); 1157 u64 pte; 1158 1159 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1160 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1161 pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1162 pte |= pte_encode_ps(pt_level); 1163 1164 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1165 pte |= XE_PPGTT_PTE_DM; 1166 1167 return pte; 1168 } 1169 1170 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1171 u16 pat_index, u32 pt_level) 1172 { 1173 struct xe_device *xe = xe_vma_vm(vma)->xe; 1174 1175 pte |= XE_PAGE_PRESENT; 1176 1177 if (likely(!xe_vma_read_only(vma))) 1178 pte |= XE_PAGE_RW; 1179 1180 pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1181 pte |= pte_encode_ps(pt_level); 1182 1183 if (unlikely(xe_vma_is_null(vma))) 1184 pte |= XE_PTE_NULL; 1185 1186 return pte; 1187 } 1188 1189 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1190 u16 pat_index, 1191 u32 pt_level, bool devmem, u64 flags) 1192 { 1193 u64 pte; 1194 1195 /* Avoid passing random bits directly as flags */ 1196 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1197 1198 pte = addr; 1199 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1200 pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1201 pte |= pte_encode_ps(pt_level); 1202 1203 if (devmem) 1204 pte |= XE_PPGTT_PTE_DM; 1205 1206 pte |= flags; 1207 1208 return pte; 1209 } 1210 1211 static const struct xe_pt_ops xelp_pt_ops = { 1212 .pte_encode_bo = xelp_pte_encode_bo, 1213 .pte_encode_vma = xelp_pte_encode_vma, 1214 .pte_encode_addr = xelp_pte_encode_addr, 1215 .pde_encode_bo = xelp_pde_encode_bo, 1216 }; 1217 1218 static void vm_destroy_work_func(struct work_struct *w); 1219 1220 /** 1221 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1222 * given tile and vm. 1223 * @xe: xe device. 1224 * @tile: tile to set up for. 1225 * @vm: vm to set up for. 1226 * 1227 * Sets up a pagetable tree with one page-table per level and a single 1228 * leaf PTE. All pagetable entries point to the single page-table or, 1229 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1230 * writes become NOPs. 1231 * 1232 * Return: 0 on success, negative error code on error. 1233 */ 1234 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1235 struct xe_vm *vm) 1236 { 1237 u8 id = tile->id; 1238 int i; 1239 1240 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1241 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1242 if (IS_ERR(vm->scratch_pt[id][i])) 1243 return PTR_ERR(vm->scratch_pt[id][i]); 1244 1245 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1246 } 1247 1248 return 0; 1249 } 1250 1251 static void xe_vm_free_scratch(struct xe_vm *vm) 1252 { 1253 struct xe_tile *tile; 1254 u8 id; 1255 1256 if (!xe_vm_has_scratch(vm)) 1257 return; 1258 1259 for_each_tile(tile, vm->xe, id) { 1260 u32 i; 1261 1262 if (!vm->pt_root[id]) 1263 continue; 1264 1265 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1266 if (vm->scratch_pt[id][i]) 1267 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1268 } 1269 } 1270 1271 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1272 { 1273 struct drm_gem_object *vm_resv_obj; 1274 struct xe_vm *vm; 1275 int err, number_tiles = 0; 1276 struct xe_tile *tile; 1277 u8 id; 1278 1279 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1280 if (!vm) 1281 return ERR_PTR(-ENOMEM); 1282 1283 vm->xe = xe; 1284 1285 vm->size = 1ull << xe->info.va_bits; 1286 1287 vm->flags = flags; 1288 1289 init_rwsem(&vm->lock); 1290 1291 INIT_LIST_HEAD(&vm->rebind_list); 1292 1293 INIT_LIST_HEAD(&vm->userptr.repin_list); 1294 INIT_LIST_HEAD(&vm->userptr.invalidated); 1295 init_rwsem(&vm->userptr.notifier_lock); 1296 spin_lock_init(&vm->userptr.invalidated_lock); 1297 1298 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1299 1300 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1301 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1302 1303 for_each_tile(tile, xe, id) 1304 xe_range_fence_tree_init(&vm->rftree[id]); 1305 1306 vm->pt_ops = &xelp_pt_ops; 1307 1308 if (!(flags & XE_VM_FLAG_MIGRATION)) 1309 xe_device_mem_access_get(xe); 1310 1311 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1312 if (!vm_resv_obj) { 1313 err = -ENOMEM; 1314 goto err_no_resv; 1315 } 1316 1317 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1318 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1319 1320 drm_gem_object_put(vm_resv_obj); 1321 1322 err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 1323 if (err) 1324 goto err_close; 1325 1326 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1327 vm->flags |= XE_VM_FLAG_64K; 1328 1329 for_each_tile(tile, xe, id) { 1330 if (flags & XE_VM_FLAG_MIGRATION && 1331 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1332 continue; 1333 1334 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1335 if (IS_ERR(vm->pt_root[id])) { 1336 err = PTR_ERR(vm->pt_root[id]); 1337 vm->pt_root[id] = NULL; 1338 goto err_unlock_close; 1339 } 1340 } 1341 1342 if (xe_vm_has_scratch(vm)) { 1343 for_each_tile(tile, xe, id) { 1344 if (!vm->pt_root[id]) 1345 continue; 1346 1347 err = xe_vm_create_scratch(xe, tile, vm); 1348 if (err) 1349 goto err_unlock_close; 1350 } 1351 vm->batch_invalidate_tlb = true; 1352 } 1353 1354 if (flags & XE_VM_FLAG_LR_MODE) { 1355 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1356 vm->flags |= XE_VM_FLAG_LR_MODE; 1357 vm->batch_invalidate_tlb = false; 1358 } 1359 1360 /* Fill pt_root after allocating scratch tables */ 1361 for_each_tile(tile, xe, id) { 1362 if (!vm->pt_root[id]) 1363 continue; 1364 1365 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1366 } 1367 dma_resv_unlock(xe_vm_resv(vm)); 1368 1369 /* Kernel migration VM shouldn't have a circular loop.. */ 1370 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1371 for_each_tile(tile, xe, id) { 1372 struct xe_gt *gt = tile->primary_gt; 1373 struct xe_vm *migrate_vm; 1374 struct xe_exec_queue *q; 1375 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1376 1377 if (!vm->pt_root[id]) 1378 continue; 1379 1380 migrate_vm = xe_migrate_get_vm(tile->migrate); 1381 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 1382 XE_ENGINE_CLASS_COPY, 1383 create_flags); 1384 xe_vm_put(migrate_vm); 1385 if (IS_ERR(q)) { 1386 err = PTR_ERR(q); 1387 goto err_close; 1388 } 1389 vm->q[id] = q; 1390 number_tiles++; 1391 } 1392 } 1393 1394 if (number_tiles > 1) 1395 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1396 1397 mutex_lock(&xe->usm.lock); 1398 if (flags & XE_VM_FLAG_FAULT_MODE) 1399 xe->usm.num_vm_in_fault_mode++; 1400 else if (!(flags & XE_VM_FLAG_MIGRATION)) 1401 xe->usm.num_vm_in_non_fault_mode++; 1402 mutex_unlock(&xe->usm.lock); 1403 1404 trace_xe_vm_create(vm); 1405 1406 return vm; 1407 1408 err_unlock_close: 1409 dma_resv_unlock(xe_vm_resv(vm)); 1410 err_close: 1411 xe_vm_close_and_put(vm); 1412 return ERR_PTR(err); 1413 1414 err_no_resv: 1415 for_each_tile(tile, xe, id) 1416 xe_range_fence_tree_fini(&vm->rftree[id]); 1417 kfree(vm); 1418 if (!(flags & XE_VM_FLAG_MIGRATION)) 1419 xe_device_mem_access_put(xe); 1420 return ERR_PTR(err); 1421 } 1422 1423 static void xe_vm_close(struct xe_vm *vm) 1424 { 1425 down_write(&vm->lock); 1426 vm->size = 0; 1427 up_write(&vm->lock); 1428 } 1429 1430 void xe_vm_close_and_put(struct xe_vm *vm) 1431 { 1432 LIST_HEAD(contested); 1433 struct xe_device *xe = vm->xe; 1434 struct xe_tile *tile; 1435 struct xe_vma *vma, *next_vma; 1436 struct drm_gpuva *gpuva, *next; 1437 u8 id; 1438 1439 xe_assert(xe, !vm->preempt.num_exec_queues); 1440 1441 xe_vm_close(vm); 1442 if (xe_vm_in_preempt_fence_mode(vm)) 1443 flush_work(&vm->preempt.rebind_work); 1444 1445 down_write(&vm->lock); 1446 for_each_tile(tile, xe, id) { 1447 if (vm->q[id]) 1448 xe_exec_queue_last_fence_put(vm->q[id], vm); 1449 } 1450 up_write(&vm->lock); 1451 1452 for_each_tile(tile, xe, id) { 1453 if (vm->q[id]) { 1454 xe_exec_queue_kill(vm->q[id]); 1455 xe_exec_queue_put(vm->q[id]); 1456 vm->q[id] = NULL; 1457 } 1458 } 1459 1460 down_write(&vm->lock); 1461 xe_vm_lock(vm, false); 1462 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1463 vma = gpuva_to_vma(gpuva); 1464 1465 if (xe_vma_has_no_bo(vma)) { 1466 down_read(&vm->userptr.notifier_lock); 1467 vma->gpuva.flags |= XE_VMA_DESTROYED; 1468 up_read(&vm->userptr.notifier_lock); 1469 } 1470 1471 xe_vm_remove_vma(vm, vma); 1472 1473 /* easy case, remove from VMA? */ 1474 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1475 list_del_init(&vma->combined_links.rebind); 1476 xe_vma_destroy(vma, NULL); 1477 continue; 1478 } 1479 1480 list_move_tail(&vma->combined_links.destroy, &contested); 1481 vma->gpuva.flags |= XE_VMA_DESTROYED; 1482 } 1483 1484 /* 1485 * All vm operations will add shared fences to resv. 1486 * The only exception is eviction for a shared object, 1487 * but even so, the unbind when evicted would still 1488 * install a fence to resv. Hence it's safe to 1489 * destroy the pagetables immediately. 1490 */ 1491 xe_vm_free_scratch(vm); 1492 1493 for_each_tile(tile, xe, id) { 1494 if (vm->pt_root[id]) { 1495 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1496 vm->pt_root[id] = NULL; 1497 } 1498 } 1499 xe_vm_unlock(vm); 1500 1501 /* 1502 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1503 * Since we hold a refcount to the bo, we can remove and free 1504 * the members safely without locking. 1505 */ 1506 list_for_each_entry_safe(vma, next_vma, &contested, 1507 combined_links.destroy) { 1508 list_del_init(&vma->combined_links.destroy); 1509 xe_vma_destroy_unlocked(vma); 1510 } 1511 1512 up_write(&vm->lock); 1513 1514 mutex_lock(&xe->usm.lock); 1515 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 1516 xe->usm.num_vm_in_fault_mode--; 1517 else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) 1518 xe->usm.num_vm_in_non_fault_mode--; 1519 mutex_unlock(&xe->usm.lock); 1520 1521 for_each_tile(tile, xe, id) 1522 xe_range_fence_tree_fini(&vm->rftree[id]); 1523 1524 xe_vm_put(vm); 1525 } 1526 1527 static void vm_destroy_work_func(struct work_struct *w) 1528 { 1529 struct xe_vm *vm = 1530 container_of(w, struct xe_vm, destroy_work); 1531 struct xe_device *xe = vm->xe; 1532 struct xe_tile *tile; 1533 u8 id; 1534 void *lookup; 1535 1536 /* xe_vm_close_and_put was not called? */ 1537 xe_assert(xe, !vm->size); 1538 1539 if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { 1540 xe_device_mem_access_put(xe); 1541 1542 if (xe->info.has_asid && vm->usm.asid) { 1543 mutex_lock(&xe->usm.lock); 1544 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1545 xe_assert(xe, lookup == vm); 1546 mutex_unlock(&xe->usm.lock); 1547 } 1548 } 1549 1550 for_each_tile(tile, xe, id) 1551 XE_WARN_ON(vm->pt_root[id]); 1552 1553 trace_xe_vm_free(vm); 1554 dma_fence_put(vm->rebind_fence); 1555 kfree(vm); 1556 } 1557 1558 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1559 { 1560 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1561 1562 /* To destroy the VM we need to be able to sleep */ 1563 queue_work(system_unbound_wq, &vm->destroy_work); 1564 } 1565 1566 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1567 { 1568 struct xe_vm *vm; 1569 1570 mutex_lock(&xef->vm.lock); 1571 vm = xa_load(&xef->vm.xa, id); 1572 if (vm) 1573 xe_vm_get(vm); 1574 mutex_unlock(&xef->vm.lock); 1575 1576 return vm; 1577 } 1578 1579 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1580 { 1581 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1582 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1583 } 1584 1585 static struct xe_exec_queue * 1586 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1587 { 1588 return q ? q : vm->q[0]; 1589 } 1590 1591 static struct dma_fence * 1592 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, 1593 struct xe_sync_entry *syncs, u32 num_syncs, 1594 bool first_op, bool last_op) 1595 { 1596 struct xe_vm *vm = xe_vma_vm(vma); 1597 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); 1598 struct xe_tile *tile; 1599 struct dma_fence *fence = NULL; 1600 struct dma_fence **fences = NULL; 1601 struct dma_fence_array *cf = NULL; 1602 int cur_fence = 0, i; 1603 int number_tiles = hweight8(vma->tile_present); 1604 int err; 1605 u8 id; 1606 1607 trace_xe_vma_unbind(vma); 1608 1609 if (number_tiles > 1) { 1610 fences = kmalloc_array(number_tiles, sizeof(*fences), 1611 GFP_KERNEL); 1612 if (!fences) 1613 return ERR_PTR(-ENOMEM); 1614 } 1615 1616 for_each_tile(tile, vm->xe, id) { 1617 if (!(vma->tile_present & BIT(id))) 1618 goto next; 1619 1620 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id], 1621 first_op ? syncs : NULL, 1622 first_op ? num_syncs : 0); 1623 if (IS_ERR(fence)) { 1624 err = PTR_ERR(fence); 1625 goto err_fences; 1626 } 1627 1628 if (fences) 1629 fences[cur_fence++] = fence; 1630 1631 next: 1632 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 1633 q = list_next_entry(q, multi_gt_list); 1634 } 1635 1636 if (fences) { 1637 cf = dma_fence_array_create(number_tiles, fences, 1638 vm->composite_fence_ctx, 1639 vm->composite_fence_seqno++, 1640 false); 1641 if (!cf) { 1642 --vm->composite_fence_seqno; 1643 err = -ENOMEM; 1644 goto err_fences; 1645 } 1646 } 1647 1648 fence = cf ? &cf->base : !fence ? 1649 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; 1650 if (last_op) { 1651 for (i = 0; i < num_syncs; i++) 1652 xe_sync_entry_signal(&syncs[i], NULL, fence); 1653 } 1654 1655 return fence; 1656 1657 err_fences: 1658 if (fences) { 1659 while (cur_fence) 1660 dma_fence_put(fences[--cur_fence]); 1661 kfree(fences); 1662 } 1663 1664 return ERR_PTR(err); 1665 } 1666 1667 static struct dma_fence * 1668 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, 1669 struct xe_sync_entry *syncs, u32 num_syncs, 1670 bool first_op, bool last_op) 1671 { 1672 struct xe_tile *tile; 1673 struct dma_fence *fence; 1674 struct dma_fence **fences = NULL; 1675 struct dma_fence_array *cf = NULL; 1676 struct xe_vm *vm = xe_vma_vm(vma); 1677 int cur_fence = 0, i; 1678 int number_tiles = hweight8(vma->tile_mask); 1679 int err; 1680 u8 id; 1681 1682 trace_xe_vma_bind(vma); 1683 1684 if (number_tiles > 1) { 1685 fences = kmalloc_array(number_tiles, sizeof(*fences), 1686 GFP_KERNEL); 1687 if (!fences) 1688 return ERR_PTR(-ENOMEM); 1689 } 1690 1691 for_each_tile(tile, vm->xe, id) { 1692 if (!(vma->tile_mask & BIT(id))) 1693 goto next; 1694 1695 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], 1696 first_op ? syncs : NULL, 1697 first_op ? num_syncs : 0, 1698 vma->tile_present & BIT(id)); 1699 if (IS_ERR(fence)) { 1700 err = PTR_ERR(fence); 1701 goto err_fences; 1702 } 1703 1704 if (fences) 1705 fences[cur_fence++] = fence; 1706 1707 next: 1708 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 1709 q = list_next_entry(q, multi_gt_list); 1710 } 1711 1712 if (fences) { 1713 cf = dma_fence_array_create(number_tiles, fences, 1714 vm->composite_fence_ctx, 1715 vm->composite_fence_seqno++, 1716 false); 1717 if (!cf) { 1718 --vm->composite_fence_seqno; 1719 err = -ENOMEM; 1720 goto err_fences; 1721 } 1722 } 1723 1724 if (last_op) { 1725 for (i = 0; i < num_syncs; i++) 1726 xe_sync_entry_signal(&syncs[i], NULL, 1727 cf ? &cf->base : fence); 1728 } 1729 1730 return cf ? &cf->base : fence; 1731 1732 err_fences: 1733 if (fences) { 1734 while (cur_fence) 1735 dma_fence_put(fences[--cur_fence]); 1736 kfree(fences); 1737 } 1738 1739 return ERR_PTR(err); 1740 } 1741 1742 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, 1743 struct xe_exec_queue *q, struct xe_sync_entry *syncs, 1744 u32 num_syncs, bool immediate, bool first_op, 1745 bool last_op) 1746 { 1747 struct dma_fence *fence; 1748 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); 1749 1750 xe_vm_assert_held(vm); 1751 1752 if (immediate) { 1753 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, 1754 last_op); 1755 if (IS_ERR(fence)) 1756 return PTR_ERR(fence); 1757 } else { 1758 int i; 1759 1760 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1761 1762 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); 1763 if (last_op) { 1764 for (i = 0; i < num_syncs; i++) 1765 xe_sync_entry_signal(&syncs[i], NULL, fence); 1766 } 1767 } 1768 1769 if (last_op) 1770 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 1771 dma_fence_put(fence); 1772 1773 return 0; 1774 } 1775 1776 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, 1777 struct xe_bo *bo, struct xe_sync_entry *syncs, 1778 u32 num_syncs, bool immediate, bool first_op, 1779 bool last_op) 1780 { 1781 int err; 1782 1783 xe_vm_assert_held(vm); 1784 xe_bo_assert_held(bo); 1785 1786 if (bo && immediate) { 1787 err = xe_bo_validate(bo, vm, true); 1788 if (err) 1789 return err; 1790 } 1791 1792 return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, 1793 last_op); 1794 } 1795 1796 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, 1797 struct xe_exec_queue *q, struct xe_sync_entry *syncs, 1798 u32 num_syncs, bool first_op, bool last_op) 1799 { 1800 struct dma_fence *fence; 1801 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); 1802 1803 xe_vm_assert_held(vm); 1804 xe_bo_assert_held(xe_vma_bo(vma)); 1805 1806 fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); 1807 if (IS_ERR(fence)) 1808 return PTR_ERR(fence); 1809 1810 xe_vma_destroy(vma, fence); 1811 if (last_op) 1812 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 1813 dma_fence_put(fence); 1814 1815 return 0; 1816 } 1817 1818 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1819 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1820 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1821 1822 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1823 struct drm_file *file) 1824 { 1825 struct xe_device *xe = to_xe_device(dev); 1826 struct xe_file *xef = to_xe_file(file); 1827 struct drm_xe_vm_create *args = data; 1828 struct xe_tile *tile; 1829 struct xe_vm *vm; 1830 u32 id, asid; 1831 int err; 1832 u32 flags = 0; 1833 1834 if (XE_IOCTL_DBG(xe, args->extensions)) 1835 return -EINVAL; 1836 1837 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1838 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1839 1840 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1841 !xe->info.has_usm)) 1842 return -EINVAL; 1843 1844 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1845 return -EINVAL; 1846 1847 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1848 return -EINVAL; 1849 1850 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1851 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1852 return -EINVAL; 1853 1854 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1855 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1856 return -EINVAL; 1857 1858 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1859 xe_device_in_non_fault_mode(xe))) 1860 return -EINVAL; 1861 1862 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 1863 xe_device_in_fault_mode(xe))) 1864 return -EINVAL; 1865 1866 if (XE_IOCTL_DBG(xe, args->extensions)) 1867 return -EINVAL; 1868 1869 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1870 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1871 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1872 flags |= XE_VM_FLAG_LR_MODE; 1873 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1874 flags |= XE_VM_FLAG_FAULT_MODE; 1875 1876 vm = xe_vm_create(xe, flags); 1877 if (IS_ERR(vm)) 1878 return PTR_ERR(vm); 1879 1880 mutex_lock(&xef->vm.lock); 1881 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1882 mutex_unlock(&xef->vm.lock); 1883 if (err) 1884 goto err_close_and_put; 1885 1886 if (xe->info.has_asid) { 1887 mutex_lock(&xe->usm.lock); 1888 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1889 XA_LIMIT(1, XE_MAX_ASID - 1), 1890 &xe->usm.next_asid, GFP_KERNEL); 1891 mutex_unlock(&xe->usm.lock); 1892 if (err < 0) 1893 goto err_free_id; 1894 1895 vm->usm.asid = asid; 1896 } 1897 1898 args->vm_id = id; 1899 vm->xef = xef; 1900 1901 /* Record BO memory for VM pagetable created against client */ 1902 for_each_tile(tile, xe, id) 1903 if (vm->pt_root[id]) 1904 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1905 1906 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1907 /* Warning: Security issue - never enable by default */ 1908 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1909 #endif 1910 1911 return 0; 1912 1913 err_free_id: 1914 mutex_lock(&xef->vm.lock); 1915 xa_erase(&xef->vm.xa, id); 1916 mutex_unlock(&xef->vm.lock); 1917 err_close_and_put: 1918 xe_vm_close_and_put(vm); 1919 1920 return err; 1921 } 1922 1923 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1924 struct drm_file *file) 1925 { 1926 struct xe_device *xe = to_xe_device(dev); 1927 struct xe_file *xef = to_xe_file(file); 1928 struct drm_xe_vm_destroy *args = data; 1929 struct xe_vm *vm; 1930 int err = 0; 1931 1932 if (XE_IOCTL_DBG(xe, args->pad) || 1933 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1934 return -EINVAL; 1935 1936 mutex_lock(&xef->vm.lock); 1937 vm = xa_load(&xef->vm.xa, args->vm_id); 1938 if (XE_IOCTL_DBG(xe, !vm)) 1939 err = -ENOENT; 1940 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1941 err = -EBUSY; 1942 else 1943 xa_erase(&xef->vm.xa, args->vm_id); 1944 mutex_unlock(&xef->vm.lock); 1945 1946 if (!err) 1947 xe_vm_close_and_put(vm); 1948 1949 return err; 1950 } 1951 1952 static const u32 region_to_mem_type[] = { 1953 XE_PL_TT, 1954 XE_PL_VRAM0, 1955 XE_PL_VRAM1, 1956 }; 1957 1958 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, 1959 struct xe_exec_queue *q, u32 region, 1960 struct xe_sync_entry *syncs, u32 num_syncs, 1961 bool first_op, bool last_op) 1962 { 1963 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); 1964 int err; 1965 1966 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 1967 1968 if (!xe_vma_has_no_bo(vma)) { 1969 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); 1970 if (err) 1971 return err; 1972 } 1973 1974 if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { 1975 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, 1976 true, first_op, last_op); 1977 } else { 1978 int i; 1979 1980 /* Nothing to do, signal fences now */ 1981 if (last_op) { 1982 for (i = 0; i < num_syncs; i++) { 1983 struct dma_fence *fence = 1984 xe_exec_queue_last_fence_get(wait_exec_queue, vm); 1985 1986 xe_sync_entry_signal(&syncs[i], NULL, fence); 1987 } 1988 } 1989 1990 return 0; 1991 } 1992 } 1993 1994 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1995 bool post_commit) 1996 { 1997 down_read(&vm->userptr.notifier_lock); 1998 vma->gpuva.flags |= XE_VMA_DESTROYED; 1999 up_read(&vm->userptr.notifier_lock); 2000 if (post_commit) 2001 xe_vm_remove_vma(vm, vma); 2002 } 2003 2004 #undef ULL 2005 #define ULL unsigned long long 2006 2007 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2008 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2009 { 2010 struct xe_vma *vma; 2011 2012 switch (op->op) { 2013 case DRM_GPUVA_OP_MAP: 2014 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2015 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2016 break; 2017 case DRM_GPUVA_OP_REMAP: 2018 vma = gpuva_to_vma(op->remap.unmap->va); 2019 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2020 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2021 op->remap.unmap->keep ? 1 : 0); 2022 if (op->remap.prev) 2023 vm_dbg(&xe->drm, 2024 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2025 (ULL)op->remap.prev->va.addr, 2026 (ULL)op->remap.prev->va.range); 2027 if (op->remap.next) 2028 vm_dbg(&xe->drm, 2029 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2030 (ULL)op->remap.next->va.addr, 2031 (ULL)op->remap.next->va.range); 2032 break; 2033 case DRM_GPUVA_OP_UNMAP: 2034 vma = gpuva_to_vma(op->unmap.va); 2035 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2036 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2037 op->unmap.keep ? 1 : 0); 2038 break; 2039 case DRM_GPUVA_OP_PREFETCH: 2040 vma = gpuva_to_vma(op->prefetch.va); 2041 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2042 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2043 break; 2044 default: 2045 drm_warn(&xe->drm, "NOT POSSIBLE"); 2046 } 2047 } 2048 #else 2049 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2050 { 2051 } 2052 #endif 2053 2054 /* 2055 * Create operations list from IOCTL arguments, setup operations fields so parse 2056 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2057 */ 2058 static struct drm_gpuva_ops * 2059 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 2060 u64 bo_offset_or_userptr, u64 addr, u64 range, 2061 u32 operation, u32 flags, 2062 u32 prefetch_region, u16 pat_index) 2063 { 2064 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2065 struct drm_gpuva_ops *ops; 2066 struct drm_gpuva_op *__op; 2067 struct xe_vma_op *op; 2068 struct drm_gpuvm_bo *vm_bo; 2069 int err; 2070 2071 lockdep_assert_held_write(&vm->lock); 2072 2073 vm_dbg(&vm->xe->drm, 2074 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2075 operation, (ULL)addr, (ULL)range, 2076 (ULL)bo_offset_or_userptr); 2077 2078 switch (operation) { 2079 case DRM_XE_VM_BIND_OP_MAP: 2080 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 2081 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 2082 obj, bo_offset_or_userptr); 2083 break; 2084 case DRM_XE_VM_BIND_OP_UNMAP: 2085 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2086 break; 2087 case DRM_XE_VM_BIND_OP_PREFETCH: 2088 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2089 break; 2090 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2091 xe_assert(vm->xe, bo); 2092 2093 err = xe_bo_lock(bo, true); 2094 if (err) 2095 return ERR_PTR(err); 2096 2097 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2098 if (IS_ERR(vm_bo)) { 2099 xe_bo_unlock(bo); 2100 return ERR_CAST(vm_bo); 2101 } 2102 2103 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2104 drm_gpuvm_bo_put(vm_bo); 2105 xe_bo_unlock(bo); 2106 break; 2107 default: 2108 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2109 ops = ERR_PTR(-EINVAL); 2110 } 2111 if (IS_ERR(ops)) 2112 return ops; 2113 2114 #ifdef TEST_VM_ASYNC_OPS_ERROR 2115 if (operation & FORCE_ASYNC_OP_ERROR) { 2116 op = list_first_entry_or_null(&ops->list, struct xe_vma_op, 2117 base.entry); 2118 if (op) 2119 op->inject_error = true; 2120 } 2121 #endif 2122 2123 drm_gpuva_for_each_op(__op, ops) { 2124 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2125 2126 if (__op->op == DRM_GPUVA_OP_MAP) { 2127 op->map.immediate = 2128 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2129 op->map.read_only = 2130 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2131 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2132 op->map.pat_index = pat_index; 2133 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2134 op->prefetch.region = prefetch_region; 2135 } 2136 2137 print_op(vm->xe, __op); 2138 } 2139 2140 return ops; 2141 } 2142 2143 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2144 u16 pat_index, unsigned int flags) 2145 { 2146 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2147 struct drm_exec exec; 2148 struct xe_vma *vma; 2149 int err; 2150 2151 lockdep_assert_held_write(&vm->lock); 2152 2153 if (bo) { 2154 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2155 drm_exec_until_all_locked(&exec) { 2156 err = 0; 2157 if (!bo->vm) { 2158 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2159 drm_exec_retry_on_contention(&exec); 2160 } 2161 if (!err) { 2162 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2163 drm_exec_retry_on_contention(&exec); 2164 } 2165 if (err) { 2166 drm_exec_fini(&exec); 2167 return ERR_PTR(err); 2168 } 2169 } 2170 } 2171 vma = xe_vma_create(vm, bo, op->gem.offset, 2172 op->va.addr, op->va.addr + 2173 op->va.range - 1, pat_index, flags); 2174 if (bo) 2175 drm_exec_fini(&exec); 2176 2177 if (xe_vma_is_userptr(vma)) { 2178 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2179 if (err) { 2180 prep_vma_destroy(vm, vma, false); 2181 xe_vma_destroy_unlocked(vma); 2182 return ERR_PTR(err); 2183 } 2184 } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { 2185 err = add_preempt_fences(vm, bo); 2186 if (err) { 2187 prep_vma_destroy(vm, vma, false); 2188 xe_vma_destroy_unlocked(vma); 2189 return ERR_PTR(err); 2190 } 2191 } 2192 2193 return vma; 2194 } 2195 2196 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2197 { 2198 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2199 return SZ_1G; 2200 else if (vma->gpuva.flags & XE_VMA_PTE_2M) 2201 return SZ_2M; 2202 2203 return SZ_4K; 2204 } 2205 2206 static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2207 { 2208 switch (size) { 2209 case SZ_1G: 2210 vma->gpuva.flags |= XE_VMA_PTE_1G; 2211 break; 2212 case SZ_2M: 2213 vma->gpuva.flags |= XE_VMA_PTE_2M; 2214 break; 2215 } 2216 2217 return SZ_4K; 2218 } 2219 2220 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2221 { 2222 int err = 0; 2223 2224 lockdep_assert_held_write(&vm->lock); 2225 2226 switch (op->base.op) { 2227 case DRM_GPUVA_OP_MAP: 2228 err |= xe_vm_insert_vma(vm, op->map.vma); 2229 if (!err) 2230 op->flags |= XE_VMA_OP_COMMITTED; 2231 break; 2232 case DRM_GPUVA_OP_REMAP: 2233 { 2234 u8 tile_present = 2235 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2236 2237 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2238 true); 2239 op->flags |= XE_VMA_OP_COMMITTED; 2240 2241 if (op->remap.prev) { 2242 err |= xe_vm_insert_vma(vm, op->remap.prev); 2243 if (!err) 2244 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2245 if (!err && op->remap.skip_prev) { 2246 op->remap.prev->tile_present = 2247 tile_present; 2248 op->remap.prev = NULL; 2249 } 2250 } 2251 if (op->remap.next) { 2252 err |= xe_vm_insert_vma(vm, op->remap.next); 2253 if (!err) 2254 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2255 if (!err && op->remap.skip_next) { 2256 op->remap.next->tile_present = 2257 tile_present; 2258 op->remap.next = NULL; 2259 } 2260 } 2261 2262 /* Adjust for partial unbind after removin VMA from VM */ 2263 if (!err) { 2264 op->base.remap.unmap->va->va.addr = op->remap.start; 2265 op->base.remap.unmap->va->va.range = op->remap.range; 2266 } 2267 break; 2268 } 2269 case DRM_GPUVA_OP_UNMAP: 2270 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2271 op->flags |= XE_VMA_OP_COMMITTED; 2272 break; 2273 case DRM_GPUVA_OP_PREFETCH: 2274 op->flags |= XE_VMA_OP_COMMITTED; 2275 break; 2276 default: 2277 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2278 } 2279 2280 return err; 2281 } 2282 2283 2284 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, 2285 struct drm_gpuva_ops *ops, 2286 struct xe_sync_entry *syncs, u32 num_syncs, 2287 struct list_head *ops_list, bool last) 2288 { 2289 struct xe_vma_op *last_op = NULL; 2290 struct drm_gpuva_op *__op; 2291 int err = 0; 2292 2293 lockdep_assert_held_write(&vm->lock); 2294 2295 drm_gpuva_for_each_op(__op, ops) { 2296 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2297 struct xe_vma *vma; 2298 bool first = list_empty(ops_list); 2299 unsigned int flags = 0; 2300 2301 INIT_LIST_HEAD(&op->link); 2302 list_add_tail(&op->link, ops_list); 2303 2304 if (first) { 2305 op->flags |= XE_VMA_OP_FIRST; 2306 op->num_syncs = num_syncs; 2307 op->syncs = syncs; 2308 } 2309 2310 op->q = q; 2311 2312 switch (op->base.op) { 2313 case DRM_GPUVA_OP_MAP: 2314 { 2315 flags |= op->map.read_only ? 2316 VMA_CREATE_FLAG_READ_ONLY : 0; 2317 flags |= op->map.is_null ? 2318 VMA_CREATE_FLAG_IS_NULL : 0; 2319 2320 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2321 flags); 2322 if (IS_ERR(vma)) 2323 return PTR_ERR(vma); 2324 2325 op->map.vma = vma; 2326 break; 2327 } 2328 case DRM_GPUVA_OP_REMAP: 2329 { 2330 struct xe_vma *old = 2331 gpuva_to_vma(op->base.remap.unmap->va); 2332 2333 op->remap.start = xe_vma_start(old); 2334 op->remap.range = xe_vma_size(old); 2335 2336 if (op->base.remap.prev) { 2337 flags |= op->base.remap.unmap->va->flags & 2338 XE_VMA_READ_ONLY ? 2339 VMA_CREATE_FLAG_READ_ONLY : 0; 2340 flags |= op->base.remap.unmap->va->flags & 2341 DRM_GPUVA_SPARSE ? 2342 VMA_CREATE_FLAG_IS_NULL : 0; 2343 2344 vma = new_vma(vm, op->base.remap.prev, 2345 old->pat_index, flags); 2346 if (IS_ERR(vma)) 2347 return PTR_ERR(vma); 2348 2349 op->remap.prev = vma; 2350 2351 /* 2352 * Userptr creates a new SG mapping so 2353 * we must also rebind. 2354 */ 2355 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2356 IS_ALIGNED(xe_vma_end(vma), 2357 xe_vma_max_pte_size(old)); 2358 if (op->remap.skip_prev) { 2359 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2360 op->remap.range -= 2361 xe_vma_end(vma) - 2362 xe_vma_start(old); 2363 op->remap.start = xe_vma_end(vma); 2364 } 2365 } 2366 2367 if (op->base.remap.next) { 2368 flags |= op->base.remap.unmap->va->flags & 2369 XE_VMA_READ_ONLY ? 2370 VMA_CREATE_FLAG_READ_ONLY : 0; 2371 flags |= op->base.remap.unmap->va->flags & 2372 DRM_GPUVA_SPARSE ? 2373 VMA_CREATE_FLAG_IS_NULL : 0; 2374 2375 vma = new_vma(vm, op->base.remap.next, 2376 old->pat_index, flags); 2377 if (IS_ERR(vma)) 2378 return PTR_ERR(vma); 2379 2380 op->remap.next = vma; 2381 2382 /* 2383 * Userptr creates a new SG mapping so 2384 * we must also rebind. 2385 */ 2386 op->remap.skip_next = !xe_vma_is_userptr(old) && 2387 IS_ALIGNED(xe_vma_start(vma), 2388 xe_vma_max_pte_size(old)); 2389 if (op->remap.skip_next) { 2390 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2391 op->remap.range -= 2392 xe_vma_end(old) - 2393 xe_vma_start(vma); 2394 } 2395 } 2396 break; 2397 } 2398 case DRM_GPUVA_OP_UNMAP: 2399 case DRM_GPUVA_OP_PREFETCH: 2400 /* Nothing to do */ 2401 break; 2402 default: 2403 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2404 } 2405 2406 last_op = op; 2407 2408 err = xe_vma_op_commit(vm, op); 2409 if (err) 2410 return err; 2411 } 2412 2413 /* FIXME: Unhandled corner case */ 2414 XE_WARN_ON(!last_op && last && !list_empty(ops_list)); 2415 2416 if (!last_op) 2417 return 0; 2418 2419 last_op->ops = ops; 2420 if (last) { 2421 last_op->flags |= XE_VMA_OP_LAST; 2422 last_op->num_syncs = num_syncs; 2423 last_op->syncs = syncs; 2424 } 2425 2426 return 0; 2427 } 2428 2429 static int op_execute(struct drm_exec *exec, struct xe_vm *vm, 2430 struct xe_vma *vma, struct xe_vma_op *op) 2431 { 2432 int err; 2433 2434 lockdep_assert_held_write(&vm->lock); 2435 2436 err = xe_vm_prepare_vma(exec, vma, 1); 2437 if (err) 2438 return err; 2439 2440 xe_vm_assert_held(vm); 2441 xe_bo_assert_held(xe_vma_bo(vma)); 2442 2443 switch (op->base.op) { 2444 case DRM_GPUVA_OP_MAP: 2445 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), 2446 op->syncs, op->num_syncs, 2447 op->map.immediate || !xe_vm_in_fault_mode(vm), 2448 op->flags & XE_VMA_OP_FIRST, 2449 op->flags & XE_VMA_OP_LAST); 2450 break; 2451 case DRM_GPUVA_OP_REMAP: 2452 { 2453 bool prev = !!op->remap.prev; 2454 bool next = !!op->remap.next; 2455 2456 if (!op->remap.unmap_done) { 2457 if (prev || next) 2458 vma->gpuva.flags |= XE_VMA_FIRST_REBIND; 2459 err = xe_vm_unbind(vm, vma, op->q, op->syncs, 2460 op->num_syncs, 2461 op->flags & XE_VMA_OP_FIRST, 2462 op->flags & XE_VMA_OP_LAST && 2463 !prev && !next); 2464 if (err) 2465 break; 2466 op->remap.unmap_done = true; 2467 } 2468 2469 if (prev) { 2470 op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; 2471 err = xe_vm_bind(vm, op->remap.prev, op->q, 2472 xe_vma_bo(op->remap.prev), op->syncs, 2473 op->num_syncs, true, false, 2474 op->flags & XE_VMA_OP_LAST && !next); 2475 op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; 2476 if (err) 2477 break; 2478 op->remap.prev = NULL; 2479 } 2480 2481 if (next) { 2482 op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; 2483 err = xe_vm_bind(vm, op->remap.next, op->q, 2484 xe_vma_bo(op->remap.next), 2485 op->syncs, op->num_syncs, 2486 true, false, 2487 op->flags & XE_VMA_OP_LAST); 2488 op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; 2489 if (err) 2490 break; 2491 op->remap.next = NULL; 2492 } 2493 2494 break; 2495 } 2496 case DRM_GPUVA_OP_UNMAP: 2497 err = xe_vm_unbind(vm, vma, op->q, op->syncs, 2498 op->num_syncs, op->flags & XE_VMA_OP_FIRST, 2499 op->flags & XE_VMA_OP_LAST); 2500 break; 2501 case DRM_GPUVA_OP_PREFETCH: 2502 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, 2503 op->syncs, op->num_syncs, 2504 op->flags & XE_VMA_OP_FIRST, 2505 op->flags & XE_VMA_OP_LAST); 2506 break; 2507 default: 2508 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2509 } 2510 2511 if (err) 2512 trace_xe_vma_fail(vma); 2513 2514 return err; 2515 } 2516 2517 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, 2518 struct xe_vma_op *op) 2519 { 2520 struct drm_exec exec; 2521 int err; 2522 2523 retry_userptr: 2524 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2525 drm_exec_until_all_locked(&exec) { 2526 err = op_execute(&exec, vm, vma, op); 2527 drm_exec_retry_on_contention(&exec); 2528 if (err) 2529 break; 2530 } 2531 drm_exec_fini(&exec); 2532 2533 if (err == -EAGAIN && xe_vma_is_userptr(vma)) { 2534 lockdep_assert_held_write(&vm->lock); 2535 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2536 if (!err) 2537 goto retry_userptr; 2538 2539 trace_xe_vma_fail(vma); 2540 } 2541 2542 return err; 2543 } 2544 2545 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) 2546 { 2547 int ret = 0; 2548 2549 lockdep_assert_held_write(&vm->lock); 2550 2551 #ifdef TEST_VM_ASYNC_OPS_ERROR 2552 if (op->inject_error) { 2553 op->inject_error = false; 2554 return -ENOMEM; 2555 } 2556 #endif 2557 2558 switch (op->base.op) { 2559 case DRM_GPUVA_OP_MAP: 2560 ret = __xe_vma_op_execute(vm, op->map.vma, op); 2561 break; 2562 case DRM_GPUVA_OP_REMAP: 2563 { 2564 struct xe_vma *vma; 2565 2566 if (!op->remap.unmap_done) 2567 vma = gpuva_to_vma(op->base.remap.unmap->va); 2568 else if (op->remap.prev) 2569 vma = op->remap.prev; 2570 else 2571 vma = op->remap.next; 2572 2573 ret = __xe_vma_op_execute(vm, vma, op); 2574 break; 2575 } 2576 case DRM_GPUVA_OP_UNMAP: 2577 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), 2578 op); 2579 break; 2580 case DRM_GPUVA_OP_PREFETCH: 2581 ret = __xe_vma_op_execute(vm, 2582 gpuva_to_vma(op->base.prefetch.va), 2583 op); 2584 break; 2585 default: 2586 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2587 } 2588 2589 return ret; 2590 } 2591 2592 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) 2593 { 2594 bool last = op->flags & XE_VMA_OP_LAST; 2595 2596 if (last) { 2597 while (op->num_syncs--) 2598 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); 2599 kfree(op->syncs); 2600 if (op->q) 2601 xe_exec_queue_put(op->q); 2602 } 2603 if (!list_empty(&op->link)) 2604 list_del(&op->link); 2605 if (op->ops) 2606 drm_gpuva_ops_free(&vm->gpuvm, op->ops); 2607 if (last) 2608 xe_vm_put(vm); 2609 } 2610 2611 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2612 bool post_commit, bool prev_post_commit, 2613 bool next_post_commit) 2614 { 2615 lockdep_assert_held_write(&vm->lock); 2616 2617 switch (op->base.op) { 2618 case DRM_GPUVA_OP_MAP: 2619 if (op->map.vma) { 2620 prep_vma_destroy(vm, op->map.vma, post_commit); 2621 xe_vma_destroy_unlocked(op->map.vma); 2622 } 2623 break; 2624 case DRM_GPUVA_OP_UNMAP: 2625 { 2626 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2627 2628 if (vma) { 2629 down_read(&vm->userptr.notifier_lock); 2630 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2631 up_read(&vm->userptr.notifier_lock); 2632 if (post_commit) 2633 xe_vm_insert_vma(vm, vma); 2634 } 2635 break; 2636 } 2637 case DRM_GPUVA_OP_REMAP: 2638 { 2639 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2640 2641 if (op->remap.prev) { 2642 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2643 xe_vma_destroy_unlocked(op->remap.prev); 2644 } 2645 if (op->remap.next) { 2646 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2647 xe_vma_destroy_unlocked(op->remap.next); 2648 } 2649 if (vma) { 2650 down_read(&vm->userptr.notifier_lock); 2651 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2652 up_read(&vm->userptr.notifier_lock); 2653 if (post_commit) 2654 xe_vm_insert_vma(vm, vma); 2655 } 2656 break; 2657 } 2658 case DRM_GPUVA_OP_PREFETCH: 2659 /* Nothing to do */ 2660 break; 2661 default: 2662 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2663 } 2664 } 2665 2666 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2667 struct drm_gpuva_ops **ops, 2668 int num_ops_list) 2669 { 2670 int i; 2671 2672 for (i = num_ops_list - 1; i; ++i) { 2673 struct drm_gpuva_ops *__ops = ops[i]; 2674 struct drm_gpuva_op *__op; 2675 2676 if (!__ops) 2677 continue; 2678 2679 drm_gpuva_for_each_op_reverse(__op, __ops) { 2680 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2681 2682 xe_vma_op_unwind(vm, op, 2683 op->flags & XE_VMA_OP_COMMITTED, 2684 op->flags & XE_VMA_OP_PREV_COMMITTED, 2685 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2686 } 2687 2688 drm_gpuva_ops_free(&vm->gpuvm, __ops); 2689 } 2690 } 2691 2692 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2693 struct list_head *ops_list) 2694 { 2695 struct xe_vma_op *op, *next; 2696 int err; 2697 2698 lockdep_assert_held_write(&vm->lock); 2699 2700 list_for_each_entry_safe(op, next, ops_list, link) { 2701 err = xe_vma_op_execute(vm, op); 2702 if (err) { 2703 drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", 2704 op->base.op, err); 2705 /* 2706 * FIXME: Killing VM rather than proper error handling 2707 */ 2708 xe_vm_kill(vm); 2709 return -ENOSPC; 2710 } 2711 xe_vma_op_cleanup(vm, op); 2712 } 2713 2714 return 0; 2715 } 2716 2717 #ifdef TEST_VM_ASYNC_OPS_ERROR 2718 #define SUPPORTED_FLAGS \ 2719 (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ 2720 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) 2721 #else 2722 #define SUPPORTED_FLAGS \ 2723 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2724 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ 2725 0xffff) 2726 #endif 2727 #define XE_64K_PAGE_MASK 0xffffull 2728 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2729 2730 #define MAX_BINDS 512 /* FIXME: Picking random upper limit */ 2731 2732 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2733 struct drm_xe_vm_bind *args, 2734 struct drm_xe_vm_bind_op **bind_ops) 2735 { 2736 int err; 2737 int i; 2738 2739 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2740 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2741 return -EINVAL; 2742 2743 if (XE_IOCTL_DBG(xe, args->extensions) || 2744 XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) 2745 return -EINVAL; 2746 2747 if (args->num_binds > 1) { 2748 u64 __user *bind_user = 2749 u64_to_user_ptr(args->vector_of_binds); 2750 2751 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * 2752 args->num_binds, GFP_KERNEL); 2753 if (!*bind_ops) 2754 return -ENOMEM; 2755 2756 err = __copy_from_user(*bind_ops, bind_user, 2757 sizeof(struct drm_xe_vm_bind_op) * 2758 args->num_binds); 2759 if (XE_IOCTL_DBG(xe, err)) { 2760 err = -EFAULT; 2761 goto free_bind_ops; 2762 } 2763 } else { 2764 *bind_ops = &args->bind; 2765 } 2766 2767 for (i = 0; i < args->num_binds; ++i) { 2768 u64 range = (*bind_ops)[i].range; 2769 u64 addr = (*bind_ops)[i].addr; 2770 u32 op = (*bind_ops)[i].op; 2771 u32 flags = (*bind_ops)[i].flags; 2772 u32 obj = (*bind_ops)[i].obj; 2773 u64 obj_offset = (*bind_ops)[i].obj_offset; 2774 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2775 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2776 u16 pat_index = (*bind_ops)[i].pat_index; 2777 u16 coh_mode; 2778 2779 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2780 err = -EINVAL; 2781 goto free_bind_ops; 2782 } 2783 2784 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2785 (*bind_ops)[i].pat_index = pat_index; 2786 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2787 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2788 err = -EINVAL; 2789 goto free_bind_ops; 2790 } 2791 2792 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2793 err = -EINVAL; 2794 goto free_bind_ops; 2795 } 2796 2797 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2798 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2799 XE_IOCTL_DBG(xe, obj && is_null) || 2800 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2801 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2802 is_null) || 2803 XE_IOCTL_DBG(xe, !obj && 2804 op == DRM_XE_VM_BIND_OP_MAP && 2805 !is_null) || 2806 XE_IOCTL_DBG(xe, !obj && 2807 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2808 XE_IOCTL_DBG(xe, addr && 2809 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2810 XE_IOCTL_DBG(xe, range && 2811 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2812 XE_IOCTL_DBG(xe, obj && 2813 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2814 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2815 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2816 XE_IOCTL_DBG(xe, obj && 2817 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2818 XE_IOCTL_DBG(xe, prefetch_region && 2819 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2820 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2821 xe->info.mem_region_mask)) || 2822 XE_IOCTL_DBG(xe, obj && 2823 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2824 err = -EINVAL; 2825 goto free_bind_ops; 2826 } 2827 2828 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2829 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2830 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2831 XE_IOCTL_DBG(xe, !range && 2832 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2833 err = -EINVAL; 2834 goto free_bind_ops; 2835 } 2836 } 2837 2838 return 0; 2839 2840 free_bind_ops: 2841 if (args->num_binds > 1) 2842 kfree(*bind_ops); 2843 return err; 2844 } 2845 2846 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2847 struct xe_exec_queue *q, 2848 struct xe_sync_entry *syncs, 2849 int num_syncs) 2850 { 2851 struct dma_fence *fence; 2852 int i, err = 0; 2853 2854 fence = xe_sync_in_fence_get(syncs, num_syncs, 2855 to_wait_exec_queue(vm, q), vm); 2856 if (IS_ERR(fence)) 2857 return PTR_ERR(fence); 2858 2859 for (i = 0; i < num_syncs; i++) 2860 xe_sync_entry_signal(&syncs[i], NULL, fence); 2861 2862 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2863 fence); 2864 dma_fence_put(fence); 2865 2866 return err; 2867 } 2868 2869 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2870 { 2871 struct xe_device *xe = to_xe_device(dev); 2872 struct xe_file *xef = to_xe_file(file); 2873 struct drm_xe_vm_bind *args = data; 2874 struct drm_xe_sync __user *syncs_user; 2875 struct xe_bo **bos = NULL; 2876 struct drm_gpuva_ops **ops = NULL; 2877 struct xe_vm *vm; 2878 struct xe_exec_queue *q = NULL; 2879 u32 num_syncs, num_ufence = 0; 2880 struct xe_sync_entry *syncs = NULL; 2881 struct drm_xe_vm_bind_op *bind_ops; 2882 LIST_HEAD(ops_list); 2883 int err; 2884 int i; 2885 2886 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 2887 if (err) 2888 return err; 2889 2890 if (args->exec_queue_id) { 2891 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 2892 if (XE_IOCTL_DBG(xe, !q)) { 2893 err = -ENOENT; 2894 goto free_objs; 2895 } 2896 2897 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 2898 err = -EINVAL; 2899 goto put_exec_queue; 2900 } 2901 } 2902 2903 vm = xe_vm_lookup(xef, args->vm_id); 2904 if (XE_IOCTL_DBG(xe, !vm)) { 2905 err = -EINVAL; 2906 goto put_exec_queue; 2907 } 2908 2909 err = down_write_killable(&vm->lock); 2910 if (err) 2911 goto put_vm; 2912 2913 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 2914 err = -ENOENT; 2915 goto release_vm_lock; 2916 } 2917 2918 for (i = 0; i < args->num_binds; ++i) { 2919 u64 range = bind_ops[i].range; 2920 u64 addr = bind_ops[i].addr; 2921 2922 if (XE_IOCTL_DBG(xe, range > vm->size) || 2923 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 2924 err = -EINVAL; 2925 goto release_vm_lock; 2926 } 2927 } 2928 2929 if (args->num_binds) { 2930 bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); 2931 if (!bos) { 2932 err = -ENOMEM; 2933 goto release_vm_lock; 2934 } 2935 2936 ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); 2937 if (!ops) { 2938 err = -ENOMEM; 2939 goto release_vm_lock; 2940 } 2941 } 2942 2943 for (i = 0; i < args->num_binds; ++i) { 2944 struct drm_gem_object *gem_obj; 2945 u64 range = bind_ops[i].range; 2946 u64 addr = bind_ops[i].addr; 2947 u32 obj = bind_ops[i].obj; 2948 u64 obj_offset = bind_ops[i].obj_offset; 2949 u16 pat_index = bind_ops[i].pat_index; 2950 u16 coh_mode; 2951 2952 if (!obj) 2953 continue; 2954 2955 gem_obj = drm_gem_object_lookup(file, obj); 2956 if (XE_IOCTL_DBG(xe, !gem_obj)) { 2957 err = -ENOENT; 2958 goto put_obj; 2959 } 2960 bos[i] = gem_to_xe_bo(gem_obj); 2961 2962 if (XE_IOCTL_DBG(xe, range > bos[i]->size) || 2963 XE_IOCTL_DBG(xe, obj_offset > 2964 bos[i]->size - range)) { 2965 err = -EINVAL; 2966 goto put_obj; 2967 } 2968 2969 if (bos[i]->flags & XE_BO_INTERNAL_64K) { 2970 if (XE_IOCTL_DBG(xe, obj_offset & 2971 XE_64K_PAGE_MASK) || 2972 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2973 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2974 err = -EINVAL; 2975 goto put_obj; 2976 } 2977 } 2978 2979 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2980 if (bos[i]->cpu_caching) { 2981 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2982 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2983 err = -EINVAL; 2984 goto put_obj; 2985 } 2986 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2987 /* 2988 * Imported dma-buf from a different device should 2989 * require 1way or 2way coherency since we don't know 2990 * how it was mapped on the CPU. Just assume is it 2991 * potentially cached on CPU side. 2992 */ 2993 err = -EINVAL; 2994 goto put_obj; 2995 } 2996 } 2997 2998 if (args->num_syncs) { 2999 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3000 if (!syncs) { 3001 err = -ENOMEM; 3002 goto put_obj; 3003 } 3004 } 3005 3006 syncs_user = u64_to_user_ptr(args->syncs); 3007 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3008 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3009 &syncs_user[num_syncs], 3010 (xe_vm_in_lr_mode(vm) ? 3011 SYNC_PARSE_FLAG_LR_MODE : 0) | 3012 (!args->num_binds ? 3013 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3014 if (err) 3015 goto free_syncs; 3016 3017 if (xe_sync_is_ufence(&syncs[num_syncs])) 3018 num_ufence++; 3019 } 3020 3021 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3022 err = -EINVAL; 3023 goto free_syncs; 3024 } 3025 3026 if (!args->num_binds) { 3027 err = -ENODATA; 3028 goto free_syncs; 3029 } 3030 3031 for (i = 0; i < args->num_binds; ++i) { 3032 u64 range = bind_ops[i].range; 3033 u64 addr = bind_ops[i].addr; 3034 u32 op = bind_ops[i].op; 3035 u32 flags = bind_ops[i].flags; 3036 u64 obj_offset = bind_ops[i].obj_offset; 3037 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3038 u16 pat_index = bind_ops[i].pat_index; 3039 3040 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3041 addr, range, op, flags, 3042 prefetch_region, pat_index); 3043 if (IS_ERR(ops[i])) { 3044 err = PTR_ERR(ops[i]); 3045 ops[i] = NULL; 3046 goto unwind_ops; 3047 } 3048 3049 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, 3050 &ops_list, 3051 i == args->num_binds - 1); 3052 if (err) 3053 goto unwind_ops; 3054 } 3055 3056 /* Nothing to do */ 3057 if (list_empty(&ops_list)) { 3058 err = -ENODATA; 3059 goto unwind_ops; 3060 } 3061 3062 xe_vm_get(vm); 3063 if (q) 3064 xe_exec_queue_get(q); 3065 3066 err = vm_bind_ioctl_ops_execute(vm, &ops_list); 3067 3068 up_write(&vm->lock); 3069 3070 if (q) 3071 xe_exec_queue_put(q); 3072 xe_vm_put(vm); 3073 3074 for (i = 0; bos && i < args->num_binds; ++i) 3075 xe_bo_put(bos[i]); 3076 3077 kfree(bos); 3078 kfree(ops); 3079 if (args->num_binds > 1) 3080 kfree(bind_ops); 3081 3082 return err; 3083 3084 unwind_ops: 3085 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3086 free_syncs: 3087 if (err == -ENODATA) 3088 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3089 while (num_syncs--) 3090 xe_sync_entry_cleanup(&syncs[num_syncs]); 3091 3092 kfree(syncs); 3093 put_obj: 3094 for (i = 0; i < args->num_binds; ++i) 3095 xe_bo_put(bos[i]); 3096 release_vm_lock: 3097 up_write(&vm->lock); 3098 put_vm: 3099 xe_vm_put(vm); 3100 put_exec_queue: 3101 if (q) 3102 xe_exec_queue_put(q); 3103 free_objs: 3104 kfree(bos); 3105 kfree(ops); 3106 if (args->num_binds > 1) 3107 kfree(bind_ops); 3108 return err; 3109 } 3110 3111 /** 3112 * xe_vm_lock() - Lock the vm's dma_resv object 3113 * @vm: The struct xe_vm whose lock is to be locked 3114 * @intr: Whether to perform any wait interruptible 3115 * 3116 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3117 * contended lock was interrupted. If @intr is false, the function 3118 * always returns 0. 3119 */ 3120 int xe_vm_lock(struct xe_vm *vm, bool intr) 3121 { 3122 if (intr) 3123 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3124 3125 return dma_resv_lock(xe_vm_resv(vm), NULL); 3126 } 3127 3128 /** 3129 * xe_vm_unlock() - Unlock the vm's dma_resv object 3130 * @vm: The struct xe_vm whose lock is to be released. 3131 * 3132 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3133 */ 3134 void xe_vm_unlock(struct xe_vm *vm) 3135 { 3136 dma_resv_unlock(xe_vm_resv(vm)); 3137 } 3138 3139 /** 3140 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3141 * @vma: VMA to invalidate 3142 * 3143 * Walks a list of page tables leaves which it memset the entries owned by this 3144 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3145 * complete. 3146 * 3147 * Returns 0 for success, negative error code otherwise. 3148 */ 3149 int xe_vm_invalidate_vma(struct xe_vma *vma) 3150 { 3151 struct xe_device *xe = xe_vma_vm(vma)->xe; 3152 struct xe_tile *tile; 3153 u32 tile_needs_invalidate = 0; 3154 int seqno[XE_MAX_TILES_PER_DEVICE]; 3155 u8 id; 3156 int ret; 3157 3158 xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma))); 3159 xe_assert(xe, !xe_vma_is_null(vma)); 3160 trace_xe_vma_usm_invalidate(vma); 3161 3162 /* Check that we don't race with page-table updates */ 3163 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3164 if (xe_vma_is_userptr(vma)) { 3165 WARN_ON_ONCE(!mmu_interval_check_retry 3166 (&to_userptr_vma(vma)->userptr.notifier, 3167 to_userptr_vma(vma)->userptr.notifier_seq)); 3168 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3169 DMA_RESV_USAGE_BOOKKEEP)); 3170 3171 } else { 3172 xe_bo_assert_held(xe_vma_bo(vma)); 3173 } 3174 } 3175 3176 for_each_tile(tile, xe, id) { 3177 if (xe_pt_zap_ptes(tile, vma)) { 3178 tile_needs_invalidate |= BIT(id); 3179 xe_device_wmb(xe); 3180 /* 3181 * FIXME: We potentially need to invalidate multiple 3182 * GTs within the tile 3183 */ 3184 seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); 3185 if (seqno[id] < 0) 3186 return seqno[id]; 3187 } 3188 } 3189 3190 for_each_tile(tile, xe, id) { 3191 if (tile_needs_invalidate & BIT(id)) { 3192 ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); 3193 if (ret < 0) 3194 return ret; 3195 } 3196 } 3197 3198 vma->usm.tile_invalidated = vma->tile_mask; 3199 3200 return 0; 3201 } 3202 3203 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) 3204 { 3205 struct drm_gpuva *gpuva; 3206 bool is_vram; 3207 uint64_t addr; 3208 3209 if (!down_read_trylock(&vm->lock)) { 3210 drm_printf(p, " Failed to acquire VM lock to dump capture"); 3211 return 0; 3212 } 3213 if (vm->pt_root[gt_id]) { 3214 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE); 3215 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo); 3216 drm_printf(p, " VM root: A:0x%llx %s\n", addr, 3217 is_vram ? "VRAM" : "SYS"); 3218 } 3219 3220 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3221 struct xe_vma *vma = gpuva_to_vma(gpuva); 3222 bool is_userptr = xe_vma_is_userptr(vma); 3223 bool is_null = xe_vma_is_null(vma); 3224 3225 if (is_null) { 3226 addr = 0; 3227 } else if (is_userptr) { 3228 struct sg_table *sg = to_userptr_vma(vma)->userptr.sg; 3229 struct xe_res_cursor cur; 3230 3231 if (sg) { 3232 xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur); 3233 addr = xe_res_dma(&cur); 3234 } else { 3235 addr = 0; 3236 } 3237 } else { 3238 addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE); 3239 is_vram = xe_bo_is_vram(xe_vma_bo(vma)); 3240 } 3241 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", 3242 xe_vma_start(vma), xe_vma_end(vma) - 1, 3243 xe_vma_size(vma), 3244 addr, is_null ? "NULL" : is_userptr ? "USR" : 3245 is_vram ? "VRAM" : "SYS"); 3246 } 3247 up_read(&vm->lock); 3248 3249 return 0; 3250 } 3251