1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_gt_tlb_invalidation.h" 32 #include "xe_migrate.h" 33 #include "xe_pat.h" 34 #include "xe_pm.h" 35 #include "xe_preempt_fence.h" 36 #include "xe_pt.h" 37 #include "xe_pxp.h" 38 #include "xe_res_cursor.h" 39 #include "xe_svm.h" 40 #include "xe_sync.h" 41 #include "xe_trace_bo.h" 42 #include "xe_wa.h" 43 #include "xe_hmm.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vma_userptr_check_repin() - Advisory check for repin needed 52 * @uvma: The userptr vma 53 * 54 * Check if the userptr vma has been invalidated since last successful 55 * repin. The check is advisory only and can the function can be called 56 * without the vm->userptr.notifier_lock held. There is no guarantee that the 57 * vma userptr will remain valid after a lockless check, so typically 58 * the call needs to be followed by a proper check under the notifier_lock. 59 * 60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 61 */ 62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 63 { 64 return mmu_interval_check_retry(&uvma->userptr.notifier, 65 uvma->userptr.notifier_seq) ? 66 -EAGAIN : 0; 67 } 68 69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 70 { 71 struct xe_vma *vma = &uvma->vma; 72 struct xe_vm *vm = xe_vma_vm(vma); 73 struct xe_device *xe = vm->xe; 74 75 lockdep_assert_held(&vm->lock); 76 xe_assert(xe, xe_vma_is_userptr(vma)); 77 78 return xe_hmm_userptr_populate_range(uvma, false); 79 } 80 81 static bool preempt_fences_waiting(struct xe_vm *vm) 82 { 83 struct xe_exec_queue *q; 84 85 lockdep_assert_held(&vm->lock); 86 xe_vm_assert_held(vm); 87 88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 89 if (!q->lr.pfence || 90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 91 &q->lr.pfence->flags)) { 92 return true; 93 } 94 } 95 96 return false; 97 } 98 99 static void free_preempt_fences(struct list_head *list) 100 { 101 struct list_head *link, *next; 102 103 list_for_each_safe(link, next, list) 104 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 105 } 106 107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 108 unsigned int *count) 109 { 110 lockdep_assert_held(&vm->lock); 111 xe_vm_assert_held(vm); 112 113 if (*count >= vm->preempt.num_exec_queues) 114 return 0; 115 116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 118 119 if (IS_ERR(pfence)) 120 return PTR_ERR(pfence); 121 122 list_move_tail(xe_preempt_fence_link(pfence), list); 123 } 124 125 return 0; 126 } 127 128 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 129 { 130 struct xe_exec_queue *q; 131 132 xe_vm_assert_held(vm); 133 134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 135 if (q->lr.pfence) { 136 long timeout = dma_fence_wait(q->lr.pfence, false); 137 138 /* Only -ETIME on fence indicates VM needs to be killed */ 139 if (timeout < 0 || q->lr.pfence->error == -ETIME) 140 return -ETIME; 141 142 dma_fence_put(q->lr.pfence); 143 q->lr.pfence = NULL; 144 } 145 } 146 147 return 0; 148 } 149 150 static bool xe_vm_is_idle(struct xe_vm *vm) 151 { 152 struct xe_exec_queue *q; 153 154 xe_vm_assert_held(vm); 155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 156 if (!xe_exec_queue_is_idle(q)) 157 return false; 158 } 159 160 return true; 161 } 162 163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 164 { 165 struct list_head *link; 166 struct xe_exec_queue *q; 167 168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 169 struct dma_fence *fence; 170 171 link = list->next; 172 xe_assert(vm->xe, link != list); 173 174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 175 q, q->lr.context, 176 ++q->lr.seqno); 177 dma_fence_put(q->lr.pfence); 178 q->lr.pfence = fence; 179 } 180 } 181 182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 183 { 184 struct xe_exec_queue *q; 185 int err; 186 187 xe_bo_assert_held(bo); 188 189 if (!vm->preempt.num_exec_queues) 190 return 0; 191 192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 193 if (err) 194 return err; 195 196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 197 if (q->lr.pfence) { 198 dma_resv_add_fence(bo->ttm.base.resv, 199 q->lr.pfence, 200 DMA_RESV_USAGE_BOOKKEEP); 201 } 202 203 return 0; 204 } 205 206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 207 struct drm_exec *exec) 208 { 209 struct xe_exec_queue *q; 210 211 lockdep_assert_held(&vm->lock); 212 xe_vm_assert_held(vm); 213 214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 215 q->ops->resume(q); 216 217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 219 } 220 } 221 222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 223 { 224 struct drm_gpuvm_exec vm_exec = { 225 .vm = &vm->gpuvm, 226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 227 .num_fences = 1, 228 }; 229 struct drm_exec *exec = &vm_exec.exec; 230 struct dma_fence *pfence; 231 int err; 232 bool wait; 233 234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 235 236 down_write(&vm->lock); 237 err = drm_gpuvm_exec_lock(&vm_exec); 238 if (err) 239 goto out_up_write; 240 241 pfence = xe_preempt_fence_create(q, q->lr.context, 242 ++q->lr.seqno); 243 if (IS_ERR(pfence)) { 244 err = PTR_ERR(pfence); 245 goto out_fini; 246 } 247 248 list_add(&q->lr.link, &vm->preempt.exec_queues); 249 ++vm->preempt.num_exec_queues; 250 q->lr.pfence = pfence; 251 252 down_read(&vm->userptr.notifier_lock); 253 254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 256 257 /* 258 * Check to see if a preemption on VM is in flight or userptr 259 * invalidation, if so trigger this preempt fence to sync state with 260 * other preempt fences on the VM. 261 */ 262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 263 if (wait) 264 dma_fence_enable_sw_signaling(pfence); 265 266 up_read(&vm->userptr.notifier_lock); 267 268 out_fini: 269 drm_exec_fini(exec); 270 out_up_write: 271 up_write(&vm->lock); 272 273 return err; 274 } 275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 276 277 /** 278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 279 * @vm: The VM. 280 * @q: The exec_queue 281 * 282 * Note that this function might be called multiple times on the same queue. 283 */ 284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 285 { 286 if (!xe_vm_in_preempt_fence_mode(vm)) 287 return; 288 289 down_write(&vm->lock); 290 if (!list_empty(&q->lr.link)) { 291 list_del_init(&q->lr.link); 292 --vm->preempt.num_exec_queues; 293 } 294 if (q->lr.pfence) { 295 dma_fence_enable_sw_signaling(q->lr.pfence); 296 dma_fence_put(q->lr.pfence); 297 q->lr.pfence = NULL; 298 } 299 up_write(&vm->lock); 300 } 301 302 /** 303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 304 * that need repinning. 305 * @vm: The VM. 306 * 307 * This function checks for whether the VM has userptrs that need repinning, 308 * and provides a release-type barrier on the userptr.notifier_lock after 309 * checking. 310 * 311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 312 */ 313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 314 { 315 lockdep_assert_held_read(&vm->userptr.notifier_lock); 316 317 return (list_empty(&vm->userptr.repin_list) && 318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 319 } 320 321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 322 323 /** 324 * xe_vm_kill() - VM Kill 325 * @vm: The VM. 326 * @unlocked: Flag indicates the VM's dma-resv is not held 327 * 328 * Kill the VM by setting banned flag indicated VM is no longer available for 329 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 330 */ 331 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 332 { 333 struct xe_exec_queue *q; 334 335 lockdep_assert_held(&vm->lock); 336 337 if (unlocked) 338 xe_vm_lock(vm, false); 339 340 vm->flags |= XE_VM_FLAG_BANNED; 341 trace_xe_vm_kill(vm); 342 343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 344 q->ops->kill(q); 345 346 if (unlocked) 347 xe_vm_unlock(vm); 348 349 /* TODO: Inform user the VM is banned */ 350 } 351 352 /** 353 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 354 * @exec: The drm_exec object used for locking before validation. 355 * @err: The error returned from ttm_bo_validate(). 356 * @end: A ktime_t cookie that should be set to 0 before first use and 357 * that should be reused on subsequent calls. 358 * 359 * With multiple active VMs, under memory pressure, it is possible that 360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 361 * Until ttm properly handles locking in such scenarios, best thing the 362 * driver can do is retry with a timeout. Check if that is necessary, and 363 * if so unlock the drm_exec's objects while keeping the ticket to prepare 364 * for a rerun. 365 * 366 * Return: true if a retry after drm_exec_init() is recommended; 367 * false otherwise. 368 */ 369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 370 { 371 ktime_t cur; 372 373 if (err != -ENOMEM) 374 return false; 375 376 cur = ktime_get(); 377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 378 if (!ktime_before(cur, *end)) 379 return false; 380 381 msleep(20); 382 return true; 383 } 384 385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 386 { 387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 388 struct drm_gpuva *gpuva; 389 int ret; 390 391 lockdep_assert_held(&vm->lock); 392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 394 &vm->rebind_list); 395 396 if (!try_wait_for_completion(&vm->xe->pm_block)) 397 return -EAGAIN; 398 399 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 400 if (ret) 401 return ret; 402 403 vm_bo->evicted = false; 404 return 0; 405 } 406 407 /** 408 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 409 * @vm: The vm for which we are rebinding. 410 * @exec: The struct drm_exec with the locked GEM objects. 411 * @num_fences: The number of fences to reserve for the operation, not 412 * including rebinds and validations. 413 * 414 * Validates all evicted gem objects and rebinds their vmas. Note that 415 * rebindings may cause evictions and hence the validation-rebind 416 * sequence is rerun until there are no more objects to validate. 417 * 418 * Return: 0 on success, negative error code on error. In particular, 419 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 420 * the drm_exec transaction needs to be restarted. 421 */ 422 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 423 unsigned int num_fences) 424 { 425 struct drm_gem_object *obj; 426 unsigned long index; 427 int ret; 428 429 do { 430 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 431 if (ret) 432 return ret; 433 434 ret = xe_vm_rebind(vm, false); 435 if (ret) 436 return ret; 437 } while (!list_empty(&vm->gpuvm.evict.list)); 438 439 drm_exec_for_each_locked_object(exec, index, obj) { 440 ret = dma_resv_reserve_fences(obj->resv, num_fences); 441 if (ret) 442 return ret; 443 } 444 445 return 0; 446 } 447 448 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 449 bool *done) 450 { 451 int err; 452 453 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 454 if (err) 455 return err; 456 457 if (xe_vm_is_idle(vm)) { 458 vm->preempt.rebind_deactivated = true; 459 *done = true; 460 return 0; 461 } 462 463 if (!preempt_fences_waiting(vm)) { 464 *done = true; 465 return 0; 466 } 467 468 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 469 if (err) 470 return err; 471 472 err = wait_for_existing_preempt_fences(vm); 473 if (err) 474 return err; 475 476 /* 477 * Add validation and rebinding to the locking loop since both can 478 * cause evictions which may require blocing dma_resv locks. 479 * The fence reservation here is intended for the new preempt fences 480 * we attach at the end of the rebind work. 481 */ 482 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 483 } 484 485 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 486 { 487 struct xe_device *xe = vm->xe; 488 bool ret = false; 489 490 mutex_lock(&xe->rebind_resume_lock); 491 if (!try_wait_for_completion(&vm->xe->pm_block)) { 492 ret = true; 493 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 494 } 495 mutex_unlock(&xe->rebind_resume_lock); 496 497 return ret; 498 } 499 500 /** 501 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 502 * @vm: The vm whose preempt worker to resume. 503 * 504 * Resume a preempt worker that was previously suspended by 505 * vm_suspend_rebind_worker(). 506 */ 507 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 508 { 509 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 510 } 511 512 static void preempt_rebind_work_func(struct work_struct *w) 513 { 514 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 515 struct drm_exec exec; 516 unsigned int fence_count = 0; 517 LIST_HEAD(preempt_fences); 518 ktime_t end = 0; 519 int err = 0; 520 long wait; 521 int __maybe_unused tries = 0; 522 523 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 524 trace_xe_vm_rebind_worker_enter(vm); 525 526 down_write(&vm->lock); 527 528 if (xe_vm_is_closed_or_banned(vm)) { 529 up_write(&vm->lock); 530 trace_xe_vm_rebind_worker_exit(vm); 531 return; 532 } 533 534 retry: 535 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 536 up_write(&vm->lock); 537 return; 538 } 539 540 if (xe_vm_userptr_check_repin(vm)) { 541 err = xe_vm_userptr_pin(vm); 542 if (err) 543 goto out_unlock_outer; 544 } 545 546 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 547 548 drm_exec_until_all_locked(&exec) { 549 bool done = false; 550 551 err = xe_preempt_work_begin(&exec, vm, &done); 552 drm_exec_retry_on_contention(&exec); 553 if (err || done) { 554 drm_exec_fini(&exec); 555 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 556 err = -EAGAIN; 557 558 goto out_unlock_outer; 559 } 560 } 561 562 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 563 if (err) 564 goto out_unlock; 565 566 err = xe_vm_rebind(vm, true); 567 if (err) 568 goto out_unlock; 569 570 /* Wait on rebinds and munmap style VM unbinds */ 571 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 572 DMA_RESV_USAGE_KERNEL, 573 false, MAX_SCHEDULE_TIMEOUT); 574 if (wait <= 0) { 575 err = -ETIME; 576 goto out_unlock; 577 } 578 579 #define retry_required(__tries, __vm) \ 580 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 581 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 582 __xe_vm_userptr_needs_repin(__vm)) 583 584 down_read(&vm->userptr.notifier_lock); 585 if (retry_required(tries, vm)) { 586 up_read(&vm->userptr.notifier_lock); 587 err = -EAGAIN; 588 goto out_unlock; 589 } 590 591 #undef retry_required 592 593 spin_lock(&vm->xe->ttm.lru_lock); 594 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 595 spin_unlock(&vm->xe->ttm.lru_lock); 596 597 /* Point of no return. */ 598 arm_preempt_fences(vm, &preempt_fences); 599 resume_and_reinstall_preempt_fences(vm, &exec); 600 up_read(&vm->userptr.notifier_lock); 601 602 out_unlock: 603 drm_exec_fini(&exec); 604 out_unlock_outer: 605 if (err == -EAGAIN) { 606 trace_xe_vm_rebind_worker_retry(vm); 607 goto retry; 608 } 609 610 if (err) { 611 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 612 xe_vm_kill(vm, true); 613 } 614 up_write(&vm->lock); 615 616 free_preempt_fences(&preempt_fences); 617 618 trace_xe_vm_rebind_worker_exit(vm); 619 } 620 621 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 622 { 623 struct xe_userptr *userptr = &uvma->userptr; 624 struct xe_vma *vma = &uvma->vma; 625 struct dma_resv_iter cursor; 626 struct dma_fence *fence; 627 long err; 628 629 /* 630 * Tell exec and rebind worker they need to repin and rebind this 631 * userptr. 632 */ 633 if (!xe_vm_in_fault_mode(vm) && 634 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 635 spin_lock(&vm->userptr.invalidated_lock); 636 list_move_tail(&userptr->invalidate_link, 637 &vm->userptr.invalidated); 638 spin_unlock(&vm->userptr.invalidated_lock); 639 } 640 641 /* 642 * Preempt fences turn into schedule disables, pipeline these. 643 * Note that even in fault mode, we need to wait for binds and 644 * unbinds to complete, and those are attached as BOOKMARK fences 645 * to the vm. 646 */ 647 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 648 DMA_RESV_USAGE_BOOKKEEP); 649 dma_resv_for_each_fence_unlocked(&cursor, fence) 650 dma_fence_enable_sw_signaling(fence); 651 dma_resv_iter_end(&cursor); 652 653 err = dma_resv_wait_timeout(xe_vm_resv(vm), 654 DMA_RESV_USAGE_BOOKKEEP, 655 false, MAX_SCHEDULE_TIMEOUT); 656 XE_WARN_ON(err <= 0); 657 658 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 659 err = xe_vm_invalidate_vma(vma); 660 XE_WARN_ON(err); 661 } 662 663 xe_hmm_userptr_unmap(uvma); 664 } 665 666 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 667 const struct mmu_notifier_range *range, 668 unsigned long cur_seq) 669 { 670 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 671 struct xe_vma *vma = &uvma->vma; 672 struct xe_vm *vm = xe_vma_vm(vma); 673 674 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 675 trace_xe_vma_userptr_invalidate(vma); 676 677 if (!mmu_notifier_range_blockable(range)) 678 return false; 679 680 vm_dbg(&xe_vma_vm(vma)->xe->drm, 681 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 682 xe_vma_start(vma), xe_vma_size(vma)); 683 684 down_write(&vm->userptr.notifier_lock); 685 mmu_interval_set_seq(mni, cur_seq); 686 687 __vma_userptr_invalidate(vm, uvma); 688 up_write(&vm->userptr.notifier_lock); 689 trace_xe_vma_userptr_invalidate_complete(vma); 690 691 return true; 692 } 693 694 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 695 .invalidate = vma_userptr_invalidate, 696 }; 697 698 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 699 /** 700 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 701 * @uvma: The userptr vma to invalidate 702 * 703 * Perform a forced userptr invalidation for testing purposes. 704 */ 705 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 706 { 707 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 708 709 /* Protect against concurrent userptr pinning */ 710 lockdep_assert_held(&vm->lock); 711 /* Protect against concurrent notifiers */ 712 lockdep_assert_held(&vm->userptr.notifier_lock); 713 /* 714 * Protect against concurrent instances of this function and 715 * the critical exec sections 716 */ 717 xe_vm_assert_held(vm); 718 719 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 720 uvma->userptr.notifier_seq)) 721 uvma->userptr.notifier_seq -= 2; 722 __vma_userptr_invalidate(vm, uvma); 723 } 724 #endif 725 726 int xe_vm_userptr_pin(struct xe_vm *vm) 727 { 728 struct xe_userptr_vma *uvma, *next; 729 int err = 0; 730 731 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 732 lockdep_assert_held_write(&vm->lock); 733 734 /* Collect invalidated userptrs */ 735 spin_lock(&vm->userptr.invalidated_lock); 736 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 737 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 738 userptr.invalidate_link) { 739 list_del_init(&uvma->userptr.invalidate_link); 740 list_add_tail(&uvma->userptr.repin_link, 741 &vm->userptr.repin_list); 742 } 743 spin_unlock(&vm->userptr.invalidated_lock); 744 745 /* Pin and move to bind list */ 746 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 747 userptr.repin_link) { 748 err = xe_vma_userptr_pin_pages(uvma); 749 if (err == -EFAULT) { 750 list_del_init(&uvma->userptr.repin_link); 751 /* 752 * We might have already done the pin once already, but 753 * then had to retry before the re-bind happened, due 754 * some other condition in the caller, but in the 755 * meantime the userptr got dinged by the notifier such 756 * that we need to revalidate here, but this time we hit 757 * the EFAULT. In such a case make sure we remove 758 * ourselves from the rebind list to avoid going down in 759 * flames. 760 */ 761 if (!list_empty(&uvma->vma.combined_links.rebind)) 762 list_del_init(&uvma->vma.combined_links.rebind); 763 764 /* Wait for pending binds */ 765 xe_vm_lock(vm, false); 766 dma_resv_wait_timeout(xe_vm_resv(vm), 767 DMA_RESV_USAGE_BOOKKEEP, 768 false, MAX_SCHEDULE_TIMEOUT); 769 770 down_read(&vm->userptr.notifier_lock); 771 err = xe_vm_invalidate_vma(&uvma->vma); 772 up_read(&vm->userptr.notifier_lock); 773 xe_vm_unlock(vm); 774 if (err) 775 break; 776 } else { 777 if (err) 778 break; 779 780 list_del_init(&uvma->userptr.repin_link); 781 list_move_tail(&uvma->vma.combined_links.rebind, 782 &vm->rebind_list); 783 } 784 } 785 786 if (err) { 787 down_write(&vm->userptr.notifier_lock); 788 spin_lock(&vm->userptr.invalidated_lock); 789 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 790 userptr.repin_link) { 791 list_del_init(&uvma->userptr.repin_link); 792 list_move_tail(&uvma->userptr.invalidate_link, 793 &vm->userptr.invalidated); 794 } 795 spin_unlock(&vm->userptr.invalidated_lock); 796 up_write(&vm->userptr.notifier_lock); 797 } 798 return err; 799 } 800 801 /** 802 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 803 * that need repinning. 804 * @vm: The VM. 805 * 806 * This function does an advisory check for whether the VM has userptrs that 807 * need repinning. 808 * 809 * Return: 0 if there are no indications of userptrs needing repinning, 810 * -EAGAIN if there are. 811 */ 812 int xe_vm_userptr_check_repin(struct xe_vm *vm) 813 { 814 return (list_empty_careful(&vm->userptr.repin_list) && 815 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 816 } 817 818 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 819 { 820 int i; 821 822 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 823 if (!vops->pt_update_ops[i].num_ops) 824 continue; 825 826 vops->pt_update_ops[i].ops = 827 kmalloc_array(vops->pt_update_ops[i].num_ops, 828 sizeof(*vops->pt_update_ops[i].ops), 829 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 830 if (!vops->pt_update_ops[i].ops) 831 return array_of_binds ? -ENOBUFS : -ENOMEM; 832 } 833 834 return 0; 835 } 836 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 837 838 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 839 { 840 struct xe_vma *vma; 841 842 vma = gpuva_to_vma(op->base.prefetch.va); 843 844 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 845 xa_destroy(&op->prefetch_range.range); 846 } 847 848 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 849 { 850 struct xe_vma_op *op; 851 852 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 853 return; 854 855 list_for_each_entry(op, &vops->list, link) 856 xe_vma_svm_prefetch_op_fini(op); 857 } 858 859 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 860 { 861 int i; 862 863 xe_vma_svm_prefetch_ops_fini(vops); 864 865 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 866 kfree(vops->pt_update_ops[i].ops); 867 } 868 869 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 870 { 871 int i; 872 873 if (!inc_val) 874 return; 875 876 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 877 if (BIT(i) & tile_mask) 878 vops->pt_update_ops[i].num_ops += inc_val; 879 } 880 881 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 882 u8 tile_mask) 883 { 884 INIT_LIST_HEAD(&op->link); 885 op->tile_mask = tile_mask; 886 op->base.op = DRM_GPUVA_OP_MAP; 887 op->base.map.va.addr = vma->gpuva.va.addr; 888 op->base.map.va.range = vma->gpuva.va.range; 889 op->base.map.gem.obj = vma->gpuva.gem.obj; 890 op->base.map.gem.offset = vma->gpuva.gem.offset; 891 op->map.vma = vma; 892 op->map.immediate = true; 893 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 894 op->map.is_null = xe_vma_is_null(vma); 895 } 896 897 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 898 u8 tile_mask) 899 { 900 struct xe_vma_op *op; 901 902 op = kzalloc(sizeof(*op), GFP_KERNEL); 903 if (!op) 904 return -ENOMEM; 905 906 xe_vm_populate_rebind(op, vma, tile_mask); 907 list_add_tail(&op->link, &vops->list); 908 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 909 910 return 0; 911 } 912 913 static struct dma_fence *ops_execute(struct xe_vm *vm, 914 struct xe_vma_ops *vops); 915 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 916 struct xe_exec_queue *q, 917 struct xe_sync_entry *syncs, u32 num_syncs); 918 919 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 920 { 921 struct dma_fence *fence; 922 struct xe_vma *vma, *next; 923 struct xe_vma_ops vops; 924 struct xe_vma_op *op, *next_op; 925 int err, i; 926 927 lockdep_assert_held(&vm->lock); 928 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 929 list_empty(&vm->rebind_list)) 930 return 0; 931 932 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 933 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 934 vops.pt_update_ops[i].wait_vm_bookkeep = true; 935 936 xe_vm_assert_held(vm); 937 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 938 xe_assert(vm->xe, vma->tile_present); 939 940 if (rebind_worker) 941 trace_xe_vma_rebind_worker(vma); 942 else 943 trace_xe_vma_rebind_exec(vma); 944 945 err = xe_vm_ops_add_rebind(&vops, vma, 946 vma->tile_present); 947 if (err) 948 goto free_ops; 949 } 950 951 err = xe_vma_ops_alloc(&vops, false); 952 if (err) 953 goto free_ops; 954 955 fence = ops_execute(vm, &vops); 956 if (IS_ERR(fence)) { 957 err = PTR_ERR(fence); 958 } else { 959 dma_fence_put(fence); 960 list_for_each_entry_safe(vma, next, &vm->rebind_list, 961 combined_links.rebind) 962 list_del_init(&vma->combined_links.rebind); 963 } 964 free_ops: 965 list_for_each_entry_safe(op, next_op, &vops.list, link) { 966 list_del(&op->link); 967 kfree(op); 968 } 969 xe_vma_ops_fini(&vops); 970 971 return err; 972 } 973 974 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 975 { 976 struct dma_fence *fence = NULL; 977 struct xe_vma_ops vops; 978 struct xe_vma_op *op, *next_op; 979 struct xe_tile *tile; 980 u8 id; 981 int err; 982 983 lockdep_assert_held(&vm->lock); 984 xe_vm_assert_held(vm); 985 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 986 987 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 988 for_each_tile(tile, vm->xe, id) { 989 vops.pt_update_ops[id].wait_vm_bookkeep = true; 990 vops.pt_update_ops[tile->id].q = 991 xe_tile_migrate_exec_queue(tile); 992 } 993 994 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 995 if (err) 996 return ERR_PTR(err); 997 998 err = xe_vma_ops_alloc(&vops, false); 999 if (err) { 1000 fence = ERR_PTR(err); 1001 goto free_ops; 1002 } 1003 1004 fence = ops_execute(vm, &vops); 1005 1006 free_ops: 1007 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1008 list_del(&op->link); 1009 kfree(op); 1010 } 1011 xe_vma_ops_fini(&vops); 1012 1013 return fence; 1014 } 1015 1016 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 1017 struct xe_vma *vma, 1018 struct xe_svm_range *range, 1019 u8 tile_mask) 1020 { 1021 INIT_LIST_HEAD(&op->link); 1022 op->tile_mask = tile_mask; 1023 op->base.op = DRM_GPUVA_OP_DRIVER; 1024 op->subop = XE_VMA_SUBOP_MAP_RANGE; 1025 op->map_range.vma = vma; 1026 op->map_range.range = range; 1027 } 1028 1029 static int 1030 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 1031 struct xe_vma *vma, 1032 struct xe_svm_range *range, 1033 u8 tile_mask) 1034 { 1035 struct xe_vma_op *op; 1036 1037 op = kzalloc(sizeof(*op), GFP_KERNEL); 1038 if (!op) 1039 return -ENOMEM; 1040 1041 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1042 list_add_tail(&op->link, &vops->list); 1043 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1044 1045 return 0; 1046 } 1047 1048 /** 1049 * xe_vm_range_rebind() - VM range (re)bind 1050 * @vm: The VM which the range belongs to. 1051 * @vma: The VMA which the range belongs to. 1052 * @range: SVM range to rebind. 1053 * @tile_mask: Tile mask to bind the range to. 1054 * 1055 * (re)bind SVM range setting up GPU page tables for the range. 1056 * 1057 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 1058 * failure 1059 */ 1060 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 1061 struct xe_vma *vma, 1062 struct xe_svm_range *range, 1063 u8 tile_mask) 1064 { 1065 struct dma_fence *fence = NULL; 1066 struct xe_vma_ops vops; 1067 struct xe_vma_op *op, *next_op; 1068 struct xe_tile *tile; 1069 u8 id; 1070 int err; 1071 1072 lockdep_assert_held(&vm->lock); 1073 xe_vm_assert_held(vm); 1074 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1075 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1076 1077 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1078 for_each_tile(tile, vm->xe, id) { 1079 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1080 vops.pt_update_ops[tile->id].q = 1081 xe_tile_migrate_exec_queue(tile); 1082 } 1083 1084 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 1085 if (err) 1086 return ERR_PTR(err); 1087 1088 err = xe_vma_ops_alloc(&vops, false); 1089 if (err) { 1090 fence = ERR_PTR(err); 1091 goto free_ops; 1092 } 1093 1094 fence = ops_execute(vm, &vops); 1095 1096 free_ops: 1097 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1098 list_del(&op->link); 1099 kfree(op); 1100 } 1101 xe_vma_ops_fini(&vops); 1102 1103 return fence; 1104 } 1105 1106 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 1107 struct xe_svm_range *range) 1108 { 1109 INIT_LIST_HEAD(&op->link); 1110 op->tile_mask = range->tile_present; 1111 op->base.op = DRM_GPUVA_OP_DRIVER; 1112 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 1113 op->unmap_range.range = range; 1114 } 1115 1116 static int 1117 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 1118 struct xe_svm_range *range) 1119 { 1120 struct xe_vma_op *op; 1121 1122 op = kzalloc(sizeof(*op), GFP_KERNEL); 1123 if (!op) 1124 return -ENOMEM; 1125 1126 xe_vm_populate_range_unbind(op, range); 1127 list_add_tail(&op->link, &vops->list); 1128 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1129 1130 return 0; 1131 } 1132 1133 /** 1134 * xe_vm_range_unbind() - VM range unbind 1135 * @vm: The VM which the range belongs to. 1136 * @range: SVM range to rebind. 1137 * 1138 * Unbind SVM range removing the GPU page tables for the range. 1139 * 1140 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 1141 * failure 1142 */ 1143 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 1144 struct xe_svm_range *range) 1145 { 1146 struct dma_fence *fence = NULL; 1147 struct xe_vma_ops vops; 1148 struct xe_vma_op *op, *next_op; 1149 struct xe_tile *tile; 1150 u8 id; 1151 int err; 1152 1153 lockdep_assert_held(&vm->lock); 1154 xe_vm_assert_held(vm); 1155 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1156 1157 if (!range->tile_present) 1158 return dma_fence_get_stub(); 1159 1160 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1161 for_each_tile(tile, vm->xe, id) { 1162 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1163 vops.pt_update_ops[tile->id].q = 1164 xe_tile_migrate_exec_queue(tile); 1165 } 1166 1167 err = xe_vm_ops_add_range_unbind(&vops, range); 1168 if (err) 1169 return ERR_PTR(err); 1170 1171 err = xe_vma_ops_alloc(&vops, false); 1172 if (err) { 1173 fence = ERR_PTR(err); 1174 goto free_ops; 1175 } 1176 1177 fence = ops_execute(vm, &vops); 1178 1179 free_ops: 1180 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1181 list_del(&op->link); 1182 kfree(op); 1183 } 1184 xe_vma_ops_fini(&vops); 1185 1186 return fence; 1187 } 1188 1189 static void xe_vma_free(struct xe_vma *vma) 1190 { 1191 if (xe_vma_is_userptr(vma)) 1192 kfree(to_userptr_vma(vma)); 1193 else 1194 kfree(vma); 1195 } 1196 1197 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 1198 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 1199 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 1200 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 1201 1202 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1203 struct xe_bo *bo, 1204 u64 bo_offset_or_userptr, 1205 u64 start, u64 end, 1206 u16 pat_index, unsigned int flags) 1207 { 1208 struct xe_vma *vma; 1209 struct xe_tile *tile; 1210 u8 id; 1211 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 1212 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 1213 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 1214 bool is_cpu_addr_mirror = 1215 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 1216 1217 xe_assert(vm->xe, start < end); 1218 xe_assert(vm->xe, end < vm->size); 1219 1220 /* 1221 * Allocate and ensure that the xe_vma_is_userptr() return 1222 * matches what was allocated. 1223 */ 1224 if (!bo && !is_null && !is_cpu_addr_mirror) { 1225 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 1226 1227 if (!uvma) 1228 return ERR_PTR(-ENOMEM); 1229 1230 vma = &uvma->vma; 1231 } else { 1232 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 1233 if (!vma) 1234 return ERR_PTR(-ENOMEM); 1235 1236 if (is_cpu_addr_mirror) 1237 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 1238 if (is_null) 1239 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 1240 if (bo) 1241 vma->gpuva.gem.obj = &bo->ttm.base; 1242 } 1243 1244 INIT_LIST_HEAD(&vma->combined_links.rebind); 1245 1246 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1247 vma->gpuva.vm = &vm->gpuvm; 1248 vma->gpuva.va.addr = start; 1249 vma->gpuva.va.range = end - start + 1; 1250 if (read_only) 1251 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1252 if (dumpable) 1253 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1254 1255 for_each_tile(tile, vm->xe, id) 1256 vma->tile_mask |= 0x1 << id; 1257 1258 if (vm->xe->info.has_atomic_enable_pte_bit) 1259 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1260 1261 vma->pat_index = pat_index; 1262 1263 if (bo) { 1264 struct drm_gpuvm_bo *vm_bo; 1265 1266 xe_bo_assert_held(bo); 1267 1268 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1269 if (IS_ERR(vm_bo)) { 1270 xe_vma_free(vma); 1271 return ERR_CAST(vm_bo); 1272 } 1273 1274 drm_gpuvm_bo_extobj_add(vm_bo); 1275 drm_gem_object_get(&bo->ttm.base); 1276 vma->gpuva.gem.offset = bo_offset_or_userptr; 1277 drm_gpuva_link(&vma->gpuva, vm_bo); 1278 drm_gpuvm_bo_put(vm_bo); 1279 } else /* userptr or null */ { 1280 if (!is_null && !is_cpu_addr_mirror) { 1281 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1282 u64 size = end - start + 1; 1283 int err; 1284 1285 INIT_LIST_HEAD(&userptr->invalidate_link); 1286 INIT_LIST_HEAD(&userptr->repin_link); 1287 vma->gpuva.gem.offset = bo_offset_or_userptr; 1288 mutex_init(&userptr->unmap_mutex); 1289 1290 err = mmu_interval_notifier_insert(&userptr->notifier, 1291 current->mm, 1292 xe_vma_userptr(vma), size, 1293 &vma_userptr_notifier_ops); 1294 if (err) { 1295 xe_vma_free(vma); 1296 return ERR_PTR(err); 1297 } 1298 1299 userptr->notifier_seq = LONG_MAX; 1300 } 1301 1302 xe_vm_get(vm); 1303 } 1304 1305 return vma; 1306 } 1307 1308 static void xe_vma_destroy_late(struct xe_vma *vma) 1309 { 1310 struct xe_vm *vm = xe_vma_vm(vma); 1311 1312 if (vma->ufence) { 1313 xe_sync_ufence_put(vma->ufence); 1314 vma->ufence = NULL; 1315 } 1316 1317 if (xe_vma_is_userptr(vma)) { 1318 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1319 struct xe_userptr *userptr = &uvma->userptr; 1320 1321 if (userptr->sg) 1322 xe_hmm_userptr_free_sg(uvma); 1323 1324 /* 1325 * Since userptr pages are not pinned, we can't remove 1326 * the notifier until we're sure the GPU is not accessing 1327 * them anymore 1328 */ 1329 mmu_interval_notifier_remove(&userptr->notifier); 1330 mutex_destroy(&userptr->unmap_mutex); 1331 xe_vm_put(vm); 1332 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1333 xe_vm_put(vm); 1334 } else { 1335 xe_bo_put(xe_vma_bo(vma)); 1336 } 1337 1338 xe_vma_free(vma); 1339 } 1340 1341 static void vma_destroy_work_func(struct work_struct *w) 1342 { 1343 struct xe_vma *vma = 1344 container_of(w, struct xe_vma, destroy_work); 1345 1346 xe_vma_destroy_late(vma); 1347 } 1348 1349 static void vma_destroy_cb(struct dma_fence *fence, 1350 struct dma_fence_cb *cb) 1351 { 1352 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1353 1354 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1355 queue_work(system_unbound_wq, &vma->destroy_work); 1356 } 1357 1358 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1359 { 1360 struct xe_vm *vm = xe_vma_vm(vma); 1361 1362 lockdep_assert_held_write(&vm->lock); 1363 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1364 1365 if (xe_vma_is_userptr(vma)) { 1366 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1367 1368 spin_lock(&vm->userptr.invalidated_lock); 1369 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1370 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1371 spin_unlock(&vm->userptr.invalidated_lock); 1372 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1373 xe_bo_assert_held(xe_vma_bo(vma)); 1374 1375 drm_gpuva_unlink(&vma->gpuva); 1376 } 1377 1378 xe_vm_assert_held(vm); 1379 if (fence) { 1380 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1381 vma_destroy_cb); 1382 1383 if (ret) { 1384 XE_WARN_ON(ret != -ENOENT); 1385 xe_vma_destroy_late(vma); 1386 } 1387 } else { 1388 xe_vma_destroy_late(vma); 1389 } 1390 } 1391 1392 /** 1393 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1394 * @exec: The drm_exec object we're currently locking for. 1395 * @vma: The vma for witch we want to lock the vm resv and any attached 1396 * object's resv. 1397 * 1398 * Return: 0 on success, negative error code on error. In particular 1399 * may return -EDEADLK on WW transaction contention and -EINTR if 1400 * an interruptible wait is terminated by a signal. 1401 */ 1402 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1403 { 1404 struct xe_vm *vm = xe_vma_vm(vma); 1405 struct xe_bo *bo = xe_vma_bo(vma); 1406 int err; 1407 1408 XE_WARN_ON(!vm); 1409 1410 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1411 if (!err && bo && !bo->vm) 1412 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1413 1414 return err; 1415 } 1416 1417 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1418 { 1419 struct drm_exec exec; 1420 int err; 1421 1422 drm_exec_init(&exec, 0, 0); 1423 drm_exec_until_all_locked(&exec) { 1424 err = xe_vm_lock_vma(&exec, vma); 1425 drm_exec_retry_on_contention(&exec); 1426 if (XE_WARN_ON(err)) 1427 break; 1428 } 1429 1430 xe_vma_destroy(vma, NULL); 1431 1432 drm_exec_fini(&exec); 1433 } 1434 1435 struct xe_vma * 1436 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1437 { 1438 struct drm_gpuva *gpuva; 1439 1440 lockdep_assert_held(&vm->lock); 1441 1442 if (xe_vm_is_closed_or_banned(vm)) 1443 return NULL; 1444 1445 xe_assert(vm->xe, start + range <= vm->size); 1446 1447 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1448 1449 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1450 } 1451 1452 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1453 { 1454 int err; 1455 1456 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1457 lockdep_assert_held(&vm->lock); 1458 1459 mutex_lock(&vm->snap_mutex); 1460 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1461 mutex_unlock(&vm->snap_mutex); 1462 XE_WARN_ON(err); /* Shouldn't be possible */ 1463 1464 return err; 1465 } 1466 1467 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1468 { 1469 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1470 lockdep_assert_held(&vm->lock); 1471 1472 mutex_lock(&vm->snap_mutex); 1473 drm_gpuva_remove(&vma->gpuva); 1474 mutex_unlock(&vm->snap_mutex); 1475 if (vm->usm.last_fault_vma == vma) 1476 vm->usm.last_fault_vma = NULL; 1477 } 1478 1479 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1480 { 1481 struct xe_vma_op *op; 1482 1483 op = kzalloc(sizeof(*op), GFP_KERNEL); 1484 1485 if (unlikely(!op)) 1486 return NULL; 1487 1488 return &op->base; 1489 } 1490 1491 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1492 1493 static const struct drm_gpuvm_ops gpuvm_ops = { 1494 .op_alloc = xe_vm_op_alloc, 1495 .vm_bo_validate = xe_gpuvm_validate, 1496 .vm_free = xe_vm_free, 1497 }; 1498 1499 static u64 pde_encode_pat_index(u16 pat_index) 1500 { 1501 u64 pte = 0; 1502 1503 if (pat_index & BIT(0)) 1504 pte |= XE_PPGTT_PTE_PAT0; 1505 1506 if (pat_index & BIT(1)) 1507 pte |= XE_PPGTT_PTE_PAT1; 1508 1509 return pte; 1510 } 1511 1512 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1513 { 1514 u64 pte = 0; 1515 1516 if (pat_index & BIT(0)) 1517 pte |= XE_PPGTT_PTE_PAT0; 1518 1519 if (pat_index & BIT(1)) 1520 pte |= XE_PPGTT_PTE_PAT1; 1521 1522 if (pat_index & BIT(2)) { 1523 if (pt_level) 1524 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1525 else 1526 pte |= XE_PPGTT_PTE_PAT2; 1527 } 1528 1529 if (pat_index & BIT(3)) 1530 pte |= XELPG_PPGTT_PTE_PAT3; 1531 1532 if (pat_index & (BIT(4))) 1533 pte |= XE2_PPGTT_PTE_PAT4; 1534 1535 return pte; 1536 } 1537 1538 static u64 pte_encode_ps(u32 pt_level) 1539 { 1540 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1541 1542 if (pt_level == 1) 1543 return XE_PDE_PS_2M; 1544 else if (pt_level == 2) 1545 return XE_PDPE_PS_1G; 1546 1547 return 0; 1548 } 1549 1550 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1551 const u16 pat_index) 1552 { 1553 u64 pde; 1554 1555 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1556 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1557 pde |= pde_encode_pat_index(pat_index); 1558 1559 return pde; 1560 } 1561 1562 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1563 u16 pat_index, u32 pt_level) 1564 { 1565 u64 pte; 1566 1567 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1568 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1569 pte |= pte_encode_pat_index(pat_index, pt_level); 1570 pte |= pte_encode_ps(pt_level); 1571 1572 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1573 pte |= XE_PPGTT_PTE_DM; 1574 1575 return pte; 1576 } 1577 1578 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1579 u16 pat_index, u32 pt_level) 1580 { 1581 pte |= XE_PAGE_PRESENT; 1582 1583 if (likely(!xe_vma_read_only(vma))) 1584 pte |= XE_PAGE_RW; 1585 1586 pte |= pte_encode_pat_index(pat_index, pt_level); 1587 pte |= pte_encode_ps(pt_level); 1588 1589 if (unlikely(xe_vma_is_null(vma))) 1590 pte |= XE_PTE_NULL; 1591 1592 return pte; 1593 } 1594 1595 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1596 u16 pat_index, 1597 u32 pt_level, bool devmem, u64 flags) 1598 { 1599 u64 pte; 1600 1601 /* Avoid passing random bits directly as flags */ 1602 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1603 1604 pte = addr; 1605 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1606 pte |= pte_encode_pat_index(pat_index, pt_level); 1607 pte |= pte_encode_ps(pt_level); 1608 1609 if (devmem) 1610 pte |= XE_PPGTT_PTE_DM; 1611 1612 pte |= flags; 1613 1614 return pte; 1615 } 1616 1617 static const struct xe_pt_ops xelp_pt_ops = { 1618 .pte_encode_bo = xelp_pte_encode_bo, 1619 .pte_encode_vma = xelp_pte_encode_vma, 1620 .pte_encode_addr = xelp_pte_encode_addr, 1621 .pde_encode_bo = xelp_pde_encode_bo, 1622 }; 1623 1624 static void vm_destroy_work_func(struct work_struct *w); 1625 1626 /** 1627 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1628 * given tile and vm. 1629 * @xe: xe device. 1630 * @tile: tile to set up for. 1631 * @vm: vm to set up for. 1632 * 1633 * Sets up a pagetable tree with one page-table per level and a single 1634 * leaf PTE. All pagetable entries point to the single page-table or, 1635 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1636 * writes become NOPs. 1637 * 1638 * Return: 0 on success, negative error code on error. 1639 */ 1640 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1641 struct xe_vm *vm) 1642 { 1643 u8 id = tile->id; 1644 int i; 1645 1646 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1647 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1648 if (IS_ERR(vm->scratch_pt[id][i])) { 1649 int err = PTR_ERR(vm->scratch_pt[id][i]); 1650 1651 vm->scratch_pt[id][i] = NULL; 1652 return err; 1653 } 1654 1655 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1656 } 1657 1658 return 0; 1659 } 1660 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1661 1662 static void xe_vm_free_scratch(struct xe_vm *vm) 1663 { 1664 struct xe_tile *tile; 1665 u8 id; 1666 1667 if (!xe_vm_has_scratch(vm)) 1668 return; 1669 1670 for_each_tile(tile, vm->xe, id) { 1671 u32 i; 1672 1673 if (!vm->pt_root[id]) 1674 continue; 1675 1676 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1677 if (vm->scratch_pt[id][i]) 1678 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1679 } 1680 } 1681 1682 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1683 { 1684 struct drm_gem_object *vm_resv_obj; 1685 struct xe_vm *vm; 1686 int err, number_tiles = 0; 1687 struct xe_tile *tile; 1688 u8 id; 1689 1690 /* 1691 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1692 * ever be in faulting mode. 1693 */ 1694 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1695 1696 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1697 if (!vm) 1698 return ERR_PTR(-ENOMEM); 1699 1700 vm->xe = xe; 1701 1702 vm->size = 1ull << xe->info.va_bits; 1703 vm->flags = flags; 1704 1705 if (xef) 1706 vm->xef = xe_file_get(xef); 1707 /** 1708 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1709 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1710 * under a user-VM lock when the PXP session is started at exec_queue 1711 * creation time. Those are different VMs and therefore there is no risk 1712 * of deadlock, but we need to tell lockdep that this is the case or it 1713 * will print a warning. 1714 */ 1715 if (flags & XE_VM_FLAG_GSC) { 1716 static struct lock_class_key gsc_vm_key; 1717 1718 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1719 } else { 1720 init_rwsem(&vm->lock); 1721 } 1722 mutex_init(&vm->snap_mutex); 1723 1724 INIT_LIST_HEAD(&vm->rebind_list); 1725 1726 INIT_LIST_HEAD(&vm->userptr.repin_list); 1727 INIT_LIST_HEAD(&vm->userptr.invalidated); 1728 init_rwsem(&vm->userptr.notifier_lock); 1729 spin_lock_init(&vm->userptr.invalidated_lock); 1730 1731 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1732 1733 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1734 1735 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1736 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1737 1738 for_each_tile(tile, xe, id) 1739 xe_range_fence_tree_init(&vm->rftree[id]); 1740 1741 vm->pt_ops = &xelp_pt_ops; 1742 1743 /* 1744 * Long-running workloads are not protected by the scheduler references. 1745 * By design, run_job for long-running workloads returns NULL and the 1746 * scheduler drops all the references of it, hence protecting the VM 1747 * for this case is necessary. 1748 */ 1749 if (flags & XE_VM_FLAG_LR_MODE) { 1750 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1751 xe_pm_runtime_get_noresume(xe); 1752 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1753 } 1754 1755 if (flags & XE_VM_FLAG_FAULT_MODE) { 1756 err = xe_svm_init(vm); 1757 if (err) 1758 goto err_no_resv; 1759 } 1760 1761 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1762 if (!vm_resv_obj) { 1763 err = -ENOMEM; 1764 goto err_svm_fini; 1765 } 1766 1767 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1768 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1769 1770 drm_gem_object_put(vm_resv_obj); 1771 1772 err = xe_vm_lock(vm, true); 1773 if (err) 1774 goto err_close; 1775 1776 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1777 vm->flags |= XE_VM_FLAG_64K; 1778 1779 for_each_tile(tile, xe, id) { 1780 if (flags & XE_VM_FLAG_MIGRATION && 1781 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1782 continue; 1783 1784 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1785 if (IS_ERR(vm->pt_root[id])) { 1786 err = PTR_ERR(vm->pt_root[id]); 1787 vm->pt_root[id] = NULL; 1788 goto err_unlock_close; 1789 } 1790 } 1791 1792 if (xe_vm_has_scratch(vm)) { 1793 for_each_tile(tile, xe, id) { 1794 if (!vm->pt_root[id]) 1795 continue; 1796 1797 err = xe_vm_create_scratch(xe, tile, vm); 1798 if (err) 1799 goto err_unlock_close; 1800 } 1801 vm->batch_invalidate_tlb = true; 1802 } 1803 1804 if (vm->flags & XE_VM_FLAG_LR_MODE) 1805 vm->batch_invalidate_tlb = false; 1806 1807 /* Fill pt_root after allocating scratch tables */ 1808 for_each_tile(tile, xe, id) { 1809 if (!vm->pt_root[id]) 1810 continue; 1811 1812 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1813 } 1814 xe_vm_unlock(vm); 1815 1816 /* Kernel migration VM shouldn't have a circular loop.. */ 1817 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1818 for_each_tile(tile, xe, id) { 1819 struct xe_exec_queue *q; 1820 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1821 1822 if (!vm->pt_root[id]) 1823 continue; 1824 1825 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1826 if (IS_ERR(q)) { 1827 err = PTR_ERR(q); 1828 goto err_close; 1829 } 1830 vm->q[id] = q; 1831 number_tiles++; 1832 } 1833 } 1834 1835 if (number_tiles > 1) 1836 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1837 1838 if (xef && xe->info.has_asid) { 1839 u32 asid; 1840 1841 down_write(&xe->usm.lock); 1842 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1843 XA_LIMIT(1, XE_MAX_ASID - 1), 1844 &xe->usm.next_asid, GFP_KERNEL); 1845 up_write(&xe->usm.lock); 1846 if (err < 0) 1847 goto err_unlock_close; 1848 1849 vm->usm.asid = asid; 1850 } 1851 1852 trace_xe_vm_create(vm); 1853 1854 return vm; 1855 1856 err_unlock_close: 1857 xe_vm_unlock(vm); 1858 err_close: 1859 xe_vm_close_and_put(vm); 1860 return ERR_PTR(err); 1861 1862 err_svm_fini: 1863 if (flags & XE_VM_FLAG_FAULT_MODE) { 1864 vm->size = 0; /* close the vm */ 1865 xe_svm_fini(vm); 1866 } 1867 err_no_resv: 1868 mutex_destroy(&vm->snap_mutex); 1869 for_each_tile(tile, xe, id) 1870 xe_range_fence_tree_fini(&vm->rftree[id]); 1871 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1872 if (vm->xef) 1873 xe_file_put(vm->xef); 1874 kfree(vm); 1875 if (flags & XE_VM_FLAG_LR_MODE) 1876 xe_pm_runtime_put(xe); 1877 return ERR_PTR(err); 1878 } 1879 1880 static void xe_vm_close(struct xe_vm *vm) 1881 { 1882 struct xe_device *xe = vm->xe; 1883 bool bound; 1884 int idx; 1885 1886 bound = drm_dev_enter(&xe->drm, &idx); 1887 1888 down_write(&vm->lock); 1889 if (xe_vm_in_fault_mode(vm)) 1890 xe_svm_notifier_lock(vm); 1891 1892 vm->size = 0; 1893 1894 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1895 struct xe_tile *tile; 1896 struct xe_gt *gt; 1897 u8 id; 1898 1899 /* Wait for pending binds */ 1900 dma_resv_wait_timeout(xe_vm_resv(vm), 1901 DMA_RESV_USAGE_BOOKKEEP, 1902 false, MAX_SCHEDULE_TIMEOUT); 1903 1904 if (bound) { 1905 for_each_tile(tile, xe, id) 1906 if (vm->pt_root[id]) 1907 xe_pt_clear(xe, vm->pt_root[id]); 1908 1909 for_each_gt(gt, xe, id) 1910 xe_gt_tlb_invalidation_vm(gt, vm); 1911 } 1912 } 1913 1914 if (xe_vm_in_fault_mode(vm)) 1915 xe_svm_notifier_unlock(vm); 1916 up_write(&vm->lock); 1917 1918 if (bound) 1919 drm_dev_exit(idx); 1920 } 1921 1922 void xe_vm_close_and_put(struct xe_vm *vm) 1923 { 1924 LIST_HEAD(contested); 1925 struct xe_device *xe = vm->xe; 1926 struct xe_tile *tile; 1927 struct xe_vma *vma, *next_vma; 1928 struct drm_gpuva *gpuva, *next; 1929 u8 id; 1930 1931 xe_assert(xe, !vm->preempt.num_exec_queues); 1932 1933 xe_vm_close(vm); 1934 if (xe_vm_in_preempt_fence_mode(vm)) { 1935 mutex_lock(&xe->rebind_resume_lock); 1936 list_del_init(&vm->preempt.pm_activate_link); 1937 mutex_unlock(&xe->rebind_resume_lock); 1938 flush_work(&vm->preempt.rebind_work); 1939 } 1940 if (xe_vm_in_fault_mode(vm)) 1941 xe_svm_close(vm); 1942 1943 down_write(&vm->lock); 1944 for_each_tile(tile, xe, id) { 1945 if (vm->q[id]) 1946 xe_exec_queue_last_fence_put(vm->q[id], vm); 1947 } 1948 up_write(&vm->lock); 1949 1950 for_each_tile(tile, xe, id) { 1951 if (vm->q[id]) { 1952 xe_exec_queue_kill(vm->q[id]); 1953 xe_exec_queue_put(vm->q[id]); 1954 vm->q[id] = NULL; 1955 } 1956 } 1957 1958 down_write(&vm->lock); 1959 xe_vm_lock(vm, false); 1960 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1961 vma = gpuva_to_vma(gpuva); 1962 1963 if (xe_vma_has_no_bo(vma)) { 1964 down_read(&vm->userptr.notifier_lock); 1965 vma->gpuva.flags |= XE_VMA_DESTROYED; 1966 up_read(&vm->userptr.notifier_lock); 1967 } 1968 1969 xe_vm_remove_vma(vm, vma); 1970 1971 /* easy case, remove from VMA? */ 1972 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1973 list_del_init(&vma->combined_links.rebind); 1974 xe_vma_destroy(vma, NULL); 1975 continue; 1976 } 1977 1978 list_move_tail(&vma->combined_links.destroy, &contested); 1979 vma->gpuva.flags |= XE_VMA_DESTROYED; 1980 } 1981 1982 /* 1983 * All vm operations will add shared fences to resv. 1984 * The only exception is eviction for a shared object, 1985 * but even so, the unbind when evicted would still 1986 * install a fence to resv. Hence it's safe to 1987 * destroy the pagetables immediately. 1988 */ 1989 xe_vm_free_scratch(vm); 1990 1991 for_each_tile(tile, xe, id) { 1992 if (vm->pt_root[id]) { 1993 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1994 vm->pt_root[id] = NULL; 1995 } 1996 } 1997 xe_vm_unlock(vm); 1998 1999 /* 2000 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 2001 * Since we hold a refcount to the bo, we can remove and free 2002 * the members safely without locking. 2003 */ 2004 list_for_each_entry_safe(vma, next_vma, &contested, 2005 combined_links.destroy) { 2006 list_del_init(&vma->combined_links.destroy); 2007 xe_vma_destroy_unlocked(vma); 2008 } 2009 2010 if (xe_vm_in_fault_mode(vm)) 2011 xe_svm_fini(vm); 2012 2013 up_write(&vm->lock); 2014 2015 down_write(&xe->usm.lock); 2016 if (vm->usm.asid) { 2017 void *lookup; 2018 2019 xe_assert(xe, xe->info.has_asid); 2020 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 2021 2022 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 2023 xe_assert(xe, lookup == vm); 2024 } 2025 up_write(&xe->usm.lock); 2026 2027 for_each_tile(tile, xe, id) 2028 xe_range_fence_tree_fini(&vm->rftree[id]); 2029 2030 xe_vm_put(vm); 2031 } 2032 2033 static void vm_destroy_work_func(struct work_struct *w) 2034 { 2035 struct xe_vm *vm = 2036 container_of(w, struct xe_vm, destroy_work); 2037 struct xe_device *xe = vm->xe; 2038 struct xe_tile *tile; 2039 u8 id; 2040 2041 /* xe_vm_close_and_put was not called? */ 2042 xe_assert(xe, !vm->size); 2043 2044 if (xe_vm_in_preempt_fence_mode(vm)) 2045 flush_work(&vm->preempt.rebind_work); 2046 2047 mutex_destroy(&vm->snap_mutex); 2048 2049 if (vm->flags & XE_VM_FLAG_LR_MODE) 2050 xe_pm_runtime_put(xe); 2051 2052 for_each_tile(tile, xe, id) 2053 XE_WARN_ON(vm->pt_root[id]); 2054 2055 trace_xe_vm_free(vm); 2056 2057 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2058 2059 if (vm->xef) 2060 xe_file_put(vm->xef); 2061 2062 kfree(vm); 2063 } 2064 2065 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2066 { 2067 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2068 2069 /* To destroy the VM we need to be able to sleep */ 2070 queue_work(system_unbound_wq, &vm->destroy_work); 2071 } 2072 2073 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2074 { 2075 struct xe_vm *vm; 2076 2077 mutex_lock(&xef->vm.lock); 2078 vm = xa_load(&xef->vm.xa, id); 2079 if (vm) 2080 xe_vm_get(vm); 2081 mutex_unlock(&xef->vm.lock); 2082 2083 return vm; 2084 } 2085 2086 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2087 { 2088 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 2089 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 2090 } 2091 2092 static struct xe_exec_queue * 2093 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2094 { 2095 return q ? q : vm->q[0]; 2096 } 2097 2098 static struct xe_user_fence * 2099 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2100 { 2101 unsigned int i; 2102 2103 for (i = 0; i < num_syncs; i++) { 2104 struct xe_sync_entry *e = &syncs[i]; 2105 2106 if (xe_sync_is_ufence(e)) 2107 return xe_sync_ufence_get(e); 2108 } 2109 2110 return NULL; 2111 } 2112 2113 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2114 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2115 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2116 2117 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2118 struct drm_file *file) 2119 { 2120 struct xe_device *xe = to_xe_device(dev); 2121 struct xe_file *xef = to_xe_file(file); 2122 struct drm_xe_vm_create *args = data; 2123 struct xe_vm *vm; 2124 u32 id; 2125 int err; 2126 u32 flags = 0; 2127 2128 if (XE_IOCTL_DBG(xe, args->extensions)) 2129 return -EINVAL; 2130 2131 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 2132 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2133 2134 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2135 !xe->info.has_usm)) 2136 return -EINVAL; 2137 2138 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2139 return -EINVAL; 2140 2141 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2142 return -EINVAL; 2143 2144 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2145 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2146 !xe->info.needs_scratch)) 2147 return -EINVAL; 2148 2149 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2150 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2151 return -EINVAL; 2152 2153 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2154 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2155 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2156 flags |= XE_VM_FLAG_LR_MODE; 2157 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2158 flags |= XE_VM_FLAG_FAULT_MODE; 2159 2160 vm = xe_vm_create(xe, flags, xef); 2161 if (IS_ERR(vm)) 2162 return PTR_ERR(vm); 2163 2164 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2165 /* Warning: Security issue - never enable by default */ 2166 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2167 #endif 2168 2169 /* user id alloc must always be last in ioctl to prevent UAF */ 2170 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2171 if (err) 2172 goto err_close_and_put; 2173 2174 args->vm_id = id; 2175 2176 return 0; 2177 2178 err_close_and_put: 2179 xe_vm_close_and_put(vm); 2180 2181 return err; 2182 } 2183 2184 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2185 struct drm_file *file) 2186 { 2187 struct xe_device *xe = to_xe_device(dev); 2188 struct xe_file *xef = to_xe_file(file); 2189 struct drm_xe_vm_destroy *args = data; 2190 struct xe_vm *vm; 2191 int err = 0; 2192 2193 if (XE_IOCTL_DBG(xe, args->pad) || 2194 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2195 return -EINVAL; 2196 2197 mutex_lock(&xef->vm.lock); 2198 vm = xa_load(&xef->vm.xa, args->vm_id); 2199 if (XE_IOCTL_DBG(xe, !vm)) 2200 err = -ENOENT; 2201 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2202 err = -EBUSY; 2203 else 2204 xa_erase(&xef->vm.xa, args->vm_id); 2205 mutex_unlock(&xef->vm.lock); 2206 2207 if (!err) 2208 xe_vm_close_and_put(vm); 2209 2210 return err; 2211 } 2212 2213 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2214 { 2215 if (page_addr > xe_vma_end(vma) - 1 || 2216 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2217 return false; 2218 2219 return true; 2220 } 2221 2222 /** 2223 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2224 * 2225 * @vm: the xe_vm the vma belongs to 2226 * @page_addr: address to look up 2227 */ 2228 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2229 { 2230 struct xe_vma *vma = NULL; 2231 2232 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2233 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2234 vma = vm->usm.last_fault_vma; 2235 } 2236 if (!vma) 2237 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2238 2239 return vma; 2240 } 2241 2242 static const u32 region_to_mem_type[] = { 2243 XE_PL_TT, 2244 XE_PL_VRAM0, 2245 XE_PL_VRAM1, 2246 }; 2247 2248 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2249 bool post_commit) 2250 { 2251 down_read(&vm->userptr.notifier_lock); 2252 vma->gpuva.flags |= XE_VMA_DESTROYED; 2253 up_read(&vm->userptr.notifier_lock); 2254 if (post_commit) 2255 xe_vm_remove_vma(vm, vma); 2256 } 2257 2258 #undef ULL 2259 #define ULL unsigned long long 2260 2261 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2262 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2263 { 2264 struct xe_vma *vma; 2265 2266 switch (op->op) { 2267 case DRM_GPUVA_OP_MAP: 2268 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2269 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2270 break; 2271 case DRM_GPUVA_OP_REMAP: 2272 vma = gpuva_to_vma(op->remap.unmap->va); 2273 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2274 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2275 op->remap.unmap->keep ? 1 : 0); 2276 if (op->remap.prev) 2277 vm_dbg(&xe->drm, 2278 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2279 (ULL)op->remap.prev->va.addr, 2280 (ULL)op->remap.prev->va.range); 2281 if (op->remap.next) 2282 vm_dbg(&xe->drm, 2283 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2284 (ULL)op->remap.next->va.addr, 2285 (ULL)op->remap.next->va.range); 2286 break; 2287 case DRM_GPUVA_OP_UNMAP: 2288 vma = gpuva_to_vma(op->unmap.va); 2289 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2290 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2291 op->unmap.keep ? 1 : 0); 2292 break; 2293 case DRM_GPUVA_OP_PREFETCH: 2294 vma = gpuva_to_vma(op->prefetch.va); 2295 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2296 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2297 break; 2298 default: 2299 drm_warn(&xe->drm, "NOT POSSIBLE"); 2300 } 2301 } 2302 #else 2303 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2304 { 2305 } 2306 #endif 2307 2308 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2309 { 2310 if (!xe_vm_in_fault_mode(vm)) 2311 return false; 2312 2313 if (!xe_vm_has_scratch(vm)) 2314 return false; 2315 2316 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2317 return false; 2318 2319 return true; 2320 } 2321 2322 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2323 { 2324 struct drm_gpuva_op *__op; 2325 2326 drm_gpuva_for_each_op(__op, ops) { 2327 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2328 2329 xe_vma_svm_prefetch_op_fini(op); 2330 } 2331 } 2332 2333 /* 2334 * Create operations list from IOCTL arguments, setup operations fields so parse 2335 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2336 */ 2337 static struct drm_gpuva_ops * 2338 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2339 struct xe_bo *bo, u64 bo_offset_or_userptr, 2340 u64 addr, u64 range, 2341 u32 operation, u32 flags, 2342 u32 prefetch_region, u16 pat_index) 2343 { 2344 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2345 struct drm_gpuva_ops *ops; 2346 struct drm_gpuva_op *__op; 2347 struct drm_gpuvm_bo *vm_bo; 2348 u64 range_end = addr + range; 2349 int err; 2350 2351 lockdep_assert_held_write(&vm->lock); 2352 2353 vm_dbg(&vm->xe->drm, 2354 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2355 operation, (ULL)addr, (ULL)range, 2356 (ULL)bo_offset_or_userptr); 2357 2358 switch (operation) { 2359 case DRM_XE_VM_BIND_OP_MAP: 2360 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 2361 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 2362 obj, bo_offset_or_userptr); 2363 break; 2364 case DRM_XE_VM_BIND_OP_UNMAP: 2365 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2366 break; 2367 case DRM_XE_VM_BIND_OP_PREFETCH: 2368 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2369 break; 2370 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2371 xe_assert(vm->xe, bo); 2372 2373 err = xe_bo_lock(bo, true); 2374 if (err) 2375 return ERR_PTR(err); 2376 2377 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2378 if (IS_ERR(vm_bo)) { 2379 xe_bo_unlock(bo); 2380 return ERR_CAST(vm_bo); 2381 } 2382 2383 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2384 drm_gpuvm_bo_put(vm_bo); 2385 xe_bo_unlock(bo); 2386 break; 2387 default: 2388 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2389 ops = ERR_PTR(-EINVAL); 2390 } 2391 if (IS_ERR(ops)) 2392 return ops; 2393 2394 drm_gpuva_for_each_op(__op, ops) { 2395 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2396 2397 if (__op->op == DRM_GPUVA_OP_MAP) { 2398 op->map.immediate = 2399 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2400 op->map.read_only = 2401 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2402 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2403 op->map.is_cpu_addr_mirror = flags & 2404 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2405 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2406 op->map.pat_index = pat_index; 2407 op->map.invalidate_on_bind = 2408 __xe_vm_needs_clear_scratch_pages(vm, flags); 2409 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2410 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2411 struct xe_svm_range *svm_range; 2412 struct drm_gpusvm_ctx ctx = {}; 2413 struct xe_tile *tile; 2414 u8 id, tile_mask = 0; 2415 u32 i; 2416 2417 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2418 op->prefetch.region = prefetch_region; 2419 break; 2420 } 2421 2422 ctx.read_only = xe_vma_read_only(vma); 2423 ctx.devmem_possible = IS_DGFX(vm->xe) && 2424 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2425 2426 for_each_tile(tile, vm->xe, id) 2427 tile_mask |= 0x1 << id; 2428 2429 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2430 op->prefetch_range.region = prefetch_region; 2431 op->prefetch_range.ranges_count = 0; 2432 alloc_next_range: 2433 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2434 2435 if (PTR_ERR(svm_range) == -ENOENT) { 2436 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2437 2438 addr = ret == ULONG_MAX ? 0 : ret; 2439 if (addr) 2440 goto alloc_next_range; 2441 else 2442 goto print_op_label; 2443 } 2444 2445 if (IS_ERR(svm_range)) { 2446 err = PTR_ERR(svm_range); 2447 goto unwind_prefetch_ops; 2448 } 2449 2450 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { 2451 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2452 goto check_next_range; 2453 } 2454 2455 err = xa_alloc(&op->prefetch_range.range, 2456 &i, svm_range, xa_limit_32b, 2457 GFP_KERNEL); 2458 2459 if (err) 2460 goto unwind_prefetch_ops; 2461 2462 op->prefetch_range.ranges_count++; 2463 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2464 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2465 check_next_range: 2466 if (range_end > xe_svm_range_end(svm_range) && 2467 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2468 addr = xe_svm_range_end(svm_range); 2469 goto alloc_next_range; 2470 } 2471 } 2472 print_op_label: 2473 print_op(vm->xe, __op); 2474 } 2475 2476 return ops; 2477 2478 unwind_prefetch_ops: 2479 xe_svm_prefetch_gpuva_ops_fini(ops); 2480 drm_gpuva_ops_free(&vm->gpuvm, ops); 2481 return ERR_PTR(err); 2482 } 2483 2484 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2485 2486 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2487 u16 pat_index, unsigned int flags) 2488 { 2489 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2490 struct drm_exec exec; 2491 struct xe_vma *vma; 2492 int err = 0; 2493 2494 lockdep_assert_held_write(&vm->lock); 2495 2496 if (bo) { 2497 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2498 drm_exec_until_all_locked(&exec) { 2499 err = 0; 2500 if (!bo->vm) { 2501 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2502 drm_exec_retry_on_contention(&exec); 2503 } 2504 if (!err) { 2505 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2506 drm_exec_retry_on_contention(&exec); 2507 } 2508 if (err) { 2509 drm_exec_fini(&exec); 2510 return ERR_PTR(err); 2511 } 2512 } 2513 } 2514 vma = xe_vma_create(vm, bo, op->gem.offset, 2515 op->va.addr, op->va.addr + 2516 op->va.range - 1, pat_index, flags); 2517 if (IS_ERR(vma)) 2518 goto err_unlock; 2519 2520 if (xe_vma_is_userptr(vma)) 2521 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2522 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2523 err = add_preempt_fences(vm, bo); 2524 2525 err_unlock: 2526 if (bo) 2527 drm_exec_fini(&exec); 2528 2529 if (err) { 2530 prep_vma_destroy(vm, vma, false); 2531 xe_vma_destroy_unlocked(vma); 2532 vma = ERR_PTR(err); 2533 } 2534 2535 return vma; 2536 } 2537 2538 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2539 { 2540 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2541 return SZ_1G; 2542 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2543 return SZ_2M; 2544 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2545 return SZ_64K; 2546 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2547 return SZ_4K; 2548 2549 return SZ_1G; /* Uninitialized, used max size */ 2550 } 2551 2552 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2553 { 2554 switch (size) { 2555 case SZ_1G: 2556 vma->gpuva.flags |= XE_VMA_PTE_1G; 2557 break; 2558 case SZ_2M: 2559 vma->gpuva.flags |= XE_VMA_PTE_2M; 2560 break; 2561 case SZ_64K: 2562 vma->gpuva.flags |= XE_VMA_PTE_64K; 2563 break; 2564 case SZ_4K: 2565 vma->gpuva.flags |= XE_VMA_PTE_4K; 2566 break; 2567 } 2568 } 2569 2570 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2571 { 2572 int err = 0; 2573 2574 lockdep_assert_held_write(&vm->lock); 2575 2576 switch (op->base.op) { 2577 case DRM_GPUVA_OP_MAP: 2578 err |= xe_vm_insert_vma(vm, op->map.vma); 2579 if (!err) 2580 op->flags |= XE_VMA_OP_COMMITTED; 2581 break; 2582 case DRM_GPUVA_OP_REMAP: 2583 { 2584 u8 tile_present = 2585 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2586 2587 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2588 true); 2589 op->flags |= XE_VMA_OP_COMMITTED; 2590 2591 if (op->remap.prev) { 2592 err |= xe_vm_insert_vma(vm, op->remap.prev); 2593 if (!err) 2594 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2595 if (!err && op->remap.skip_prev) { 2596 op->remap.prev->tile_present = 2597 tile_present; 2598 op->remap.prev = NULL; 2599 } 2600 } 2601 if (op->remap.next) { 2602 err |= xe_vm_insert_vma(vm, op->remap.next); 2603 if (!err) 2604 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2605 if (!err && op->remap.skip_next) { 2606 op->remap.next->tile_present = 2607 tile_present; 2608 op->remap.next = NULL; 2609 } 2610 } 2611 2612 /* Adjust for partial unbind after removing VMA from VM */ 2613 if (!err) { 2614 op->base.remap.unmap->va->va.addr = op->remap.start; 2615 op->base.remap.unmap->va->va.range = op->remap.range; 2616 } 2617 break; 2618 } 2619 case DRM_GPUVA_OP_UNMAP: 2620 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2621 op->flags |= XE_VMA_OP_COMMITTED; 2622 break; 2623 case DRM_GPUVA_OP_PREFETCH: 2624 op->flags |= XE_VMA_OP_COMMITTED; 2625 break; 2626 default: 2627 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2628 } 2629 2630 return err; 2631 } 2632 2633 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2634 struct xe_vma_ops *vops) 2635 { 2636 struct xe_device *xe = vm->xe; 2637 struct drm_gpuva_op *__op; 2638 struct xe_tile *tile; 2639 u8 id, tile_mask = 0; 2640 int err = 0; 2641 2642 lockdep_assert_held_write(&vm->lock); 2643 2644 for_each_tile(tile, vm->xe, id) 2645 tile_mask |= 0x1 << id; 2646 2647 drm_gpuva_for_each_op(__op, ops) { 2648 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2649 struct xe_vma *vma; 2650 unsigned int flags = 0; 2651 2652 INIT_LIST_HEAD(&op->link); 2653 list_add_tail(&op->link, &vops->list); 2654 op->tile_mask = tile_mask; 2655 2656 switch (op->base.op) { 2657 case DRM_GPUVA_OP_MAP: 2658 { 2659 flags |= op->map.read_only ? 2660 VMA_CREATE_FLAG_READ_ONLY : 0; 2661 flags |= op->map.is_null ? 2662 VMA_CREATE_FLAG_IS_NULL : 0; 2663 flags |= op->map.dumpable ? 2664 VMA_CREATE_FLAG_DUMPABLE : 0; 2665 flags |= op->map.is_cpu_addr_mirror ? 2666 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2667 2668 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2669 flags); 2670 if (IS_ERR(vma)) 2671 return PTR_ERR(vma); 2672 2673 op->map.vma = vma; 2674 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2675 !op->map.is_cpu_addr_mirror) || 2676 op->map.invalidate_on_bind) 2677 xe_vma_ops_incr_pt_update_ops(vops, 2678 op->tile_mask, 1); 2679 break; 2680 } 2681 case DRM_GPUVA_OP_REMAP: 2682 { 2683 struct xe_vma *old = 2684 gpuva_to_vma(op->base.remap.unmap->va); 2685 bool skip = xe_vma_is_cpu_addr_mirror(old); 2686 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2687 int num_remap_ops = 0; 2688 2689 if (op->base.remap.prev) 2690 start = op->base.remap.prev->va.addr + 2691 op->base.remap.prev->va.range; 2692 if (op->base.remap.next) 2693 end = op->base.remap.next->va.addr; 2694 2695 if (xe_vma_is_cpu_addr_mirror(old) && 2696 xe_svm_has_mapping(vm, start, end)) 2697 return -EBUSY; 2698 2699 op->remap.start = xe_vma_start(old); 2700 op->remap.range = xe_vma_size(old); 2701 2702 flags |= op->base.remap.unmap->va->flags & 2703 XE_VMA_READ_ONLY ? 2704 VMA_CREATE_FLAG_READ_ONLY : 0; 2705 flags |= op->base.remap.unmap->va->flags & 2706 DRM_GPUVA_SPARSE ? 2707 VMA_CREATE_FLAG_IS_NULL : 0; 2708 flags |= op->base.remap.unmap->va->flags & 2709 XE_VMA_DUMPABLE ? 2710 VMA_CREATE_FLAG_DUMPABLE : 0; 2711 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2712 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2713 2714 if (op->base.remap.prev) { 2715 vma = new_vma(vm, op->base.remap.prev, 2716 old->pat_index, flags); 2717 if (IS_ERR(vma)) 2718 return PTR_ERR(vma); 2719 2720 op->remap.prev = vma; 2721 2722 /* 2723 * Userptr creates a new SG mapping so 2724 * we must also rebind. 2725 */ 2726 op->remap.skip_prev = skip || 2727 (!xe_vma_is_userptr(old) && 2728 IS_ALIGNED(xe_vma_end(vma), 2729 xe_vma_max_pte_size(old))); 2730 if (op->remap.skip_prev) { 2731 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2732 op->remap.range -= 2733 xe_vma_end(vma) - 2734 xe_vma_start(old); 2735 op->remap.start = xe_vma_end(vma); 2736 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2737 (ULL)op->remap.start, 2738 (ULL)op->remap.range); 2739 } else { 2740 num_remap_ops++; 2741 } 2742 } 2743 2744 if (op->base.remap.next) { 2745 vma = new_vma(vm, op->base.remap.next, 2746 old->pat_index, flags); 2747 if (IS_ERR(vma)) 2748 return PTR_ERR(vma); 2749 2750 op->remap.next = vma; 2751 2752 /* 2753 * Userptr creates a new SG mapping so 2754 * we must also rebind. 2755 */ 2756 op->remap.skip_next = skip || 2757 (!xe_vma_is_userptr(old) && 2758 IS_ALIGNED(xe_vma_start(vma), 2759 xe_vma_max_pte_size(old))); 2760 if (op->remap.skip_next) { 2761 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2762 op->remap.range -= 2763 xe_vma_end(old) - 2764 xe_vma_start(vma); 2765 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2766 (ULL)op->remap.start, 2767 (ULL)op->remap.range); 2768 } else { 2769 num_remap_ops++; 2770 } 2771 } 2772 if (!skip) 2773 num_remap_ops++; 2774 2775 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2776 break; 2777 } 2778 case DRM_GPUVA_OP_UNMAP: 2779 vma = gpuva_to_vma(op->base.unmap.va); 2780 2781 if (xe_vma_is_cpu_addr_mirror(vma) && 2782 xe_svm_has_mapping(vm, xe_vma_start(vma), 2783 xe_vma_end(vma))) 2784 return -EBUSY; 2785 2786 if (!xe_vma_is_cpu_addr_mirror(vma)) 2787 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2788 break; 2789 case DRM_GPUVA_OP_PREFETCH: 2790 vma = gpuva_to_vma(op->base.prefetch.va); 2791 2792 if (xe_vma_is_userptr(vma)) { 2793 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2794 if (err) 2795 return err; 2796 } 2797 2798 if (xe_vma_is_cpu_addr_mirror(vma)) 2799 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2800 op->prefetch_range.ranges_count); 2801 else 2802 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2803 2804 break; 2805 default: 2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2807 } 2808 2809 err = xe_vma_op_commit(vm, op); 2810 if (err) 2811 return err; 2812 } 2813 2814 return 0; 2815 } 2816 2817 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2818 bool post_commit, bool prev_post_commit, 2819 bool next_post_commit) 2820 { 2821 lockdep_assert_held_write(&vm->lock); 2822 2823 switch (op->base.op) { 2824 case DRM_GPUVA_OP_MAP: 2825 if (op->map.vma) { 2826 prep_vma_destroy(vm, op->map.vma, post_commit); 2827 xe_vma_destroy_unlocked(op->map.vma); 2828 } 2829 break; 2830 case DRM_GPUVA_OP_UNMAP: 2831 { 2832 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2833 2834 if (vma) { 2835 down_read(&vm->userptr.notifier_lock); 2836 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2837 up_read(&vm->userptr.notifier_lock); 2838 if (post_commit) 2839 xe_vm_insert_vma(vm, vma); 2840 } 2841 break; 2842 } 2843 case DRM_GPUVA_OP_REMAP: 2844 { 2845 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2846 2847 if (op->remap.prev) { 2848 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2849 xe_vma_destroy_unlocked(op->remap.prev); 2850 } 2851 if (op->remap.next) { 2852 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2853 xe_vma_destroy_unlocked(op->remap.next); 2854 } 2855 if (vma) { 2856 down_read(&vm->userptr.notifier_lock); 2857 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2858 up_read(&vm->userptr.notifier_lock); 2859 if (post_commit) 2860 xe_vm_insert_vma(vm, vma); 2861 } 2862 break; 2863 } 2864 case DRM_GPUVA_OP_PREFETCH: 2865 /* Nothing to do */ 2866 break; 2867 default: 2868 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2869 } 2870 } 2871 2872 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2873 struct drm_gpuva_ops **ops, 2874 int num_ops_list) 2875 { 2876 int i; 2877 2878 for (i = num_ops_list - 1; i >= 0; --i) { 2879 struct drm_gpuva_ops *__ops = ops[i]; 2880 struct drm_gpuva_op *__op; 2881 2882 if (!__ops) 2883 continue; 2884 2885 drm_gpuva_for_each_op_reverse(__op, __ops) { 2886 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2887 2888 xe_vma_op_unwind(vm, op, 2889 op->flags & XE_VMA_OP_COMMITTED, 2890 op->flags & XE_VMA_OP_PREV_COMMITTED, 2891 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2892 } 2893 } 2894 } 2895 2896 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2897 bool validate) 2898 { 2899 struct xe_bo *bo = xe_vma_bo(vma); 2900 struct xe_vm *vm = xe_vma_vm(vma); 2901 int err = 0; 2902 2903 if (bo) { 2904 if (!bo->vm) 2905 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2906 if (!err && validate) 2907 err = xe_bo_validate(bo, vm, 2908 !xe_vm_in_preempt_fence_mode(vm)); 2909 } 2910 2911 return err; 2912 } 2913 2914 static int check_ufence(struct xe_vma *vma) 2915 { 2916 if (vma->ufence) { 2917 struct xe_user_fence * const f = vma->ufence; 2918 2919 if (!xe_sync_ufence_get_status(f)) 2920 return -EBUSY; 2921 2922 vma->ufence = NULL; 2923 xe_sync_ufence_put(f); 2924 } 2925 2926 return 0; 2927 } 2928 2929 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2930 { 2931 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2932 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2933 int err = 0; 2934 2935 struct xe_svm_range *svm_range; 2936 struct drm_gpusvm_ctx ctx = {}; 2937 struct xe_tile *tile; 2938 unsigned long i; 2939 u32 region; 2940 2941 if (!xe_vma_is_cpu_addr_mirror(vma)) 2942 return 0; 2943 2944 region = op->prefetch_range.region; 2945 2946 ctx.read_only = xe_vma_read_only(vma); 2947 ctx.devmem_possible = devmem_possible; 2948 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2949 2950 /* TODO: Threading the migration */ 2951 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2952 if (!region) 2953 xe_svm_range_migrate_to_smem(vm, svm_range); 2954 2955 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { 2956 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; 2957 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2958 if (err) { 2959 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2960 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2961 return -ENODATA; 2962 } 2963 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2964 } 2965 2966 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2967 if (err) { 2968 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2969 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2970 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2971 err = -ENODATA; 2972 return err; 2973 } 2974 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2975 } 2976 2977 return err; 2978 } 2979 2980 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2981 struct xe_vma_op *op) 2982 { 2983 int err = 0; 2984 2985 switch (op->base.op) { 2986 case DRM_GPUVA_OP_MAP: 2987 if (!op->map.invalidate_on_bind) 2988 err = vma_lock_and_validate(exec, op->map.vma, 2989 !xe_vm_in_fault_mode(vm) || 2990 op->map.immediate); 2991 break; 2992 case DRM_GPUVA_OP_REMAP: 2993 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2994 if (err) 2995 break; 2996 2997 err = vma_lock_and_validate(exec, 2998 gpuva_to_vma(op->base.remap.unmap->va), 2999 false); 3000 if (!err && op->remap.prev) 3001 err = vma_lock_and_validate(exec, op->remap.prev, true); 3002 if (!err && op->remap.next) 3003 err = vma_lock_and_validate(exec, op->remap.next, true); 3004 break; 3005 case DRM_GPUVA_OP_UNMAP: 3006 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3007 if (err) 3008 break; 3009 3010 err = vma_lock_and_validate(exec, 3011 gpuva_to_vma(op->base.unmap.va), 3012 false); 3013 break; 3014 case DRM_GPUVA_OP_PREFETCH: 3015 { 3016 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3017 u32 region; 3018 3019 if (xe_vma_is_cpu_addr_mirror(vma)) 3020 region = op->prefetch_range.region; 3021 else 3022 region = op->prefetch.region; 3023 3024 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 3025 3026 err = vma_lock_and_validate(exec, 3027 gpuva_to_vma(op->base.prefetch.va), 3028 false); 3029 if (!err && !xe_vma_has_no_bo(vma)) 3030 err = xe_bo_migrate(xe_vma_bo(vma), 3031 region_to_mem_type[region]); 3032 break; 3033 } 3034 default: 3035 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3036 } 3037 3038 return err; 3039 } 3040 3041 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3042 { 3043 struct xe_vma_op *op; 3044 int err; 3045 3046 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3047 return 0; 3048 3049 list_for_each_entry(op, &vops->list, link) { 3050 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3051 err = prefetch_ranges(vm, op); 3052 if (err) 3053 return err; 3054 } 3055 } 3056 3057 return 0; 3058 } 3059 3060 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3061 struct xe_vm *vm, 3062 struct xe_vma_ops *vops) 3063 { 3064 struct xe_vma_op *op; 3065 int err; 3066 3067 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3068 if (err) 3069 return err; 3070 3071 list_for_each_entry(op, &vops->list, link) { 3072 err = op_lock_and_prep(exec, vm, op); 3073 if (err) 3074 return err; 3075 } 3076 3077 #ifdef TEST_VM_OPS_ERROR 3078 if (vops->inject_error && 3079 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3080 return -ENOSPC; 3081 #endif 3082 3083 return 0; 3084 } 3085 3086 static void op_trace(struct xe_vma_op *op) 3087 { 3088 switch (op->base.op) { 3089 case DRM_GPUVA_OP_MAP: 3090 trace_xe_vma_bind(op->map.vma); 3091 break; 3092 case DRM_GPUVA_OP_REMAP: 3093 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3094 if (op->remap.prev) 3095 trace_xe_vma_bind(op->remap.prev); 3096 if (op->remap.next) 3097 trace_xe_vma_bind(op->remap.next); 3098 break; 3099 case DRM_GPUVA_OP_UNMAP: 3100 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3101 break; 3102 case DRM_GPUVA_OP_PREFETCH: 3103 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3104 break; 3105 case DRM_GPUVA_OP_DRIVER: 3106 break; 3107 default: 3108 XE_WARN_ON("NOT POSSIBLE"); 3109 } 3110 } 3111 3112 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3113 { 3114 struct xe_vma_op *op; 3115 3116 list_for_each_entry(op, &vops->list, link) 3117 op_trace(op); 3118 } 3119 3120 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3121 { 3122 struct xe_exec_queue *q = vops->q; 3123 struct xe_tile *tile; 3124 int number_tiles = 0; 3125 u8 id; 3126 3127 for_each_tile(tile, vm->xe, id) { 3128 if (vops->pt_update_ops[id].num_ops) 3129 ++number_tiles; 3130 3131 if (vops->pt_update_ops[id].q) 3132 continue; 3133 3134 if (q) { 3135 vops->pt_update_ops[id].q = q; 3136 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3137 q = list_next_entry(q, multi_gt_list); 3138 } else { 3139 vops->pt_update_ops[id].q = vm->q[id]; 3140 } 3141 } 3142 3143 return number_tiles; 3144 } 3145 3146 static struct dma_fence *ops_execute(struct xe_vm *vm, 3147 struct xe_vma_ops *vops) 3148 { 3149 struct xe_tile *tile; 3150 struct dma_fence *fence = NULL; 3151 struct dma_fence **fences = NULL; 3152 struct dma_fence_array *cf = NULL; 3153 int number_tiles = 0, current_fence = 0, err; 3154 u8 id; 3155 3156 number_tiles = vm_ops_setup_tile_args(vm, vops); 3157 if (number_tiles == 0) 3158 return ERR_PTR(-ENODATA); 3159 3160 if (number_tiles > 1) { 3161 fences = kmalloc_array(number_tiles, sizeof(*fences), 3162 GFP_KERNEL); 3163 if (!fences) { 3164 fence = ERR_PTR(-ENOMEM); 3165 goto err_trace; 3166 } 3167 } 3168 3169 for_each_tile(tile, vm->xe, id) { 3170 if (!vops->pt_update_ops[id].num_ops) 3171 continue; 3172 3173 err = xe_pt_update_ops_prepare(tile, vops); 3174 if (err) { 3175 fence = ERR_PTR(err); 3176 goto err_out; 3177 } 3178 } 3179 3180 trace_xe_vm_ops_execute(vops); 3181 3182 for_each_tile(tile, vm->xe, id) { 3183 if (!vops->pt_update_ops[id].num_ops) 3184 continue; 3185 3186 fence = xe_pt_update_ops_run(tile, vops); 3187 if (IS_ERR(fence)) 3188 goto err_out; 3189 3190 if (fences) 3191 fences[current_fence++] = fence; 3192 } 3193 3194 if (fences) { 3195 cf = dma_fence_array_create(number_tiles, fences, 3196 vm->composite_fence_ctx, 3197 vm->composite_fence_seqno++, 3198 false); 3199 if (!cf) { 3200 --vm->composite_fence_seqno; 3201 fence = ERR_PTR(-ENOMEM); 3202 goto err_out; 3203 } 3204 fence = &cf->base; 3205 } 3206 3207 for_each_tile(tile, vm->xe, id) { 3208 if (!vops->pt_update_ops[id].num_ops) 3209 continue; 3210 3211 xe_pt_update_ops_fini(tile, vops); 3212 } 3213 3214 return fence; 3215 3216 err_out: 3217 for_each_tile(tile, vm->xe, id) { 3218 if (!vops->pt_update_ops[id].num_ops) 3219 continue; 3220 3221 xe_pt_update_ops_abort(tile, vops); 3222 } 3223 while (current_fence) 3224 dma_fence_put(fences[--current_fence]); 3225 kfree(fences); 3226 kfree(cf); 3227 3228 err_trace: 3229 trace_xe_vm_ops_fail(vm); 3230 return fence; 3231 } 3232 3233 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3234 { 3235 if (vma->ufence) 3236 xe_sync_ufence_put(vma->ufence); 3237 vma->ufence = __xe_sync_ufence_get(ufence); 3238 } 3239 3240 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3241 struct xe_user_fence *ufence) 3242 { 3243 switch (op->base.op) { 3244 case DRM_GPUVA_OP_MAP: 3245 vma_add_ufence(op->map.vma, ufence); 3246 break; 3247 case DRM_GPUVA_OP_REMAP: 3248 if (op->remap.prev) 3249 vma_add_ufence(op->remap.prev, ufence); 3250 if (op->remap.next) 3251 vma_add_ufence(op->remap.next, ufence); 3252 break; 3253 case DRM_GPUVA_OP_UNMAP: 3254 break; 3255 case DRM_GPUVA_OP_PREFETCH: 3256 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3257 break; 3258 default: 3259 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3260 } 3261 } 3262 3263 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3264 struct dma_fence *fence) 3265 { 3266 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3267 struct xe_user_fence *ufence; 3268 struct xe_vma_op *op; 3269 int i; 3270 3271 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3272 list_for_each_entry(op, &vops->list, link) { 3273 if (ufence) 3274 op_add_ufence(vm, op, ufence); 3275 3276 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3277 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3278 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3279 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3280 fence); 3281 } 3282 if (ufence) 3283 xe_sync_ufence_put(ufence); 3284 if (fence) { 3285 for (i = 0; i < vops->num_syncs; i++) 3286 xe_sync_entry_signal(vops->syncs + i, fence); 3287 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3288 } 3289 } 3290 3291 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3292 struct xe_vma_ops *vops) 3293 { 3294 struct drm_exec exec; 3295 struct dma_fence *fence; 3296 int err; 3297 3298 lockdep_assert_held_write(&vm->lock); 3299 3300 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3301 DRM_EXEC_IGNORE_DUPLICATES, 0); 3302 drm_exec_until_all_locked(&exec) { 3303 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3304 drm_exec_retry_on_contention(&exec); 3305 if (err) { 3306 fence = ERR_PTR(err); 3307 goto unlock; 3308 } 3309 3310 fence = ops_execute(vm, vops); 3311 if (IS_ERR(fence)) { 3312 if (PTR_ERR(fence) == -ENODATA) 3313 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3314 goto unlock; 3315 } 3316 3317 vm_bind_ioctl_ops_fini(vm, vops, fence); 3318 } 3319 3320 unlock: 3321 drm_exec_fini(&exec); 3322 return fence; 3323 } 3324 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3325 3326 #define SUPPORTED_FLAGS_STUB \ 3327 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3328 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3329 DRM_XE_VM_BIND_FLAG_NULL | \ 3330 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3331 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3332 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3333 3334 #ifdef TEST_VM_OPS_ERROR 3335 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3336 #else 3337 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3338 #endif 3339 3340 #define XE_64K_PAGE_MASK 0xffffull 3341 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3342 3343 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3344 struct drm_xe_vm_bind *args, 3345 struct drm_xe_vm_bind_op **bind_ops) 3346 { 3347 int err; 3348 int i; 3349 3350 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3351 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3352 return -EINVAL; 3353 3354 if (XE_IOCTL_DBG(xe, args->extensions)) 3355 return -EINVAL; 3356 3357 if (args->num_binds > 1) { 3358 u64 __user *bind_user = 3359 u64_to_user_ptr(args->vector_of_binds); 3360 3361 *bind_ops = kvmalloc_array(args->num_binds, 3362 sizeof(struct drm_xe_vm_bind_op), 3363 GFP_KERNEL | __GFP_ACCOUNT | 3364 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3365 if (!*bind_ops) 3366 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3367 3368 err = copy_from_user(*bind_ops, bind_user, 3369 sizeof(struct drm_xe_vm_bind_op) * 3370 args->num_binds); 3371 if (XE_IOCTL_DBG(xe, err)) { 3372 err = -EFAULT; 3373 goto free_bind_ops; 3374 } 3375 } else { 3376 *bind_ops = &args->bind; 3377 } 3378 3379 for (i = 0; i < args->num_binds; ++i) { 3380 u64 range = (*bind_ops)[i].range; 3381 u64 addr = (*bind_ops)[i].addr; 3382 u32 op = (*bind_ops)[i].op; 3383 u32 flags = (*bind_ops)[i].flags; 3384 u32 obj = (*bind_ops)[i].obj; 3385 u64 obj_offset = (*bind_ops)[i].obj_offset; 3386 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3387 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3388 bool is_cpu_addr_mirror = flags & 3389 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3390 u16 pat_index = (*bind_ops)[i].pat_index; 3391 u16 coh_mode; 3392 3393 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3394 (!xe_vm_in_fault_mode(vm) || 3395 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3396 err = -EINVAL; 3397 goto free_bind_ops; 3398 } 3399 3400 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3401 err = -EINVAL; 3402 goto free_bind_ops; 3403 } 3404 3405 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3406 (*bind_ops)[i].pat_index = pat_index; 3407 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3408 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3409 err = -EINVAL; 3410 goto free_bind_ops; 3411 } 3412 3413 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3414 err = -EINVAL; 3415 goto free_bind_ops; 3416 } 3417 3418 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3419 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3420 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3421 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3422 is_cpu_addr_mirror)) || 3423 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3424 (is_null || is_cpu_addr_mirror)) || 3425 XE_IOCTL_DBG(xe, !obj && 3426 op == DRM_XE_VM_BIND_OP_MAP && 3427 !is_null && !is_cpu_addr_mirror) || 3428 XE_IOCTL_DBG(xe, !obj && 3429 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3430 XE_IOCTL_DBG(xe, addr && 3431 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3432 XE_IOCTL_DBG(xe, range && 3433 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3434 XE_IOCTL_DBG(xe, obj && 3435 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3436 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3437 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3438 XE_IOCTL_DBG(xe, obj && 3439 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3440 XE_IOCTL_DBG(xe, prefetch_region && 3441 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3442 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 3443 xe->info.mem_region_mask)) || 3444 XE_IOCTL_DBG(xe, obj && 3445 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3446 err = -EINVAL; 3447 goto free_bind_ops; 3448 } 3449 3450 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3451 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3452 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3453 XE_IOCTL_DBG(xe, !range && 3454 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3455 err = -EINVAL; 3456 goto free_bind_ops; 3457 } 3458 } 3459 3460 return 0; 3461 3462 free_bind_ops: 3463 if (args->num_binds > 1) 3464 kvfree(*bind_ops); 3465 *bind_ops = NULL; 3466 return err; 3467 } 3468 3469 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3470 struct xe_exec_queue *q, 3471 struct xe_sync_entry *syncs, 3472 int num_syncs) 3473 { 3474 struct dma_fence *fence; 3475 int i, err = 0; 3476 3477 fence = xe_sync_in_fence_get(syncs, num_syncs, 3478 to_wait_exec_queue(vm, q), vm); 3479 if (IS_ERR(fence)) 3480 return PTR_ERR(fence); 3481 3482 for (i = 0; i < num_syncs; i++) 3483 xe_sync_entry_signal(&syncs[i], fence); 3484 3485 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3486 fence); 3487 dma_fence_put(fence); 3488 3489 return err; 3490 } 3491 3492 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3493 struct xe_exec_queue *q, 3494 struct xe_sync_entry *syncs, u32 num_syncs) 3495 { 3496 memset(vops, 0, sizeof(*vops)); 3497 INIT_LIST_HEAD(&vops->list); 3498 vops->vm = vm; 3499 vops->q = q; 3500 vops->syncs = syncs; 3501 vops->num_syncs = num_syncs; 3502 vops->flags = 0; 3503 } 3504 3505 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3506 u64 addr, u64 range, u64 obj_offset, 3507 u16 pat_index, u32 op, u32 bind_flags) 3508 { 3509 u16 coh_mode; 3510 3511 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3512 XE_IOCTL_DBG(xe, obj_offset > 3513 xe_bo_size(bo) - range)) { 3514 return -EINVAL; 3515 } 3516 3517 /* 3518 * Some platforms require 64k VM_BIND alignment, 3519 * specifically those with XE_VRAM_FLAGS_NEED64K. 3520 * 3521 * Other platforms may have BO's set to 64k physical placement, 3522 * but can be mapped at 4k offsets anyway. This check is only 3523 * there for the former case. 3524 */ 3525 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3526 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3527 if (XE_IOCTL_DBG(xe, obj_offset & 3528 XE_64K_PAGE_MASK) || 3529 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3530 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3531 return -EINVAL; 3532 } 3533 } 3534 3535 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3536 if (bo->cpu_caching) { 3537 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3538 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3539 return -EINVAL; 3540 } 3541 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3542 /* 3543 * Imported dma-buf from a different device should 3544 * require 1way or 2way coherency since we don't know 3545 * how it was mapped on the CPU. Just assume is it 3546 * potentially cached on CPU side. 3547 */ 3548 return -EINVAL; 3549 } 3550 3551 /* If a BO is protected it can only be mapped if the key is still valid */ 3552 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3553 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3554 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3555 return -ENOEXEC; 3556 3557 return 0; 3558 } 3559 3560 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3561 { 3562 struct xe_device *xe = to_xe_device(dev); 3563 struct xe_file *xef = to_xe_file(file); 3564 struct drm_xe_vm_bind *args = data; 3565 struct drm_xe_sync __user *syncs_user; 3566 struct xe_bo **bos = NULL; 3567 struct drm_gpuva_ops **ops = NULL; 3568 struct xe_vm *vm; 3569 struct xe_exec_queue *q = NULL; 3570 u32 num_syncs, num_ufence = 0; 3571 struct xe_sync_entry *syncs = NULL; 3572 struct drm_xe_vm_bind_op *bind_ops = NULL; 3573 struct xe_vma_ops vops; 3574 struct dma_fence *fence; 3575 int err; 3576 int i; 3577 3578 vm = xe_vm_lookup(xef, args->vm_id); 3579 if (XE_IOCTL_DBG(xe, !vm)) 3580 return -EINVAL; 3581 3582 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3583 if (err) 3584 goto put_vm; 3585 3586 if (args->exec_queue_id) { 3587 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3588 if (XE_IOCTL_DBG(xe, !q)) { 3589 err = -ENOENT; 3590 goto put_vm; 3591 } 3592 3593 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3594 err = -EINVAL; 3595 goto put_exec_queue; 3596 } 3597 } 3598 3599 /* Ensure all UNMAPs visible */ 3600 xe_svm_flush(vm); 3601 3602 err = down_write_killable(&vm->lock); 3603 if (err) 3604 goto put_exec_queue; 3605 3606 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3607 err = -ENOENT; 3608 goto release_vm_lock; 3609 } 3610 3611 for (i = 0; i < args->num_binds; ++i) { 3612 u64 range = bind_ops[i].range; 3613 u64 addr = bind_ops[i].addr; 3614 3615 if (XE_IOCTL_DBG(xe, range > vm->size) || 3616 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3617 err = -EINVAL; 3618 goto release_vm_lock; 3619 } 3620 } 3621 3622 if (args->num_binds) { 3623 bos = kvcalloc(args->num_binds, sizeof(*bos), 3624 GFP_KERNEL | __GFP_ACCOUNT | 3625 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3626 if (!bos) { 3627 err = -ENOMEM; 3628 goto release_vm_lock; 3629 } 3630 3631 ops = kvcalloc(args->num_binds, sizeof(*ops), 3632 GFP_KERNEL | __GFP_ACCOUNT | 3633 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3634 if (!ops) { 3635 err = -ENOMEM; 3636 goto release_vm_lock; 3637 } 3638 } 3639 3640 for (i = 0; i < args->num_binds; ++i) { 3641 struct drm_gem_object *gem_obj; 3642 u64 range = bind_ops[i].range; 3643 u64 addr = bind_ops[i].addr; 3644 u32 obj = bind_ops[i].obj; 3645 u64 obj_offset = bind_ops[i].obj_offset; 3646 u16 pat_index = bind_ops[i].pat_index; 3647 u32 op = bind_ops[i].op; 3648 u32 bind_flags = bind_ops[i].flags; 3649 3650 if (!obj) 3651 continue; 3652 3653 gem_obj = drm_gem_object_lookup(file, obj); 3654 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3655 err = -ENOENT; 3656 goto put_obj; 3657 } 3658 bos[i] = gem_to_xe_bo(gem_obj); 3659 3660 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3661 obj_offset, pat_index, op, 3662 bind_flags); 3663 if (err) 3664 goto put_obj; 3665 } 3666 3667 if (args->num_syncs) { 3668 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3669 if (!syncs) { 3670 err = -ENOMEM; 3671 goto put_obj; 3672 } 3673 } 3674 3675 syncs_user = u64_to_user_ptr(args->syncs); 3676 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3677 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3678 &syncs_user[num_syncs], 3679 (xe_vm_in_lr_mode(vm) ? 3680 SYNC_PARSE_FLAG_LR_MODE : 0) | 3681 (!args->num_binds ? 3682 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3683 if (err) 3684 goto free_syncs; 3685 3686 if (xe_sync_is_ufence(&syncs[num_syncs])) 3687 num_ufence++; 3688 } 3689 3690 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3691 err = -EINVAL; 3692 goto free_syncs; 3693 } 3694 3695 if (!args->num_binds) { 3696 err = -ENODATA; 3697 goto free_syncs; 3698 } 3699 3700 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3701 for (i = 0; i < args->num_binds; ++i) { 3702 u64 range = bind_ops[i].range; 3703 u64 addr = bind_ops[i].addr; 3704 u32 op = bind_ops[i].op; 3705 u32 flags = bind_ops[i].flags; 3706 u64 obj_offset = bind_ops[i].obj_offset; 3707 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3708 u16 pat_index = bind_ops[i].pat_index; 3709 3710 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3711 addr, range, op, flags, 3712 prefetch_region, pat_index); 3713 if (IS_ERR(ops[i])) { 3714 err = PTR_ERR(ops[i]); 3715 ops[i] = NULL; 3716 goto unwind_ops; 3717 } 3718 3719 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3720 if (err) 3721 goto unwind_ops; 3722 3723 #ifdef TEST_VM_OPS_ERROR 3724 if (flags & FORCE_OP_ERROR) { 3725 vops.inject_error = true; 3726 vm->xe->vm_inject_error_position = 3727 (vm->xe->vm_inject_error_position + 1) % 3728 FORCE_OP_ERROR_COUNT; 3729 } 3730 #endif 3731 } 3732 3733 /* Nothing to do */ 3734 if (list_empty(&vops.list)) { 3735 err = -ENODATA; 3736 goto unwind_ops; 3737 } 3738 3739 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3740 if (err) 3741 goto unwind_ops; 3742 3743 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3744 if (err) 3745 goto unwind_ops; 3746 3747 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3748 if (IS_ERR(fence)) 3749 err = PTR_ERR(fence); 3750 else 3751 dma_fence_put(fence); 3752 3753 unwind_ops: 3754 if (err && err != -ENODATA) 3755 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3756 xe_vma_ops_fini(&vops); 3757 for (i = args->num_binds - 1; i >= 0; --i) 3758 if (ops[i]) 3759 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3760 free_syncs: 3761 if (err == -ENODATA) 3762 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3763 while (num_syncs--) 3764 xe_sync_entry_cleanup(&syncs[num_syncs]); 3765 3766 kfree(syncs); 3767 put_obj: 3768 for (i = 0; i < args->num_binds; ++i) 3769 xe_bo_put(bos[i]); 3770 release_vm_lock: 3771 up_write(&vm->lock); 3772 put_exec_queue: 3773 if (q) 3774 xe_exec_queue_put(q); 3775 put_vm: 3776 xe_vm_put(vm); 3777 kvfree(bos); 3778 kvfree(ops); 3779 if (args->num_binds > 1) 3780 kvfree(bind_ops); 3781 return err; 3782 } 3783 3784 /** 3785 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3786 * @vm: VM to bind the BO to 3787 * @bo: BO to bind 3788 * @q: exec queue to use for the bind (optional) 3789 * @addr: address at which to bind the BO 3790 * @cache_lvl: PAT cache level to use 3791 * 3792 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3793 * kernel-owned VM. 3794 * 3795 * Returns a dma_fence to track the binding completion if the job to do so was 3796 * successfully submitted, an error pointer otherwise. 3797 */ 3798 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3799 struct xe_exec_queue *q, u64 addr, 3800 enum xe_cache_level cache_lvl) 3801 { 3802 struct xe_vma_ops vops; 3803 struct drm_gpuva_ops *ops = NULL; 3804 struct dma_fence *fence; 3805 int err; 3806 3807 xe_bo_get(bo); 3808 xe_vm_get(vm); 3809 if (q) 3810 xe_exec_queue_get(q); 3811 3812 down_write(&vm->lock); 3813 3814 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3815 3816 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3817 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3818 vm->xe->pat.idx[cache_lvl]); 3819 if (IS_ERR(ops)) { 3820 err = PTR_ERR(ops); 3821 goto release_vm_lock; 3822 } 3823 3824 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3825 if (err) 3826 goto release_vm_lock; 3827 3828 xe_assert(vm->xe, !list_empty(&vops.list)); 3829 3830 err = xe_vma_ops_alloc(&vops, false); 3831 if (err) 3832 goto unwind_ops; 3833 3834 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3835 if (IS_ERR(fence)) 3836 err = PTR_ERR(fence); 3837 3838 unwind_ops: 3839 if (err && err != -ENODATA) 3840 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3841 3842 xe_vma_ops_fini(&vops); 3843 drm_gpuva_ops_free(&vm->gpuvm, ops); 3844 3845 release_vm_lock: 3846 up_write(&vm->lock); 3847 3848 if (q) 3849 xe_exec_queue_put(q); 3850 xe_vm_put(vm); 3851 xe_bo_put(bo); 3852 3853 if (err) 3854 fence = ERR_PTR(err); 3855 3856 return fence; 3857 } 3858 3859 /** 3860 * xe_vm_lock() - Lock the vm's dma_resv object 3861 * @vm: The struct xe_vm whose lock is to be locked 3862 * @intr: Whether to perform any wait interruptible 3863 * 3864 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3865 * contended lock was interrupted. If @intr is false, the function 3866 * always returns 0. 3867 */ 3868 int xe_vm_lock(struct xe_vm *vm, bool intr) 3869 { 3870 if (intr) 3871 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3872 3873 return dma_resv_lock(xe_vm_resv(vm), NULL); 3874 } 3875 3876 /** 3877 * xe_vm_unlock() - Unlock the vm's dma_resv object 3878 * @vm: The struct xe_vm whose lock is to be released. 3879 * 3880 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3881 */ 3882 void xe_vm_unlock(struct xe_vm *vm) 3883 { 3884 dma_resv_unlock(xe_vm_resv(vm)); 3885 } 3886 3887 /** 3888 * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an 3889 * address range 3890 * @vm: The VM 3891 * @start: start address 3892 * @end: end address 3893 * @tile_mask: mask for which gt's issue tlb invalidation 3894 * 3895 * Issue a range based TLB invalidation for gt's in tilemask 3896 * 3897 * Returns 0 for success, negative error code otherwise. 3898 */ 3899 int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, 3900 u64 end, u8 tile_mask) 3901 { 3902 struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3903 struct xe_tile *tile; 3904 u32 fence_id = 0; 3905 u8 id; 3906 int err; 3907 3908 if (!tile_mask) 3909 return 0; 3910 3911 for_each_tile(tile, vm->xe, id) { 3912 if (tile_mask & BIT(id)) { 3913 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3914 &fence[fence_id], true); 3915 3916 err = xe_gt_tlb_invalidation_range(tile->primary_gt, 3917 &fence[fence_id], 3918 start, 3919 end, 3920 vm->usm.asid); 3921 if (err) 3922 goto wait; 3923 ++fence_id; 3924 3925 if (!tile->media_gt) 3926 continue; 3927 3928 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3929 &fence[fence_id], true); 3930 3931 err = xe_gt_tlb_invalidation_range(tile->media_gt, 3932 &fence[fence_id], 3933 start, 3934 end, 3935 vm->usm.asid); 3936 if (err) 3937 goto wait; 3938 ++fence_id; 3939 } 3940 } 3941 3942 wait: 3943 for (id = 0; id < fence_id; ++id) 3944 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3945 3946 return err; 3947 } 3948 3949 /** 3950 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3951 * @vma: VMA to invalidate 3952 * 3953 * Walks a list of page tables leaves which it memset the entries owned by this 3954 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3955 * complete. 3956 * 3957 * Returns 0 for success, negative error code otherwise. 3958 */ 3959 int xe_vm_invalidate_vma(struct xe_vma *vma) 3960 { 3961 struct xe_device *xe = xe_vma_vm(vma)->xe; 3962 struct xe_vm *vm = xe_vma_vm(vma); 3963 struct xe_tile *tile; 3964 u8 tile_mask = 0; 3965 int ret = 0; 3966 u8 id; 3967 3968 xe_assert(xe, !xe_vma_is_null(vma)); 3969 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3970 trace_xe_vma_invalidate(vma); 3971 3972 vm_dbg(&vm->xe->drm, 3973 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3974 xe_vma_start(vma), xe_vma_size(vma)); 3975 3976 /* 3977 * Check that we don't race with page-table updates, tile_invalidated 3978 * update is safe 3979 */ 3980 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3981 if (xe_vma_is_userptr(vma)) { 3982 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 3983 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 3984 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3985 3986 WARN_ON_ONCE(!mmu_interval_check_retry 3987 (&to_userptr_vma(vma)->userptr.notifier, 3988 to_userptr_vma(vma)->userptr.notifier_seq)); 3989 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3990 DMA_RESV_USAGE_BOOKKEEP)); 3991 3992 } else { 3993 xe_bo_assert_held(xe_vma_bo(vma)); 3994 } 3995 } 3996 3997 for_each_tile(tile, xe, id) 3998 if (xe_pt_zap_ptes(tile, vma)) 3999 tile_mask |= BIT(id); 4000 4001 xe_device_wmb(xe); 4002 4003 ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), 4004 xe_vma_end(vma), tile_mask); 4005 4006 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4007 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4008 4009 return ret; 4010 } 4011 4012 int xe_vm_validate_protected(struct xe_vm *vm) 4013 { 4014 struct drm_gpuva *gpuva; 4015 int err = 0; 4016 4017 if (!vm) 4018 return -ENODEV; 4019 4020 mutex_lock(&vm->snap_mutex); 4021 4022 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4023 struct xe_vma *vma = gpuva_to_vma(gpuva); 4024 struct xe_bo *bo = vma->gpuva.gem.obj ? 4025 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4026 4027 if (!bo) 4028 continue; 4029 4030 if (xe_bo_is_protected(bo)) { 4031 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4032 if (err) 4033 break; 4034 } 4035 } 4036 4037 mutex_unlock(&vm->snap_mutex); 4038 return err; 4039 } 4040 4041 struct xe_vm_snapshot { 4042 unsigned long num_snaps; 4043 struct { 4044 u64 ofs, bo_ofs; 4045 unsigned long len; 4046 struct xe_bo *bo; 4047 void *data; 4048 struct mm_struct *mm; 4049 } snap[]; 4050 }; 4051 4052 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4053 { 4054 unsigned long num_snaps = 0, i; 4055 struct xe_vm_snapshot *snap = NULL; 4056 struct drm_gpuva *gpuva; 4057 4058 if (!vm) 4059 return NULL; 4060 4061 mutex_lock(&vm->snap_mutex); 4062 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4063 if (gpuva->flags & XE_VMA_DUMPABLE) 4064 num_snaps++; 4065 } 4066 4067 if (num_snaps) 4068 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4069 if (!snap) { 4070 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4071 goto out_unlock; 4072 } 4073 4074 snap->num_snaps = num_snaps; 4075 i = 0; 4076 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4077 struct xe_vma *vma = gpuva_to_vma(gpuva); 4078 struct xe_bo *bo = vma->gpuva.gem.obj ? 4079 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4080 4081 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4082 continue; 4083 4084 snap->snap[i].ofs = xe_vma_start(vma); 4085 snap->snap[i].len = xe_vma_size(vma); 4086 if (bo) { 4087 snap->snap[i].bo = xe_bo_get(bo); 4088 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4089 } else if (xe_vma_is_userptr(vma)) { 4090 struct mm_struct *mm = 4091 to_userptr_vma(vma)->userptr.notifier.mm; 4092 4093 if (mmget_not_zero(mm)) 4094 snap->snap[i].mm = mm; 4095 else 4096 snap->snap[i].data = ERR_PTR(-EFAULT); 4097 4098 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4099 } else { 4100 snap->snap[i].data = ERR_PTR(-ENOENT); 4101 } 4102 i++; 4103 } 4104 4105 out_unlock: 4106 mutex_unlock(&vm->snap_mutex); 4107 return snap; 4108 } 4109 4110 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4111 { 4112 if (IS_ERR_OR_NULL(snap)) 4113 return; 4114 4115 for (int i = 0; i < snap->num_snaps; i++) { 4116 struct xe_bo *bo = snap->snap[i].bo; 4117 int err; 4118 4119 if (IS_ERR(snap->snap[i].data)) 4120 continue; 4121 4122 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4123 if (!snap->snap[i].data) { 4124 snap->snap[i].data = ERR_PTR(-ENOMEM); 4125 goto cleanup_bo; 4126 } 4127 4128 if (bo) { 4129 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4130 snap->snap[i].data, snap->snap[i].len); 4131 } else { 4132 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4133 4134 kthread_use_mm(snap->snap[i].mm); 4135 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4136 err = 0; 4137 else 4138 err = -EFAULT; 4139 kthread_unuse_mm(snap->snap[i].mm); 4140 4141 mmput(snap->snap[i].mm); 4142 snap->snap[i].mm = NULL; 4143 } 4144 4145 if (err) { 4146 kvfree(snap->snap[i].data); 4147 snap->snap[i].data = ERR_PTR(err); 4148 } 4149 4150 cleanup_bo: 4151 xe_bo_put(bo); 4152 snap->snap[i].bo = NULL; 4153 } 4154 } 4155 4156 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4157 { 4158 unsigned long i, j; 4159 4160 if (IS_ERR_OR_NULL(snap)) { 4161 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4162 return; 4163 } 4164 4165 for (i = 0; i < snap->num_snaps; i++) { 4166 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4167 4168 if (IS_ERR(snap->snap[i].data)) { 4169 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4170 PTR_ERR(snap->snap[i].data)); 4171 continue; 4172 } 4173 4174 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4175 4176 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4177 u32 *val = snap->snap[i].data + j; 4178 char dumped[ASCII85_BUFSZ]; 4179 4180 drm_puts(p, ascii85_encode(*val, dumped)); 4181 } 4182 4183 drm_puts(p, "\n"); 4184 4185 if (drm_coredump_printer_is_full(p)) 4186 return; 4187 } 4188 } 4189 4190 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4191 { 4192 unsigned long i; 4193 4194 if (IS_ERR_OR_NULL(snap)) 4195 return; 4196 4197 for (i = 0; i < snap->num_snaps; i++) { 4198 if (!IS_ERR(snap->snap[i].data)) 4199 kvfree(snap->snap[i].data); 4200 xe_bo_put(snap->snap[i].bo); 4201 if (snap->snap[i].mm) 4202 mmput(snap->snap[i].mm); 4203 } 4204 kvfree(snap); 4205 } 4206