1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_gt_tlb_invalidation.h" 32 #include "xe_migrate.h" 33 #include "xe_pat.h" 34 #include "xe_pm.h" 35 #include "xe_preempt_fence.h" 36 #include "xe_pt.h" 37 #include "xe_pxp.h" 38 #include "xe_res_cursor.h" 39 #include "xe_svm.h" 40 #include "xe_sync.h" 41 #include "xe_trace_bo.h" 42 #include "xe_wa.h" 43 #include "xe_hmm.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vma_userptr_check_repin() - Advisory check for repin needed 52 * @uvma: The userptr vma 53 * 54 * Check if the userptr vma has been invalidated since last successful 55 * repin. The check is advisory only and can the function can be called 56 * without the vm->userptr.notifier_lock held. There is no guarantee that the 57 * vma userptr will remain valid after a lockless check, so typically 58 * the call needs to be followed by a proper check under the notifier_lock. 59 * 60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 61 */ 62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 63 { 64 return mmu_interval_check_retry(&uvma->userptr.notifier, 65 uvma->userptr.notifier_seq) ? 66 -EAGAIN : 0; 67 } 68 69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 70 { 71 struct xe_vma *vma = &uvma->vma; 72 struct xe_vm *vm = xe_vma_vm(vma); 73 struct xe_device *xe = vm->xe; 74 75 lockdep_assert_held(&vm->lock); 76 xe_assert(xe, xe_vma_is_userptr(vma)); 77 78 return xe_hmm_userptr_populate_range(uvma, false); 79 } 80 81 static bool preempt_fences_waiting(struct xe_vm *vm) 82 { 83 struct xe_exec_queue *q; 84 85 lockdep_assert_held(&vm->lock); 86 xe_vm_assert_held(vm); 87 88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 89 if (!q->lr.pfence || 90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 91 &q->lr.pfence->flags)) { 92 return true; 93 } 94 } 95 96 return false; 97 } 98 99 static void free_preempt_fences(struct list_head *list) 100 { 101 struct list_head *link, *next; 102 103 list_for_each_safe(link, next, list) 104 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 105 } 106 107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 108 unsigned int *count) 109 { 110 lockdep_assert_held(&vm->lock); 111 xe_vm_assert_held(vm); 112 113 if (*count >= vm->preempt.num_exec_queues) 114 return 0; 115 116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 118 119 if (IS_ERR(pfence)) 120 return PTR_ERR(pfence); 121 122 list_move_tail(xe_preempt_fence_link(pfence), list); 123 } 124 125 return 0; 126 } 127 128 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 129 { 130 struct xe_exec_queue *q; 131 132 xe_vm_assert_held(vm); 133 134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 135 if (q->lr.pfence) { 136 long timeout = dma_fence_wait(q->lr.pfence, false); 137 138 /* Only -ETIME on fence indicates VM needs to be killed */ 139 if (timeout < 0 || q->lr.pfence->error == -ETIME) 140 return -ETIME; 141 142 dma_fence_put(q->lr.pfence); 143 q->lr.pfence = NULL; 144 } 145 } 146 147 return 0; 148 } 149 150 static bool xe_vm_is_idle(struct xe_vm *vm) 151 { 152 struct xe_exec_queue *q; 153 154 xe_vm_assert_held(vm); 155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 156 if (!xe_exec_queue_is_idle(q)) 157 return false; 158 } 159 160 return true; 161 } 162 163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 164 { 165 struct list_head *link; 166 struct xe_exec_queue *q; 167 168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 169 struct dma_fence *fence; 170 171 link = list->next; 172 xe_assert(vm->xe, link != list); 173 174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 175 q, q->lr.context, 176 ++q->lr.seqno); 177 dma_fence_put(q->lr.pfence); 178 q->lr.pfence = fence; 179 } 180 } 181 182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 183 { 184 struct xe_exec_queue *q; 185 int err; 186 187 xe_bo_assert_held(bo); 188 189 if (!vm->preempt.num_exec_queues) 190 return 0; 191 192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 193 if (err) 194 return err; 195 196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 197 if (q->lr.pfence) { 198 dma_resv_add_fence(bo->ttm.base.resv, 199 q->lr.pfence, 200 DMA_RESV_USAGE_BOOKKEEP); 201 } 202 203 return 0; 204 } 205 206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 207 struct drm_exec *exec) 208 { 209 struct xe_exec_queue *q; 210 211 lockdep_assert_held(&vm->lock); 212 xe_vm_assert_held(vm); 213 214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 215 q->ops->resume(q); 216 217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 219 } 220 } 221 222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 223 { 224 struct drm_gpuvm_exec vm_exec = { 225 .vm = &vm->gpuvm, 226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 227 .num_fences = 1, 228 }; 229 struct drm_exec *exec = &vm_exec.exec; 230 struct dma_fence *pfence; 231 int err; 232 bool wait; 233 234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 235 236 down_write(&vm->lock); 237 err = drm_gpuvm_exec_lock(&vm_exec); 238 if (err) 239 goto out_up_write; 240 241 pfence = xe_preempt_fence_create(q, q->lr.context, 242 ++q->lr.seqno); 243 if (!pfence) { 244 err = -ENOMEM; 245 goto out_fini; 246 } 247 248 list_add(&q->lr.link, &vm->preempt.exec_queues); 249 ++vm->preempt.num_exec_queues; 250 q->lr.pfence = pfence; 251 252 down_read(&vm->userptr.notifier_lock); 253 254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 256 257 /* 258 * Check to see if a preemption on VM is in flight or userptr 259 * invalidation, if so trigger this preempt fence to sync state with 260 * other preempt fences on the VM. 261 */ 262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 263 if (wait) 264 dma_fence_enable_sw_signaling(pfence); 265 266 up_read(&vm->userptr.notifier_lock); 267 268 out_fini: 269 drm_exec_fini(exec); 270 out_up_write: 271 up_write(&vm->lock); 272 273 return err; 274 } 275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 276 277 /** 278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 279 * @vm: The VM. 280 * @q: The exec_queue 281 * 282 * Note that this function might be called multiple times on the same queue. 283 */ 284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 285 { 286 if (!xe_vm_in_preempt_fence_mode(vm)) 287 return; 288 289 down_write(&vm->lock); 290 if (!list_empty(&q->lr.link)) { 291 list_del_init(&q->lr.link); 292 --vm->preempt.num_exec_queues; 293 } 294 if (q->lr.pfence) { 295 dma_fence_enable_sw_signaling(q->lr.pfence); 296 dma_fence_put(q->lr.pfence); 297 q->lr.pfence = NULL; 298 } 299 up_write(&vm->lock); 300 } 301 302 /** 303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 304 * that need repinning. 305 * @vm: The VM. 306 * 307 * This function checks for whether the VM has userptrs that need repinning, 308 * and provides a release-type barrier on the userptr.notifier_lock after 309 * checking. 310 * 311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 312 */ 313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 314 { 315 lockdep_assert_held_read(&vm->userptr.notifier_lock); 316 317 return (list_empty(&vm->userptr.repin_list) && 318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 319 } 320 321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 322 323 /** 324 * xe_vm_kill() - VM Kill 325 * @vm: The VM. 326 * @unlocked: Flag indicates the VM's dma-resv is not held 327 * 328 * Kill the VM by setting banned flag indicated VM is no longer available for 329 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 330 */ 331 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 332 { 333 struct xe_exec_queue *q; 334 335 lockdep_assert_held(&vm->lock); 336 337 if (unlocked) 338 xe_vm_lock(vm, false); 339 340 vm->flags |= XE_VM_FLAG_BANNED; 341 trace_xe_vm_kill(vm); 342 343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 344 q->ops->kill(q); 345 346 if (unlocked) 347 xe_vm_unlock(vm); 348 349 /* TODO: Inform user the VM is banned */ 350 } 351 352 /** 353 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 354 * @exec: The drm_exec object used for locking before validation. 355 * @err: The error returned from ttm_bo_validate(). 356 * @end: A ktime_t cookie that should be set to 0 before first use and 357 * that should be reused on subsequent calls. 358 * 359 * With multiple active VMs, under memory pressure, it is possible that 360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 361 * Until ttm properly handles locking in such scenarios, best thing the 362 * driver can do is retry with a timeout. Check if that is necessary, and 363 * if so unlock the drm_exec's objects while keeping the ticket to prepare 364 * for a rerun. 365 * 366 * Return: true if a retry after drm_exec_init() is recommended; 367 * false otherwise. 368 */ 369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 370 { 371 ktime_t cur; 372 373 if (err != -ENOMEM) 374 return false; 375 376 cur = ktime_get(); 377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 378 if (!ktime_before(cur, *end)) 379 return false; 380 381 msleep(20); 382 return true; 383 } 384 385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 386 { 387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 388 struct drm_gpuva *gpuva; 389 int ret; 390 391 lockdep_assert_held(&vm->lock); 392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 394 &vm->rebind_list); 395 396 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 397 if (ret) 398 return ret; 399 400 vm_bo->evicted = false; 401 return 0; 402 } 403 404 /** 405 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 406 * @vm: The vm for which we are rebinding. 407 * @exec: The struct drm_exec with the locked GEM objects. 408 * @num_fences: The number of fences to reserve for the operation, not 409 * including rebinds and validations. 410 * 411 * Validates all evicted gem objects and rebinds their vmas. Note that 412 * rebindings may cause evictions and hence the validation-rebind 413 * sequence is rerun until there are no more objects to validate. 414 * 415 * Return: 0 on success, negative error code on error. In particular, 416 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 417 * the drm_exec transaction needs to be restarted. 418 */ 419 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 420 unsigned int num_fences) 421 { 422 struct drm_gem_object *obj; 423 unsigned long index; 424 int ret; 425 426 do { 427 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 428 if (ret) 429 return ret; 430 431 ret = xe_vm_rebind(vm, false); 432 if (ret) 433 return ret; 434 } while (!list_empty(&vm->gpuvm.evict.list)); 435 436 drm_exec_for_each_locked_object(exec, index, obj) { 437 ret = dma_resv_reserve_fences(obj->resv, num_fences); 438 if (ret) 439 return ret; 440 } 441 442 return 0; 443 } 444 445 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 446 bool *done) 447 { 448 int err; 449 450 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 451 if (err) 452 return err; 453 454 if (xe_vm_is_idle(vm)) { 455 vm->preempt.rebind_deactivated = true; 456 *done = true; 457 return 0; 458 } 459 460 if (!preempt_fences_waiting(vm)) { 461 *done = true; 462 return 0; 463 } 464 465 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 466 if (err) 467 return err; 468 469 err = wait_for_existing_preempt_fences(vm); 470 if (err) 471 return err; 472 473 /* 474 * Add validation and rebinding to the locking loop since both can 475 * cause evictions which may require blocing dma_resv locks. 476 * The fence reservation here is intended for the new preempt fences 477 * we attach at the end of the rebind work. 478 */ 479 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 480 } 481 482 static void preempt_rebind_work_func(struct work_struct *w) 483 { 484 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 485 struct drm_exec exec; 486 unsigned int fence_count = 0; 487 LIST_HEAD(preempt_fences); 488 ktime_t end = 0; 489 int err = 0; 490 long wait; 491 int __maybe_unused tries = 0; 492 493 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 494 trace_xe_vm_rebind_worker_enter(vm); 495 496 down_write(&vm->lock); 497 498 if (xe_vm_is_closed_or_banned(vm)) { 499 up_write(&vm->lock); 500 trace_xe_vm_rebind_worker_exit(vm); 501 return; 502 } 503 504 retry: 505 if (xe_vm_userptr_check_repin(vm)) { 506 err = xe_vm_userptr_pin(vm); 507 if (err) 508 goto out_unlock_outer; 509 } 510 511 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 512 513 drm_exec_until_all_locked(&exec) { 514 bool done = false; 515 516 err = xe_preempt_work_begin(&exec, vm, &done); 517 drm_exec_retry_on_contention(&exec); 518 if (err || done) { 519 drm_exec_fini(&exec); 520 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 521 err = -EAGAIN; 522 523 goto out_unlock_outer; 524 } 525 } 526 527 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 528 if (err) 529 goto out_unlock; 530 531 err = xe_vm_rebind(vm, true); 532 if (err) 533 goto out_unlock; 534 535 /* Wait on rebinds and munmap style VM unbinds */ 536 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 537 DMA_RESV_USAGE_KERNEL, 538 false, MAX_SCHEDULE_TIMEOUT); 539 if (wait <= 0) { 540 err = -ETIME; 541 goto out_unlock; 542 } 543 544 #define retry_required(__tries, __vm) \ 545 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 546 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 547 __xe_vm_userptr_needs_repin(__vm)) 548 549 down_read(&vm->userptr.notifier_lock); 550 if (retry_required(tries, vm)) { 551 up_read(&vm->userptr.notifier_lock); 552 err = -EAGAIN; 553 goto out_unlock; 554 } 555 556 #undef retry_required 557 558 spin_lock(&vm->xe->ttm.lru_lock); 559 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 560 spin_unlock(&vm->xe->ttm.lru_lock); 561 562 /* Point of no return. */ 563 arm_preempt_fences(vm, &preempt_fences); 564 resume_and_reinstall_preempt_fences(vm, &exec); 565 up_read(&vm->userptr.notifier_lock); 566 567 out_unlock: 568 drm_exec_fini(&exec); 569 out_unlock_outer: 570 if (err == -EAGAIN) { 571 trace_xe_vm_rebind_worker_retry(vm); 572 goto retry; 573 } 574 575 if (err) { 576 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 577 xe_vm_kill(vm, true); 578 } 579 up_write(&vm->lock); 580 581 free_preempt_fences(&preempt_fences); 582 583 trace_xe_vm_rebind_worker_exit(vm); 584 } 585 586 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 587 { 588 struct xe_userptr *userptr = &uvma->userptr; 589 struct xe_vma *vma = &uvma->vma; 590 struct dma_resv_iter cursor; 591 struct dma_fence *fence; 592 long err; 593 594 /* 595 * Tell exec and rebind worker they need to repin and rebind this 596 * userptr. 597 */ 598 if (!xe_vm_in_fault_mode(vm) && 599 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 600 spin_lock(&vm->userptr.invalidated_lock); 601 list_move_tail(&userptr->invalidate_link, 602 &vm->userptr.invalidated); 603 spin_unlock(&vm->userptr.invalidated_lock); 604 } 605 606 /* 607 * Preempt fences turn into schedule disables, pipeline these. 608 * Note that even in fault mode, we need to wait for binds and 609 * unbinds to complete, and those are attached as BOOKMARK fences 610 * to the vm. 611 */ 612 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 613 DMA_RESV_USAGE_BOOKKEEP); 614 dma_resv_for_each_fence_unlocked(&cursor, fence) 615 dma_fence_enable_sw_signaling(fence); 616 dma_resv_iter_end(&cursor); 617 618 err = dma_resv_wait_timeout(xe_vm_resv(vm), 619 DMA_RESV_USAGE_BOOKKEEP, 620 false, MAX_SCHEDULE_TIMEOUT); 621 XE_WARN_ON(err <= 0); 622 623 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 624 err = xe_vm_invalidate_vma(vma); 625 XE_WARN_ON(err); 626 } 627 628 xe_hmm_userptr_unmap(uvma); 629 } 630 631 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 632 const struct mmu_notifier_range *range, 633 unsigned long cur_seq) 634 { 635 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 636 struct xe_vma *vma = &uvma->vma; 637 struct xe_vm *vm = xe_vma_vm(vma); 638 639 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 640 trace_xe_vma_userptr_invalidate(vma); 641 642 if (!mmu_notifier_range_blockable(range)) 643 return false; 644 645 vm_dbg(&xe_vma_vm(vma)->xe->drm, 646 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 647 xe_vma_start(vma), xe_vma_size(vma)); 648 649 down_write(&vm->userptr.notifier_lock); 650 mmu_interval_set_seq(mni, cur_seq); 651 652 __vma_userptr_invalidate(vm, uvma); 653 up_write(&vm->userptr.notifier_lock); 654 trace_xe_vma_userptr_invalidate_complete(vma); 655 656 return true; 657 } 658 659 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 660 .invalidate = vma_userptr_invalidate, 661 }; 662 663 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 664 /** 665 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 666 * @uvma: The userptr vma to invalidate 667 * 668 * Perform a forced userptr invalidation for testing purposes. 669 */ 670 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 671 { 672 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 673 674 /* Protect against concurrent userptr pinning */ 675 lockdep_assert_held(&vm->lock); 676 /* Protect against concurrent notifiers */ 677 lockdep_assert_held(&vm->userptr.notifier_lock); 678 /* 679 * Protect against concurrent instances of this function and 680 * the critical exec sections 681 */ 682 xe_vm_assert_held(vm); 683 684 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 685 uvma->userptr.notifier_seq)) 686 uvma->userptr.notifier_seq -= 2; 687 __vma_userptr_invalidate(vm, uvma); 688 } 689 #endif 690 691 int xe_vm_userptr_pin(struct xe_vm *vm) 692 { 693 struct xe_userptr_vma *uvma, *next; 694 int err = 0; 695 696 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 697 lockdep_assert_held_write(&vm->lock); 698 699 /* Collect invalidated userptrs */ 700 spin_lock(&vm->userptr.invalidated_lock); 701 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 702 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 703 userptr.invalidate_link) { 704 list_del_init(&uvma->userptr.invalidate_link); 705 list_add_tail(&uvma->userptr.repin_link, 706 &vm->userptr.repin_list); 707 } 708 spin_unlock(&vm->userptr.invalidated_lock); 709 710 /* Pin and move to bind list */ 711 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 712 userptr.repin_link) { 713 err = xe_vma_userptr_pin_pages(uvma); 714 if (err == -EFAULT) { 715 list_del_init(&uvma->userptr.repin_link); 716 /* 717 * We might have already done the pin once already, but 718 * then had to retry before the re-bind happened, due 719 * some other condition in the caller, but in the 720 * meantime the userptr got dinged by the notifier such 721 * that we need to revalidate here, but this time we hit 722 * the EFAULT. In such a case make sure we remove 723 * ourselves from the rebind list to avoid going down in 724 * flames. 725 */ 726 if (!list_empty(&uvma->vma.combined_links.rebind)) 727 list_del_init(&uvma->vma.combined_links.rebind); 728 729 /* Wait for pending binds */ 730 xe_vm_lock(vm, false); 731 dma_resv_wait_timeout(xe_vm_resv(vm), 732 DMA_RESV_USAGE_BOOKKEEP, 733 false, MAX_SCHEDULE_TIMEOUT); 734 735 down_read(&vm->userptr.notifier_lock); 736 err = xe_vm_invalidate_vma(&uvma->vma); 737 up_read(&vm->userptr.notifier_lock); 738 xe_vm_unlock(vm); 739 if (err) 740 break; 741 } else { 742 if (err) 743 break; 744 745 list_del_init(&uvma->userptr.repin_link); 746 list_move_tail(&uvma->vma.combined_links.rebind, 747 &vm->rebind_list); 748 } 749 } 750 751 if (err) { 752 down_write(&vm->userptr.notifier_lock); 753 spin_lock(&vm->userptr.invalidated_lock); 754 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 755 userptr.repin_link) { 756 list_del_init(&uvma->userptr.repin_link); 757 list_move_tail(&uvma->userptr.invalidate_link, 758 &vm->userptr.invalidated); 759 } 760 spin_unlock(&vm->userptr.invalidated_lock); 761 up_write(&vm->userptr.notifier_lock); 762 } 763 return err; 764 } 765 766 /** 767 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 768 * that need repinning. 769 * @vm: The VM. 770 * 771 * This function does an advisory check for whether the VM has userptrs that 772 * need repinning. 773 * 774 * Return: 0 if there are no indications of userptrs needing repinning, 775 * -EAGAIN if there are. 776 */ 777 int xe_vm_userptr_check_repin(struct xe_vm *vm) 778 { 779 return (list_empty_careful(&vm->userptr.repin_list) && 780 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 781 } 782 783 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 784 { 785 int i; 786 787 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 788 if (!vops->pt_update_ops[i].num_ops) 789 continue; 790 791 vops->pt_update_ops[i].ops = 792 kmalloc_array(vops->pt_update_ops[i].num_ops, 793 sizeof(*vops->pt_update_ops[i].ops), 794 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 795 if (!vops->pt_update_ops[i].ops) 796 return array_of_binds ? -ENOBUFS : -ENOMEM; 797 } 798 799 return 0; 800 } 801 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 802 803 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 804 { 805 struct xe_vma *vma; 806 807 vma = gpuva_to_vma(op->base.prefetch.va); 808 809 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 810 xa_destroy(&op->prefetch_range.range); 811 } 812 813 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 814 { 815 struct xe_vma_op *op; 816 817 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 818 return; 819 820 list_for_each_entry(op, &vops->list, link) 821 xe_vma_svm_prefetch_op_fini(op); 822 } 823 824 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 825 { 826 int i; 827 828 xe_vma_svm_prefetch_ops_fini(vops); 829 830 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 831 kfree(vops->pt_update_ops[i].ops); 832 } 833 834 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 835 { 836 int i; 837 838 if (!inc_val) 839 return; 840 841 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 842 if (BIT(i) & tile_mask) 843 vops->pt_update_ops[i].num_ops += inc_val; 844 } 845 846 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 847 u8 tile_mask) 848 { 849 INIT_LIST_HEAD(&op->link); 850 op->tile_mask = tile_mask; 851 op->base.op = DRM_GPUVA_OP_MAP; 852 op->base.map.va.addr = vma->gpuva.va.addr; 853 op->base.map.va.range = vma->gpuva.va.range; 854 op->base.map.gem.obj = vma->gpuva.gem.obj; 855 op->base.map.gem.offset = vma->gpuva.gem.offset; 856 op->map.vma = vma; 857 op->map.immediate = true; 858 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 859 op->map.is_null = xe_vma_is_null(vma); 860 } 861 862 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 863 u8 tile_mask) 864 { 865 struct xe_vma_op *op; 866 867 op = kzalloc(sizeof(*op), GFP_KERNEL); 868 if (!op) 869 return -ENOMEM; 870 871 xe_vm_populate_rebind(op, vma, tile_mask); 872 list_add_tail(&op->link, &vops->list); 873 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 874 875 return 0; 876 } 877 878 static struct dma_fence *ops_execute(struct xe_vm *vm, 879 struct xe_vma_ops *vops); 880 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 881 struct xe_exec_queue *q, 882 struct xe_sync_entry *syncs, u32 num_syncs); 883 884 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 885 { 886 struct dma_fence *fence; 887 struct xe_vma *vma, *next; 888 struct xe_vma_ops vops; 889 struct xe_vma_op *op, *next_op; 890 int err, i; 891 892 lockdep_assert_held(&vm->lock); 893 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 894 list_empty(&vm->rebind_list)) 895 return 0; 896 897 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 898 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 899 vops.pt_update_ops[i].wait_vm_bookkeep = true; 900 901 xe_vm_assert_held(vm); 902 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 903 xe_assert(vm->xe, vma->tile_present); 904 905 if (rebind_worker) 906 trace_xe_vma_rebind_worker(vma); 907 else 908 trace_xe_vma_rebind_exec(vma); 909 910 err = xe_vm_ops_add_rebind(&vops, vma, 911 vma->tile_present); 912 if (err) 913 goto free_ops; 914 } 915 916 err = xe_vma_ops_alloc(&vops, false); 917 if (err) 918 goto free_ops; 919 920 fence = ops_execute(vm, &vops); 921 if (IS_ERR(fence)) { 922 err = PTR_ERR(fence); 923 } else { 924 dma_fence_put(fence); 925 list_for_each_entry_safe(vma, next, &vm->rebind_list, 926 combined_links.rebind) 927 list_del_init(&vma->combined_links.rebind); 928 } 929 free_ops: 930 list_for_each_entry_safe(op, next_op, &vops.list, link) { 931 list_del(&op->link); 932 kfree(op); 933 } 934 xe_vma_ops_fini(&vops); 935 936 return err; 937 } 938 939 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 940 { 941 struct dma_fence *fence = NULL; 942 struct xe_vma_ops vops; 943 struct xe_vma_op *op, *next_op; 944 struct xe_tile *tile; 945 u8 id; 946 int err; 947 948 lockdep_assert_held(&vm->lock); 949 xe_vm_assert_held(vm); 950 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 951 952 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 953 for_each_tile(tile, vm->xe, id) { 954 vops.pt_update_ops[id].wait_vm_bookkeep = true; 955 vops.pt_update_ops[tile->id].q = 956 xe_migrate_exec_queue(tile->migrate); 957 } 958 959 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 960 if (err) 961 return ERR_PTR(err); 962 963 err = xe_vma_ops_alloc(&vops, false); 964 if (err) { 965 fence = ERR_PTR(err); 966 goto free_ops; 967 } 968 969 fence = ops_execute(vm, &vops); 970 971 free_ops: 972 list_for_each_entry_safe(op, next_op, &vops.list, link) { 973 list_del(&op->link); 974 kfree(op); 975 } 976 xe_vma_ops_fini(&vops); 977 978 return fence; 979 } 980 981 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 982 struct xe_vma *vma, 983 struct xe_svm_range *range, 984 u8 tile_mask) 985 { 986 INIT_LIST_HEAD(&op->link); 987 op->tile_mask = tile_mask; 988 op->base.op = DRM_GPUVA_OP_DRIVER; 989 op->subop = XE_VMA_SUBOP_MAP_RANGE; 990 op->map_range.vma = vma; 991 op->map_range.range = range; 992 } 993 994 static int 995 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 996 struct xe_vma *vma, 997 struct xe_svm_range *range, 998 u8 tile_mask) 999 { 1000 struct xe_vma_op *op; 1001 1002 op = kzalloc(sizeof(*op), GFP_KERNEL); 1003 if (!op) 1004 return -ENOMEM; 1005 1006 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1007 list_add_tail(&op->link, &vops->list); 1008 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1009 1010 return 0; 1011 } 1012 1013 /** 1014 * xe_vm_range_rebind() - VM range (re)bind 1015 * @vm: The VM which the range belongs to. 1016 * @vma: The VMA which the range belongs to. 1017 * @range: SVM range to rebind. 1018 * @tile_mask: Tile mask to bind the range to. 1019 * 1020 * (re)bind SVM range setting up GPU page tables for the range. 1021 * 1022 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 1023 * failure 1024 */ 1025 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 1026 struct xe_vma *vma, 1027 struct xe_svm_range *range, 1028 u8 tile_mask) 1029 { 1030 struct dma_fence *fence = NULL; 1031 struct xe_vma_ops vops; 1032 struct xe_vma_op *op, *next_op; 1033 struct xe_tile *tile; 1034 u8 id; 1035 int err; 1036 1037 lockdep_assert_held(&vm->lock); 1038 xe_vm_assert_held(vm); 1039 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1040 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1041 1042 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1043 for_each_tile(tile, vm->xe, id) { 1044 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1045 vops.pt_update_ops[tile->id].q = 1046 xe_migrate_exec_queue(tile->migrate); 1047 } 1048 1049 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 1050 if (err) 1051 return ERR_PTR(err); 1052 1053 err = xe_vma_ops_alloc(&vops, false); 1054 if (err) { 1055 fence = ERR_PTR(err); 1056 goto free_ops; 1057 } 1058 1059 fence = ops_execute(vm, &vops); 1060 1061 free_ops: 1062 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1063 list_del(&op->link); 1064 kfree(op); 1065 } 1066 xe_vma_ops_fini(&vops); 1067 1068 return fence; 1069 } 1070 1071 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 1072 struct xe_svm_range *range) 1073 { 1074 INIT_LIST_HEAD(&op->link); 1075 op->tile_mask = range->tile_present; 1076 op->base.op = DRM_GPUVA_OP_DRIVER; 1077 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 1078 op->unmap_range.range = range; 1079 } 1080 1081 static int 1082 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 1083 struct xe_svm_range *range) 1084 { 1085 struct xe_vma_op *op; 1086 1087 op = kzalloc(sizeof(*op), GFP_KERNEL); 1088 if (!op) 1089 return -ENOMEM; 1090 1091 xe_vm_populate_range_unbind(op, range); 1092 list_add_tail(&op->link, &vops->list); 1093 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1094 1095 return 0; 1096 } 1097 1098 /** 1099 * xe_vm_range_unbind() - VM range unbind 1100 * @vm: The VM which the range belongs to. 1101 * @range: SVM range to rebind. 1102 * 1103 * Unbind SVM range removing the GPU page tables for the range. 1104 * 1105 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 1106 * failure 1107 */ 1108 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 1109 struct xe_svm_range *range) 1110 { 1111 struct dma_fence *fence = NULL; 1112 struct xe_vma_ops vops; 1113 struct xe_vma_op *op, *next_op; 1114 struct xe_tile *tile; 1115 u8 id; 1116 int err; 1117 1118 lockdep_assert_held(&vm->lock); 1119 xe_vm_assert_held(vm); 1120 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1121 1122 if (!range->tile_present) 1123 return dma_fence_get_stub(); 1124 1125 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1126 for_each_tile(tile, vm->xe, id) { 1127 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1128 vops.pt_update_ops[tile->id].q = 1129 xe_migrate_exec_queue(tile->migrate); 1130 } 1131 1132 err = xe_vm_ops_add_range_unbind(&vops, range); 1133 if (err) 1134 return ERR_PTR(err); 1135 1136 err = xe_vma_ops_alloc(&vops, false); 1137 if (err) { 1138 fence = ERR_PTR(err); 1139 goto free_ops; 1140 } 1141 1142 fence = ops_execute(vm, &vops); 1143 1144 free_ops: 1145 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1146 list_del(&op->link); 1147 kfree(op); 1148 } 1149 xe_vma_ops_fini(&vops); 1150 1151 return fence; 1152 } 1153 1154 static void xe_vma_free(struct xe_vma *vma) 1155 { 1156 if (xe_vma_is_userptr(vma)) 1157 kfree(to_userptr_vma(vma)); 1158 else 1159 kfree(vma); 1160 } 1161 1162 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 1163 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 1164 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 1165 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 1166 1167 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1168 struct xe_bo *bo, 1169 u64 bo_offset_or_userptr, 1170 u64 start, u64 end, 1171 u16 pat_index, unsigned int flags) 1172 { 1173 struct xe_vma *vma; 1174 struct xe_tile *tile; 1175 u8 id; 1176 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 1177 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 1178 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 1179 bool is_cpu_addr_mirror = 1180 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 1181 1182 xe_assert(vm->xe, start < end); 1183 xe_assert(vm->xe, end < vm->size); 1184 1185 /* 1186 * Allocate and ensure that the xe_vma_is_userptr() return 1187 * matches what was allocated. 1188 */ 1189 if (!bo && !is_null && !is_cpu_addr_mirror) { 1190 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 1191 1192 if (!uvma) 1193 return ERR_PTR(-ENOMEM); 1194 1195 vma = &uvma->vma; 1196 } else { 1197 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 1198 if (!vma) 1199 return ERR_PTR(-ENOMEM); 1200 1201 if (is_cpu_addr_mirror) 1202 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 1203 if (is_null) 1204 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 1205 if (bo) 1206 vma->gpuva.gem.obj = &bo->ttm.base; 1207 } 1208 1209 INIT_LIST_HEAD(&vma->combined_links.rebind); 1210 1211 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1212 vma->gpuva.vm = &vm->gpuvm; 1213 vma->gpuva.va.addr = start; 1214 vma->gpuva.va.range = end - start + 1; 1215 if (read_only) 1216 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1217 if (dumpable) 1218 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1219 1220 for_each_tile(tile, vm->xe, id) 1221 vma->tile_mask |= 0x1 << id; 1222 1223 if (vm->xe->info.has_atomic_enable_pte_bit) 1224 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1225 1226 vma->pat_index = pat_index; 1227 1228 if (bo) { 1229 struct drm_gpuvm_bo *vm_bo; 1230 1231 xe_bo_assert_held(bo); 1232 1233 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1234 if (IS_ERR(vm_bo)) { 1235 xe_vma_free(vma); 1236 return ERR_CAST(vm_bo); 1237 } 1238 1239 drm_gpuvm_bo_extobj_add(vm_bo); 1240 drm_gem_object_get(&bo->ttm.base); 1241 vma->gpuva.gem.offset = bo_offset_or_userptr; 1242 drm_gpuva_link(&vma->gpuva, vm_bo); 1243 drm_gpuvm_bo_put(vm_bo); 1244 } else /* userptr or null */ { 1245 if (!is_null && !is_cpu_addr_mirror) { 1246 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1247 u64 size = end - start + 1; 1248 int err; 1249 1250 INIT_LIST_HEAD(&userptr->invalidate_link); 1251 INIT_LIST_HEAD(&userptr->repin_link); 1252 vma->gpuva.gem.offset = bo_offset_or_userptr; 1253 mutex_init(&userptr->unmap_mutex); 1254 1255 err = mmu_interval_notifier_insert(&userptr->notifier, 1256 current->mm, 1257 xe_vma_userptr(vma), size, 1258 &vma_userptr_notifier_ops); 1259 if (err) { 1260 xe_vma_free(vma); 1261 return ERR_PTR(err); 1262 } 1263 1264 userptr->notifier_seq = LONG_MAX; 1265 } 1266 1267 xe_vm_get(vm); 1268 } 1269 1270 return vma; 1271 } 1272 1273 static void xe_vma_destroy_late(struct xe_vma *vma) 1274 { 1275 struct xe_vm *vm = xe_vma_vm(vma); 1276 1277 if (vma->ufence) { 1278 xe_sync_ufence_put(vma->ufence); 1279 vma->ufence = NULL; 1280 } 1281 1282 if (xe_vma_is_userptr(vma)) { 1283 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1284 struct xe_userptr *userptr = &uvma->userptr; 1285 1286 if (userptr->sg) 1287 xe_hmm_userptr_free_sg(uvma); 1288 1289 /* 1290 * Since userptr pages are not pinned, we can't remove 1291 * the notifier until we're sure the GPU is not accessing 1292 * them anymore 1293 */ 1294 mmu_interval_notifier_remove(&userptr->notifier); 1295 mutex_destroy(&userptr->unmap_mutex); 1296 xe_vm_put(vm); 1297 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1298 xe_vm_put(vm); 1299 } else { 1300 xe_bo_put(xe_vma_bo(vma)); 1301 } 1302 1303 xe_vma_free(vma); 1304 } 1305 1306 static void vma_destroy_work_func(struct work_struct *w) 1307 { 1308 struct xe_vma *vma = 1309 container_of(w, struct xe_vma, destroy_work); 1310 1311 xe_vma_destroy_late(vma); 1312 } 1313 1314 static void vma_destroy_cb(struct dma_fence *fence, 1315 struct dma_fence_cb *cb) 1316 { 1317 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1318 1319 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1320 queue_work(system_unbound_wq, &vma->destroy_work); 1321 } 1322 1323 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1324 { 1325 struct xe_vm *vm = xe_vma_vm(vma); 1326 1327 lockdep_assert_held_write(&vm->lock); 1328 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1329 1330 if (xe_vma_is_userptr(vma)) { 1331 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1332 1333 spin_lock(&vm->userptr.invalidated_lock); 1334 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1335 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1336 spin_unlock(&vm->userptr.invalidated_lock); 1337 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1338 xe_bo_assert_held(xe_vma_bo(vma)); 1339 1340 drm_gpuva_unlink(&vma->gpuva); 1341 } 1342 1343 xe_vm_assert_held(vm); 1344 if (fence) { 1345 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1346 vma_destroy_cb); 1347 1348 if (ret) { 1349 XE_WARN_ON(ret != -ENOENT); 1350 xe_vma_destroy_late(vma); 1351 } 1352 } else { 1353 xe_vma_destroy_late(vma); 1354 } 1355 } 1356 1357 /** 1358 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1359 * @exec: The drm_exec object we're currently locking for. 1360 * @vma: The vma for witch we want to lock the vm resv and any attached 1361 * object's resv. 1362 * 1363 * Return: 0 on success, negative error code on error. In particular 1364 * may return -EDEADLK on WW transaction contention and -EINTR if 1365 * an interruptible wait is terminated by a signal. 1366 */ 1367 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1368 { 1369 struct xe_vm *vm = xe_vma_vm(vma); 1370 struct xe_bo *bo = xe_vma_bo(vma); 1371 int err; 1372 1373 XE_WARN_ON(!vm); 1374 1375 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1376 if (!err && bo && !bo->vm) 1377 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1378 1379 return err; 1380 } 1381 1382 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1383 { 1384 struct drm_exec exec; 1385 int err; 1386 1387 drm_exec_init(&exec, 0, 0); 1388 drm_exec_until_all_locked(&exec) { 1389 err = xe_vm_lock_vma(&exec, vma); 1390 drm_exec_retry_on_contention(&exec); 1391 if (XE_WARN_ON(err)) 1392 break; 1393 } 1394 1395 xe_vma_destroy(vma, NULL); 1396 1397 drm_exec_fini(&exec); 1398 } 1399 1400 struct xe_vma * 1401 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1402 { 1403 struct drm_gpuva *gpuva; 1404 1405 lockdep_assert_held(&vm->lock); 1406 1407 if (xe_vm_is_closed_or_banned(vm)) 1408 return NULL; 1409 1410 xe_assert(vm->xe, start + range <= vm->size); 1411 1412 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1413 1414 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1415 } 1416 1417 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1418 { 1419 int err; 1420 1421 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1422 lockdep_assert_held(&vm->lock); 1423 1424 mutex_lock(&vm->snap_mutex); 1425 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1426 mutex_unlock(&vm->snap_mutex); 1427 XE_WARN_ON(err); /* Shouldn't be possible */ 1428 1429 return err; 1430 } 1431 1432 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1433 { 1434 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1435 lockdep_assert_held(&vm->lock); 1436 1437 mutex_lock(&vm->snap_mutex); 1438 drm_gpuva_remove(&vma->gpuva); 1439 mutex_unlock(&vm->snap_mutex); 1440 if (vm->usm.last_fault_vma == vma) 1441 vm->usm.last_fault_vma = NULL; 1442 } 1443 1444 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1445 { 1446 struct xe_vma_op *op; 1447 1448 op = kzalloc(sizeof(*op), GFP_KERNEL); 1449 1450 if (unlikely(!op)) 1451 return NULL; 1452 1453 return &op->base; 1454 } 1455 1456 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1457 1458 static const struct drm_gpuvm_ops gpuvm_ops = { 1459 .op_alloc = xe_vm_op_alloc, 1460 .vm_bo_validate = xe_gpuvm_validate, 1461 .vm_free = xe_vm_free, 1462 }; 1463 1464 static u64 pde_encode_pat_index(u16 pat_index) 1465 { 1466 u64 pte = 0; 1467 1468 if (pat_index & BIT(0)) 1469 pte |= XE_PPGTT_PTE_PAT0; 1470 1471 if (pat_index & BIT(1)) 1472 pte |= XE_PPGTT_PTE_PAT1; 1473 1474 return pte; 1475 } 1476 1477 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1478 { 1479 u64 pte = 0; 1480 1481 if (pat_index & BIT(0)) 1482 pte |= XE_PPGTT_PTE_PAT0; 1483 1484 if (pat_index & BIT(1)) 1485 pte |= XE_PPGTT_PTE_PAT1; 1486 1487 if (pat_index & BIT(2)) { 1488 if (pt_level) 1489 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1490 else 1491 pte |= XE_PPGTT_PTE_PAT2; 1492 } 1493 1494 if (pat_index & BIT(3)) 1495 pte |= XELPG_PPGTT_PTE_PAT3; 1496 1497 if (pat_index & (BIT(4))) 1498 pte |= XE2_PPGTT_PTE_PAT4; 1499 1500 return pte; 1501 } 1502 1503 static u64 pte_encode_ps(u32 pt_level) 1504 { 1505 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1506 1507 if (pt_level == 1) 1508 return XE_PDE_PS_2M; 1509 else if (pt_level == 2) 1510 return XE_PDPE_PS_1G; 1511 1512 return 0; 1513 } 1514 1515 static u16 pde_pat_index(struct xe_bo *bo) 1516 { 1517 struct xe_device *xe = xe_bo_device(bo); 1518 u16 pat_index; 1519 1520 /* 1521 * We only have two bits to encode the PAT index in non-leaf nodes, but 1522 * these only point to other paging structures so we only need a minimal 1523 * selection of options. The user PAT index is only for encoding leaf 1524 * nodes, where we have use of more bits to do the encoding. The 1525 * non-leaf nodes are instead under driver control so the chosen index 1526 * here should be distict from the user PAT index. Also the 1527 * corresponding coherency of the PAT index should be tied to the 1528 * allocation type of the page table (or at least we should pick 1529 * something which is always safe). 1530 */ 1531 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1532 pat_index = xe->pat.idx[XE_CACHE_WB]; 1533 else 1534 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1535 1536 xe_assert(xe, pat_index <= 3); 1537 1538 return pat_index; 1539 } 1540 1541 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1542 { 1543 u64 pde; 1544 1545 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1546 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1547 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1548 1549 return pde; 1550 } 1551 1552 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1553 u16 pat_index, u32 pt_level) 1554 { 1555 u64 pte; 1556 1557 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1558 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1559 pte |= pte_encode_pat_index(pat_index, pt_level); 1560 pte |= pte_encode_ps(pt_level); 1561 1562 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1563 pte |= XE_PPGTT_PTE_DM; 1564 1565 return pte; 1566 } 1567 1568 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1569 u16 pat_index, u32 pt_level) 1570 { 1571 pte |= XE_PAGE_PRESENT; 1572 1573 if (likely(!xe_vma_read_only(vma))) 1574 pte |= XE_PAGE_RW; 1575 1576 pte |= pte_encode_pat_index(pat_index, pt_level); 1577 pte |= pte_encode_ps(pt_level); 1578 1579 if (unlikely(xe_vma_is_null(vma))) 1580 pte |= XE_PTE_NULL; 1581 1582 return pte; 1583 } 1584 1585 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1586 u16 pat_index, 1587 u32 pt_level, bool devmem, u64 flags) 1588 { 1589 u64 pte; 1590 1591 /* Avoid passing random bits directly as flags */ 1592 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1593 1594 pte = addr; 1595 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1596 pte |= pte_encode_pat_index(pat_index, pt_level); 1597 pte |= pte_encode_ps(pt_level); 1598 1599 if (devmem) 1600 pte |= XE_PPGTT_PTE_DM; 1601 1602 pte |= flags; 1603 1604 return pte; 1605 } 1606 1607 static const struct xe_pt_ops xelp_pt_ops = { 1608 .pte_encode_bo = xelp_pte_encode_bo, 1609 .pte_encode_vma = xelp_pte_encode_vma, 1610 .pte_encode_addr = xelp_pte_encode_addr, 1611 .pde_encode_bo = xelp_pde_encode_bo, 1612 }; 1613 1614 static void vm_destroy_work_func(struct work_struct *w); 1615 1616 /** 1617 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1618 * given tile and vm. 1619 * @xe: xe device. 1620 * @tile: tile to set up for. 1621 * @vm: vm to set up for. 1622 * 1623 * Sets up a pagetable tree with one page-table per level and a single 1624 * leaf PTE. All pagetable entries point to the single page-table or, 1625 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1626 * writes become NOPs. 1627 * 1628 * Return: 0 on success, negative error code on error. 1629 */ 1630 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1631 struct xe_vm *vm) 1632 { 1633 u8 id = tile->id; 1634 int i; 1635 1636 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1637 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1638 if (IS_ERR(vm->scratch_pt[id][i])) 1639 return PTR_ERR(vm->scratch_pt[id][i]); 1640 1641 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1642 } 1643 1644 return 0; 1645 } 1646 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1647 1648 static void xe_vm_free_scratch(struct xe_vm *vm) 1649 { 1650 struct xe_tile *tile; 1651 u8 id; 1652 1653 if (!xe_vm_has_scratch(vm)) 1654 return; 1655 1656 for_each_tile(tile, vm->xe, id) { 1657 u32 i; 1658 1659 if (!vm->pt_root[id]) 1660 continue; 1661 1662 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1663 if (vm->scratch_pt[id][i]) 1664 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1665 } 1666 } 1667 1668 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1669 { 1670 struct drm_gem_object *vm_resv_obj; 1671 struct xe_vm *vm; 1672 int err, number_tiles = 0; 1673 struct xe_tile *tile; 1674 u8 id; 1675 1676 /* 1677 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1678 * ever be in faulting mode. 1679 */ 1680 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1681 1682 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1683 if (!vm) 1684 return ERR_PTR(-ENOMEM); 1685 1686 vm->xe = xe; 1687 1688 vm->size = 1ull << xe->info.va_bits; 1689 vm->flags = flags; 1690 1691 if (xef) 1692 vm->xef = xe_file_get(xef); 1693 /** 1694 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1695 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1696 * under a user-VM lock when the PXP session is started at exec_queue 1697 * creation time. Those are different VMs and therefore there is no risk 1698 * of deadlock, but we need to tell lockdep that this is the case or it 1699 * will print a warning. 1700 */ 1701 if (flags & XE_VM_FLAG_GSC) { 1702 static struct lock_class_key gsc_vm_key; 1703 1704 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1705 } else { 1706 init_rwsem(&vm->lock); 1707 } 1708 mutex_init(&vm->snap_mutex); 1709 1710 INIT_LIST_HEAD(&vm->rebind_list); 1711 1712 INIT_LIST_HEAD(&vm->userptr.repin_list); 1713 INIT_LIST_HEAD(&vm->userptr.invalidated); 1714 init_rwsem(&vm->userptr.notifier_lock); 1715 spin_lock_init(&vm->userptr.invalidated_lock); 1716 1717 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1718 1719 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1720 1721 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1722 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1723 1724 for_each_tile(tile, xe, id) 1725 xe_range_fence_tree_init(&vm->rftree[id]); 1726 1727 vm->pt_ops = &xelp_pt_ops; 1728 1729 /* 1730 * Long-running workloads are not protected by the scheduler references. 1731 * By design, run_job for long-running workloads returns NULL and the 1732 * scheduler drops all the references of it, hence protecting the VM 1733 * for this case is necessary. 1734 */ 1735 if (flags & XE_VM_FLAG_LR_MODE) { 1736 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1737 xe_pm_runtime_get_noresume(xe); 1738 } 1739 1740 if (flags & XE_VM_FLAG_FAULT_MODE) { 1741 err = xe_svm_init(vm); 1742 if (err) 1743 goto err_no_resv; 1744 } 1745 1746 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1747 if (!vm_resv_obj) { 1748 err = -ENOMEM; 1749 goto err_svm_fini; 1750 } 1751 1752 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1753 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1754 1755 drm_gem_object_put(vm_resv_obj); 1756 1757 err = xe_vm_lock(vm, true); 1758 if (err) 1759 goto err_close; 1760 1761 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1762 vm->flags |= XE_VM_FLAG_64K; 1763 1764 for_each_tile(tile, xe, id) { 1765 if (flags & XE_VM_FLAG_MIGRATION && 1766 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1767 continue; 1768 1769 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1770 if (IS_ERR(vm->pt_root[id])) { 1771 err = PTR_ERR(vm->pt_root[id]); 1772 vm->pt_root[id] = NULL; 1773 goto err_unlock_close; 1774 } 1775 } 1776 1777 if (xe_vm_has_scratch(vm)) { 1778 for_each_tile(tile, xe, id) { 1779 if (!vm->pt_root[id]) 1780 continue; 1781 1782 err = xe_vm_create_scratch(xe, tile, vm); 1783 if (err) 1784 goto err_unlock_close; 1785 } 1786 vm->batch_invalidate_tlb = true; 1787 } 1788 1789 if (vm->flags & XE_VM_FLAG_LR_MODE) 1790 vm->batch_invalidate_tlb = false; 1791 1792 /* Fill pt_root after allocating scratch tables */ 1793 for_each_tile(tile, xe, id) { 1794 if (!vm->pt_root[id]) 1795 continue; 1796 1797 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1798 } 1799 xe_vm_unlock(vm); 1800 1801 /* Kernel migration VM shouldn't have a circular loop.. */ 1802 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1803 for_each_tile(tile, xe, id) { 1804 struct xe_exec_queue *q; 1805 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1806 1807 if (!vm->pt_root[id]) 1808 continue; 1809 1810 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1811 if (IS_ERR(q)) { 1812 err = PTR_ERR(q); 1813 goto err_close; 1814 } 1815 vm->q[id] = q; 1816 number_tiles++; 1817 } 1818 } 1819 1820 if (number_tiles > 1) 1821 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1822 1823 if (xef && xe->info.has_asid) { 1824 u32 asid; 1825 1826 down_write(&xe->usm.lock); 1827 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1828 XA_LIMIT(1, XE_MAX_ASID - 1), 1829 &xe->usm.next_asid, GFP_KERNEL); 1830 up_write(&xe->usm.lock); 1831 if (err < 0) 1832 goto err_unlock_close; 1833 1834 vm->usm.asid = asid; 1835 } 1836 1837 trace_xe_vm_create(vm); 1838 1839 return vm; 1840 1841 err_unlock_close: 1842 xe_vm_unlock(vm); 1843 err_close: 1844 xe_vm_close_and_put(vm); 1845 return ERR_PTR(err); 1846 1847 err_svm_fini: 1848 if (flags & XE_VM_FLAG_FAULT_MODE) { 1849 vm->size = 0; /* close the vm */ 1850 xe_svm_fini(vm); 1851 } 1852 err_no_resv: 1853 mutex_destroy(&vm->snap_mutex); 1854 for_each_tile(tile, xe, id) 1855 xe_range_fence_tree_fini(&vm->rftree[id]); 1856 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1857 if (vm->xef) 1858 xe_file_put(vm->xef); 1859 kfree(vm); 1860 if (flags & XE_VM_FLAG_LR_MODE) 1861 xe_pm_runtime_put(xe); 1862 return ERR_PTR(err); 1863 } 1864 1865 static void xe_vm_close(struct xe_vm *vm) 1866 { 1867 struct xe_device *xe = vm->xe; 1868 bool bound; 1869 int idx; 1870 1871 bound = drm_dev_enter(&xe->drm, &idx); 1872 1873 down_write(&vm->lock); 1874 if (xe_vm_in_fault_mode(vm)) 1875 xe_svm_notifier_lock(vm); 1876 1877 vm->size = 0; 1878 1879 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1880 struct xe_tile *tile; 1881 struct xe_gt *gt; 1882 u8 id; 1883 1884 /* Wait for pending binds */ 1885 dma_resv_wait_timeout(xe_vm_resv(vm), 1886 DMA_RESV_USAGE_BOOKKEEP, 1887 false, MAX_SCHEDULE_TIMEOUT); 1888 1889 if (bound) { 1890 for_each_tile(tile, xe, id) 1891 if (vm->pt_root[id]) 1892 xe_pt_clear(xe, vm->pt_root[id]); 1893 1894 for_each_gt(gt, xe, id) 1895 xe_gt_tlb_invalidation_vm(gt, vm); 1896 } 1897 } 1898 1899 if (xe_vm_in_fault_mode(vm)) 1900 xe_svm_notifier_unlock(vm); 1901 up_write(&vm->lock); 1902 1903 if (bound) 1904 drm_dev_exit(idx); 1905 } 1906 1907 void xe_vm_close_and_put(struct xe_vm *vm) 1908 { 1909 LIST_HEAD(contested); 1910 struct xe_device *xe = vm->xe; 1911 struct xe_tile *tile; 1912 struct xe_vma *vma, *next_vma; 1913 struct drm_gpuva *gpuva, *next; 1914 u8 id; 1915 1916 xe_assert(xe, !vm->preempt.num_exec_queues); 1917 1918 xe_vm_close(vm); 1919 if (xe_vm_in_preempt_fence_mode(vm)) 1920 flush_work(&vm->preempt.rebind_work); 1921 if (xe_vm_in_fault_mode(vm)) 1922 xe_svm_close(vm); 1923 1924 down_write(&vm->lock); 1925 for_each_tile(tile, xe, id) { 1926 if (vm->q[id]) 1927 xe_exec_queue_last_fence_put(vm->q[id], vm); 1928 } 1929 up_write(&vm->lock); 1930 1931 for_each_tile(tile, xe, id) { 1932 if (vm->q[id]) { 1933 xe_exec_queue_kill(vm->q[id]); 1934 xe_exec_queue_put(vm->q[id]); 1935 vm->q[id] = NULL; 1936 } 1937 } 1938 1939 down_write(&vm->lock); 1940 xe_vm_lock(vm, false); 1941 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1942 vma = gpuva_to_vma(gpuva); 1943 1944 if (xe_vma_has_no_bo(vma)) { 1945 down_read(&vm->userptr.notifier_lock); 1946 vma->gpuva.flags |= XE_VMA_DESTROYED; 1947 up_read(&vm->userptr.notifier_lock); 1948 } 1949 1950 xe_vm_remove_vma(vm, vma); 1951 1952 /* easy case, remove from VMA? */ 1953 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1954 list_del_init(&vma->combined_links.rebind); 1955 xe_vma_destroy(vma, NULL); 1956 continue; 1957 } 1958 1959 list_move_tail(&vma->combined_links.destroy, &contested); 1960 vma->gpuva.flags |= XE_VMA_DESTROYED; 1961 } 1962 1963 /* 1964 * All vm operations will add shared fences to resv. 1965 * The only exception is eviction for a shared object, 1966 * but even so, the unbind when evicted would still 1967 * install a fence to resv. Hence it's safe to 1968 * destroy the pagetables immediately. 1969 */ 1970 xe_vm_free_scratch(vm); 1971 1972 for_each_tile(tile, xe, id) { 1973 if (vm->pt_root[id]) { 1974 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1975 vm->pt_root[id] = NULL; 1976 } 1977 } 1978 xe_vm_unlock(vm); 1979 1980 /* 1981 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1982 * Since we hold a refcount to the bo, we can remove and free 1983 * the members safely without locking. 1984 */ 1985 list_for_each_entry_safe(vma, next_vma, &contested, 1986 combined_links.destroy) { 1987 list_del_init(&vma->combined_links.destroy); 1988 xe_vma_destroy_unlocked(vma); 1989 } 1990 1991 if (xe_vm_in_fault_mode(vm)) 1992 xe_svm_fini(vm); 1993 1994 up_write(&vm->lock); 1995 1996 down_write(&xe->usm.lock); 1997 if (vm->usm.asid) { 1998 void *lookup; 1999 2000 xe_assert(xe, xe->info.has_asid); 2001 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 2002 2003 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 2004 xe_assert(xe, lookup == vm); 2005 } 2006 up_write(&xe->usm.lock); 2007 2008 for_each_tile(tile, xe, id) 2009 xe_range_fence_tree_fini(&vm->rftree[id]); 2010 2011 xe_vm_put(vm); 2012 } 2013 2014 static void vm_destroy_work_func(struct work_struct *w) 2015 { 2016 struct xe_vm *vm = 2017 container_of(w, struct xe_vm, destroy_work); 2018 struct xe_device *xe = vm->xe; 2019 struct xe_tile *tile; 2020 u8 id; 2021 2022 /* xe_vm_close_and_put was not called? */ 2023 xe_assert(xe, !vm->size); 2024 2025 if (xe_vm_in_preempt_fence_mode(vm)) 2026 flush_work(&vm->preempt.rebind_work); 2027 2028 mutex_destroy(&vm->snap_mutex); 2029 2030 if (vm->flags & XE_VM_FLAG_LR_MODE) 2031 xe_pm_runtime_put(xe); 2032 2033 for_each_tile(tile, xe, id) 2034 XE_WARN_ON(vm->pt_root[id]); 2035 2036 trace_xe_vm_free(vm); 2037 2038 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2039 2040 if (vm->xef) 2041 xe_file_put(vm->xef); 2042 2043 kfree(vm); 2044 } 2045 2046 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2047 { 2048 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2049 2050 /* To destroy the VM we need to be able to sleep */ 2051 queue_work(system_unbound_wq, &vm->destroy_work); 2052 } 2053 2054 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2055 { 2056 struct xe_vm *vm; 2057 2058 mutex_lock(&xef->vm.lock); 2059 vm = xa_load(&xef->vm.xa, id); 2060 if (vm) 2061 xe_vm_get(vm); 2062 mutex_unlock(&xef->vm.lock); 2063 2064 return vm; 2065 } 2066 2067 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2068 { 2069 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2070 } 2071 2072 static struct xe_exec_queue * 2073 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2074 { 2075 return q ? q : vm->q[0]; 2076 } 2077 2078 static struct xe_user_fence * 2079 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2080 { 2081 unsigned int i; 2082 2083 for (i = 0; i < num_syncs; i++) { 2084 struct xe_sync_entry *e = &syncs[i]; 2085 2086 if (xe_sync_is_ufence(e)) 2087 return xe_sync_ufence_get(e); 2088 } 2089 2090 return NULL; 2091 } 2092 2093 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2094 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2095 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2096 2097 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2098 struct drm_file *file) 2099 { 2100 struct xe_device *xe = to_xe_device(dev); 2101 struct xe_file *xef = to_xe_file(file); 2102 struct drm_xe_vm_create *args = data; 2103 struct xe_vm *vm; 2104 u32 id; 2105 int err; 2106 u32 flags = 0; 2107 2108 if (XE_IOCTL_DBG(xe, args->extensions)) 2109 return -EINVAL; 2110 2111 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 2112 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2113 2114 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2115 !xe->info.has_usm)) 2116 return -EINVAL; 2117 2118 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2119 return -EINVAL; 2120 2121 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2122 return -EINVAL; 2123 2124 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2125 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2126 !xe->info.needs_scratch)) 2127 return -EINVAL; 2128 2129 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2130 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2131 return -EINVAL; 2132 2133 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2134 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2135 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2136 flags |= XE_VM_FLAG_LR_MODE; 2137 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2138 flags |= XE_VM_FLAG_FAULT_MODE; 2139 2140 vm = xe_vm_create(xe, flags, xef); 2141 if (IS_ERR(vm)) 2142 return PTR_ERR(vm); 2143 2144 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2145 /* Warning: Security issue - never enable by default */ 2146 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2147 #endif 2148 2149 /* user id alloc must always be last in ioctl to prevent UAF */ 2150 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2151 if (err) 2152 goto err_close_and_put; 2153 2154 args->vm_id = id; 2155 2156 return 0; 2157 2158 err_close_and_put: 2159 xe_vm_close_and_put(vm); 2160 2161 return err; 2162 } 2163 2164 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2165 struct drm_file *file) 2166 { 2167 struct xe_device *xe = to_xe_device(dev); 2168 struct xe_file *xef = to_xe_file(file); 2169 struct drm_xe_vm_destroy *args = data; 2170 struct xe_vm *vm; 2171 int err = 0; 2172 2173 if (XE_IOCTL_DBG(xe, args->pad) || 2174 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2175 return -EINVAL; 2176 2177 mutex_lock(&xef->vm.lock); 2178 vm = xa_load(&xef->vm.xa, args->vm_id); 2179 if (XE_IOCTL_DBG(xe, !vm)) 2180 err = -ENOENT; 2181 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2182 err = -EBUSY; 2183 else 2184 xa_erase(&xef->vm.xa, args->vm_id); 2185 mutex_unlock(&xef->vm.lock); 2186 2187 if (!err) 2188 xe_vm_close_and_put(vm); 2189 2190 return err; 2191 } 2192 2193 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2194 { 2195 if (page_addr > xe_vma_end(vma) - 1 || 2196 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2197 return false; 2198 2199 return true; 2200 } 2201 2202 /** 2203 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2204 * 2205 * @vm: the xe_vm the vma belongs to 2206 * @page_addr: address to look up 2207 */ 2208 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2209 { 2210 struct xe_vma *vma = NULL; 2211 2212 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2213 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2214 vma = vm->usm.last_fault_vma; 2215 } 2216 if (!vma) 2217 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2218 2219 return vma; 2220 } 2221 2222 static const u32 region_to_mem_type[] = { 2223 XE_PL_TT, 2224 XE_PL_VRAM0, 2225 XE_PL_VRAM1, 2226 }; 2227 2228 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2229 bool post_commit) 2230 { 2231 down_read(&vm->userptr.notifier_lock); 2232 vma->gpuva.flags |= XE_VMA_DESTROYED; 2233 up_read(&vm->userptr.notifier_lock); 2234 if (post_commit) 2235 xe_vm_remove_vma(vm, vma); 2236 } 2237 2238 #undef ULL 2239 #define ULL unsigned long long 2240 2241 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2242 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2243 { 2244 struct xe_vma *vma; 2245 2246 switch (op->op) { 2247 case DRM_GPUVA_OP_MAP: 2248 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2249 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2250 break; 2251 case DRM_GPUVA_OP_REMAP: 2252 vma = gpuva_to_vma(op->remap.unmap->va); 2253 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2254 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2255 op->remap.unmap->keep ? 1 : 0); 2256 if (op->remap.prev) 2257 vm_dbg(&xe->drm, 2258 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2259 (ULL)op->remap.prev->va.addr, 2260 (ULL)op->remap.prev->va.range); 2261 if (op->remap.next) 2262 vm_dbg(&xe->drm, 2263 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2264 (ULL)op->remap.next->va.addr, 2265 (ULL)op->remap.next->va.range); 2266 break; 2267 case DRM_GPUVA_OP_UNMAP: 2268 vma = gpuva_to_vma(op->unmap.va); 2269 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2270 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2271 op->unmap.keep ? 1 : 0); 2272 break; 2273 case DRM_GPUVA_OP_PREFETCH: 2274 vma = gpuva_to_vma(op->prefetch.va); 2275 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2276 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2277 break; 2278 default: 2279 drm_warn(&xe->drm, "NOT POSSIBLE"); 2280 } 2281 } 2282 #else 2283 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2284 { 2285 } 2286 #endif 2287 2288 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2289 { 2290 if (!xe_vm_in_fault_mode(vm)) 2291 return false; 2292 2293 if (!xe_vm_has_scratch(vm)) 2294 return false; 2295 2296 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2297 return false; 2298 2299 return true; 2300 } 2301 2302 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2303 { 2304 struct drm_gpuva_op *__op; 2305 2306 drm_gpuva_for_each_op(__op, ops) { 2307 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2308 2309 xe_vma_svm_prefetch_op_fini(op); 2310 } 2311 } 2312 2313 /* 2314 * Create operations list from IOCTL arguments, setup operations fields so parse 2315 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2316 */ 2317 static struct drm_gpuva_ops * 2318 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2319 struct xe_bo *bo, u64 bo_offset_or_userptr, 2320 u64 addr, u64 range, 2321 u32 operation, u32 flags, 2322 u32 prefetch_region, u16 pat_index) 2323 { 2324 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2325 struct drm_gpuva_ops *ops; 2326 struct drm_gpuva_op *__op; 2327 struct drm_gpuvm_bo *vm_bo; 2328 u64 range_end = addr + range; 2329 int err; 2330 2331 lockdep_assert_held_write(&vm->lock); 2332 2333 vm_dbg(&vm->xe->drm, 2334 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2335 operation, (ULL)addr, (ULL)range, 2336 (ULL)bo_offset_or_userptr); 2337 2338 switch (operation) { 2339 case DRM_XE_VM_BIND_OP_MAP: 2340 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 2341 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 2342 obj, bo_offset_or_userptr); 2343 break; 2344 case DRM_XE_VM_BIND_OP_UNMAP: 2345 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2346 break; 2347 case DRM_XE_VM_BIND_OP_PREFETCH: 2348 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2349 break; 2350 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2351 xe_assert(vm->xe, bo); 2352 2353 err = xe_bo_lock(bo, true); 2354 if (err) 2355 return ERR_PTR(err); 2356 2357 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2358 if (IS_ERR(vm_bo)) { 2359 xe_bo_unlock(bo); 2360 return ERR_CAST(vm_bo); 2361 } 2362 2363 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2364 drm_gpuvm_bo_put(vm_bo); 2365 xe_bo_unlock(bo); 2366 break; 2367 default: 2368 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2369 ops = ERR_PTR(-EINVAL); 2370 } 2371 if (IS_ERR(ops)) 2372 return ops; 2373 2374 drm_gpuva_for_each_op(__op, ops) { 2375 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2376 2377 if (__op->op == DRM_GPUVA_OP_MAP) { 2378 op->map.immediate = 2379 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2380 op->map.read_only = 2381 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2382 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2383 op->map.is_cpu_addr_mirror = flags & 2384 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2385 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2386 op->map.pat_index = pat_index; 2387 op->map.invalidate_on_bind = 2388 __xe_vm_needs_clear_scratch_pages(vm, flags); 2389 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2390 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2391 struct xe_svm_range *svm_range; 2392 struct drm_gpusvm_ctx ctx = {}; 2393 struct xe_tile *tile; 2394 u8 id, tile_mask = 0; 2395 u32 i; 2396 2397 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2398 op->prefetch.region = prefetch_region; 2399 break; 2400 } 2401 2402 ctx.read_only = xe_vma_read_only(vma); 2403 ctx.devmem_possible = IS_DGFX(vm->xe) && 2404 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2405 2406 for_each_tile(tile, vm->xe, id) 2407 tile_mask |= 0x1 << id; 2408 2409 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2410 op->prefetch_range.region = prefetch_region; 2411 op->prefetch_range.ranges_count = 0; 2412 alloc_next_range: 2413 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2414 2415 if (PTR_ERR(svm_range) == -ENOENT) { 2416 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2417 2418 addr = ret == ULONG_MAX ? 0 : ret; 2419 if (addr) 2420 goto alloc_next_range; 2421 else 2422 goto print_op_label; 2423 } 2424 2425 if (IS_ERR(svm_range)) { 2426 err = PTR_ERR(svm_range); 2427 goto unwind_prefetch_ops; 2428 } 2429 2430 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { 2431 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2432 goto check_next_range; 2433 } 2434 2435 err = xa_alloc(&op->prefetch_range.range, 2436 &i, svm_range, xa_limit_32b, 2437 GFP_KERNEL); 2438 2439 if (err) 2440 goto unwind_prefetch_ops; 2441 2442 op->prefetch_range.ranges_count++; 2443 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2444 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2445 check_next_range: 2446 if (range_end > xe_svm_range_end(svm_range) && 2447 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2448 addr = xe_svm_range_end(svm_range); 2449 goto alloc_next_range; 2450 } 2451 } 2452 print_op_label: 2453 print_op(vm->xe, __op); 2454 } 2455 2456 return ops; 2457 2458 unwind_prefetch_ops: 2459 xe_svm_prefetch_gpuva_ops_fini(ops); 2460 drm_gpuva_ops_free(&vm->gpuvm, ops); 2461 return ERR_PTR(err); 2462 } 2463 2464 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2465 2466 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2467 u16 pat_index, unsigned int flags) 2468 { 2469 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2470 struct drm_exec exec; 2471 struct xe_vma *vma; 2472 int err = 0; 2473 2474 lockdep_assert_held_write(&vm->lock); 2475 2476 if (bo) { 2477 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2478 drm_exec_until_all_locked(&exec) { 2479 err = 0; 2480 if (!bo->vm) { 2481 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2482 drm_exec_retry_on_contention(&exec); 2483 } 2484 if (!err) { 2485 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2486 drm_exec_retry_on_contention(&exec); 2487 } 2488 if (err) { 2489 drm_exec_fini(&exec); 2490 return ERR_PTR(err); 2491 } 2492 } 2493 } 2494 vma = xe_vma_create(vm, bo, op->gem.offset, 2495 op->va.addr, op->va.addr + 2496 op->va.range - 1, pat_index, flags); 2497 if (IS_ERR(vma)) 2498 goto err_unlock; 2499 2500 if (xe_vma_is_userptr(vma)) 2501 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2502 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2503 err = add_preempt_fences(vm, bo); 2504 2505 err_unlock: 2506 if (bo) 2507 drm_exec_fini(&exec); 2508 2509 if (err) { 2510 prep_vma_destroy(vm, vma, false); 2511 xe_vma_destroy_unlocked(vma); 2512 vma = ERR_PTR(err); 2513 } 2514 2515 return vma; 2516 } 2517 2518 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2519 { 2520 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2521 return SZ_1G; 2522 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2523 return SZ_2M; 2524 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2525 return SZ_64K; 2526 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2527 return SZ_4K; 2528 2529 return SZ_1G; /* Uninitialized, used max size */ 2530 } 2531 2532 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2533 { 2534 switch (size) { 2535 case SZ_1G: 2536 vma->gpuva.flags |= XE_VMA_PTE_1G; 2537 break; 2538 case SZ_2M: 2539 vma->gpuva.flags |= XE_VMA_PTE_2M; 2540 break; 2541 case SZ_64K: 2542 vma->gpuva.flags |= XE_VMA_PTE_64K; 2543 break; 2544 case SZ_4K: 2545 vma->gpuva.flags |= XE_VMA_PTE_4K; 2546 break; 2547 } 2548 } 2549 2550 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2551 { 2552 int err = 0; 2553 2554 lockdep_assert_held_write(&vm->lock); 2555 2556 switch (op->base.op) { 2557 case DRM_GPUVA_OP_MAP: 2558 err |= xe_vm_insert_vma(vm, op->map.vma); 2559 if (!err) 2560 op->flags |= XE_VMA_OP_COMMITTED; 2561 break; 2562 case DRM_GPUVA_OP_REMAP: 2563 { 2564 u8 tile_present = 2565 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2566 2567 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2568 true); 2569 op->flags |= XE_VMA_OP_COMMITTED; 2570 2571 if (op->remap.prev) { 2572 err |= xe_vm_insert_vma(vm, op->remap.prev); 2573 if (!err) 2574 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2575 if (!err && op->remap.skip_prev) { 2576 op->remap.prev->tile_present = 2577 tile_present; 2578 op->remap.prev = NULL; 2579 } 2580 } 2581 if (op->remap.next) { 2582 err |= xe_vm_insert_vma(vm, op->remap.next); 2583 if (!err) 2584 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2585 if (!err && op->remap.skip_next) { 2586 op->remap.next->tile_present = 2587 tile_present; 2588 op->remap.next = NULL; 2589 } 2590 } 2591 2592 /* Adjust for partial unbind after removing VMA from VM */ 2593 if (!err) { 2594 op->base.remap.unmap->va->va.addr = op->remap.start; 2595 op->base.remap.unmap->va->va.range = op->remap.range; 2596 } 2597 break; 2598 } 2599 case DRM_GPUVA_OP_UNMAP: 2600 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2601 op->flags |= XE_VMA_OP_COMMITTED; 2602 break; 2603 case DRM_GPUVA_OP_PREFETCH: 2604 op->flags |= XE_VMA_OP_COMMITTED; 2605 break; 2606 default: 2607 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2608 } 2609 2610 return err; 2611 } 2612 2613 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2614 struct xe_vma_ops *vops) 2615 { 2616 struct xe_device *xe = vm->xe; 2617 struct drm_gpuva_op *__op; 2618 struct xe_tile *tile; 2619 u8 id, tile_mask = 0; 2620 int err = 0; 2621 2622 lockdep_assert_held_write(&vm->lock); 2623 2624 for_each_tile(tile, vm->xe, id) 2625 tile_mask |= 0x1 << id; 2626 2627 drm_gpuva_for_each_op(__op, ops) { 2628 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2629 struct xe_vma *vma; 2630 unsigned int flags = 0; 2631 2632 INIT_LIST_HEAD(&op->link); 2633 list_add_tail(&op->link, &vops->list); 2634 op->tile_mask = tile_mask; 2635 2636 switch (op->base.op) { 2637 case DRM_GPUVA_OP_MAP: 2638 { 2639 flags |= op->map.read_only ? 2640 VMA_CREATE_FLAG_READ_ONLY : 0; 2641 flags |= op->map.is_null ? 2642 VMA_CREATE_FLAG_IS_NULL : 0; 2643 flags |= op->map.dumpable ? 2644 VMA_CREATE_FLAG_DUMPABLE : 0; 2645 flags |= op->map.is_cpu_addr_mirror ? 2646 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2647 2648 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2649 flags); 2650 if (IS_ERR(vma)) 2651 return PTR_ERR(vma); 2652 2653 op->map.vma = vma; 2654 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2655 !op->map.is_cpu_addr_mirror) || 2656 op->map.invalidate_on_bind) 2657 xe_vma_ops_incr_pt_update_ops(vops, 2658 op->tile_mask, 1); 2659 break; 2660 } 2661 case DRM_GPUVA_OP_REMAP: 2662 { 2663 struct xe_vma *old = 2664 gpuva_to_vma(op->base.remap.unmap->va); 2665 bool skip = xe_vma_is_cpu_addr_mirror(old); 2666 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2667 int num_remap_ops = 0; 2668 2669 if (op->base.remap.prev) 2670 start = op->base.remap.prev->va.addr + 2671 op->base.remap.prev->va.range; 2672 if (op->base.remap.next) 2673 end = op->base.remap.next->va.addr; 2674 2675 if (xe_vma_is_cpu_addr_mirror(old) && 2676 xe_svm_has_mapping(vm, start, end)) 2677 return -EBUSY; 2678 2679 op->remap.start = xe_vma_start(old); 2680 op->remap.range = xe_vma_size(old); 2681 2682 flags |= op->base.remap.unmap->va->flags & 2683 XE_VMA_READ_ONLY ? 2684 VMA_CREATE_FLAG_READ_ONLY : 0; 2685 flags |= op->base.remap.unmap->va->flags & 2686 DRM_GPUVA_SPARSE ? 2687 VMA_CREATE_FLAG_IS_NULL : 0; 2688 flags |= op->base.remap.unmap->va->flags & 2689 XE_VMA_DUMPABLE ? 2690 VMA_CREATE_FLAG_DUMPABLE : 0; 2691 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2692 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2693 2694 if (op->base.remap.prev) { 2695 vma = new_vma(vm, op->base.remap.prev, 2696 old->pat_index, flags); 2697 if (IS_ERR(vma)) 2698 return PTR_ERR(vma); 2699 2700 op->remap.prev = vma; 2701 2702 /* 2703 * Userptr creates a new SG mapping so 2704 * we must also rebind. 2705 */ 2706 op->remap.skip_prev = skip || 2707 (!xe_vma_is_userptr(old) && 2708 IS_ALIGNED(xe_vma_end(vma), 2709 xe_vma_max_pte_size(old))); 2710 if (op->remap.skip_prev) { 2711 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2712 op->remap.range -= 2713 xe_vma_end(vma) - 2714 xe_vma_start(old); 2715 op->remap.start = xe_vma_end(vma); 2716 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2717 (ULL)op->remap.start, 2718 (ULL)op->remap.range); 2719 } else { 2720 num_remap_ops++; 2721 } 2722 } 2723 2724 if (op->base.remap.next) { 2725 vma = new_vma(vm, op->base.remap.next, 2726 old->pat_index, flags); 2727 if (IS_ERR(vma)) 2728 return PTR_ERR(vma); 2729 2730 op->remap.next = vma; 2731 2732 /* 2733 * Userptr creates a new SG mapping so 2734 * we must also rebind. 2735 */ 2736 op->remap.skip_next = skip || 2737 (!xe_vma_is_userptr(old) && 2738 IS_ALIGNED(xe_vma_start(vma), 2739 xe_vma_max_pte_size(old))); 2740 if (op->remap.skip_next) { 2741 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2742 op->remap.range -= 2743 xe_vma_end(old) - 2744 xe_vma_start(vma); 2745 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2746 (ULL)op->remap.start, 2747 (ULL)op->remap.range); 2748 } else { 2749 num_remap_ops++; 2750 } 2751 } 2752 if (!skip) 2753 num_remap_ops++; 2754 2755 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2756 break; 2757 } 2758 case DRM_GPUVA_OP_UNMAP: 2759 vma = gpuva_to_vma(op->base.unmap.va); 2760 2761 if (xe_vma_is_cpu_addr_mirror(vma) && 2762 xe_svm_has_mapping(vm, xe_vma_start(vma), 2763 xe_vma_end(vma))) 2764 return -EBUSY; 2765 2766 if (!xe_vma_is_cpu_addr_mirror(vma)) 2767 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2768 break; 2769 case DRM_GPUVA_OP_PREFETCH: 2770 vma = gpuva_to_vma(op->base.prefetch.va); 2771 2772 if (xe_vma_is_userptr(vma)) { 2773 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2774 if (err) 2775 return err; 2776 } 2777 2778 if (xe_vma_is_cpu_addr_mirror(vma)) 2779 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2780 op->prefetch_range.ranges_count); 2781 else 2782 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2783 2784 break; 2785 default: 2786 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2787 } 2788 2789 err = xe_vma_op_commit(vm, op); 2790 if (err) 2791 return err; 2792 } 2793 2794 return 0; 2795 } 2796 2797 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2798 bool post_commit, bool prev_post_commit, 2799 bool next_post_commit) 2800 { 2801 lockdep_assert_held_write(&vm->lock); 2802 2803 switch (op->base.op) { 2804 case DRM_GPUVA_OP_MAP: 2805 if (op->map.vma) { 2806 prep_vma_destroy(vm, op->map.vma, post_commit); 2807 xe_vma_destroy_unlocked(op->map.vma); 2808 } 2809 break; 2810 case DRM_GPUVA_OP_UNMAP: 2811 { 2812 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2813 2814 if (vma) { 2815 down_read(&vm->userptr.notifier_lock); 2816 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2817 up_read(&vm->userptr.notifier_lock); 2818 if (post_commit) 2819 xe_vm_insert_vma(vm, vma); 2820 } 2821 break; 2822 } 2823 case DRM_GPUVA_OP_REMAP: 2824 { 2825 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2826 2827 if (op->remap.prev) { 2828 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2829 xe_vma_destroy_unlocked(op->remap.prev); 2830 } 2831 if (op->remap.next) { 2832 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2833 xe_vma_destroy_unlocked(op->remap.next); 2834 } 2835 if (vma) { 2836 down_read(&vm->userptr.notifier_lock); 2837 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2838 up_read(&vm->userptr.notifier_lock); 2839 if (post_commit) 2840 xe_vm_insert_vma(vm, vma); 2841 } 2842 break; 2843 } 2844 case DRM_GPUVA_OP_PREFETCH: 2845 /* Nothing to do */ 2846 break; 2847 default: 2848 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2849 } 2850 } 2851 2852 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2853 struct drm_gpuva_ops **ops, 2854 int num_ops_list) 2855 { 2856 int i; 2857 2858 for (i = num_ops_list - 1; i >= 0; --i) { 2859 struct drm_gpuva_ops *__ops = ops[i]; 2860 struct drm_gpuva_op *__op; 2861 2862 if (!__ops) 2863 continue; 2864 2865 drm_gpuva_for_each_op_reverse(__op, __ops) { 2866 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2867 2868 xe_vma_op_unwind(vm, op, 2869 op->flags & XE_VMA_OP_COMMITTED, 2870 op->flags & XE_VMA_OP_PREV_COMMITTED, 2871 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2872 } 2873 } 2874 } 2875 2876 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2877 bool validate) 2878 { 2879 struct xe_bo *bo = xe_vma_bo(vma); 2880 struct xe_vm *vm = xe_vma_vm(vma); 2881 int err = 0; 2882 2883 if (bo) { 2884 if (!bo->vm) 2885 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2886 if (!err && validate) 2887 err = xe_bo_validate(bo, vm, 2888 !xe_vm_in_preempt_fence_mode(vm)); 2889 } 2890 2891 return err; 2892 } 2893 2894 static int check_ufence(struct xe_vma *vma) 2895 { 2896 if (vma->ufence) { 2897 struct xe_user_fence * const f = vma->ufence; 2898 2899 if (!xe_sync_ufence_get_status(f)) 2900 return -EBUSY; 2901 2902 vma->ufence = NULL; 2903 xe_sync_ufence_put(f); 2904 } 2905 2906 return 0; 2907 } 2908 2909 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2910 { 2911 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2912 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2913 int err = 0; 2914 2915 struct xe_svm_range *svm_range; 2916 struct drm_gpusvm_ctx ctx = {}; 2917 struct xe_tile *tile; 2918 unsigned long i; 2919 u32 region; 2920 2921 if (!xe_vma_is_cpu_addr_mirror(vma)) 2922 return 0; 2923 2924 region = op->prefetch_range.region; 2925 2926 ctx.read_only = xe_vma_read_only(vma); 2927 ctx.devmem_possible = devmem_possible; 2928 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2929 2930 /* TODO: Threading the migration */ 2931 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2932 if (!region) 2933 xe_svm_range_migrate_to_smem(vm, svm_range); 2934 2935 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { 2936 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; 2937 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2938 if (err) { 2939 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2940 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2941 return -ENODATA; 2942 } 2943 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2944 } 2945 2946 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2947 if (err) { 2948 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2949 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2950 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2951 err = -ENODATA; 2952 return err; 2953 } 2954 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2955 } 2956 2957 return err; 2958 } 2959 2960 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2961 struct xe_vma_op *op) 2962 { 2963 int err = 0; 2964 2965 switch (op->base.op) { 2966 case DRM_GPUVA_OP_MAP: 2967 if (!op->map.invalidate_on_bind) 2968 err = vma_lock_and_validate(exec, op->map.vma, 2969 !xe_vm_in_fault_mode(vm) || 2970 op->map.immediate); 2971 break; 2972 case DRM_GPUVA_OP_REMAP: 2973 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2974 if (err) 2975 break; 2976 2977 err = vma_lock_and_validate(exec, 2978 gpuva_to_vma(op->base.remap.unmap->va), 2979 false); 2980 if (!err && op->remap.prev) 2981 err = vma_lock_and_validate(exec, op->remap.prev, true); 2982 if (!err && op->remap.next) 2983 err = vma_lock_and_validate(exec, op->remap.next, true); 2984 break; 2985 case DRM_GPUVA_OP_UNMAP: 2986 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2987 if (err) 2988 break; 2989 2990 err = vma_lock_and_validate(exec, 2991 gpuva_to_vma(op->base.unmap.va), 2992 false); 2993 break; 2994 case DRM_GPUVA_OP_PREFETCH: 2995 { 2996 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2997 u32 region; 2998 2999 if (xe_vma_is_cpu_addr_mirror(vma)) 3000 region = op->prefetch_range.region; 3001 else 3002 region = op->prefetch.region; 3003 3004 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 3005 3006 err = vma_lock_and_validate(exec, 3007 gpuva_to_vma(op->base.prefetch.va), 3008 false); 3009 if (!err && !xe_vma_has_no_bo(vma)) 3010 err = xe_bo_migrate(xe_vma_bo(vma), 3011 region_to_mem_type[region]); 3012 break; 3013 } 3014 default: 3015 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3016 } 3017 3018 return err; 3019 } 3020 3021 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3022 { 3023 struct xe_vma_op *op; 3024 int err; 3025 3026 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3027 return 0; 3028 3029 list_for_each_entry(op, &vops->list, link) { 3030 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3031 err = prefetch_ranges(vm, op); 3032 if (err) 3033 return err; 3034 } 3035 } 3036 3037 return 0; 3038 } 3039 3040 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3041 struct xe_vm *vm, 3042 struct xe_vma_ops *vops) 3043 { 3044 struct xe_vma_op *op; 3045 int err; 3046 3047 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3048 if (err) 3049 return err; 3050 3051 list_for_each_entry(op, &vops->list, link) { 3052 err = op_lock_and_prep(exec, vm, op); 3053 if (err) 3054 return err; 3055 } 3056 3057 #ifdef TEST_VM_OPS_ERROR 3058 if (vops->inject_error && 3059 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3060 return -ENOSPC; 3061 #endif 3062 3063 return 0; 3064 } 3065 3066 static void op_trace(struct xe_vma_op *op) 3067 { 3068 switch (op->base.op) { 3069 case DRM_GPUVA_OP_MAP: 3070 trace_xe_vma_bind(op->map.vma); 3071 break; 3072 case DRM_GPUVA_OP_REMAP: 3073 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3074 if (op->remap.prev) 3075 trace_xe_vma_bind(op->remap.prev); 3076 if (op->remap.next) 3077 trace_xe_vma_bind(op->remap.next); 3078 break; 3079 case DRM_GPUVA_OP_UNMAP: 3080 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3081 break; 3082 case DRM_GPUVA_OP_PREFETCH: 3083 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3084 break; 3085 case DRM_GPUVA_OP_DRIVER: 3086 break; 3087 default: 3088 XE_WARN_ON("NOT POSSIBLE"); 3089 } 3090 } 3091 3092 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3093 { 3094 struct xe_vma_op *op; 3095 3096 list_for_each_entry(op, &vops->list, link) 3097 op_trace(op); 3098 } 3099 3100 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3101 { 3102 struct xe_exec_queue *q = vops->q; 3103 struct xe_tile *tile; 3104 int number_tiles = 0; 3105 u8 id; 3106 3107 for_each_tile(tile, vm->xe, id) { 3108 if (vops->pt_update_ops[id].num_ops) 3109 ++number_tiles; 3110 3111 if (vops->pt_update_ops[id].q) 3112 continue; 3113 3114 if (q) { 3115 vops->pt_update_ops[id].q = q; 3116 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3117 q = list_next_entry(q, multi_gt_list); 3118 } else { 3119 vops->pt_update_ops[id].q = vm->q[id]; 3120 } 3121 } 3122 3123 return number_tiles; 3124 } 3125 3126 static struct dma_fence *ops_execute(struct xe_vm *vm, 3127 struct xe_vma_ops *vops) 3128 { 3129 struct xe_tile *tile; 3130 struct dma_fence *fence = NULL; 3131 struct dma_fence **fences = NULL; 3132 struct dma_fence_array *cf = NULL; 3133 int number_tiles = 0, current_fence = 0, err; 3134 u8 id; 3135 3136 number_tiles = vm_ops_setup_tile_args(vm, vops); 3137 if (number_tiles == 0) 3138 return ERR_PTR(-ENODATA); 3139 3140 if (number_tiles > 1) { 3141 fences = kmalloc_array(number_tiles, sizeof(*fences), 3142 GFP_KERNEL); 3143 if (!fences) { 3144 fence = ERR_PTR(-ENOMEM); 3145 goto err_trace; 3146 } 3147 } 3148 3149 for_each_tile(tile, vm->xe, id) { 3150 if (!vops->pt_update_ops[id].num_ops) 3151 continue; 3152 3153 err = xe_pt_update_ops_prepare(tile, vops); 3154 if (err) { 3155 fence = ERR_PTR(err); 3156 goto err_out; 3157 } 3158 } 3159 3160 trace_xe_vm_ops_execute(vops); 3161 3162 for_each_tile(tile, vm->xe, id) { 3163 if (!vops->pt_update_ops[id].num_ops) 3164 continue; 3165 3166 fence = xe_pt_update_ops_run(tile, vops); 3167 if (IS_ERR(fence)) 3168 goto err_out; 3169 3170 if (fences) 3171 fences[current_fence++] = fence; 3172 } 3173 3174 if (fences) { 3175 cf = dma_fence_array_create(number_tiles, fences, 3176 vm->composite_fence_ctx, 3177 vm->composite_fence_seqno++, 3178 false); 3179 if (!cf) { 3180 --vm->composite_fence_seqno; 3181 fence = ERR_PTR(-ENOMEM); 3182 goto err_out; 3183 } 3184 fence = &cf->base; 3185 } 3186 3187 for_each_tile(tile, vm->xe, id) { 3188 if (!vops->pt_update_ops[id].num_ops) 3189 continue; 3190 3191 xe_pt_update_ops_fini(tile, vops); 3192 } 3193 3194 return fence; 3195 3196 err_out: 3197 for_each_tile(tile, vm->xe, id) { 3198 if (!vops->pt_update_ops[id].num_ops) 3199 continue; 3200 3201 xe_pt_update_ops_abort(tile, vops); 3202 } 3203 while (current_fence) 3204 dma_fence_put(fences[--current_fence]); 3205 kfree(fences); 3206 kfree(cf); 3207 3208 err_trace: 3209 trace_xe_vm_ops_fail(vm); 3210 return fence; 3211 } 3212 3213 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3214 { 3215 if (vma->ufence) 3216 xe_sync_ufence_put(vma->ufence); 3217 vma->ufence = __xe_sync_ufence_get(ufence); 3218 } 3219 3220 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3221 struct xe_user_fence *ufence) 3222 { 3223 switch (op->base.op) { 3224 case DRM_GPUVA_OP_MAP: 3225 vma_add_ufence(op->map.vma, ufence); 3226 break; 3227 case DRM_GPUVA_OP_REMAP: 3228 if (op->remap.prev) 3229 vma_add_ufence(op->remap.prev, ufence); 3230 if (op->remap.next) 3231 vma_add_ufence(op->remap.next, ufence); 3232 break; 3233 case DRM_GPUVA_OP_UNMAP: 3234 break; 3235 case DRM_GPUVA_OP_PREFETCH: 3236 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3237 break; 3238 default: 3239 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3240 } 3241 } 3242 3243 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3244 struct dma_fence *fence) 3245 { 3246 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3247 struct xe_user_fence *ufence; 3248 struct xe_vma_op *op; 3249 int i; 3250 3251 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3252 list_for_each_entry(op, &vops->list, link) { 3253 if (ufence) 3254 op_add_ufence(vm, op, ufence); 3255 3256 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3257 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3258 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3259 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3260 fence); 3261 } 3262 if (ufence) 3263 xe_sync_ufence_put(ufence); 3264 if (fence) { 3265 for (i = 0; i < vops->num_syncs; i++) 3266 xe_sync_entry_signal(vops->syncs + i, fence); 3267 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3268 } 3269 } 3270 3271 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3272 struct xe_vma_ops *vops) 3273 { 3274 struct drm_exec exec; 3275 struct dma_fence *fence; 3276 int err; 3277 3278 lockdep_assert_held_write(&vm->lock); 3279 3280 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3281 DRM_EXEC_IGNORE_DUPLICATES, 0); 3282 drm_exec_until_all_locked(&exec) { 3283 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3284 drm_exec_retry_on_contention(&exec); 3285 if (err) { 3286 fence = ERR_PTR(err); 3287 goto unlock; 3288 } 3289 3290 fence = ops_execute(vm, vops); 3291 if (IS_ERR(fence)) { 3292 if (PTR_ERR(fence) == -ENODATA) 3293 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3294 goto unlock; 3295 } 3296 3297 vm_bind_ioctl_ops_fini(vm, vops, fence); 3298 } 3299 3300 unlock: 3301 drm_exec_fini(&exec); 3302 return fence; 3303 } 3304 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3305 3306 #define SUPPORTED_FLAGS_STUB \ 3307 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3308 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3309 DRM_XE_VM_BIND_FLAG_NULL | \ 3310 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3311 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3312 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3313 3314 #ifdef TEST_VM_OPS_ERROR 3315 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3316 #else 3317 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3318 #endif 3319 3320 #define XE_64K_PAGE_MASK 0xffffull 3321 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3322 3323 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3324 struct drm_xe_vm_bind *args, 3325 struct drm_xe_vm_bind_op **bind_ops) 3326 { 3327 int err; 3328 int i; 3329 3330 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3331 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3332 return -EINVAL; 3333 3334 if (XE_IOCTL_DBG(xe, args->extensions)) 3335 return -EINVAL; 3336 3337 if (args->num_binds > 1) { 3338 u64 __user *bind_user = 3339 u64_to_user_ptr(args->vector_of_binds); 3340 3341 *bind_ops = kvmalloc_array(args->num_binds, 3342 sizeof(struct drm_xe_vm_bind_op), 3343 GFP_KERNEL | __GFP_ACCOUNT | 3344 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3345 if (!*bind_ops) 3346 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3347 3348 err = copy_from_user(*bind_ops, bind_user, 3349 sizeof(struct drm_xe_vm_bind_op) * 3350 args->num_binds); 3351 if (XE_IOCTL_DBG(xe, err)) { 3352 err = -EFAULT; 3353 goto free_bind_ops; 3354 } 3355 } else { 3356 *bind_ops = &args->bind; 3357 } 3358 3359 for (i = 0; i < args->num_binds; ++i) { 3360 u64 range = (*bind_ops)[i].range; 3361 u64 addr = (*bind_ops)[i].addr; 3362 u32 op = (*bind_ops)[i].op; 3363 u32 flags = (*bind_ops)[i].flags; 3364 u32 obj = (*bind_ops)[i].obj; 3365 u64 obj_offset = (*bind_ops)[i].obj_offset; 3366 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3367 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3368 bool is_cpu_addr_mirror = flags & 3369 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3370 u16 pat_index = (*bind_ops)[i].pat_index; 3371 u16 coh_mode; 3372 3373 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3374 (!xe_vm_in_fault_mode(vm) || 3375 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3376 err = -EINVAL; 3377 goto free_bind_ops; 3378 } 3379 3380 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3381 err = -EINVAL; 3382 goto free_bind_ops; 3383 } 3384 3385 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3386 (*bind_ops)[i].pat_index = pat_index; 3387 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3388 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3389 err = -EINVAL; 3390 goto free_bind_ops; 3391 } 3392 3393 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3394 err = -EINVAL; 3395 goto free_bind_ops; 3396 } 3397 3398 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3399 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3400 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3401 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3402 is_cpu_addr_mirror)) || 3403 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3404 (is_null || is_cpu_addr_mirror)) || 3405 XE_IOCTL_DBG(xe, !obj && 3406 op == DRM_XE_VM_BIND_OP_MAP && 3407 !is_null && !is_cpu_addr_mirror) || 3408 XE_IOCTL_DBG(xe, !obj && 3409 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3410 XE_IOCTL_DBG(xe, addr && 3411 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3412 XE_IOCTL_DBG(xe, range && 3413 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3414 XE_IOCTL_DBG(xe, obj && 3415 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3416 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3417 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3418 XE_IOCTL_DBG(xe, obj && 3419 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3420 XE_IOCTL_DBG(xe, prefetch_region && 3421 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3422 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 3423 xe->info.mem_region_mask)) || 3424 XE_IOCTL_DBG(xe, obj && 3425 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3426 err = -EINVAL; 3427 goto free_bind_ops; 3428 } 3429 3430 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3431 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3432 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3433 XE_IOCTL_DBG(xe, !range && 3434 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3435 err = -EINVAL; 3436 goto free_bind_ops; 3437 } 3438 } 3439 3440 return 0; 3441 3442 free_bind_ops: 3443 if (args->num_binds > 1) 3444 kvfree(*bind_ops); 3445 *bind_ops = NULL; 3446 return err; 3447 } 3448 3449 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3450 struct xe_exec_queue *q, 3451 struct xe_sync_entry *syncs, 3452 int num_syncs) 3453 { 3454 struct dma_fence *fence; 3455 int i, err = 0; 3456 3457 fence = xe_sync_in_fence_get(syncs, num_syncs, 3458 to_wait_exec_queue(vm, q), vm); 3459 if (IS_ERR(fence)) 3460 return PTR_ERR(fence); 3461 3462 for (i = 0; i < num_syncs; i++) 3463 xe_sync_entry_signal(&syncs[i], fence); 3464 3465 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3466 fence); 3467 dma_fence_put(fence); 3468 3469 return err; 3470 } 3471 3472 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3473 struct xe_exec_queue *q, 3474 struct xe_sync_entry *syncs, u32 num_syncs) 3475 { 3476 memset(vops, 0, sizeof(*vops)); 3477 INIT_LIST_HEAD(&vops->list); 3478 vops->vm = vm; 3479 vops->q = q; 3480 vops->syncs = syncs; 3481 vops->num_syncs = num_syncs; 3482 vops->flags = 0; 3483 } 3484 3485 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3486 u64 addr, u64 range, u64 obj_offset, 3487 u16 pat_index, u32 op, u32 bind_flags) 3488 { 3489 u16 coh_mode; 3490 3491 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3492 XE_IOCTL_DBG(xe, obj_offset > 3493 xe_bo_size(bo) - range)) { 3494 return -EINVAL; 3495 } 3496 3497 /* 3498 * Some platforms require 64k VM_BIND alignment, 3499 * specifically those with XE_VRAM_FLAGS_NEED64K. 3500 * 3501 * Other platforms may have BO's set to 64k physical placement, 3502 * but can be mapped at 4k offsets anyway. This check is only 3503 * there for the former case. 3504 */ 3505 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3506 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3507 if (XE_IOCTL_DBG(xe, obj_offset & 3508 XE_64K_PAGE_MASK) || 3509 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3510 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3511 return -EINVAL; 3512 } 3513 } 3514 3515 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3516 if (bo->cpu_caching) { 3517 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3518 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3519 return -EINVAL; 3520 } 3521 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3522 /* 3523 * Imported dma-buf from a different device should 3524 * require 1way or 2way coherency since we don't know 3525 * how it was mapped on the CPU. Just assume is it 3526 * potentially cached on CPU side. 3527 */ 3528 return -EINVAL; 3529 } 3530 3531 /* If a BO is protected it can only be mapped if the key is still valid */ 3532 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3533 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3534 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3535 return -ENOEXEC; 3536 3537 return 0; 3538 } 3539 3540 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3541 { 3542 struct xe_device *xe = to_xe_device(dev); 3543 struct xe_file *xef = to_xe_file(file); 3544 struct drm_xe_vm_bind *args = data; 3545 struct drm_xe_sync __user *syncs_user; 3546 struct xe_bo **bos = NULL; 3547 struct drm_gpuva_ops **ops = NULL; 3548 struct xe_vm *vm; 3549 struct xe_exec_queue *q = NULL; 3550 u32 num_syncs, num_ufence = 0; 3551 struct xe_sync_entry *syncs = NULL; 3552 struct drm_xe_vm_bind_op *bind_ops = NULL; 3553 struct xe_vma_ops vops; 3554 struct dma_fence *fence; 3555 int err; 3556 int i; 3557 3558 vm = xe_vm_lookup(xef, args->vm_id); 3559 if (XE_IOCTL_DBG(xe, !vm)) 3560 return -EINVAL; 3561 3562 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3563 if (err) 3564 goto put_vm; 3565 3566 if (args->exec_queue_id) { 3567 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3568 if (XE_IOCTL_DBG(xe, !q)) { 3569 err = -ENOENT; 3570 goto free_bind_ops; 3571 } 3572 3573 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3574 err = -EINVAL; 3575 goto put_exec_queue; 3576 } 3577 } 3578 3579 /* Ensure all UNMAPs visible */ 3580 xe_svm_flush(vm); 3581 3582 err = down_write_killable(&vm->lock); 3583 if (err) 3584 goto put_exec_queue; 3585 3586 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3587 err = -ENOENT; 3588 goto release_vm_lock; 3589 } 3590 3591 for (i = 0; i < args->num_binds; ++i) { 3592 u64 range = bind_ops[i].range; 3593 u64 addr = bind_ops[i].addr; 3594 3595 if (XE_IOCTL_DBG(xe, range > vm->size) || 3596 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3597 err = -EINVAL; 3598 goto release_vm_lock; 3599 } 3600 } 3601 3602 if (args->num_binds) { 3603 bos = kvcalloc(args->num_binds, sizeof(*bos), 3604 GFP_KERNEL | __GFP_ACCOUNT | 3605 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3606 if (!bos) { 3607 err = -ENOMEM; 3608 goto release_vm_lock; 3609 } 3610 3611 ops = kvcalloc(args->num_binds, sizeof(*ops), 3612 GFP_KERNEL | __GFP_ACCOUNT | 3613 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3614 if (!ops) { 3615 err = -ENOMEM; 3616 goto free_bos; 3617 } 3618 } 3619 3620 for (i = 0; i < args->num_binds; ++i) { 3621 struct drm_gem_object *gem_obj; 3622 u64 range = bind_ops[i].range; 3623 u64 addr = bind_ops[i].addr; 3624 u32 obj = bind_ops[i].obj; 3625 u64 obj_offset = bind_ops[i].obj_offset; 3626 u16 pat_index = bind_ops[i].pat_index; 3627 u32 op = bind_ops[i].op; 3628 u32 bind_flags = bind_ops[i].flags; 3629 3630 if (!obj) 3631 continue; 3632 3633 gem_obj = drm_gem_object_lookup(file, obj); 3634 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3635 err = -ENOENT; 3636 goto put_obj; 3637 } 3638 bos[i] = gem_to_xe_bo(gem_obj); 3639 3640 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3641 obj_offset, pat_index, op, 3642 bind_flags); 3643 if (err) 3644 goto put_obj; 3645 } 3646 3647 if (args->num_syncs) { 3648 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3649 if (!syncs) { 3650 err = -ENOMEM; 3651 goto put_obj; 3652 } 3653 } 3654 3655 syncs_user = u64_to_user_ptr(args->syncs); 3656 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3657 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3658 &syncs_user[num_syncs], 3659 (xe_vm_in_lr_mode(vm) ? 3660 SYNC_PARSE_FLAG_LR_MODE : 0) | 3661 (!args->num_binds ? 3662 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3663 if (err) 3664 goto free_syncs; 3665 3666 if (xe_sync_is_ufence(&syncs[num_syncs])) 3667 num_ufence++; 3668 } 3669 3670 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3671 err = -EINVAL; 3672 goto free_syncs; 3673 } 3674 3675 if (!args->num_binds) { 3676 err = -ENODATA; 3677 goto free_syncs; 3678 } 3679 3680 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3681 for (i = 0; i < args->num_binds; ++i) { 3682 u64 range = bind_ops[i].range; 3683 u64 addr = bind_ops[i].addr; 3684 u32 op = bind_ops[i].op; 3685 u32 flags = bind_ops[i].flags; 3686 u64 obj_offset = bind_ops[i].obj_offset; 3687 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3688 u16 pat_index = bind_ops[i].pat_index; 3689 3690 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3691 addr, range, op, flags, 3692 prefetch_region, pat_index); 3693 if (IS_ERR(ops[i])) { 3694 err = PTR_ERR(ops[i]); 3695 ops[i] = NULL; 3696 goto unwind_ops; 3697 } 3698 3699 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3700 if (err) 3701 goto unwind_ops; 3702 3703 #ifdef TEST_VM_OPS_ERROR 3704 if (flags & FORCE_OP_ERROR) { 3705 vops.inject_error = true; 3706 vm->xe->vm_inject_error_position = 3707 (vm->xe->vm_inject_error_position + 1) % 3708 FORCE_OP_ERROR_COUNT; 3709 } 3710 #endif 3711 } 3712 3713 /* Nothing to do */ 3714 if (list_empty(&vops.list)) { 3715 err = -ENODATA; 3716 goto unwind_ops; 3717 } 3718 3719 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3720 if (err) 3721 goto unwind_ops; 3722 3723 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3724 if (err) 3725 goto unwind_ops; 3726 3727 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3728 if (IS_ERR(fence)) 3729 err = PTR_ERR(fence); 3730 else 3731 dma_fence_put(fence); 3732 3733 unwind_ops: 3734 if (err && err != -ENODATA) 3735 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3736 xe_vma_ops_fini(&vops); 3737 for (i = args->num_binds - 1; i >= 0; --i) 3738 if (ops[i]) 3739 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3740 free_syncs: 3741 if (err == -ENODATA) 3742 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3743 while (num_syncs--) 3744 xe_sync_entry_cleanup(&syncs[num_syncs]); 3745 3746 kfree(syncs); 3747 put_obj: 3748 for (i = 0; i < args->num_binds; ++i) 3749 xe_bo_put(bos[i]); 3750 3751 kvfree(ops); 3752 free_bos: 3753 kvfree(bos); 3754 release_vm_lock: 3755 up_write(&vm->lock); 3756 put_exec_queue: 3757 if (q) 3758 xe_exec_queue_put(q); 3759 free_bind_ops: 3760 if (args->num_binds > 1) 3761 kvfree(bind_ops); 3762 put_vm: 3763 xe_vm_put(vm); 3764 return err; 3765 } 3766 3767 /** 3768 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3769 * @vm: VM to bind the BO to 3770 * @bo: BO to bind 3771 * @q: exec queue to use for the bind (optional) 3772 * @addr: address at which to bind the BO 3773 * @cache_lvl: PAT cache level to use 3774 * 3775 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3776 * kernel-owned VM. 3777 * 3778 * Returns a dma_fence to track the binding completion if the job to do so was 3779 * successfully submitted, an error pointer otherwise. 3780 */ 3781 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3782 struct xe_exec_queue *q, u64 addr, 3783 enum xe_cache_level cache_lvl) 3784 { 3785 struct xe_vma_ops vops; 3786 struct drm_gpuva_ops *ops = NULL; 3787 struct dma_fence *fence; 3788 int err; 3789 3790 xe_bo_get(bo); 3791 xe_vm_get(vm); 3792 if (q) 3793 xe_exec_queue_get(q); 3794 3795 down_write(&vm->lock); 3796 3797 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3798 3799 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3800 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3801 vm->xe->pat.idx[cache_lvl]); 3802 if (IS_ERR(ops)) { 3803 err = PTR_ERR(ops); 3804 goto release_vm_lock; 3805 } 3806 3807 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3808 if (err) 3809 goto release_vm_lock; 3810 3811 xe_assert(vm->xe, !list_empty(&vops.list)); 3812 3813 err = xe_vma_ops_alloc(&vops, false); 3814 if (err) 3815 goto unwind_ops; 3816 3817 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3818 if (IS_ERR(fence)) 3819 err = PTR_ERR(fence); 3820 3821 unwind_ops: 3822 if (err && err != -ENODATA) 3823 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3824 3825 xe_vma_ops_fini(&vops); 3826 drm_gpuva_ops_free(&vm->gpuvm, ops); 3827 3828 release_vm_lock: 3829 up_write(&vm->lock); 3830 3831 if (q) 3832 xe_exec_queue_put(q); 3833 xe_vm_put(vm); 3834 xe_bo_put(bo); 3835 3836 if (err) 3837 fence = ERR_PTR(err); 3838 3839 return fence; 3840 } 3841 3842 /** 3843 * xe_vm_lock() - Lock the vm's dma_resv object 3844 * @vm: The struct xe_vm whose lock is to be locked 3845 * @intr: Whether to perform any wait interruptible 3846 * 3847 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3848 * contended lock was interrupted. If @intr is false, the function 3849 * always returns 0. 3850 */ 3851 int xe_vm_lock(struct xe_vm *vm, bool intr) 3852 { 3853 if (intr) 3854 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3855 3856 return dma_resv_lock(xe_vm_resv(vm), NULL); 3857 } 3858 3859 /** 3860 * xe_vm_unlock() - Unlock the vm's dma_resv object 3861 * @vm: The struct xe_vm whose lock is to be released. 3862 * 3863 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3864 */ 3865 void xe_vm_unlock(struct xe_vm *vm) 3866 { 3867 dma_resv_unlock(xe_vm_resv(vm)); 3868 } 3869 3870 /** 3871 * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an 3872 * address range 3873 * @vm: The VM 3874 * @start: start address 3875 * @end: end address 3876 * @tile_mask: mask for which gt's issue tlb invalidation 3877 * 3878 * Issue a range based TLB invalidation for gt's in tilemask 3879 * 3880 * Returns 0 for success, negative error code otherwise. 3881 */ 3882 int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, 3883 u64 end, u8 tile_mask) 3884 { 3885 struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3886 struct xe_tile *tile; 3887 u32 fence_id = 0; 3888 u8 id; 3889 int err; 3890 3891 if (!tile_mask) 3892 return 0; 3893 3894 for_each_tile(tile, vm->xe, id) { 3895 if (tile_mask & BIT(id)) { 3896 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3897 &fence[fence_id], true); 3898 3899 err = xe_gt_tlb_invalidation_range(tile->primary_gt, 3900 &fence[fence_id], 3901 start, 3902 end, 3903 vm->usm.asid); 3904 if (err) 3905 goto wait; 3906 ++fence_id; 3907 3908 if (!tile->media_gt) 3909 continue; 3910 3911 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3912 &fence[fence_id], true); 3913 3914 err = xe_gt_tlb_invalidation_range(tile->media_gt, 3915 &fence[fence_id], 3916 start, 3917 end, 3918 vm->usm.asid); 3919 if (err) 3920 goto wait; 3921 ++fence_id; 3922 } 3923 } 3924 3925 wait: 3926 for (id = 0; id < fence_id; ++id) 3927 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3928 3929 return err; 3930 } 3931 3932 /** 3933 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3934 * @vma: VMA to invalidate 3935 * 3936 * Walks a list of page tables leaves which it memset the entries owned by this 3937 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3938 * complete. 3939 * 3940 * Returns 0 for success, negative error code otherwise. 3941 */ 3942 int xe_vm_invalidate_vma(struct xe_vma *vma) 3943 { 3944 struct xe_device *xe = xe_vma_vm(vma)->xe; 3945 struct xe_vm *vm = xe_vma_vm(vma); 3946 struct xe_tile *tile; 3947 u8 tile_mask = 0; 3948 int ret = 0; 3949 u8 id; 3950 3951 xe_assert(xe, !xe_vma_is_null(vma)); 3952 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3953 trace_xe_vma_invalidate(vma); 3954 3955 vm_dbg(&vm->xe->drm, 3956 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3957 xe_vma_start(vma), xe_vma_size(vma)); 3958 3959 /* 3960 * Check that we don't race with page-table updates, tile_invalidated 3961 * update is safe 3962 */ 3963 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3964 if (xe_vma_is_userptr(vma)) { 3965 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 3966 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 3967 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3968 3969 WARN_ON_ONCE(!mmu_interval_check_retry 3970 (&to_userptr_vma(vma)->userptr.notifier, 3971 to_userptr_vma(vma)->userptr.notifier_seq)); 3972 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3973 DMA_RESV_USAGE_BOOKKEEP)); 3974 3975 } else { 3976 xe_bo_assert_held(xe_vma_bo(vma)); 3977 } 3978 } 3979 3980 for_each_tile(tile, xe, id) 3981 if (xe_pt_zap_ptes(tile, vma)) 3982 tile_mask |= BIT(id); 3983 3984 xe_device_wmb(xe); 3985 3986 ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), 3987 xe_vma_end(vma), tile_mask); 3988 3989 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3990 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3991 3992 return ret; 3993 } 3994 3995 int xe_vm_validate_protected(struct xe_vm *vm) 3996 { 3997 struct drm_gpuva *gpuva; 3998 int err = 0; 3999 4000 if (!vm) 4001 return -ENODEV; 4002 4003 mutex_lock(&vm->snap_mutex); 4004 4005 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4006 struct xe_vma *vma = gpuva_to_vma(gpuva); 4007 struct xe_bo *bo = vma->gpuva.gem.obj ? 4008 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4009 4010 if (!bo) 4011 continue; 4012 4013 if (xe_bo_is_protected(bo)) { 4014 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4015 if (err) 4016 break; 4017 } 4018 } 4019 4020 mutex_unlock(&vm->snap_mutex); 4021 return err; 4022 } 4023 4024 struct xe_vm_snapshot { 4025 unsigned long num_snaps; 4026 struct { 4027 u64 ofs, bo_ofs; 4028 unsigned long len; 4029 struct xe_bo *bo; 4030 void *data; 4031 struct mm_struct *mm; 4032 } snap[]; 4033 }; 4034 4035 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4036 { 4037 unsigned long num_snaps = 0, i; 4038 struct xe_vm_snapshot *snap = NULL; 4039 struct drm_gpuva *gpuva; 4040 4041 if (!vm) 4042 return NULL; 4043 4044 mutex_lock(&vm->snap_mutex); 4045 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4046 if (gpuva->flags & XE_VMA_DUMPABLE) 4047 num_snaps++; 4048 } 4049 4050 if (num_snaps) 4051 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4052 if (!snap) { 4053 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4054 goto out_unlock; 4055 } 4056 4057 snap->num_snaps = num_snaps; 4058 i = 0; 4059 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4060 struct xe_vma *vma = gpuva_to_vma(gpuva); 4061 struct xe_bo *bo = vma->gpuva.gem.obj ? 4062 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4063 4064 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4065 continue; 4066 4067 snap->snap[i].ofs = xe_vma_start(vma); 4068 snap->snap[i].len = xe_vma_size(vma); 4069 if (bo) { 4070 snap->snap[i].bo = xe_bo_get(bo); 4071 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4072 } else if (xe_vma_is_userptr(vma)) { 4073 struct mm_struct *mm = 4074 to_userptr_vma(vma)->userptr.notifier.mm; 4075 4076 if (mmget_not_zero(mm)) 4077 snap->snap[i].mm = mm; 4078 else 4079 snap->snap[i].data = ERR_PTR(-EFAULT); 4080 4081 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4082 } else { 4083 snap->snap[i].data = ERR_PTR(-ENOENT); 4084 } 4085 i++; 4086 } 4087 4088 out_unlock: 4089 mutex_unlock(&vm->snap_mutex); 4090 return snap; 4091 } 4092 4093 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4094 { 4095 if (IS_ERR_OR_NULL(snap)) 4096 return; 4097 4098 for (int i = 0; i < snap->num_snaps; i++) { 4099 struct xe_bo *bo = snap->snap[i].bo; 4100 int err; 4101 4102 if (IS_ERR(snap->snap[i].data)) 4103 continue; 4104 4105 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4106 if (!snap->snap[i].data) { 4107 snap->snap[i].data = ERR_PTR(-ENOMEM); 4108 goto cleanup_bo; 4109 } 4110 4111 if (bo) { 4112 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4113 snap->snap[i].data, snap->snap[i].len); 4114 } else { 4115 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4116 4117 kthread_use_mm(snap->snap[i].mm); 4118 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4119 err = 0; 4120 else 4121 err = -EFAULT; 4122 kthread_unuse_mm(snap->snap[i].mm); 4123 4124 mmput(snap->snap[i].mm); 4125 snap->snap[i].mm = NULL; 4126 } 4127 4128 if (err) { 4129 kvfree(snap->snap[i].data); 4130 snap->snap[i].data = ERR_PTR(err); 4131 } 4132 4133 cleanup_bo: 4134 xe_bo_put(bo); 4135 snap->snap[i].bo = NULL; 4136 } 4137 } 4138 4139 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4140 { 4141 unsigned long i, j; 4142 4143 if (IS_ERR_OR_NULL(snap)) { 4144 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4145 return; 4146 } 4147 4148 for (i = 0; i < snap->num_snaps; i++) { 4149 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4150 4151 if (IS_ERR(snap->snap[i].data)) { 4152 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4153 PTR_ERR(snap->snap[i].data)); 4154 continue; 4155 } 4156 4157 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4158 4159 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4160 u32 *val = snap->snap[i].data + j; 4161 char dumped[ASCII85_BUFSZ]; 4162 4163 drm_puts(p, ascii85_encode(*val, dumped)); 4164 } 4165 4166 drm_puts(p, "\n"); 4167 4168 if (drm_coredump_printer_is_full(p)) 4169 return; 4170 } 4171 } 4172 4173 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4174 { 4175 unsigned long i; 4176 4177 if (IS_ERR_OR_NULL(snap)) 4178 return; 4179 4180 for (i = 0; i < snap->num_snaps; i++) { 4181 if (!IS_ERR(snap->snap[i].data)) 4182 kvfree(snap->snap[i].data); 4183 xe_bo_put(snap->snap[i].bo); 4184 if (snap->snap[i].mm) 4185 mmput(snap->snap[i].mm); 4186 } 4187 kvfree(snap); 4188 } 4189