1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_gt_tlb_invalidation.h" 32 #include "xe_migrate.h" 33 #include "xe_pat.h" 34 #include "xe_pm.h" 35 #include "xe_preempt_fence.h" 36 #include "xe_pt.h" 37 #include "xe_pxp.h" 38 #include "xe_res_cursor.h" 39 #include "xe_svm.h" 40 #include "xe_sync.h" 41 #include "xe_trace_bo.h" 42 #include "xe_wa.h" 43 #include "xe_hmm.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vma_userptr_check_repin() - Advisory check for repin needed 52 * @uvma: The userptr vma 53 * 54 * Check if the userptr vma has been invalidated since last successful 55 * repin. The check is advisory only and can the function can be called 56 * without the vm->userptr.notifier_lock held. There is no guarantee that the 57 * vma userptr will remain valid after a lockless check, so typically 58 * the call needs to be followed by a proper check under the notifier_lock. 59 * 60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 61 */ 62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 63 { 64 return mmu_interval_check_retry(&uvma->userptr.notifier, 65 uvma->userptr.notifier_seq) ? 66 -EAGAIN : 0; 67 } 68 69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 70 { 71 struct xe_vma *vma = &uvma->vma; 72 struct xe_vm *vm = xe_vma_vm(vma); 73 struct xe_device *xe = vm->xe; 74 75 lockdep_assert_held(&vm->lock); 76 xe_assert(xe, xe_vma_is_userptr(vma)); 77 78 return xe_hmm_userptr_populate_range(uvma, false); 79 } 80 81 static bool preempt_fences_waiting(struct xe_vm *vm) 82 { 83 struct xe_exec_queue *q; 84 85 lockdep_assert_held(&vm->lock); 86 xe_vm_assert_held(vm); 87 88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 89 if (!q->lr.pfence || 90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 91 &q->lr.pfence->flags)) { 92 return true; 93 } 94 } 95 96 return false; 97 } 98 99 static void free_preempt_fences(struct list_head *list) 100 { 101 struct list_head *link, *next; 102 103 list_for_each_safe(link, next, list) 104 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 105 } 106 107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 108 unsigned int *count) 109 { 110 lockdep_assert_held(&vm->lock); 111 xe_vm_assert_held(vm); 112 113 if (*count >= vm->preempt.num_exec_queues) 114 return 0; 115 116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 118 119 if (IS_ERR(pfence)) 120 return PTR_ERR(pfence); 121 122 list_move_tail(xe_preempt_fence_link(pfence), list); 123 } 124 125 return 0; 126 } 127 128 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 129 { 130 struct xe_exec_queue *q; 131 132 xe_vm_assert_held(vm); 133 134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 135 if (q->lr.pfence) { 136 long timeout = dma_fence_wait(q->lr.pfence, false); 137 138 /* Only -ETIME on fence indicates VM needs to be killed */ 139 if (timeout < 0 || q->lr.pfence->error == -ETIME) 140 return -ETIME; 141 142 dma_fence_put(q->lr.pfence); 143 q->lr.pfence = NULL; 144 } 145 } 146 147 return 0; 148 } 149 150 static bool xe_vm_is_idle(struct xe_vm *vm) 151 { 152 struct xe_exec_queue *q; 153 154 xe_vm_assert_held(vm); 155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 156 if (!xe_exec_queue_is_idle(q)) 157 return false; 158 } 159 160 return true; 161 } 162 163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 164 { 165 struct list_head *link; 166 struct xe_exec_queue *q; 167 168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 169 struct dma_fence *fence; 170 171 link = list->next; 172 xe_assert(vm->xe, link != list); 173 174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 175 q, q->lr.context, 176 ++q->lr.seqno); 177 dma_fence_put(q->lr.pfence); 178 q->lr.pfence = fence; 179 } 180 } 181 182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 183 { 184 struct xe_exec_queue *q; 185 int err; 186 187 xe_bo_assert_held(bo); 188 189 if (!vm->preempt.num_exec_queues) 190 return 0; 191 192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 193 if (err) 194 return err; 195 196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 197 if (q->lr.pfence) { 198 dma_resv_add_fence(bo->ttm.base.resv, 199 q->lr.pfence, 200 DMA_RESV_USAGE_BOOKKEEP); 201 } 202 203 return 0; 204 } 205 206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 207 struct drm_exec *exec) 208 { 209 struct xe_exec_queue *q; 210 211 lockdep_assert_held(&vm->lock); 212 xe_vm_assert_held(vm); 213 214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 215 q->ops->resume(q); 216 217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 219 } 220 } 221 222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 223 { 224 struct drm_gpuvm_exec vm_exec = { 225 .vm = &vm->gpuvm, 226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 227 .num_fences = 1, 228 }; 229 struct drm_exec *exec = &vm_exec.exec; 230 struct dma_fence *pfence; 231 int err; 232 bool wait; 233 234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 235 236 down_write(&vm->lock); 237 err = drm_gpuvm_exec_lock(&vm_exec); 238 if (err) 239 goto out_up_write; 240 241 pfence = xe_preempt_fence_create(q, q->lr.context, 242 ++q->lr.seqno); 243 if (!pfence) { 244 err = -ENOMEM; 245 goto out_fini; 246 } 247 248 list_add(&q->lr.link, &vm->preempt.exec_queues); 249 ++vm->preempt.num_exec_queues; 250 q->lr.pfence = pfence; 251 252 down_read(&vm->userptr.notifier_lock); 253 254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 256 257 /* 258 * Check to see if a preemption on VM is in flight or userptr 259 * invalidation, if so trigger this preempt fence to sync state with 260 * other preempt fences on the VM. 261 */ 262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 263 if (wait) 264 dma_fence_enable_sw_signaling(pfence); 265 266 up_read(&vm->userptr.notifier_lock); 267 268 out_fini: 269 drm_exec_fini(exec); 270 out_up_write: 271 up_write(&vm->lock); 272 273 return err; 274 } 275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 276 277 /** 278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 279 * @vm: The VM. 280 * @q: The exec_queue 281 * 282 * Note that this function might be called multiple times on the same queue. 283 */ 284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 285 { 286 if (!xe_vm_in_preempt_fence_mode(vm)) 287 return; 288 289 down_write(&vm->lock); 290 if (!list_empty(&q->lr.link)) { 291 list_del_init(&q->lr.link); 292 --vm->preempt.num_exec_queues; 293 } 294 if (q->lr.pfence) { 295 dma_fence_enable_sw_signaling(q->lr.pfence); 296 dma_fence_put(q->lr.pfence); 297 q->lr.pfence = NULL; 298 } 299 up_write(&vm->lock); 300 } 301 302 /** 303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 304 * that need repinning. 305 * @vm: The VM. 306 * 307 * This function checks for whether the VM has userptrs that need repinning, 308 * and provides a release-type barrier on the userptr.notifier_lock after 309 * checking. 310 * 311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 312 */ 313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 314 { 315 lockdep_assert_held_read(&vm->userptr.notifier_lock); 316 317 return (list_empty(&vm->userptr.repin_list) && 318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 319 } 320 321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 322 323 /** 324 * xe_vm_kill() - VM Kill 325 * @vm: The VM. 326 * @unlocked: Flag indicates the VM's dma-resv is not held 327 * 328 * Kill the VM by setting banned flag indicated VM is no longer available for 329 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 330 */ 331 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 332 { 333 struct xe_exec_queue *q; 334 335 lockdep_assert_held(&vm->lock); 336 337 if (unlocked) 338 xe_vm_lock(vm, false); 339 340 vm->flags |= XE_VM_FLAG_BANNED; 341 trace_xe_vm_kill(vm); 342 343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 344 q->ops->kill(q); 345 346 if (unlocked) 347 xe_vm_unlock(vm); 348 349 /* TODO: Inform user the VM is banned */ 350 } 351 352 /** 353 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 354 * @exec: The drm_exec object used for locking before validation. 355 * @err: The error returned from ttm_bo_validate(). 356 * @end: A ktime_t cookie that should be set to 0 before first use and 357 * that should be reused on subsequent calls. 358 * 359 * With multiple active VMs, under memory pressure, it is possible that 360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 361 * Until ttm properly handles locking in such scenarios, best thing the 362 * driver can do is retry with a timeout. Check if that is necessary, and 363 * if so unlock the drm_exec's objects while keeping the ticket to prepare 364 * for a rerun. 365 * 366 * Return: true if a retry after drm_exec_init() is recommended; 367 * false otherwise. 368 */ 369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 370 { 371 ktime_t cur; 372 373 if (err != -ENOMEM) 374 return false; 375 376 cur = ktime_get(); 377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 378 if (!ktime_before(cur, *end)) 379 return false; 380 381 msleep(20); 382 return true; 383 } 384 385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 386 { 387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 388 struct drm_gpuva *gpuva; 389 int ret; 390 391 lockdep_assert_held(&vm->lock); 392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 394 &vm->rebind_list); 395 396 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 397 if (ret) 398 return ret; 399 400 vm_bo->evicted = false; 401 return 0; 402 } 403 404 /** 405 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 406 * @vm: The vm for which we are rebinding. 407 * @exec: The struct drm_exec with the locked GEM objects. 408 * @num_fences: The number of fences to reserve for the operation, not 409 * including rebinds and validations. 410 * 411 * Validates all evicted gem objects and rebinds their vmas. Note that 412 * rebindings may cause evictions and hence the validation-rebind 413 * sequence is rerun until there are no more objects to validate. 414 * 415 * Return: 0 on success, negative error code on error. In particular, 416 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 417 * the drm_exec transaction needs to be restarted. 418 */ 419 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 420 unsigned int num_fences) 421 { 422 struct drm_gem_object *obj; 423 unsigned long index; 424 int ret; 425 426 do { 427 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 428 if (ret) 429 return ret; 430 431 ret = xe_vm_rebind(vm, false); 432 if (ret) 433 return ret; 434 } while (!list_empty(&vm->gpuvm.evict.list)); 435 436 drm_exec_for_each_locked_object(exec, index, obj) { 437 ret = dma_resv_reserve_fences(obj->resv, num_fences); 438 if (ret) 439 return ret; 440 } 441 442 return 0; 443 } 444 445 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 446 bool *done) 447 { 448 int err; 449 450 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 451 if (err) 452 return err; 453 454 if (xe_vm_is_idle(vm)) { 455 vm->preempt.rebind_deactivated = true; 456 *done = true; 457 return 0; 458 } 459 460 if (!preempt_fences_waiting(vm)) { 461 *done = true; 462 return 0; 463 } 464 465 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 466 if (err) 467 return err; 468 469 err = wait_for_existing_preempt_fences(vm); 470 if (err) 471 return err; 472 473 /* 474 * Add validation and rebinding to the locking loop since both can 475 * cause evictions which may require blocing dma_resv locks. 476 * The fence reservation here is intended for the new preempt fences 477 * we attach at the end of the rebind work. 478 */ 479 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 480 } 481 482 static void preempt_rebind_work_func(struct work_struct *w) 483 { 484 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 485 struct drm_exec exec; 486 unsigned int fence_count = 0; 487 LIST_HEAD(preempt_fences); 488 ktime_t end = 0; 489 int err = 0; 490 long wait; 491 int __maybe_unused tries = 0; 492 493 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 494 trace_xe_vm_rebind_worker_enter(vm); 495 496 down_write(&vm->lock); 497 498 if (xe_vm_is_closed_or_banned(vm)) { 499 up_write(&vm->lock); 500 trace_xe_vm_rebind_worker_exit(vm); 501 return; 502 } 503 504 retry: 505 if (xe_vm_userptr_check_repin(vm)) { 506 err = xe_vm_userptr_pin(vm); 507 if (err) 508 goto out_unlock_outer; 509 } 510 511 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 512 513 drm_exec_until_all_locked(&exec) { 514 bool done = false; 515 516 err = xe_preempt_work_begin(&exec, vm, &done); 517 drm_exec_retry_on_contention(&exec); 518 if (err || done) { 519 drm_exec_fini(&exec); 520 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 521 err = -EAGAIN; 522 523 goto out_unlock_outer; 524 } 525 } 526 527 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 528 if (err) 529 goto out_unlock; 530 531 err = xe_vm_rebind(vm, true); 532 if (err) 533 goto out_unlock; 534 535 /* Wait on rebinds and munmap style VM unbinds */ 536 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 537 DMA_RESV_USAGE_KERNEL, 538 false, MAX_SCHEDULE_TIMEOUT); 539 if (wait <= 0) { 540 err = -ETIME; 541 goto out_unlock; 542 } 543 544 #define retry_required(__tries, __vm) \ 545 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 546 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 547 __xe_vm_userptr_needs_repin(__vm)) 548 549 down_read(&vm->userptr.notifier_lock); 550 if (retry_required(tries, vm)) { 551 up_read(&vm->userptr.notifier_lock); 552 err = -EAGAIN; 553 goto out_unlock; 554 } 555 556 #undef retry_required 557 558 spin_lock(&vm->xe->ttm.lru_lock); 559 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 560 spin_unlock(&vm->xe->ttm.lru_lock); 561 562 /* Point of no return. */ 563 arm_preempt_fences(vm, &preempt_fences); 564 resume_and_reinstall_preempt_fences(vm, &exec); 565 up_read(&vm->userptr.notifier_lock); 566 567 out_unlock: 568 drm_exec_fini(&exec); 569 out_unlock_outer: 570 if (err == -EAGAIN) { 571 trace_xe_vm_rebind_worker_retry(vm); 572 goto retry; 573 } 574 575 if (err) { 576 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 577 xe_vm_kill(vm, true); 578 } 579 up_write(&vm->lock); 580 581 free_preempt_fences(&preempt_fences); 582 583 trace_xe_vm_rebind_worker_exit(vm); 584 } 585 586 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 587 { 588 struct xe_userptr *userptr = &uvma->userptr; 589 struct xe_vma *vma = &uvma->vma; 590 struct dma_resv_iter cursor; 591 struct dma_fence *fence; 592 long err; 593 594 /* 595 * Tell exec and rebind worker they need to repin and rebind this 596 * userptr. 597 */ 598 if (!xe_vm_in_fault_mode(vm) && 599 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 600 spin_lock(&vm->userptr.invalidated_lock); 601 list_move_tail(&userptr->invalidate_link, 602 &vm->userptr.invalidated); 603 spin_unlock(&vm->userptr.invalidated_lock); 604 } 605 606 /* 607 * Preempt fences turn into schedule disables, pipeline these. 608 * Note that even in fault mode, we need to wait for binds and 609 * unbinds to complete, and those are attached as BOOKMARK fences 610 * to the vm. 611 */ 612 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 613 DMA_RESV_USAGE_BOOKKEEP); 614 dma_resv_for_each_fence_unlocked(&cursor, fence) 615 dma_fence_enable_sw_signaling(fence); 616 dma_resv_iter_end(&cursor); 617 618 err = dma_resv_wait_timeout(xe_vm_resv(vm), 619 DMA_RESV_USAGE_BOOKKEEP, 620 false, MAX_SCHEDULE_TIMEOUT); 621 XE_WARN_ON(err <= 0); 622 623 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 624 err = xe_vm_invalidate_vma(vma); 625 XE_WARN_ON(err); 626 } 627 628 xe_hmm_userptr_unmap(uvma); 629 } 630 631 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 632 const struct mmu_notifier_range *range, 633 unsigned long cur_seq) 634 { 635 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 636 struct xe_vma *vma = &uvma->vma; 637 struct xe_vm *vm = xe_vma_vm(vma); 638 639 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 640 trace_xe_vma_userptr_invalidate(vma); 641 642 if (!mmu_notifier_range_blockable(range)) 643 return false; 644 645 vm_dbg(&xe_vma_vm(vma)->xe->drm, 646 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 647 xe_vma_start(vma), xe_vma_size(vma)); 648 649 down_write(&vm->userptr.notifier_lock); 650 mmu_interval_set_seq(mni, cur_seq); 651 652 __vma_userptr_invalidate(vm, uvma); 653 up_write(&vm->userptr.notifier_lock); 654 trace_xe_vma_userptr_invalidate_complete(vma); 655 656 return true; 657 } 658 659 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 660 .invalidate = vma_userptr_invalidate, 661 }; 662 663 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 664 /** 665 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 666 * @uvma: The userptr vma to invalidate 667 * 668 * Perform a forced userptr invalidation for testing purposes. 669 */ 670 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 671 { 672 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 673 674 /* Protect against concurrent userptr pinning */ 675 lockdep_assert_held(&vm->lock); 676 /* Protect against concurrent notifiers */ 677 lockdep_assert_held(&vm->userptr.notifier_lock); 678 /* 679 * Protect against concurrent instances of this function and 680 * the critical exec sections 681 */ 682 xe_vm_assert_held(vm); 683 684 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 685 uvma->userptr.notifier_seq)) 686 uvma->userptr.notifier_seq -= 2; 687 __vma_userptr_invalidate(vm, uvma); 688 } 689 #endif 690 691 int xe_vm_userptr_pin(struct xe_vm *vm) 692 { 693 struct xe_userptr_vma *uvma, *next; 694 int err = 0; 695 696 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 697 lockdep_assert_held_write(&vm->lock); 698 699 /* Collect invalidated userptrs */ 700 spin_lock(&vm->userptr.invalidated_lock); 701 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 702 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 703 userptr.invalidate_link) { 704 list_del_init(&uvma->userptr.invalidate_link); 705 list_add_tail(&uvma->userptr.repin_link, 706 &vm->userptr.repin_list); 707 } 708 spin_unlock(&vm->userptr.invalidated_lock); 709 710 /* Pin and move to bind list */ 711 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 712 userptr.repin_link) { 713 err = xe_vma_userptr_pin_pages(uvma); 714 if (err == -EFAULT) { 715 list_del_init(&uvma->userptr.repin_link); 716 /* 717 * We might have already done the pin once already, but 718 * then had to retry before the re-bind happened, due 719 * some other condition in the caller, but in the 720 * meantime the userptr got dinged by the notifier such 721 * that we need to revalidate here, but this time we hit 722 * the EFAULT. In such a case make sure we remove 723 * ourselves from the rebind list to avoid going down in 724 * flames. 725 */ 726 if (!list_empty(&uvma->vma.combined_links.rebind)) 727 list_del_init(&uvma->vma.combined_links.rebind); 728 729 /* Wait for pending binds */ 730 xe_vm_lock(vm, false); 731 dma_resv_wait_timeout(xe_vm_resv(vm), 732 DMA_RESV_USAGE_BOOKKEEP, 733 false, MAX_SCHEDULE_TIMEOUT); 734 735 err = xe_vm_invalidate_vma(&uvma->vma); 736 xe_vm_unlock(vm); 737 if (err) 738 break; 739 } else { 740 if (err) 741 break; 742 743 list_del_init(&uvma->userptr.repin_link); 744 list_move_tail(&uvma->vma.combined_links.rebind, 745 &vm->rebind_list); 746 } 747 } 748 749 if (err) { 750 down_write(&vm->userptr.notifier_lock); 751 spin_lock(&vm->userptr.invalidated_lock); 752 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 753 userptr.repin_link) { 754 list_del_init(&uvma->userptr.repin_link); 755 list_move_tail(&uvma->userptr.invalidate_link, 756 &vm->userptr.invalidated); 757 } 758 spin_unlock(&vm->userptr.invalidated_lock); 759 up_write(&vm->userptr.notifier_lock); 760 } 761 return err; 762 } 763 764 /** 765 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 766 * that need repinning. 767 * @vm: The VM. 768 * 769 * This function does an advisory check for whether the VM has userptrs that 770 * need repinning. 771 * 772 * Return: 0 if there are no indications of userptrs needing repinning, 773 * -EAGAIN if there are. 774 */ 775 int xe_vm_userptr_check_repin(struct xe_vm *vm) 776 { 777 return (list_empty_careful(&vm->userptr.repin_list) && 778 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 779 } 780 781 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 782 { 783 int i; 784 785 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 786 if (!vops->pt_update_ops[i].num_ops) 787 continue; 788 789 vops->pt_update_ops[i].ops = 790 kmalloc_array(vops->pt_update_ops[i].num_ops, 791 sizeof(*vops->pt_update_ops[i].ops), 792 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 793 if (!vops->pt_update_ops[i].ops) 794 return array_of_binds ? -ENOBUFS : -ENOMEM; 795 } 796 797 return 0; 798 } 799 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 800 801 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 802 { 803 struct xe_vma *vma; 804 805 vma = gpuva_to_vma(op->base.prefetch.va); 806 807 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 808 xa_destroy(&op->prefetch_range.range); 809 } 810 811 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 812 { 813 struct xe_vma_op *op; 814 815 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 816 return; 817 818 list_for_each_entry(op, &vops->list, link) 819 xe_vma_svm_prefetch_op_fini(op); 820 } 821 822 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 823 { 824 int i; 825 826 xe_vma_svm_prefetch_ops_fini(vops); 827 828 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 829 kfree(vops->pt_update_ops[i].ops); 830 } 831 832 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 833 { 834 int i; 835 836 if (!inc_val) 837 return; 838 839 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 840 if (BIT(i) & tile_mask) 841 vops->pt_update_ops[i].num_ops += inc_val; 842 } 843 844 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 845 u8 tile_mask) 846 { 847 INIT_LIST_HEAD(&op->link); 848 op->tile_mask = tile_mask; 849 op->base.op = DRM_GPUVA_OP_MAP; 850 op->base.map.va.addr = vma->gpuva.va.addr; 851 op->base.map.va.range = vma->gpuva.va.range; 852 op->base.map.gem.obj = vma->gpuva.gem.obj; 853 op->base.map.gem.offset = vma->gpuva.gem.offset; 854 op->map.vma = vma; 855 op->map.immediate = true; 856 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 857 op->map.is_null = xe_vma_is_null(vma); 858 } 859 860 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 861 u8 tile_mask) 862 { 863 struct xe_vma_op *op; 864 865 op = kzalloc(sizeof(*op), GFP_KERNEL); 866 if (!op) 867 return -ENOMEM; 868 869 xe_vm_populate_rebind(op, vma, tile_mask); 870 list_add_tail(&op->link, &vops->list); 871 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 872 873 return 0; 874 } 875 876 static struct dma_fence *ops_execute(struct xe_vm *vm, 877 struct xe_vma_ops *vops); 878 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 879 struct xe_exec_queue *q, 880 struct xe_sync_entry *syncs, u32 num_syncs); 881 882 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 883 { 884 struct dma_fence *fence; 885 struct xe_vma *vma, *next; 886 struct xe_vma_ops vops; 887 struct xe_vma_op *op, *next_op; 888 int err, i; 889 890 lockdep_assert_held(&vm->lock); 891 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 892 list_empty(&vm->rebind_list)) 893 return 0; 894 895 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 896 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 897 vops.pt_update_ops[i].wait_vm_bookkeep = true; 898 899 xe_vm_assert_held(vm); 900 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 901 xe_assert(vm->xe, vma->tile_present); 902 903 if (rebind_worker) 904 trace_xe_vma_rebind_worker(vma); 905 else 906 trace_xe_vma_rebind_exec(vma); 907 908 err = xe_vm_ops_add_rebind(&vops, vma, 909 vma->tile_present); 910 if (err) 911 goto free_ops; 912 } 913 914 err = xe_vma_ops_alloc(&vops, false); 915 if (err) 916 goto free_ops; 917 918 fence = ops_execute(vm, &vops); 919 if (IS_ERR(fence)) { 920 err = PTR_ERR(fence); 921 } else { 922 dma_fence_put(fence); 923 list_for_each_entry_safe(vma, next, &vm->rebind_list, 924 combined_links.rebind) 925 list_del_init(&vma->combined_links.rebind); 926 } 927 free_ops: 928 list_for_each_entry_safe(op, next_op, &vops.list, link) { 929 list_del(&op->link); 930 kfree(op); 931 } 932 xe_vma_ops_fini(&vops); 933 934 return err; 935 } 936 937 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 938 { 939 struct dma_fence *fence = NULL; 940 struct xe_vma_ops vops; 941 struct xe_vma_op *op, *next_op; 942 struct xe_tile *tile; 943 u8 id; 944 int err; 945 946 lockdep_assert_held(&vm->lock); 947 xe_vm_assert_held(vm); 948 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 949 950 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 951 for_each_tile(tile, vm->xe, id) { 952 vops.pt_update_ops[id].wait_vm_bookkeep = true; 953 vops.pt_update_ops[tile->id].q = 954 xe_tile_migrate_exec_queue(tile); 955 } 956 957 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 958 if (err) 959 return ERR_PTR(err); 960 961 err = xe_vma_ops_alloc(&vops, false); 962 if (err) { 963 fence = ERR_PTR(err); 964 goto free_ops; 965 } 966 967 fence = ops_execute(vm, &vops); 968 969 free_ops: 970 list_for_each_entry_safe(op, next_op, &vops.list, link) { 971 list_del(&op->link); 972 kfree(op); 973 } 974 xe_vma_ops_fini(&vops); 975 976 return fence; 977 } 978 979 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 980 struct xe_vma *vma, 981 struct xe_svm_range *range, 982 u8 tile_mask) 983 { 984 INIT_LIST_HEAD(&op->link); 985 op->tile_mask = tile_mask; 986 op->base.op = DRM_GPUVA_OP_DRIVER; 987 op->subop = XE_VMA_SUBOP_MAP_RANGE; 988 op->map_range.vma = vma; 989 op->map_range.range = range; 990 } 991 992 static int 993 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 994 struct xe_vma *vma, 995 struct xe_svm_range *range, 996 u8 tile_mask) 997 { 998 struct xe_vma_op *op; 999 1000 op = kzalloc(sizeof(*op), GFP_KERNEL); 1001 if (!op) 1002 return -ENOMEM; 1003 1004 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1005 list_add_tail(&op->link, &vops->list); 1006 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1007 1008 return 0; 1009 } 1010 1011 /** 1012 * xe_vm_range_rebind() - VM range (re)bind 1013 * @vm: The VM which the range belongs to. 1014 * @vma: The VMA which the range belongs to. 1015 * @range: SVM range to rebind. 1016 * @tile_mask: Tile mask to bind the range to. 1017 * 1018 * (re)bind SVM range setting up GPU page tables for the range. 1019 * 1020 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 1021 * failure 1022 */ 1023 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 1024 struct xe_vma *vma, 1025 struct xe_svm_range *range, 1026 u8 tile_mask) 1027 { 1028 struct dma_fence *fence = NULL; 1029 struct xe_vma_ops vops; 1030 struct xe_vma_op *op, *next_op; 1031 struct xe_tile *tile; 1032 u8 id; 1033 int err; 1034 1035 lockdep_assert_held(&vm->lock); 1036 xe_vm_assert_held(vm); 1037 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1038 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1039 1040 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1041 for_each_tile(tile, vm->xe, id) { 1042 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1043 vops.pt_update_ops[tile->id].q = 1044 xe_tile_migrate_exec_queue(tile); 1045 } 1046 1047 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 1048 if (err) 1049 return ERR_PTR(err); 1050 1051 err = xe_vma_ops_alloc(&vops, false); 1052 if (err) { 1053 fence = ERR_PTR(err); 1054 goto free_ops; 1055 } 1056 1057 fence = ops_execute(vm, &vops); 1058 1059 free_ops: 1060 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1061 list_del(&op->link); 1062 kfree(op); 1063 } 1064 xe_vma_ops_fini(&vops); 1065 1066 return fence; 1067 } 1068 1069 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 1070 struct xe_svm_range *range) 1071 { 1072 INIT_LIST_HEAD(&op->link); 1073 op->tile_mask = range->tile_present; 1074 op->base.op = DRM_GPUVA_OP_DRIVER; 1075 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 1076 op->unmap_range.range = range; 1077 } 1078 1079 static int 1080 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 1081 struct xe_svm_range *range) 1082 { 1083 struct xe_vma_op *op; 1084 1085 op = kzalloc(sizeof(*op), GFP_KERNEL); 1086 if (!op) 1087 return -ENOMEM; 1088 1089 xe_vm_populate_range_unbind(op, range); 1090 list_add_tail(&op->link, &vops->list); 1091 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1092 1093 return 0; 1094 } 1095 1096 /** 1097 * xe_vm_range_unbind() - VM range unbind 1098 * @vm: The VM which the range belongs to. 1099 * @range: SVM range to rebind. 1100 * 1101 * Unbind SVM range removing the GPU page tables for the range. 1102 * 1103 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 1104 * failure 1105 */ 1106 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 1107 struct xe_svm_range *range) 1108 { 1109 struct dma_fence *fence = NULL; 1110 struct xe_vma_ops vops; 1111 struct xe_vma_op *op, *next_op; 1112 struct xe_tile *tile; 1113 u8 id; 1114 int err; 1115 1116 lockdep_assert_held(&vm->lock); 1117 xe_vm_assert_held(vm); 1118 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1119 1120 if (!range->tile_present) 1121 return dma_fence_get_stub(); 1122 1123 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1124 for_each_tile(tile, vm->xe, id) { 1125 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1126 vops.pt_update_ops[tile->id].q = 1127 xe_tile_migrate_exec_queue(tile); 1128 } 1129 1130 err = xe_vm_ops_add_range_unbind(&vops, range); 1131 if (err) 1132 return ERR_PTR(err); 1133 1134 err = xe_vma_ops_alloc(&vops, false); 1135 if (err) { 1136 fence = ERR_PTR(err); 1137 goto free_ops; 1138 } 1139 1140 fence = ops_execute(vm, &vops); 1141 1142 free_ops: 1143 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1144 list_del(&op->link); 1145 kfree(op); 1146 } 1147 xe_vma_ops_fini(&vops); 1148 1149 return fence; 1150 } 1151 1152 static void xe_vma_free(struct xe_vma *vma) 1153 { 1154 if (xe_vma_is_userptr(vma)) 1155 kfree(to_userptr_vma(vma)); 1156 else 1157 kfree(vma); 1158 } 1159 1160 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 1161 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 1162 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 1163 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 1164 1165 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1166 struct xe_bo *bo, 1167 u64 bo_offset_or_userptr, 1168 u64 start, u64 end, 1169 u16 pat_index, unsigned int flags) 1170 { 1171 struct xe_vma *vma; 1172 struct xe_tile *tile; 1173 u8 id; 1174 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 1175 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 1176 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 1177 bool is_cpu_addr_mirror = 1178 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 1179 1180 xe_assert(vm->xe, start < end); 1181 xe_assert(vm->xe, end < vm->size); 1182 1183 /* 1184 * Allocate and ensure that the xe_vma_is_userptr() return 1185 * matches what was allocated. 1186 */ 1187 if (!bo && !is_null && !is_cpu_addr_mirror) { 1188 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 1189 1190 if (!uvma) 1191 return ERR_PTR(-ENOMEM); 1192 1193 vma = &uvma->vma; 1194 } else { 1195 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 1196 if (!vma) 1197 return ERR_PTR(-ENOMEM); 1198 1199 if (is_cpu_addr_mirror) 1200 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 1201 if (is_null) 1202 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 1203 if (bo) 1204 vma->gpuva.gem.obj = &bo->ttm.base; 1205 } 1206 1207 INIT_LIST_HEAD(&vma->combined_links.rebind); 1208 1209 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1210 vma->gpuva.vm = &vm->gpuvm; 1211 vma->gpuva.va.addr = start; 1212 vma->gpuva.va.range = end - start + 1; 1213 if (read_only) 1214 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1215 if (dumpable) 1216 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1217 1218 for_each_tile(tile, vm->xe, id) 1219 vma->tile_mask |= 0x1 << id; 1220 1221 if (vm->xe->info.has_atomic_enable_pte_bit) 1222 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1223 1224 vma->pat_index = pat_index; 1225 1226 if (bo) { 1227 struct drm_gpuvm_bo *vm_bo; 1228 1229 xe_bo_assert_held(bo); 1230 1231 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1232 if (IS_ERR(vm_bo)) { 1233 xe_vma_free(vma); 1234 return ERR_CAST(vm_bo); 1235 } 1236 1237 drm_gpuvm_bo_extobj_add(vm_bo); 1238 drm_gem_object_get(&bo->ttm.base); 1239 vma->gpuva.gem.offset = bo_offset_or_userptr; 1240 drm_gpuva_link(&vma->gpuva, vm_bo); 1241 drm_gpuvm_bo_put(vm_bo); 1242 } else /* userptr or null */ { 1243 if (!is_null && !is_cpu_addr_mirror) { 1244 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1245 u64 size = end - start + 1; 1246 int err; 1247 1248 INIT_LIST_HEAD(&userptr->invalidate_link); 1249 INIT_LIST_HEAD(&userptr->repin_link); 1250 vma->gpuva.gem.offset = bo_offset_or_userptr; 1251 mutex_init(&userptr->unmap_mutex); 1252 1253 err = mmu_interval_notifier_insert(&userptr->notifier, 1254 current->mm, 1255 xe_vma_userptr(vma), size, 1256 &vma_userptr_notifier_ops); 1257 if (err) { 1258 xe_vma_free(vma); 1259 return ERR_PTR(err); 1260 } 1261 1262 userptr->notifier_seq = LONG_MAX; 1263 } 1264 1265 xe_vm_get(vm); 1266 } 1267 1268 return vma; 1269 } 1270 1271 static void xe_vma_destroy_late(struct xe_vma *vma) 1272 { 1273 struct xe_vm *vm = xe_vma_vm(vma); 1274 1275 if (vma->ufence) { 1276 xe_sync_ufence_put(vma->ufence); 1277 vma->ufence = NULL; 1278 } 1279 1280 if (xe_vma_is_userptr(vma)) { 1281 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1282 struct xe_userptr *userptr = &uvma->userptr; 1283 1284 if (userptr->sg) 1285 xe_hmm_userptr_free_sg(uvma); 1286 1287 /* 1288 * Since userptr pages are not pinned, we can't remove 1289 * the notifier until we're sure the GPU is not accessing 1290 * them anymore 1291 */ 1292 mmu_interval_notifier_remove(&userptr->notifier); 1293 mutex_destroy(&userptr->unmap_mutex); 1294 xe_vm_put(vm); 1295 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1296 xe_vm_put(vm); 1297 } else { 1298 xe_bo_put(xe_vma_bo(vma)); 1299 } 1300 1301 xe_vma_free(vma); 1302 } 1303 1304 static void vma_destroy_work_func(struct work_struct *w) 1305 { 1306 struct xe_vma *vma = 1307 container_of(w, struct xe_vma, destroy_work); 1308 1309 xe_vma_destroy_late(vma); 1310 } 1311 1312 static void vma_destroy_cb(struct dma_fence *fence, 1313 struct dma_fence_cb *cb) 1314 { 1315 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1316 1317 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1318 queue_work(system_unbound_wq, &vma->destroy_work); 1319 } 1320 1321 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1322 { 1323 struct xe_vm *vm = xe_vma_vm(vma); 1324 1325 lockdep_assert_held_write(&vm->lock); 1326 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1327 1328 if (xe_vma_is_userptr(vma)) { 1329 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1330 1331 spin_lock(&vm->userptr.invalidated_lock); 1332 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1333 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1334 spin_unlock(&vm->userptr.invalidated_lock); 1335 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1336 xe_bo_assert_held(xe_vma_bo(vma)); 1337 1338 drm_gpuva_unlink(&vma->gpuva); 1339 } 1340 1341 xe_vm_assert_held(vm); 1342 if (fence) { 1343 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1344 vma_destroy_cb); 1345 1346 if (ret) { 1347 XE_WARN_ON(ret != -ENOENT); 1348 xe_vma_destroy_late(vma); 1349 } 1350 } else { 1351 xe_vma_destroy_late(vma); 1352 } 1353 } 1354 1355 /** 1356 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1357 * @exec: The drm_exec object we're currently locking for. 1358 * @vma: The vma for witch we want to lock the vm resv and any attached 1359 * object's resv. 1360 * 1361 * Return: 0 on success, negative error code on error. In particular 1362 * may return -EDEADLK on WW transaction contention and -EINTR if 1363 * an interruptible wait is terminated by a signal. 1364 */ 1365 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1366 { 1367 struct xe_vm *vm = xe_vma_vm(vma); 1368 struct xe_bo *bo = xe_vma_bo(vma); 1369 int err; 1370 1371 XE_WARN_ON(!vm); 1372 1373 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1374 if (!err && bo && !bo->vm) 1375 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1376 1377 return err; 1378 } 1379 1380 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1381 { 1382 struct drm_exec exec; 1383 int err; 1384 1385 drm_exec_init(&exec, 0, 0); 1386 drm_exec_until_all_locked(&exec) { 1387 err = xe_vm_lock_vma(&exec, vma); 1388 drm_exec_retry_on_contention(&exec); 1389 if (XE_WARN_ON(err)) 1390 break; 1391 } 1392 1393 xe_vma_destroy(vma, NULL); 1394 1395 drm_exec_fini(&exec); 1396 } 1397 1398 struct xe_vma * 1399 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1400 { 1401 struct drm_gpuva *gpuva; 1402 1403 lockdep_assert_held(&vm->lock); 1404 1405 if (xe_vm_is_closed_or_banned(vm)) 1406 return NULL; 1407 1408 xe_assert(vm->xe, start + range <= vm->size); 1409 1410 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1411 1412 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1413 } 1414 1415 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1416 { 1417 int err; 1418 1419 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1420 lockdep_assert_held(&vm->lock); 1421 1422 mutex_lock(&vm->snap_mutex); 1423 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1424 mutex_unlock(&vm->snap_mutex); 1425 XE_WARN_ON(err); /* Shouldn't be possible */ 1426 1427 return err; 1428 } 1429 1430 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1431 { 1432 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1433 lockdep_assert_held(&vm->lock); 1434 1435 mutex_lock(&vm->snap_mutex); 1436 drm_gpuva_remove(&vma->gpuva); 1437 mutex_unlock(&vm->snap_mutex); 1438 if (vm->usm.last_fault_vma == vma) 1439 vm->usm.last_fault_vma = NULL; 1440 } 1441 1442 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1443 { 1444 struct xe_vma_op *op; 1445 1446 op = kzalloc(sizeof(*op), GFP_KERNEL); 1447 1448 if (unlikely(!op)) 1449 return NULL; 1450 1451 return &op->base; 1452 } 1453 1454 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1455 1456 static const struct drm_gpuvm_ops gpuvm_ops = { 1457 .op_alloc = xe_vm_op_alloc, 1458 .vm_bo_validate = xe_gpuvm_validate, 1459 .vm_free = xe_vm_free, 1460 }; 1461 1462 static u64 pde_encode_pat_index(u16 pat_index) 1463 { 1464 u64 pte = 0; 1465 1466 if (pat_index & BIT(0)) 1467 pte |= XE_PPGTT_PTE_PAT0; 1468 1469 if (pat_index & BIT(1)) 1470 pte |= XE_PPGTT_PTE_PAT1; 1471 1472 return pte; 1473 } 1474 1475 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1476 { 1477 u64 pte = 0; 1478 1479 if (pat_index & BIT(0)) 1480 pte |= XE_PPGTT_PTE_PAT0; 1481 1482 if (pat_index & BIT(1)) 1483 pte |= XE_PPGTT_PTE_PAT1; 1484 1485 if (pat_index & BIT(2)) { 1486 if (pt_level) 1487 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1488 else 1489 pte |= XE_PPGTT_PTE_PAT2; 1490 } 1491 1492 if (pat_index & BIT(3)) 1493 pte |= XELPG_PPGTT_PTE_PAT3; 1494 1495 if (pat_index & (BIT(4))) 1496 pte |= XE2_PPGTT_PTE_PAT4; 1497 1498 return pte; 1499 } 1500 1501 static u64 pte_encode_ps(u32 pt_level) 1502 { 1503 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1504 1505 if (pt_level == 1) 1506 return XE_PDE_PS_2M; 1507 else if (pt_level == 2) 1508 return XE_PDPE_PS_1G; 1509 1510 return 0; 1511 } 1512 1513 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1514 const u16 pat_index) 1515 { 1516 u64 pde; 1517 1518 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1519 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1520 pde |= pde_encode_pat_index(pat_index); 1521 1522 return pde; 1523 } 1524 1525 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1526 u16 pat_index, u32 pt_level) 1527 { 1528 u64 pte; 1529 1530 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1531 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1532 pte |= pte_encode_pat_index(pat_index, pt_level); 1533 pte |= pte_encode_ps(pt_level); 1534 1535 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1536 pte |= XE_PPGTT_PTE_DM; 1537 1538 return pte; 1539 } 1540 1541 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1542 u16 pat_index, u32 pt_level) 1543 { 1544 pte |= XE_PAGE_PRESENT; 1545 1546 if (likely(!xe_vma_read_only(vma))) 1547 pte |= XE_PAGE_RW; 1548 1549 pte |= pte_encode_pat_index(pat_index, pt_level); 1550 pte |= pte_encode_ps(pt_level); 1551 1552 if (unlikely(xe_vma_is_null(vma))) 1553 pte |= XE_PTE_NULL; 1554 1555 return pte; 1556 } 1557 1558 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1559 u16 pat_index, 1560 u32 pt_level, bool devmem, u64 flags) 1561 { 1562 u64 pte; 1563 1564 /* Avoid passing random bits directly as flags */ 1565 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1566 1567 pte = addr; 1568 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1569 pte |= pte_encode_pat_index(pat_index, pt_level); 1570 pte |= pte_encode_ps(pt_level); 1571 1572 if (devmem) 1573 pte |= XE_PPGTT_PTE_DM; 1574 1575 pte |= flags; 1576 1577 return pte; 1578 } 1579 1580 static const struct xe_pt_ops xelp_pt_ops = { 1581 .pte_encode_bo = xelp_pte_encode_bo, 1582 .pte_encode_vma = xelp_pte_encode_vma, 1583 .pte_encode_addr = xelp_pte_encode_addr, 1584 .pde_encode_bo = xelp_pde_encode_bo, 1585 }; 1586 1587 static void vm_destroy_work_func(struct work_struct *w); 1588 1589 /** 1590 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1591 * given tile and vm. 1592 * @xe: xe device. 1593 * @tile: tile to set up for. 1594 * @vm: vm to set up for. 1595 * 1596 * Sets up a pagetable tree with one page-table per level and a single 1597 * leaf PTE. All pagetable entries point to the single page-table or, 1598 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1599 * writes become NOPs. 1600 * 1601 * Return: 0 on success, negative error code on error. 1602 */ 1603 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1604 struct xe_vm *vm) 1605 { 1606 u8 id = tile->id; 1607 int i; 1608 1609 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1610 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1611 if (IS_ERR(vm->scratch_pt[id][i])) 1612 return PTR_ERR(vm->scratch_pt[id][i]); 1613 1614 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1615 } 1616 1617 return 0; 1618 } 1619 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1620 1621 static void xe_vm_free_scratch(struct xe_vm *vm) 1622 { 1623 struct xe_tile *tile; 1624 u8 id; 1625 1626 if (!xe_vm_has_scratch(vm)) 1627 return; 1628 1629 for_each_tile(tile, vm->xe, id) { 1630 u32 i; 1631 1632 if (!vm->pt_root[id]) 1633 continue; 1634 1635 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1636 if (vm->scratch_pt[id][i]) 1637 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1638 } 1639 } 1640 1641 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1642 { 1643 struct drm_gem_object *vm_resv_obj; 1644 struct xe_vm *vm; 1645 int err, number_tiles = 0; 1646 struct xe_tile *tile; 1647 u8 id; 1648 1649 /* 1650 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1651 * ever be in faulting mode. 1652 */ 1653 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1654 1655 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1656 if (!vm) 1657 return ERR_PTR(-ENOMEM); 1658 1659 vm->xe = xe; 1660 1661 vm->size = 1ull << xe->info.va_bits; 1662 1663 vm->flags = flags; 1664 1665 /** 1666 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1667 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1668 * under a user-VM lock when the PXP session is started at exec_queue 1669 * creation time. Those are different VMs and therefore there is no risk 1670 * of deadlock, but we need to tell lockdep that this is the case or it 1671 * will print a warning. 1672 */ 1673 if (flags & XE_VM_FLAG_GSC) { 1674 static struct lock_class_key gsc_vm_key; 1675 1676 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1677 } else { 1678 init_rwsem(&vm->lock); 1679 } 1680 mutex_init(&vm->snap_mutex); 1681 1682 INIT_LIST_HEAD(&vm->rebind_list); 1683 1684 INIT_LIST_HEAD(&vm->userptr.repin_list); 1685 INIT_LIST_HEAD(&vm->userptr.invalidated); 1686 init_rwsem(&vm->userptr.notifier_lock); 1687 spin_lock_init(&vm->userptr.invalidated_lock); 1688 1689 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1690 1691 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1692 1693 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1694 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1695 1696 for_each_tile(tile, xe, id) 1697 xe_range_fence_tree_init(&vm->rftree[id]); 1698 1699 vm->pt_ops = &xelp_pt_ops; 1700 1701 /* 1702 * Long-running workloads are not protected by the scheduler references. 1703 * By design, run_job for long-running workloads returns NULL and the 1704 * scheduler drops all the references of it, hence protecting the VM 1705 * for this case is necessary. 1706 */ 1707 if (flags & XE_VM_FLAG_LR_MODE) 1708 xe_pm_runtime_get_noresume(xe); 1709 1710 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1711 if (!vm_resv_obj) { 1712 err = -ENOMEM; 1713 goto err_no_resv; 1714 } 1715 1716 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1717 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1718 1719 drm_gem_object_put(vm_resv_obj); 1720 1721 err = xe_vm_lock(vm, true); 1722 if (err) 1723 goto err_close; 1724 1725 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1726 vm->flags |= XE_VM_FLAG_64K; 1727 1728 for_each_tile(tile, xe, id) { 1729 if (flags & XE_VM_FLAG_MIGRATION && 1730 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1731 continue; 1732 1733 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1734 if (IS_ERR(vm->pt_root[id])) { 1735 err = PTR_ERR(vm->pt_root[id]); 1736 vm->pt_root[id] = NULL; 1737 goto err_unlock_close; 1738 } 1739 } 1740 1741 if (xe_vm_has_scratch(vm)) { 1742 for_each_tile(tile, xe, id) { 1743 if (!vm->pt_root[id]) 1744 continue; 1745 1746 err = xe_vm_create_scratch(xe, tile, vm); 1747 if (err) 1748 goto err_unlock_close; 1749 } 1750 vm->batch_invalidate_tlb = true; 1751 } 1752 1753 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1754 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1755 vm->batch_invalidate_tlb = false; 1756 } 1757 1758 /* Fill pt_root after allocating scratch tables */ 1759 for_each_tile(tile, xe, id) { 1760 if (!vm->pt_root[id]) 1761 continue; 1762 1763 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1764 } 1765 xe_vm_unlock(vm); 1766 1767 /* Kernel migration VM shouldn't have a circular loop.. */ 1768 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1769 for_each_tile(tile, xe, id) { 1770 struct xe_exec_queue *q; 1771 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1772 1773 if (!vm->pt_root[id]) 1774 continue; 1775 1776 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1777 if (IS_ERR(q)) { 1778 err = PTR_ERR(q); 1779 goto err_close; 1780 } 1781 vm->q[id] = q; 1782 number_tiles++; 1783 } 1784 } 1785 1786 if (flags & XE_VM_FLAG_FAULT_MODE) { 1787 err = xe_svm_init(vm); 1788 if (err) 1789 goto err_close; 1790 } 1791 1792 if (number_tiles > 1) 1793 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1794 1795 trace_xe_vm_create(vm); 1796 1797 return vm; 1798 1799 err_unlock_close: 1800 xe_vm_unlock(vm); 1801 err_close: 1802 xe_vm_close_and_put(vm); 1803 return ERR_PTR(err); 1804 1805 err_no_resv: 1806 mutex_destroy(&vm->snap_mutex); 1807 for_each_tile(tile, xe, id) 1808 xe_range_fence_tree_fini(&vm->rftree[id]); 1809 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1810 kfree(vm); 1811 if (flags & XE_VM_FLAG_LR_MODE) 1812 xe_pm_runtime_put(xe); 1813 return ERR_PTR(err); 1814 } 1815 1816 static void xe_vm_close(struct xe_vm *vm) 1817 { 1818 struct xe_device *xe = vm->xe; 1819 bool bound; 1820 int idx; 1821 1822 bound = drm_dev_enter(&xe->drm, &idx); 1823 1824 down_write(&vm->lock); 1825 if (xe_vm_in_fault_mode(vm)) 1826 xe_svm_notifier_lock(vm); 1827 1828 vm->size = 0; 1829 1830 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1831 struct xe_tile *tile; 1832 struct xe_gt *gt; 1833 u8 id; 1834 1835 /* Wait for pending binds */ 1836 dma_resv_wait_timeout(xe_vm_resv(vm), 1837 DMA_RESV_USAGE_BOOKKEEP, 1838 false, MAX_SCHEDULE_TIMEOUT); 1839 1840 if (bound) { 1841 for_each_tile(tile, xe, id) 1842 if (vm->pt_root[id]) 1843 xe_pt_clear(xe, vm->pt_root[id]); 1844 1845 for_each_gt(gt, xe, id) 1846 xe_gt_tlb_invalidation_vm(gt, vm); 1847 } 1848 } 1849 1850 if (xe_vm_in_fault_mode(vm)) 1851 xe_svm_notifier_unlock(vm); 1852 up_write(&vm->lock); 1853 1854 if (bound) 1855 drm_dev_exit(idx); 1856 } 1857 1858 void xe_vm_close_and_put(struct xe_vm *vm) 1859 { 1860 LIST_HEAD(contested); 1861 struct xe_device *xe = vm->xe; 1862 struct xe_tile *tile; 1863 struct xe_vma *vma, *next_vma; 1864 struct drm_gpuva *gpuva, *next; 1865 u8 id; 1866 1867 xe_assert(xe, !vm->preempt.num_exec_queues); 1868 1869 xe_vm_close(vm); 1870 if (xe_vm_in_preempt_fence_mode(vm)) 1871 flush_work(&vm->preempt.rebind_work); 1872 if (xe_vm_in_fault_mode(vm)) 1873 xe_svm_close(vm); 1874 1875 down_write(&vm->lock); 1876 for_each_tile(tile, xe, id) { 1877 if (vm->q[id]) 1878 xe_exec_queue_last_fence_put(vm->q[id], vm); 1879 } 1880 up_write(&vm->lock); 1881 1882 for_each_tile(tile, xe, id) { 1883 if (vm->q[id]) { 1884 xe_exec_queue_kill(vm->q[id]); 1885 xe_exec_queue_put(vm->q[id]); 1886 vm->q[id] = NULL; 1887 } 1888 } 1889 1890 down_write(&vm->lock); 1891 xe_vm_lock(vm, false); 1892 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1893 vma = gpuva_to_vma(gpuva); 1894 1895 if (xe_vma_has_no_bo(vma)) { 1896 down_read(&vm->userptr.notifier_lock); 1897 vma->gpuva.flags |= XE_VMA_DESTROYED; 1898 up_read(&vm->userptr.notifier_lock); 1899 } 1900 1901 xe_vm_remove_vma(vm, vma); 1902 1903 /* easy case, remove from VMA? */ 1904 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1905 list_del_init(&vma->combined_links.rebind); 1906 xe_vma_destroy(vma, NULL); 1907 continue; 1908 } 1909 1910 list_move_tail(&vma->combined_links.destroy, &contested); 1911 vma->gpuva.flags |= XE_VMA_DESTROYED; 1912 } 1913 1914 /* 1915 * All vm operations will add shared fences to resv. 1916 * The only exception is eviction for a shared object, 1917 * but even so, the unbind when evicted would still 1918 * install a fence to resv. Hence it's safe to 1919 * destroy the pagetables immediately. 1920 */ 1921 xe_vm_free_scratch(vm); 1922 1923 for_each_tile(tile, xe, id) { 1924 if (vm->pt_root[id]) { 1925 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1926 vm->pt_root[id] = NULL; 1927 } 1928 } 1929 xe_vm_unlock(vm); 1930 1931 /* 1932 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1933 * Since we hold a refcount to the bo, we can remove and free 1934 * the members safely without locking. 1935 */ 1936 list_for_each_entry_safe(vma, next_vma, &contested, 1937 combined_links.destroy) { 1938 list_del_init(&vma->combined_links.destroy); 1939 xe_vma_destroy_unlocked(vma); 1940 } 1941 1942 if (xe_vm_in_fault_mode(vm)) 1943 xe_svm_fini(vm); 1944 1945 up_write(&vm->lock); 1946 1947 down_write(&xe->usm.lock); 1948 if (vm->usm.asid) { 1949 void *lookup; 1950 1951 xe_assert(xe, xe->info.has_asid); 1952 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1953 1954 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1955 xe_assert(xe, lookup == vm); 1956 } 1957 up_write(&xe->usm.lock); 1958 1959 for_each_tile(tile, xe, id) 1960 xe_range_fence_tree_fini(&vm->rftree[id]); 1961 1962 xe_vm_put(vm); 1963 } 1964 1965 static void vm_destroy_work_func(struct work_struct *w) 1966 { 1967 struct xe_vm *vm = 1968 container_of(w, struct xe_vm, destroy_work); 1969 struct xe_device *xe = vm->xe; 1970 struct xe_tile *tile; 1971 u8 id; 1972 1973 /* xe_vm_close_and_put was not called? */ 1974 xe_assert(xe, !vm->size); 1975 1976 if (xe_vm_in_preempt_fence_mode(vm)) 1977 flush_work(&vm->preempt.rebind_work); 1978 1979 mutex_destroy(&vm->snap_mutex); 1980 1981 if (vm->flags & XE_VM_FLAG_LR_MODE) 1982 xe_pm_runtime_put(xe); 1983 1984 for_each_tile(tile, xe, id) 1985 XE_WARN_ON(vm->pt_root[id]); 1986 1987 trace_xe_vm_free(vm); 1988 1989 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1990 1991 if (vm->xef) 1992 xe_file_put(vm->xef); 1993 1994 kfree(vm); 1995 } 1996 1997 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1998 { 1999 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2000 2001 /* To destroy the VM we need to be able to sleep */ 2002 queue_work(system_unbound_wq, &vm->destroy_work); 2003 } 2004 2005 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2006 { 2007 struct xe_vm *vm; 2008 2009 mutex_lock(&xef->vm.lock); 2010 vm = xa_load(&xef->vm.xa, id); 2011 if (vm) 2012 xe_vm_get(vm); 2013 mutex_unlock(&xef->vm.lock); 2014 2015 return vm; 2016 } 2017 2018 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2019 { 2020 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 2021 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 2022 } 2023 2024 static struct xe_exec_queue * 2025 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2026 { 2027 return q ? q : vm->q[0]; 2028 } 2029 2030 static struct xe_user_fence * 2031 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2032 { 2033 unsigned int i; 2034 2035 for (i = 0; i < num_syncs; i++) { 2036 struct xe_sync_entry *e = &syncs[i]; 2037 2038 if (xe_sync_is_ufence(e)) 2039 return xe_sync_ufence_get(e); 2040 } 2041 2042 return NULL; 2043 } 2044 2045 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2046 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2047 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2048 2049 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2050 struct drm_file *file) 2051 { 2052 struct xe_device *xe = to_xe_device(dev); 2053 struct xe_file *xef = to_xe_file(file); 2054 struct drm_xe_vm_create *args = data; 2055 struct xe_tile *tile; 2056 struct xe_vm *vm; 2057 u32 id, asid; 2058 int err; 2059 u32 flags = 0; 2060 2061 if (XE_IOCTL_DBG(xe, args->extensions)) 2062 return -EINVAL; 2063 2064 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 2065 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2066 2067 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2068 !xe->info.has_usm)) 2069 return -EINVAL; 2070 2071 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2072 return -EINVAL; 2073 2074 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2075 return -EINVAL; 2076 2077 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2078 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2079 !xe->info.needs_scratch)) 2080 return -EINVAL; 2081 2082 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2083 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2084 return -EINVAL; 2085 2086 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2087 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2088 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2089 flags |= XE_VM_FLAG_LR_MODE; 2090 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2091 flags |= XE_VM_FLAG_FAULT_MODE; 2092 2093 vm = xe_vm_create(xe, flags); 2094 if (IS_ERR(vm)) 2095 return PTR_ERR(vm); 2096 2097 if (xe->info.has_asid) { 2098 down_write(&xe->usm.lock); 2099 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 2100 XA_LIMIT(1, XE_MAX_ASID - 1), 2101 &xe->usm.next_asid, GFP_KERNEL); 2102 up_write(&xe->usm.lock); 2103 if (err < 0) 2104 goto err_close_and_put; 2105 2106 vm->usm.asid = asid; 2107 } 2108 2109 vm->xef = xe_file_get(xef); 2110 2111 /* Record BO memory for VM pagetable created against client */ 2112 for_each_tile(tile, xe, id) 2113 if (vm->pt_root[id]) 2114 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 2115 2116 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2117 /* Warning: Security issue - never enable by default */ 2118 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2119 #endif 2120 2121 /* user id alloc must always be last in ioctl to prevent UAF */ 2122 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2123 if (err) 2124 goto err_close_and_put; 2125 2126 args->vm_id = id; 2127 2128 return 0; 2129 2130 err_close_and_put: 2131 xe_vm_close_and_put(vm); 2132 2133 return err; 2134 } 2135 2136 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2137 struct drm_file *file) 2138 { 2139 struct xe_device *xe = to_xe_device(dev); 2140 struct xe_file *xef = to_xe_file(file); 2141 struct drm_xe_vm_destroy *args = data; 2142 struct xe_vm *vm; 2143 int err = 0; 2144 2145 if (XE_IOCTL_DBG(xe, args->pad) || 2146 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2147 return -EINVAL; 2148 2149 mutex_lock(&xef->vm.lock); 2150 vm = xa_load(&xef->vm.xa, args->vm_id); 2151 if (XE_IOCTL_DBG(xe, !vm)) 2152 err = -ENOENT; 2153 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2154 err = -EBUSY; 2155 else 2156 xa_erase(&xef->vm.xa, args->vm_id); 2157 mutex_unlock(&xef->vm.lock); 2158 2159 if (!err) 2160 xe_vm_close_and_put(vm); 2161 2162 return err; 2163 } 2164 2165 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2166 { 2167 if (page_addr > xe_vma_end(vma) - 1 || 2168 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2169 return false; 2170 2171 return true; 2172 } 2173 2174 /** 2175 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2176 * 2177 * @vm: the xe_vm the vma belongs to 2178 * @page_addr: address to look up 2179 */ 2180 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2181 { 2182 struct xe_vma *vma = NULL; 2183 2184 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2185 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2186 vma = vm->usm.last_fault_vma; 2187 } 2188 if (!vma) 2189 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2190 2191 return vma; 2192 } 2193 2194 static const u32 region_to_mem_type[] = { 2195 XE_PL_TT, 2196 XE_PL_VRAM0, 2197 XE_PL_VRAM1, 2198 }; 2199 2200 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2201 bool post_commit) 2202 { 2203 down_read(&vm->userptr.notifier_lock); 2204 vma->gpuva.flags |= XE_VMA_DESTROYED; 2205 up_read(&vm->userptr.notifier_lock); 2206 if (post_commit) 2207 xe_vm_remove_vma(vm, vma); 2208 } 2209 2210 #undef ULL 2211 #define ULL unsigned long long 2212 2213 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2214 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2215 { 2216 struct xe_vma *vma; 2217 2218 switch (op->op) { 2219 case DRM_GPUVA_OP_MAP: 2220 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2221 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2222 break; 2223 case DRM_GPUVA_OP_REMAP: 2224 vma = gpuva_to_vma(op->remap.unmap->va); 2225 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2226 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2227 op->remap.unmap->keep ? 1 : 0); 2228 if (op->remap.prev) 2229 vm_dbg(&xe->drm, 2230 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2231 (ULL)op->remap.prev->va.addr, 2232 (ULL)op->remap.prev->va.range); 2233 if (op->remap.next) 2234 vm_dbg(&xe->drm, 2235 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2236 (ULL)op->remap.next->va.addr, 2237 (ULL)op->remap.next->va.range); 2238 break; 2239 case DRM_GPUVA_OP_UNMAP: 2240 vma = gpuva_to_vma(op->unmap.va); 2241 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2242 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2243 op->unmap.keep ? 1 : 0); 2244 break; 2245 case DRM_GPUVA_OP_PREFETCH: 2246 vma = gpuva_to_vma(op->prefetch.va); 2247 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2248 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2249 break; 2250 default: 2251 drm_warn(&xe->drm, "NOT POSSIBLE"); 2252 } 2253 } 2254 #else 2255 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2256 { 2257 } 2258 #endif 2259 2260 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2261 { 2262 if (!xe_vm_in_fault_mode(vm)) 2263 return false; 2264 2265 if (!xe_vm_has_scratch(vm)) 2266 return false; 2267 2268 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2269 return false; 2270 2271 return true; 2272 } 2273 2274 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2275 { 2276 struct drm_gpuva_op *__op; 2277 2278 drm_gpuva_for_each_op(__op, ops) { 2279 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2280 2281 xe_vma_svm_prefetch_op_fini(op); 2282 } 2283 } 2284 2285 /* 2286 * Create operations list from IOCTL arguments, setup operations fields so parse 2287 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2288 */ 2289 static struct drm_gpuva_ops * 2290 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2291 struct xe_bo *bo, u64 bo_offset_or_userptr, 2292 u64 addr, u64 range, 2293 u32 operation, u32 flags, 2294 u32 prefetch_region, u16 pat_index) 2295 { 2296 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2297 struct drm_gpuva_ops *ops; 2298 struct drm_gpuva_op *__op; 2299 struct drm_gpuvm_bo *vm_bo; 2300 u64 range_end = addr + range; 2301 int err; 2302 2303 lockdep_assert_held_write(&vm->lock); 2304 2305 vm_dbg(&vm->xe->drm, 2306 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2307 operation, (ULL)addr, (ULL)range, 2308 (ULL)bo_offset_or_userptr); 2309 2310 switch (operation) { 2311 case DRM_XE_VM_BIND_OP_MAP: 2312 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 2313 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 2314 obj, bo_offset_or_userptr); 2315 break; 2316 case DRM_XE_VM_BIND_OP_UNMAP: 2317 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2318 break; 2319 case DRM_XE_VM_BIND_OP_PREFETCH: 2320 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2321 break; 2322 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2323 xe_assert(vm->xe, bo); 2324 2325 err = xe_bo_lock(bo, true); 2326 if (err) 2327 return ERR_PTR(err); 2328 2329 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2330 if (IS_ERR(vm_bo)) { 2331 xe_bo_unlock(bo); 2332 return ERR_CAST(vm_bo); 2333 } 2334 2335 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2336 drm_gpuvm_bo_put(vm_bo); 2337 xe_bo_unlock(bo); 2338 break; 2339 default: 2340 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2341 ops = ERR_PTR(-EINVAL); 2342 } 2343 if (IS_ERR(ops)) 2344 return ops; 2345 2346 drm_gpuva_for_each_op(__op, ops) { 2347 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2348 2349 if (__op->op == DRM_GPUVA_OP_MAP) { 2350 op->map.immediate = 2351 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2352 op->map.read_only = 2353 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2354 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2355 op->map.is_cpu_addr_mirror = flags & 2356 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2357 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2358 op->map.pat_index = pat_index; 2359 op->map.invalidate_on_bind = 2360 __xe_vm_needs_clear_scratch_pages(vm, flags); 2361 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2362 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2363 struct xe_svm_range *svm_range; 2364 struct drm_gpusvm_ctx ctx = {}; 2365 struct xe_tile *tile; 2366 u8 id, tile_mask = 0; 2367 u32 i; 2368 2369 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2370 op->prefetch.region = prefetch_region; 2371 break; 2372 } 2373 2374 ctx.read_only = xe_vma_read_only(vma); 2375 ctx.devmem_possible = IS_DGFX(vm->xe) && 2376 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); 2377 2378 for_each_tile(tile, vm->xe, id) 2379 tile_mask |= 0x1 << id; 2380 2381 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2382 op->prefetch_range.region = prefetch_region; 2383 op->prefetch_range.ranges_count = 0; 2384 alloc_next_range: 2385 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2386 2387 if (PTR_ERR(svm_range) == -ENOENT) { 2388 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2389 2390 addr = ret == ULONG_MAX ? 0 : ret; 2391 if (addr) 2392 goto alloc_next_range; 2393 else 2394 goto print_op_label; 2395 } 2396 2397 if (IS_ERR(svm_range)) { 2398 err = PTR_ERR(svm_range); 2399 goto unwind_prefetch_ops; 2400 } 2401 2402 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { 2403 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2404 goto check_next_range; 2405 } 2406 2407 err = xa_alloc(&op->prefetch_range.range, 2408 &i, svm_range, xa_limit_32b, 2409 GFP_KERNEL); 2410 2411 if (err) 2412 goto unwind_prefetch_ops; 2413 2414 op->prefetch_range.ranges_count++; 2415 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2416 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2417 check_next_range: 2418 if (range_end > xe_svm_range_end(svm_range) && 2419 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2420 addr = xe_svm_range_end(svm_range); 2421 goto alloc_next_range; 2422 } 2423 } 2424 print_op_label: 2425 print_op(vm->xe, __op); 2426 } 2427 2428 return ops; 2429 2430 unwind_prefetch_ops: 2431 xe_svm_prefetch_gpuva_ops_fini(ops); 2432 drm_gpuva_ops_free(&vm->gpuvm, ops); 2433 return ERR_PTR(err); 2434 } 2435 2436 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2437 2438 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2439 u16 pat_index, unsigned int flags) 2440 { 2441 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2442 struct drm_exec exec; 2443 struct xe_vma *vma; 2444 int err = 0; 2445 2446 lockdep_assert_held_write(&vm->lock); 2447 2448 if (bo) { 2449 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2450 drm_exec_until_all_locked(&exec) { 2451 err = 0; 2452 if (!bo->vm) { 2453 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2454 drm_exec_retry_on_contention(&exec); 2455 } 2456 if (!err) { 2457 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2458 drm_exec_retry_on_contention(&exec); 2459 } 2460 if (err) { 2461 drm_exec_fini(&exec); 2462 return ERR_PTR(err); 2463 } 2464 } 2465 } 2466 vma = xe_vma_create(vm, bo, op->gem.offset, 2467 op->va.addr, op->va.addr + 2468 op->va.range - 1, pat_index, flags); 2469 if (IS_ERR(vma)) 2470 goto err_unlock; 2471 2472 if (xe_vma_is_userptr(vma)) 2473 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2474 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2475 err = add_preempt_fences(vm, bo); 2476 2477 err_unlock: 2478 if (bo) 2479 drm_exec_fini(&exec); 2480 2481 if (err) { 2482 prep_vma_destroy(vm, vma, false); 2483 xe_vma_destroy_unlocked(vma); 2484 vma = ERR_PTR(err); 2485 } 2486 2487 return vma; 2488 } 2489 2490 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2491 { 2492 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2493 return SZ_1G; 2494 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2495 return SZ_2M; 2496 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2497 return SZ_64K; 2498 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2499 return SZ_4K; 2500 2501 return SZ_1G; /* Uninitialized, used max size */ 2502 } 2503 2504 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2505 { 2506 switch (size) { 2507 case SZ_1G: 2508 vma->gpuva.flags |= XE_VMA_PTE_1G; 2509 break; 2510 case SZ_2M: 2511 vma->gpuva.flags |= XE_VMA_PTE_2M; 2512 break; 2513 case SZ_64K: 2514 vma->gpuva.flags |= XE_VMA_PTE_64K; 2515 break; 2516 case SZ_4K: 2517 vma->gpuva.flags |= XE_VMA_PTE_4K; 2518 break; 2519 } 2520 } 2521 2522 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2523 { 2524 int err = 0; 2525 2526 lockdep_assert_held_write(&vm->lock); 2527 2528 switch (op->base.op) { 2529 case DRM_GPUVA_OP_MAP: 2530 err |= xe_vm_insert_vma(vm, op->map.vma); 2531 if (!err) 2532 op->flags |= XE_VMA_OP_COMMITTED; 2533 break; 2534 case DRM_GPUVA_OP_REMAP: 2535 { 2536 u8 tile_present = 2537 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2538 2539 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2540 true); 2541 op->flags |= XE_VMA_OP_COMMITTED; 2542 2543 if (op->remap.prev) { 2544 err |= xe_vm_insert_vma(vm, op->remap.prev); 2545 if (!err) 2546 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2547 if (!err && op->remap.skip_prev) { 2548 op->remap.prev->tile_present = 2549 tile_present; 2550 op->remap.prev = NULL; 2551 } 2552 } 2553 if (op->remap.next) { 2554 err |= xe_vm_insert_vma(vm, op->remap.next); 2555 if (!err) 2556 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2557 if (!err && op->remap.skip_next) { 2558 op->remap.next->tile_present = 2559 tile_present; 2560 op->remap.next = NULL; 2561 } 2562 } 2563 2564 /* Adjust for partial unbind after removing VMA from VM */ 2565 if (!err) { 2566 op->base.remap.unmap->va->va.addr = op->remap.start; 2567 op->base.remap.unmap->va->va.range = op->remap.range; 2568 } 2569 break; 2570 } 2571 case DRM_GPUVA_OP_UNMAP: 2572 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2573 op->flags |= XE_VMA_OP_COMMITTED; 2574 break; 2575 case DRM_GPUVA_OP_PREFETCH: 2576 op->flags |= XE_VMA_OP_COMMITTED; 2577 break; 2578 default: 2579 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2580 } 2581 2582 return err; 2583 } 2584 2585 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2586 struct xe_vma_ops *vops) 2587 { 2588 struct xe_device *xe = vm->xe; 2589 struct drm_gpuva_op *__op; 2590 struct xe_tile *tile; 2591 u8 id, tile_mask = 0; 2592 int err = 0; 2593 2594 lockdep_assert_held_write(&vm->lock); 2595 2596 for_each_tile(tile, vm->xe, id) 2597 tile_mask |= 0x1 << id; 2598 2599 drm_gpuva_for_each_op(__op, ops) { 2600 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2601 struct xe_vma *vma; 2602 unsigned int flags = 0; 2603 2604 INIT_LIST_HEAD(&op->link); 2605 list_add_tail(&op->link, &vops->list); 2606 op->tile_mask = tile_mask; 2607 2608 switch (op->base.op) { 2609 case DRM_GPUVA_OP_MAP: 2610 { 2611 flags |= op->map.read_only ? 2612 VMA_CREATE_FLAG_READ_ONLY : 0; 2613 flags |= op->map.is_null ? 2614 VMA_CREATE_FLAG_IS_NULL : 0; 2615 flags |= op->map.dumpable ? 2616 VMA_CREATE_FLAG_DUMPABLE : 0; 2617 flags |= op->map.is_cpu_addr_mirror ? 2618 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2619 2620 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2621 flags); 2622 if (IS_ERR(vma)) 2623 return PTR_ERR(vma); 2624 2625 op->map.vma = vma; 2626 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2627 !op->map.is_cpu_addr_mirror) || 2628 op->map.invalidate_on_bind) 2629 xe_vma_ops_incr_pt_update_ops(vops, 2630 op->tile_mask, 1); 2631 break; 2632 } 2633 case DRM_GPUVA_OP_REMAP: 2634 { 2635 struct xe_vma *old = 2636 gpuva_to_vma(op->base.remap.unmap->va); 2637 bool skip = xe_vma_is_cpu_addr_mirror(old); 2638 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2639 int num_remap_ops = 0; 2640 2641 if (op->base.remap.prev) 2642 start = op->base.remap.prev->va.addr + 2643 op->base.remap.prev->va.range; 2644 if (op->base.remap.next) 2645 end = op->base.remap.next->va.addr; 2646 2647 if (xe_vma_is_cpu_addr_mirror(old) && 2648 xe_svm_has_mapping(vm, start, end)) 2649 return -EBUSY; 2650 2651 op->remap.start = xe_vma_start(old); 2652 op->remap.range = xe_vma_size(old); 2653 2654 flags |= op->base.remap.unmap->va->flags & 2655 XE_VMA_READ_ONLY ? 2656 VMA_CREATE_FLAG_READ_ONLY : 0; 2657 flags |= op->base.remap.unmap->va->flags & 2658 DRM_GPUVA_SPARSE ? 2659 VMA_CREATE_FLAG_IS_NULL : 0; 2660 flags |= op->base.remap.unmap->va->flags & 2661 XE_VMA_DUMPABLE ? 2662 VMA_CREATE_FLAG_DUMPABLE : 0; 2663 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2664 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2665 2666 if (op->base.remap.prev) { 2667 vma = new_vma(vm, op->base.remap.prev, 2668 old->pat_index, flags); 2669 if (IS_ERR(vma)) 2670 return PTR_ERR(vma); 2671 2672 op->remap.prev = vma; 2673 2674 /* 2675 * Userptr creates a new SG mapping so 2676 * we must also rebind. 2677 */ 2678 op->remap.skip_prev = skip || 2679 (!xe_vma_is_userptr(old) && 2680 IS_ALIGNED(xe_vma_end(vma), 2681 xe_vma_max_pte_size(old))); 2682 if (op->remap.skip_prev) { 2683 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2684 op->remap.range -= 2685 xe_vma_end(vma) - 2686 xe_vma_start(old); 2687 op->remap.start = xe_vma_end(vma); 2688 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2689 (ULL)op->remap.start, 2690 (ULL)op->remap.range); 2691 } else { 2692 num_remap_ops++; 2693 } 2694 } 2695 2696 if (op->base.remap.next) { 2697 vma = new_vma(vm, op->base.remap.next, 2698 old->pat_index, flags); 2699 if (IS_ERR(vma)) 2700 return PTR_ERR(vma); 2701 2702 op->remap.next = vma; 2703 2704 /* 2705 * Userptr creates a new SG mapping so 2706 * we must also rebind. 2707 */ 2708 op->remap.skip_next = skip || 2709 (!xe_vma_is_userptr(old) && 2710 IS_ALIGNED(xe_vma_start(vma), 2711 xe_vma_max_pte_size(old))); 2712 if (op->remap.skip_next) { 2713 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2714 op->remap.range -= 2715 xe_vma_end(old) - 2716 xe_vma_start(vma); 2717 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2718 (ULL)op->remap.start, 2719 (ULL)op->remap.range); 2720 } else { 2721 num_remap_ops++; 2722 } 2723 } 2724 if (!skip) 2725 num_remap_ops++; 2726 2727 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2728 break; 2729 } 2730 case DRM_GPUVA_OP_UNMAP: 2731 vma = gpuva_to_vma(op->base.unmap.va); 2732 2733 if (xe_vma_is_cpu_addr_mirror(vma) && 2734 xe_svm_has_mapping(vm, xe_vma_start(vma), 2735 xe_vma_end(vma))) 2736 return -EBUSY; 2737 2738 if (!xe_vma_is_cpu_addr_mirror(vma)) 2739 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2740 break; 2741 case DRM_GPUVA_OP_PREFETCH: 2742 vma = gpuva_to_vma(op->base.prefetch.va); 2743 2744 if (xe_vma_is_userptr(vma)) { 2745 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2746 if (err) 2747 return err; 2748 } 2749 2750 if (xe_vma_is_cpu_addr_mirror(vma)) 2751 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2752 op->prefetch_range.ranges_count); 2753 else 2754 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2755 2756 break; 2757 default: 2758 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2759 } 2760 2761 err = xe_vma_op_commit(vm, op); 2762 if (err) 2763 return err; 2764 } 2765 2766 return 0; 2767 } 2768 2769 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2770 bool post_commit, bool prev_post_commit, 2771 bool next_post_commit) 2772 { 2773 lockdep_assert_held_write(&vm->lock); 2774 2775 switch (op->base.op) { 2776 case DRM_GPUVA_OP_MAP: 2777 if (op->map.vma) { 2778 prep_vma_destroy(vm, op->map.vma, post_commit); 2779 xe_vma_destroy_unlocked(op->map.vma); 2780 } 2781 break; 2782 case DRM_GPUVA_OP_UNMAP: 2783 { 2784 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2785 2786 if (vma) { 2787 down_read(&vm->userptr.notifier_lock); 2788 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2789 up_read(&vm->userptr.notifier_lock); 2790 if (post_commit) 2791 xe_vm_insert_vma(vm, vma); 2792 } 2793 break; 2794 } 2795 case DRM_GPUVA_OP_REMAP: 2796 { 2797 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2798 2799 if (op->remap.prev) { 2800 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2801 xe_vma_destroy_unlocked(op->remap.prev); 2802 } 2803 if (op->remap.next) { 2804 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2805 xe_vma_destroy_unlocked(op->remap.next); 2806 } 2807 if (vma) { 2808 down_read(&vm->userptr.notifier_lock); 2809 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2810 up_read(&vm->userptr.notifier_lock); 2811 if (post_commit) 2812 xe_vm_insert_vma(vm, vma); 2813 } 2814 break; 2815 } 2816 case DRM_GPUVA_OP_PREFETCH: 2817 /* Nothing to do */ 2818 break; 2819 default: 2820 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2821 } 2822 } 2823 2824 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2825 struct drm_gpuva_ops **ops, 2826 int num_ops_list) 2827 { 2828 int i; 2829 2830 for (i = num_ops_list - 1; i >= 0; --i) { 2831 struct drm_gpuva_ops *__ops = ops[i]; 2832 struct drm_gpuva_op *__op; 2833 2834 if (!__ops) 2835 continue; 2836 2837 drm_gpuva_for_each_op_reverse(__op, __ops) { 2838 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2839 2840 xe_vma_op_unwind(vm, op, 2841 op->flags & XE_VMA_OP_COMMITTED, 2842 op->flags & XE_VMA_OP_PREV_COMMITTED, 2843 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2844 } 2845 } 2846 } 2847 2848 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2849 bool validate) 2850 { 2851 struct xe_bo *bo = xe_vma_bo(vma); 2852 struct xe_vm *vm = xe_vma_vm(vma); 2853 int err = 0; 2854 2855 if (bo) { 2856 if (!bo->vm) 2857 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2858 if (!err && validate) 2859 err = xe_bo_validate(bo, vm, 2860 !xe_vm_in_preempt_fence_mode(vm)); 2861 } 2862 2863 return err; 2864 } 2865 2866 static int check_ufence(struct xe_vma *vma) 2867 { 2868 if (vma->ufence) { 2869 struct xe_user_fence * const f = vma->ufence; 2870 2871 if (!xe_sync_ufence_get_status(f)) 2872 return -EBUSY; 2873 2874 vma->ufence = NULL; 2875 xe_sync_ufence_put(f); 2876 } 2877 2878 return 0; 2879 } 2880 2881 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2882 { 2883 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); 2884 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2885 int err = 0; 2886 2887 struct xe_svm_range *svm_range; 2888 struct drm_gpusvm_ctx ctx = {}; 2889 struct xe_tile *tile; 2890 unsigned long i; 2891 u32 region; 2892 2893 if (!xe_vma_is_cpu_addr_mirror(vma)) 2894 return 0; 2895 2896 region = op->prefetch_range.region; 2897 2898 ctx.read_only = xe_vma_read_only(vma); 2899 ctx.devmem_possible = devmem_possible; 2900 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2901 2902 /* TODO: Threading the migration */ 2903 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2904 if (!region) 2905 xe_svm_range_migrate_to_smem(vm, svm_range); 2906 2907 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { 2908 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; 2909 err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); 2910 if (err) { 2911 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2912 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2913 return -ENODATA; 2914 } 2915 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2916 } 2917 2918 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2919 if (err) { 2920 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2921 err = -ENODATA; 2922 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2923 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2924 return err; 2925 } 2926 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2927 } 2928 2929 return err; 2930 } 2931 2932 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2933 struct xe_vma_op *op) 2934 { 2935 int err = 0; 2936 2937 switch (op->base.op) { 2938 case DRM_GPUVA_OP_MAP: 2939 if (!op->map.invalidate_on_bind) 2940 err = vma_lock_and_validate(exec, op->map.vma, 2941 !xe_vm_in_fault_mode(vm) || 2942 op->map.immediate); 2943 break; 2944 case DRM_GPUVA_OP_REMAP: 2945 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2946 if (err) 2947 break; 2948 2949 err = vma_lock_and_validate(exec, 2950 gpuva_to_vma(op->base.remap.unmap->va), 2951 false); 2952 if (!err && op->remap.prev) 2953 err = vma_lock_and_validate(exec, op->remap.prev, true); 2954 if (!err && op->remap.next) 2955 err = vma_lock_and_validate(exec, op->remap.next, true); 2956 break; 2957 case DRM_GPUVA_OP_UNMAP: 2958 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2959 if (err) 2960 break; 2961 2962 err = vma_lock_and_validate(exec, 2963 gpuva_to_vma(op->base.unmap.va), 2964 false); 2965 break; 2966 case DRM_GPUVA_OP_PREFETCH: 2967 { 2968 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2969 u32 region; 2970 2971 if (xe_vma_is_cpu_addr_mirror(vma)) 2972 region = op->prefetch_range.region; 2973 else 2974 region = op->prefetch.region; 2975 2976 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2977 2978 err = vma_lock_and_validate(exec, 2979 gpuva_to_vma(op->base.prefetch.va), 2980 false); 2981 if (!err && !xe_vma_has_no_bo(vma)) 2982 err = xe_bo_migrate(xe_vma_bo(vma), 2983 region_to_mem_type[region]); 2984 break; 2985 } 2986 default: 2987 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2988 } 2989 2990 return err; 2991 } 2992 2993 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2994 { 2995 struct xe_vma_op *op; 2996 int err; 2997 2998 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2999 return 0; 3000 3001 list_for_each_entry(op, &vops->list, link) { 3002 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3003 err = prefetch_ranges(vm, op); 3004 if (err) 3005 return err; 3006 } 3007 } 3008 3009 return 0; 3010 } 3011 3012 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3013 struct xe_vm *vm, 3014 struct xe_vma_ops *vops) 3015 { 3016 struct xe_vma_op *op; 3017 int err; 3018 3019 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3020 if (err) 3021 return err; 3022 3023 list_for_each_entry(op, &vops->list, link) { 3024 err = op_lock_and_prep(exec, vm, op); 3025 if (err) 3026 return err; 3027 } 3028 3029 #ifdef TEST_VM_OPS_ERROR 3030 if (vops->inject_error && 3031 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3032 return -ENOSPC; 3033 #endif 3034 3035 return 0; 3036 } 3037 3038 static void op_trace(struct xe_vma_op *op) 3039 { 3040 switch (op->base.op) { 3041 case DRM_GPUVA_OP_MAP: 3042 trace_xe_vma_bind(op->map.vma); 3043 break; 3044 case DRM_GPUVA_OP_REMAP: 3045 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3046 if (op->remap.prev) 3047 trace_xe_vma_bind(op->remap.prev); 3048 if (op->remap.next) 3049 trace_xe_vma_bind(op->remap.next); 3050 break; 3051 case DRM_GPUVA_OP_UNMAP: 3052 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3053 break; 3054 case DRM_GPUVA_OP_PREFETCH: 3055 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3056 break; 3057 case DRM_GPUVA_OP_DRIVER: 3058 break; 3059 default: 3060 XE_WARN_ON("NOT POSSIBLE"); 3061 } 3062 } 3063 3064 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3065 { 3066 struct xe_vma_op *op; 3067 3068 list_for_each_entry(op, &vops->list, link) 3069 op_trace(op); 3070 } 3071 3072 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3073 { 3074 struct xe_exec_queue *q = vops->q; 3075 struct xe_tile *tile; 3076 int number_tiles = 0; 3077 u8 id; 3078 3079 for_each_tile(tile, vm->xe, id) { 3080 if (vops->pt_update_ops[id].num_ops) 3081 ++number_tiles; 3082 3083 if (vops->pt_update_ops[id].q) 3084 continue; 3085 3086 if (q) { 3087 vops->pt_update_ops[id].q = q; 3088 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3089 q = list_next_entry(q, multi_gt_list); 3090 } else { 3091 vops->pt_update_ops[id].q = vm->q[id]; 3092 } 3093 } 3094 3095 return number_tiles; 3096 } 3097 3098 static struct dma_fence *ops_execute(struct xe_vm *vm, 3099 struct xe_vma_ops *vops) 3100 { 3101 struct xe_tile *tile; 3102 struct dma_fence *fence = NULL; 3103 struct dma_fence **fences = NULL; 3104 struct dma_fence_array *cf = NULL; 3105 int number_tiles = 0, current_fence = 0, err; 3106 u8 id; 3107 3108 number_tiles = vm_ops_setup_tile_args(vm, vops); 3109 if (number_tiles == 0) 3110 return ERR_PTR(-ENODATA); 3111 3112 if (number_tiles > 1) { 3113 fences = kmalloc_array(number_tiles, sizeof(*fences), 3114 GFP_KERNEL); 3115 if (!fences) { 3116 fence = ERR_PTR(-ENOMEM); 3117 goto err_trace; 3118 } 3119 } 3120 3121 for_each_tile(tile, vm->xe, id) { 3122 if (!vops->pt_update_ops[id].num_ops) 3123 continue; 3124 3125 err = xe_pt_update_ops_prepare(tile, vops); 3126 if (err) { 3127 fence = ERR_PTR(err); 3128 goto err_out; 3129 } 3130 } 3131 3132 trace_xe_vm_ops_execute(vops); 3133 3134 for_each_tile(tile, vm->xe, id) { 3135 if (!vops->pt_update_ops[id].num_ops) 3136 continue; 3137 3138 fence = xe_pt_update_ops_run(tile, vops); 3139 if (IS_ERR(fence)) 3140 goto err_out; 3141 3142 if (fences) 3143 fences[current_fence++] = fence; 3144 } 3145 3146 if (fences) { 3147 cf = dma_fence_array_create(number_tiles, fences, 3148 vm->composite_fence_ctx, 3149 vm->composite_fence_seqno++, 3150 false); 3151 if (!cf) { 3152 --vm->composite_fence_seqno; 3153 fence = ERR_PTR(-ENOMEM); 3154 goto err_out; 3155 } 3156 fence = &cf->base; 3157 } 3158 3159 for_each_tile(tile, vm->xe, id) { 3160 if (!vops->pt_update_ops[id].num_ops) 3161 continue; 3162 3163 xe_pt_update_ops_fini(tile, vops); 3164 } 3165 3166 return fence; 3167 3168 err_out: 3169 for_each_tile(tile, vm->xe, id) { 3170 if (!vops->pt_update_ops[id].num_ops) 3171 continue; 3172 3173 xe_pt_update_ops_abort(tile, vops); 3174 } 3175 while (current_fence) 3176 dma_fence_put(fences[--current_fence]); 3177 kfree(fences); 3178 kfree(cf); 3179 3180 err_trace: 3181 trace_xe_vm_ops_fail(vm); 3182 return fence; 3183 } 3184 3185 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3186 { 3187 if (vma->ufence) 3188 xe_sync_ufence_put(vma->ufence); 3189 vma->ufence = __xe_sync_ufence_get(ufence); 3190 } 3191 3192 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3193 struct xe_user_fence *ufence) 3194 { 3195 switch (op->base.op) { 3196 case DRM_GPUVA_OP_MAP: 3197 vma_add_ufence(op->map.vma, ufence); 3198 break; 3199 case DRM_GPUVA_OP_REMAP: 3200 if (op->remap.prev) 3201 vma_add_ufence(op->remap.prev, ufence); 3202 if (op->remap.next) 3203 vma_add_ufence(op->remap.next, ufence); 3204 break; 3205 case DRM_GPUVA_OP_UNMAP: 3206 break; 3207 case DRM_GPUVA_OP_PREFETCH: 3208 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3209 break; 3210 default: 3211 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3212 } 3213 } 3214 3215 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3216 struct dma_fence *fence) 3217 { 3218 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3219 struct xe_user_fence *ufence; 3220 struct xe_vma_op *op; 3221 int i; 3222 3223 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3224 list_for_each_entry(op, &vops->list, link) { 3225 if (ufence) 3226 op_add_ufence(vm, op, ufence); 3227 3228 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3229 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3230 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3231 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3232 fence); 3233 } 3234 if (ufence) 3235 xe_sync_ufence_put(ufence); 3236 if (fence) { 3237 for (i = 0; i < vops->num_syncs; i++) 3238 xe_sync_entry_signal(vops->syncs + i, fence); 3239 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3240 } 3241 } 3242 3243 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3244 struct xe_vma_ops *vops) 3245 { 3246 struct drm_exec exec; 3247 struct dma_fence *fence; 3248 int err; 3249 3250 lockdep_assert_held_write(&vm->lock); 3251 3252 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3253 DRM_EXEC_IGNORE_DUPLICATES, 0); 3254 drm_exec_until_all_locked(&exec) { 3255 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3256 drm_exec_retry_on_contention(&exec); 3257 if (err) { 3258 fence = ERR_PTR(err); 3259 goto unlock; 3260 } 3261 3262 fence = ops_execute(vm, vops); 3263 if (IS_ERR(fence)) { 3264 if (PTR_ERR(fence) == -ENODATA) 3265 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3266 goto unlock; 3267 } 3268 3269 vm_bind_ioctl_ops_fini(vm, vops, fence); 3270 } 3271 3272 unlock: 3273 drm_exec_fini(&exec); 3274 return fence; 3275 } 3276 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3277 3278 #define SUPPORTED_FLAGS_STUB \ 3279 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3280 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3281 DRM_XE_VM_BIND_FLAG_NULL | \ 3282 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3283 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3284 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3285 3286 #ifdef TEST_VM_OPS_ERROR 3287 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3288 #else 3289 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3290 #endif 3291 3292 #define XE_64K_PAGE_MASK 0xffffull 3293 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3294 3295 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3296 struct drm_xe_vm_bind *args, 3297 struct drm_xe_vm_bind_op **bind_ops) 3298 { 3299 int err; 3300 int i; 3301 3302 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3303 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3304 return -EINVAL; 3305 3306 if (XE_IOCTL_DBG(xe, args->extensions)) 3307 return -EINVAL; 3308 3309 if (args->num_binds > 1) { 3310 u64 __user *bind_user = 3311 u64_to_user_ptr(args->vector_of_binds); 3312 3313 *bind_ops = kvmalloc_array(args->num_binds, 3314 sizeof(struct drm_xe_vm_bind_op), 3315 GFP_KERNEL | __GFP_ACCOUNT | 3316 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3317 if (!*bind_ops) 3318 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3319 3320 err = copy_from_user(*bind_ops, bind_user, 3321 sizeof(struct drm_xe_vm_bind_op) * 3322 args->num_binds); 3323 if (XE_IOCTL_DBG(xe, err)) { 3324 err = -EFAULT; 3325 goto free_bind_ops; 3326 } 3327 } else { 3328 *bind_ops = &args->bind; 3329 } 3330 3331 for (i = 0; i < args->num_binds; ++i) { 3332 u64 range = (*bind_ops)[i].range; 3333 u64 addr = (*bind_ops)[i].addr; 3334 u32 op = (*bind_ops)[i].op; 3335 u32 flags = (*bind_ops)[i].flags; 3336 u32 obj = (*bind_ops)[i].obj; 3337 u64 obj_offset = (*bind_ops)[i].obj_offset; 3338 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3339 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3340 bool is_cpu_addr_mirror = flags & 3341 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3342 u16 pat_index = (*bind_ops)[i].pat_index; 3343 u16 coh_mode; 3344 3345 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3346 (!xe_vm_in_fault_mode(vm) || 3347 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3348 err = -EINVAL; 3349 goto free_bind_ops; 3350 } 3351 3352 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3353 err = -EINVAL; 3354 goto free_bind_ops; 3355 } 3356 3357 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3358 (*bind_ops)[i].pat_index = pat_index; 3359 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3360 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3361 err = -EINVAL; 3362 goto free_bind_ops; 3363 } 3364 3365 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3366 err = -EINVAL; 3367 goto free_bind_ops; 3368 } 3369 3370 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3371 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3372 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3373 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3374 is_cpu_addr_mirror)) || 3375 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3376 (is_null || is_cpu_addr_mirror)) || 3377 XE_IOCTL_DBG(xe, !obj && 3378 op == DRM_XE_VM_BIND_OP_MAP && 3379 !is_null && !is_cpu_addr_mirror) || 3380 XE_IOCTL_DBG(xe, !obj && 3381 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3382 XE_IOCTL_DBG(xe, addr && 3383 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3384 XE_IOCTL_DBG(xe, range && 3385 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3386 XE_IOCTL_DBG(xe, obj && 3387 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3388 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3389 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3390 XE_IOCTL_DBG(xe, obj && 3391 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3392 XE_IOCTL_DBG(xe, prefetch_region && 3393 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3394 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 3395 xe->info.mem_region_mask)) || 3396 XE_IOCTL_DBG(xe, obj && 3397 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3398 err = -EINVAL; 3399 goto free_bind_ops; 3400 } 3401 3402 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3403 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3404 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3405 XE_IOCTL_DBG(xe, !range && 3406 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3407 err = -EINVAL; 3408 goto free_bind_ops; 3409 } 3410 } 3411 3412 return 0; 3413 3414 free_bind_ops: 3415 if (args->num_binds > 1) 3416 kvfree(*bind_ops); 3417 return err; 3418 } 3419 3420 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3421 struct xe_exec_queue *q, 3422 struct xe_sync_entry *syncs, 3423 int num_syncs) 3424 { 3425 struct dma_fence *fence; 3426 int i, err = 0; 3427 3428 fence = xe_sync_in_fence_get(syncs, num_syncs, 3429 to_wait_exec_queue(vm, q), vm); 3430 if (IS_ERR(fence)) 3431 return PTR_ERR(fence); 3432 3433 for (i = 0; i < num_syncs; i++) 3434 xe_sync_entry_signal(&syncs[i], fence); 3435 3436 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3437 fence); 3438 dma_fence_put(fence); 3439 3440 return err; 3441 } 3442 3443 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3444 struct xe_exec_queue *q, 3445 struct xe_sync_entry *syncs, u32 num_syncs) 3446 { 3447 memset(vops, 0, sizeof(*vops)); 3448 INIT_LIST_HEAD(&vops->list); 3449 vops->vm = vm; 3450 vops->q = q; 3451 vops->syncs = syncs; 3452 vops->num_syncs = num_syncs; 3453 vops->flags = 0; 3454 } 3455 3456 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3457 u64 addr, u64 range, u64 obj_offset, 3458 u16 pat_index, u32 op, u32 bind_flags) 3459 { 3460 u16 coh_mode; 3461 3462 if (XE_IOCTL_DBG(xe, range > bo->size) || 3463 XE_IOCTL_DBG(xe, obj_offset > 3464 bo->size - range)) { 3465 return -EINVAL; 3466 } 3467 3468 /* 3469 * Some platforms require 64k VM_BIND alignment, 3470 * specifically those with XE_VRAM_FLAGS_NEED64K. 3471 * 3472 * Other platforms may have BO's set to 64k physical placement, 3473 * but can be mapped at 4k offsets anyway. This check is only 3474 * there for the former case. 3475 */ 3476 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3477 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3478 if (XE_IOCTL_DBG(xe, obj_offset & 3479 XE_64K_PAGE_MASK) || 3480 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3481 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3482 return -EINVAL; 3483 } 3484 } 3485 3486 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3487 if (bo->cpu_caching) { 3488 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3489 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3490 return -EINVAL; 3491 } 3492 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3493 /* 3494 * Imported dma-buf from a different device should 3495 * require 1way or 2way coherency since we don't know 3496 * how it was mapped on the CPU. Just assume is it 3497 * potentially cached on CPU side. 3498 */ 3499 return -EINVAL; 3500 } 3501 3502 /* If a BO is protected it can only be mapped if the key is still valid */ 3503 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3504 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3505 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3506 return -ENOEXEC; 3507 3508 return 0; 3509 } 3510 3511 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3512 { 3513 struct xe_device *xe = to_xe_device(dev); 3514 struct xe_file *xef = to_xe_file(file); 3515 struct drm_xe_vm_bind *args = data; 3516 struct drm_xe_sync __user *syncs_user; 3517 struct xe_bo **bos = NULL; 3518 struct drm_gpuva_ops **ops = NULL; 3519 struct xe_vm *vm; 3520 struct xe_exec_queue *q = NULL; 3521 u32 num_syncs, num_ufence = 0; 3522 struct xe_sync_entry *syncs = NULL; 3523 struct drm_xe_vm_bind_op *bind_ops; 3524 struct xe_vma_ops vops; 3525 struct dma_fence *fence; 3526 int err; 3527 int i; 3528 3529 vm = xe_vm_lookup(xef, args->vm_id); 3530 if (XE_IOCTL_DBG(xe, !vm)) 3531 return -EINVAL; 3532 3533 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3534 if (err) 3535 goto put_vm; 3536 3537 if (args->exec_queue_id) { 3538 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3539 if (XE_IOCTL_DBG(xe, !q)) { 3540 err = -ENOENT; 3541 goto put_vm; 3542 } 3543 3544 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3545 err = -EINVAL; 3546 goto put_exec_queue; 3547 } 3548 } 3549 3550 /* Ensure all UNMAPs visible */ 3551 xe_svm_flush(vm); 3552 3553 err = down_write_killable(&vm->lock); 3554 if (err) 3555 goto put_exec_queue; 3556 3557 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3558 err = -ENOENT; 3559 goto release_vm_lock; 3560 } 3561 3562 for (i = 0; i < args->num_binds; ++i) { 3563 u64 range = bind_ops[i].range; 3564 u64 addr = bind_ops[i].addr; 3565 3566 if (XE_IOCTL_DBG(xe, range > vm->size) || 3567 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3568 err = -EINVAL; 3569 goto release_vm_lock; 3570 } 3571 } 3572 3573 if (args->num_binds) { 3574 bos = kvcalloc(args->num_binds, sizeof(*bos), 3575 GFP_KERNEL | __GFP_ACCOUNT | 3576 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3577 if (!bos) { 3578 err = -ENOMEM; 3579 goto release_vm_lock; 3580 } 3581 3582 ops = kvcalloc(args->num_binds, sizeof(*ops), 3583 GFP_KERNEL | __GFP_ACCOUNT | 3584 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3585 if (!ops) { 3586 err = -ENOMEM; 3587 goto release_vm_lock; 3588 } 3589 } 3590 3591 for (i = 0; i < args->num_binds; ++i) { 3592 struct drm_gem_object *gem_obj; 3593 u64 range = bind_ops[i].range; 3594 u64 addr = bind_ops[i].addr; 3595 u32 obj = bind_ops[i].obj; 3596 u64 obj_offset = bind_ops[i].obj_offset; 3597 u16 pat_index = bind_ops[i].pat_index; 3598 u32 op = bind_ops[i].op; 3599 u32 bind_flags = bind_ops[i].flags; 3600 3601 if (!obj) 3602 continue; 3603 3604 gem_obj = drm_gem_object_lookup(file, obj); 3605 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3606 err = -ENOENT; 3607 goto put_obj; 3608 } 3609 bos[i] = gem_to_xe_bo(gem_obj); 3610 3611 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3612 obj_offset, pat_index, op, 3613 bind_flags); 3614 if (err) 3615 goto put_obj; 3616 } 3617 3618 if (args->num_syncs) { 3619 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3620 if (!syncs) { 3621 err = -ENOMEM; 3622 goto put_obj; 3623 } 3624 } 3625 3626 syncs_user = u64_to_user_ptr(args->syncs); 3627 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3628 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3629 &syncs_user[num_syncs], 3630 (xe_vm_in_lr_mode(vm) ? 3631 SYNC_PARSE_FLAG_LR_MODE : 0) | 3632 (!args->num_binds ? 3633 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3634 if (err) 3635 goto free_syncs; 3636 3637 if (xe_sync_is_ufence(&syncs[num_syncs])) 3638 num_ufence++; 3639 } 3640 3641 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3642 err = -EINVAL; 3643 goto free_syncs; 3644 } 3645 3646 if (!args->num_binds) { 3647 err = -ENODATA; 3648 goto free_syncs; 3649 } 3650 3651 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3652 for (i = 0; i < args->num_binds; ++i) { 3653 u64 range = bind_ops[i].range; 3654 u64 addr = bind_ops[i].addr; 3655 u32 op = bind_ops[i].op; 3656 u32 flags = bind_ops[i].flags; 3657 u64 obj_offset = bind_ops[i].obj_offset; 3658 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3659 u16 pat_index = bind_ops[i].pat_index; 3660 3661 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3662 addr, range, op, flags, 3663 prefetch_region, pat_index); 3664 if (IS_ERR(ops[i])) { 3665 err = PTR_ERR(ops[i]); 3666 ops[i] = NULL; 3667 goto unwind_ops; 3668 } 3669 3670 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3671 if (err) 3672 goto unwind_ops; 3673 3674 #ifdef TEST_VM_OPS_ERROR 3675 if (flags & FORCE_OP_ERROR) { 3676 vops.inject_error = true; 3677 vm->xe->vm_inject_error_position = 3678 (vm->xe->vm_inject_error_position + 1) % 3679 FORCE_OP_ERROR_COUNT; 3680 } 3681 #endif 3682 } 3683 3684 /* Nothing to do */ 3685 if (list_empty(&vops.list)) { 3686 err = -ENODATA; 3687 goto unwind_ops; 3688 } 3689 3690 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3691 if (err) 3692 goto unwind_ops; 3693 3694 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3695 if (err) 3696 goto unwind_ops; 3697 3698 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3699 if (IS_ERR(fence)) 3700 err = PTR_ERR(fence); 3701 else 3702 dma_fence_put(fence); 3703 3704 unwind_ops: 3705 if (err && err != -ENODATA) 3706 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3707 xe_vma_ops_fini(&vops); 3708 for (i = args->num_binds - 1; i >= 0; --i) 3709 if (ops[i]) 3710 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3711 free_syncs: 3712 if (err == -ENODATA) 3713 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3714 while (num_syncs--) 3715 xe_sync_entry_cleanup(&syncs[num_syncs]); 3716 3717 kfree(syncs); 3718 put_obj: 3719 for (i = 0; i < args->num_binds; ++i) 3720 xe_bo_put(bos[i]); 3721 release_vm_lock: 3722 up_write(&vm->lock); 3723 put_exec_queue: 3724 if (q) 3725 xe_exec_queue_put(q); 3726 put_vm: 3727 xe_vm_put(vm); 3728 kvfree(bos); 3729 kvfree(ops); 3730 if (args->num_binds > 1) 3731 kvfree(bind_ops); 3732 return err; 3733 } 3734 3735 /** 3736 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3737 * @vm: VM to bind the BO to 3738 * @bo: BO to bind 3739 * @q: exec queue to use for the bind (optional) 3740 * @addr: address at which to bind the BO 3741 * @cache_lvl: PAT cache level to use 3742 * 3743 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3744 * kernel-owned VM. 3745 * 3746 * Returns a dma_fence to track the binding completion if the job to do so was 3747 * successfully submitted, an error pointer otherwise. 3748 */ 3749 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3750 struct xe_exec_queue *q, u64 addr, 3751 enum xe_cache_level cache_lvl) 3752 { 3753 struct xe_vma_ops vops; 3754 struct drm_gpuva_ops *ops = NULL; 3755 struct dma_fence *fence; 3756 int err; 3757 3758 xe_bo_get(bo); 3759 xe_vm_get(vm); 3760 if (q) 3761 xe_exec_queue_get(q); 3762 3763 down_write(&vm->lock); 3764 3765 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3766 3767 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, 3768 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3769 vm->xe->pat.idx[cache_lvl]); 3770 if (IS_ERR(ops)) { 3771 err = PTR_ERR(ops); 3772 goto release_vm_lock; 3773 } 3774 3775 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3776 if (err) 3777 goto release_vm_lock; 3778 3779 xe_assert(vm->xe, !list_empty(&vops.list)); 3780 3781 err = xe_vma_ops_alloc(&vops, false); 3782 if (err) 3783 goto unwind_ops; 3784 3785 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3786 if (IS_ERR(fence)) 3787 err = PTR_ERR(fence); 3788 3789 unwind_ops: 3790 if (err && err != -ENODATA) 3791 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3792 3793 xe_vma_ops_fini(&vops); 3794 drm_gpuva_ops_free(&vm->gpuvm, ops); 3795 3796 release_vm_lock: 3797 up_write(&vm->lock); 3798 3799 if (q) 3800 xe_exec_queue_put(q); 3801 xe_vm_put(vm); 3802 xe_bo_put(bo); 3803 3804 if (err) 3805 fence = ERR_PTR(err); 3806 3807 return fence; 3808 } 3809 3810 /** 3811 * xe_vm_lock() - Lock the vm's dma_resv object 3812 * @vm: The struct xe_vm whose lock is to be locked 3813 * @intr: Whether to perform any wait interruptible 3814 * 3815 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3816 * contended lock was interrupted. If @intr is false, the function 3817 * always returns 0. 3818 */ 3819 int xe_vm_lock(struct xe_vm *vm, bool intr) 3820 { 3821 if (intr) 3822 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3823 3824 return dma_resv_lock(xe_vm_resv(vm), NULL); 3825 } 3826 3827 /** 3828 * xe_vm_unlock() - Unlock the vm's dma_resv object 3829 * @vm: The struct xe_vm whose lock is to be released. 3830 * 3831 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3832 */ 3833 void xe_vm_unlock(struct xe_vm *vm) 3834 { 3835 dma_resv_unlock(xe_vm_resv(vm)); 3836 } 3837 3838 /** 3839 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3840 * @vma: VMA to invalidate 3841 * 3842 * Walks a list of page tables leaves which it memset the entries owned by this 3843 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3844 * complete. 3845 * 3846 * Returns 0 for success, negative error code otherwise. 3847 */ 3848 int xe_vm_invalidate_vma(struct xe_vma *vma) 3849 { 3850 struct xe_device *xe = xe_vma_vm(vma)->xe; 3851 struct xe_tile *tile; 3852 struct xe_gt_tlb_invalidation_fence 3853 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3854 u8 id; 3855 u32 fence_id = 0; 3856 int ret = 0; 3857 3858 xe_assert(xe, !xe_vma_is_null(vma)); 3859 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3860 trace_xe_vma_invalidate(vma); 3861 3862 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3863 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3864 xe_vma_start(vma), xe_vma_size(vma)); 3865 3866 /* Check that we don't race with page-table updates */ 3867 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3868 if (xe_vma_is_userptr(vma)) { 3869 WARN_ON_ONCE(!mmu_interval_check_retry 3870 (&to_userptr_vma(vma)->userptr.notifier, 3871 to_userptr_vma(vma)->userptr.notifier_seq)); 3872 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3873 DMA_RESV_USAGE_BOOKKEEP)); 3874 3875 } else { 3876 xe_bo_assert_held(xe_vma_bo(vma)); 3877 } 3878 } 3879 3880 for_each_tile(tile, xe, id) { 3881 if (xe_pt_zap_ptes(tile, vma)) { 3882 xe_device_wmb(xe); 3883 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3884 &fence[fence_id], 3885 true); 3886 3887 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3888 &fence[fence_id], vma); 3889 if (ret) 3890 goto wait; 3891 ++fence_id; 3892 3893 if (!tile->media_gt) 3894 continue; 3895 3896 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3897 &fence[fence_id], 3898 true); 3899 3900 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3901 &fence[fence_id], vma); 3902 if (ret) 3903 goto wait; 3904 ++fence_id; 3905 } 3906 } 3907 3908 wait: 3909 for (id = 0; id < fence_id; ++id) 3910 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3911 3912 vma->tile_invalidated = vma->tile_mask; 3913 3914 return ret; 3915 } 3916 3917 int xe_vm_validate_protected(struct xe_vm *vm) 3918 { 3919 struct drm_gpuva *gpuva; 3920 int err = 0; 3921 3922 if (!vm) 3923 return -ENODEV; 3924 3925 mutex_lock(&vm->snap_mutex); 3926 3927 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3928 struct xe_vma *vma = gpuva_to_vma(gpuva); 3929 struct xe_bo *bo = vma->gpuva.gem.obj ? 3930 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3931 3932 if (!bo) 3933 continue; 3934 3935 if (xe_bo_is_protected(bo)) { 3936 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3937 if (err) 3938 break; 3939 } 3940 } 3941 3942 mutex_unlock(&vm->snap_mutex); 3943 return err; 3944 } 3945 3946 struct xe_vm_snapshot { 3947 unsigned long num_snaps; 3948 struct { 3949 u64 ofs, bo_ofs; 3950 unsigned long len; 3951 struct xe_bo *bo; 3952 void *data; 3953 struct mm_struct *mm; 3954 } snap[]; 3955 }; 3956 3957 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3958 { 3959 unsigned long num_snaps = 0, i; 3960 struct xe_vm_snapshot *snap = NULL; 3961 struct drm_gpuva *gpuva; 3962 3963 if (!vm) 3964 return NULL; 3965 3966 mutex_lock(&vm->snap_mutex); 3967 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3968 if (gpuva->flags & XE_VMA_DUMPABLE) 3969 num_snaps++; 3970 } 3971 3972 if (num_snaps) 3973 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3974 if (!snap) { 3975 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3976 goto out_unlock; 3977 } 3978 3979 snap->num_snaps = num_snaps; 3980 i = 0; 3981 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3982 struct xe_vma *vma = gpuva_to_vma(gpuva); 3983 struct xe_bo *bo = vma->gpuva.gem.obj ? 3984 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3985 3986 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3987 continue; 3988 3989 snap->snap[i].ofs = xe_vma_start(vma); 3990 snap->snap[i].len = xe_vma_size(vma); 3991 if (bo) { 3992 snap->snap[i].bo = xe_bo_get(bo); 3993 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3994 } else if (xe_vma_is_userptr(vma)) { 3995 struct mm_struct *mm = 3996 to_userptr_vma(vma)->userptr.notifier.mm; 3997 3998 if (mmget_not_zero(mm)) 3999 snap->snap[i].mm = mm; 4000 else 4001 snap->snap[i].data = ERR_PTR(-EFAULT); 4002 4003 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4004 } else { 4005 snap->snap[i].data = ERR_PTR(-ENOENT); 4006 } 4007 i++; 4008 } 4009 4010 out_unlock: 4011 mutex_unlock(&vm->snap_mutex); 4012 return snap; 4013 } 4014 4015 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4016 { 4017 if (IS_ERR_OR_NULL(snap)) 4018 return; 4019 4020 for (int i = 0; i < snap->num_snaps; i++) { 4021 struct xe_bo *bo = snap->snap[i].bo; 4022 int err; 4023 4024 if (IS_ERR(snap->snap[i].data)) 4025 continue; 4026 4027 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4028 if (!snap->snap[i].data) { 4029 snap->snap[i].data = ERR_PTR(-ENOMEM); 4030 goto cleanup_bo; 4031 } 4032 4033 if (bo) { 4034 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4035 snap->snap[i].data, snap->snap[i].len); 4036 } else { 4037 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4038 4039 kthread_use_mm(snap->snap[i].mm); 4040 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4041 err = 0; 4042 else 4043 err = -EFAULT; 4044 kthread_unuse_mm(snap->snap[i].mm); 4045 4046 mmput(snap->snap[i].mm); 4047 snap->snap[i].mm = NULL; 4048 } 4049 4050 if (err) { 4051 kvfree(snap->snap[i].data); 4052 snap->snap[i].data = ERR_PTR(err); 4053 } 4054 4055 cleanup_bo: 4056 xe_bo_put(bo); 4057 snap->snap[i].bo = NULL; 4058 } 4059 } 4060 4061 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4062 { 4063 unsigned long i, j; 4064 4065 if (IS_ERR_OR_NULL(snap)) { 4066 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4067 return; 4068 } 4069 4070 for (i = 0; i < snap->num_snaps; i++) { 4071 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4072 4073 if (IS_ERR(snap->snap[i].data)) { 4074 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4075 PTR_ERR(snap->snap[i].data)); 4076 continue; 4077 } 4078 4079 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4080 4081 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4082 u32 *val = snap->snap[i].data + j; 4083 char dumped[ASCII85_BUFSZ]; 4084 4085 drm_puts(p, ascii85_encode(*val, dumped)); 4086 } 4087 4088 drm_puts(p, "\n"); 4089 4090 if (drm_coredump_printer_is_full(p)) 4091 return; 4092 } 4093 } 4094 4095 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4096 { 4097 unsigned long i; 4098 4099 if (IS_ERR_OR_NULL(snap)) 4100 return; 4101 4102 for (i = 0; i < snap->num_snaps; i++) { 4103 if (!IS_ERR(snap->snap[i].data)) 4104 kvfree(snap->snap[i].data); 4105 xe_bo_put(snap->snap[i].bo); 4106 if (snap->snap[i].mm) 4107 mmput(snap->snap[i].mm); 4108 } 4109 kvfree(snap); 4110 } 4111