1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sync.h" 38 #include "xe_trace_bo.h" 39 #include "xe_wa.h" 40 #include "xe_hmm.h" 41 42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 43 { 44 return vm->gpuvm.r_obj; 45 } 46 47 /** 48 * xe_vma_userptr_check_repin() - Advisory check for repin needed 49 * @uvma: The userptr vma 50 * 51 * Check if the userptr vma has been invalidated since last successful 52 * repin. The check is advisory only and can the function can be called 53 * without the vm->userptr.notifier_lock held. There is no guarantee that the 54 * vma userptr will remain valid after a lockless check, so typically 55 * the call needs to be followed by a proper check under the notifier_lock. 56 * 57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 */ 59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 { 61 return mmu_interval_check_retry(&uvma->userptr.notifier, 62 uvma->userptr.notifier_seq) ? 63 -EAGAIN : 0; 64 } 65 66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 67 { 68 struct xe_vma *vma = &uvma->vma; 69 struct xe_vm *vm = xe_vma_vm(vma); 70 struct xe_device *xe = vm->xe; 71 72 lockdep_assert_held(&vm->lock); 73 xe_assert(xe, xe_vma_is_userptr(vma)); 74 75 return xe_hmm_userptr_populate_range(uvma, false); 76 } 77 78 static bool preempt_fences_waiting(struct xe_vm *vm) 79 { 80 struct xe_exec_queue *q; 81 82 lockdep_assert_held(&vm->lock); 83 xe_vm_assert_held(vm); 84 85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 86 if (!q->lr.pfence || 87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 88 &q->lr.pfence->flags)) { 89 return true; 90 } 91 } 92 93 return false; 94 } 95 96 static void free_preempt_fences(struct list_head *list) 97 { 98 struct list_head *link, *next; 99 100 list_for_each_safe(link, next, list) 101 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 102 } 103 104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 105 unsigned int *count) 106 { 107 lockdep_assert_held(&vm->lock); 108 xe_vm_assert_held(vm); 109 110 if (*count >= vm->preempt.num_exec_queues) 111 return 0; 112 113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 115 116 if (IS_ERR(pfence)) 117 return PTR_ERR(pfence); 118 119 list_move_tail(xe_preempt_fence_link(pfence), list); 120 } 121 122 return 0; 123 } 124 125 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 126 { 127 struct xe_exec_queue *q; 128 129 xe_vm_assert_held(vm); 130 131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 132 if (q->lr.pfence) { 133 long timeout = dma_fence_wait(q->lr.pfence, false); 134 135 /* Only -ETIME on fence indicates VM needs to be killed */ 136 if (timeout < 0 || q->lr.pfence->error == -ETIME) 137 return -ETIME; 138 139 dma_fence_put(q->lr.pfence); 140 q->lr.pfence = NULL; 141 } 142 } 143 144 return 0; 145 } 146 147 static bool xe_vm_is_idle(struct xe_vm *vm) 148 { 149 struct xe_exec_queue *q; 150 151 xe_vm_assert_held(vm); 152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 153 if (!xe_exec_queue_is_idle(q)) 154 return false; 155 } 156 157 return true; 158 } 159 160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 161 { 162 struct list_head *link; 163 struct xe_exec_queue *q; 164 165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 166 struct dma_fence *fence; 167 168 link = list->next; 169 xe_assert(vm->xe, link != list); 170 171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 172 q, q->lr.context, 173 ++q->lr.seqno); 174 dma_fence_put(q->lr.pfence); 175 q->lr.pfence = fence; 176 } 177 } 178 179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 180 { 181 struct xe_exec_queue *q; 182 int err; 183 184 xe_bo_assert_held(bo); 185 186 if (!vm->preempt.num_exec_queues) 187 return 0; 188 189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 190 if (err) 191 return err; 192 193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 194 if (q->lr.pfence) { 195 dma_resv_add_fence(bo->ttm.base.resv, 196 q->lr.pfence, 197 DMA_RESV_USAGE_BOOKKEEP); 198 } 199 200 return 0; 201 } 202 203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 204 struct drm_exec *exec) 205 { 206 struct xe_exec_queue *q; 207 208 lockdep_assert_held(&vm->lock); 209 xe_vm_assert_held(vm); 210 211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 212 q->ops->resume(q); 213 214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 216 } 217 } 218 219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 220 { 221 struct drm_gpuvm_exec vm_exec = { 222 .vm = &vm->gpuvm, 223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 224 .num_fences = 1, 225 }; 226 struct drm_exec *exec = &vm_exec.exec; 227 struct dma_fence *pfence; 228 int err; 229 bool wait; 230 231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 232 233 down_write(&vm->lock); 234 err = drm_gpuvm_exec_lock(&vm_exec); 235 if (err) 236 goto out_up_write; 237 238 pfence = xe_preempt_fence_create(q, q->lr.context, 239 ++q->lr.seqno); 240 if (!pfence) { 241 err = -ENOMEM; 242 goto out_fini; 243 } 244 245 list_add(&q->lr.link, &vm->preempt.exec_queues); 246 ++vm->preempt.num_exec_queues; 247 q->lr.pfence = pfence; 248 249 down_read(&vm->userptr.notifier_lock); 250 251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 253 254 /* 255 * Check to see if a preemption on VM is in flight or userptr 256 * invalidation, if so trigger this preempt fence to sync state with 257 * other preempt fences on the VM. 258 */ 259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 260 if (wait) 261 dma_fence_enable_sw_signaling(pfence); 262 263 up_read(&vm->userptr.notifier_lock); 264 265 out_fini: 266 drm_exec_fini(exec); 267 out_up_write: 268 up_write(&vm->lock); 269 270 return err; 271 } 272 273 /** 274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 275 * @vm: The VM. 276 * @q: The exec_queue 277 * 278 * Note that this function might be called multiple times on the same queue. 279 */ 280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 281 { 282 if (!xe_vm_in_preempt_fence_mode(vm)) 283 return; 284 285 down_write(&vm->lock); 286 if (!list_empty(&q->lr.link)) { 287 list_del_init(&q->lr.link); 288 --vm->preempt.num_exec_queues; 289 } 290 if (q->lr.pfence) { 291 dma_fence_enable_sw_signaling(q->lr.pfence); 292 dma_fence_put(q->lr.pfence); 293 q->lr.pfence = NULL; 294 } 295 up_write(&vm->lock); 296 } 297 298 /** 299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 300 * that need repinning. 301 * @vm: The VM. 302 * 303 * This function checks for whether the VM has userptrs that need repinning, 304 * and provides a release-type barrier on the userptr.notifier_lock after 305 * checking. 306 * 307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 308 */ 309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 310 { 311 lockdep_assert_held_read(&vm->userptr.notifier_lock); 312 313 return (list_empty(&vm->userptr.repin_list) && 314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 315 } 316 317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 318 319 /** 320 * xe_vm_kill() - VM Kill 321 * @vm: The VM. 322 * @unlocked: Flag indicates the VM's dma-resv is not held 323 * 324 * Kill the VM by setting banned flag indicated VM is no longer available for 325 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 326 */ 327 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 328 { 329 struct xe_exec_queue *q; 330 331 lockdep_assert_held(&vm->lock); 332 333 if (unlocked) 334 xe_vm_lock(vm, false); 335 336 vm->flags |= XE_VM_FLAG_BANNED; 337 trace_xe_vm_kill(vm); 338 339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 340 q->ops->kill(q); 341 342 if (unlocked) 343 xe_vm_unlock(vm); 344 345 /* TODO: Inform user the VM is banned */ 346 } 347 348 /** 349 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 350 * @exec: The drm_exec object used for locking before validation. 351 * @err: The error returned from ttm_bo_validate(). 352 * @end: A ktime_t cookie that should be set to 0 before first use and 353 * that should be reused on subsequent calls. 354 * 355 * With multiple active VMs, under memory pressure, it is possible that 356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 357 * Until ttm properly handles locking in such scenarios, best thing the 358 * driver can do is retry with a timeout. Check if that is necessary, and 359 * if so unlock the drm_exec's objects while keeping the ticket to prepare 360 * for a rerun. 361 * 362 * Return: true if a retry after drm_exec_init() is recommended; 363 * false otherwise. 364 */ 365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 366 { 367 ktime_t cur; 368 369 if (err != -ENOMEM) 370 return false; 371 372 cur = ktime_get(); 373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 374 if (!ktime_before(cur, *end)) 375 return false; 376 377 msleep(20); 378 return true; 379 } 380 381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 382 { 383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 384 struct drm_gpuva *gpuva; 385 int ret; 386 387 lockdep_assert_held(&vm->lock); 388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 390 &vm->rebind_list); 391 392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 393 if (ret) 394 return ret; 395 396 vm_bo->evicted = false; 397 return 0; 398 } 399 400 /** 401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 402 * @vm: The vm for which we are rebinding. 403 * @exec: The struct drm_exec with the locked GEM objects. 404 * @num_fences: The number of fences to reserve for the operation, not 405 * including rebinds and validations. 406 * 407 * Validates all evicted gem objects and rebinds their vmas. Note that 408 * rebindings may cause evictions and hence the validation-rebind 409 * sequence is rerun until there are no more objects to validate. 410 * 411 * Return: 0 on success, negative error code on error. In particular, 412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 413 * the drm_exec transaction needs to be restarted. 414 */ 415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 416 unsigned int num_fences) 417 { 418 struct drm_gem_object *obj; 419 unsigned long index; 420 int ret; 421 422 do { 423 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 424 if (ret) 425 return ret; 426 427 ret = xe_vm_rebind(vm, false); 428 if (ret) 429 return ret; 430 } while (!list_empty(&vm->gpuvm.evict.list)); 431 432 drm_exec_for_each_locked_object(exec, index, obj) { 433 ret = dma_resv_reserve_fences(obj->resv, num_fences); 434 if (ret) 435 return ret; 436 } 437 438 return 0; 439 } 440 441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 442 bool *done) 443 { 444 int err; 445 446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 447 if (err) 448 return err; 449 450 if (xe_vm_is_idle(vm)) { 451 vm->preempt.rebind_deactivated = true; 452 *done = true; 453 return 0; 454 } 455 456 if (!preempt_fences_waiting(vm)) { 457 *done = true; 458 return 0; 459 } 460 461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 462 if (err) 463 return err; 464 465 err = wait_for_existing_preempt_fences(vm); 466 if (err) 467 return err; 468 469 /* 470 * Add validation and rebinding to the locking loop since both can 471 * cause evictions which may require blocing dma_resv locks. 472 * The fence reservation here is intended for the new preempt fences 473 * we attach at the end of the rebind work. 474 */ 475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 476 } 477 478 static void preempt_rebind_work_func(struct work_struct *w) 479 { 480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 481 struct drm_exec exec; 482 unsigned int fence_count = 0; 483 LIST_HEAD(preempt_fences); 484 ktime_t end = 0; 485 int err = 0; 486 long wait; 487 int __maybe_unused tries = 0; 488 489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 490 trace_xe_vm_rebind_worker_enter(vm); 491 492 down_write(&vm->lock); 493 494 if (xe_vm_is_closed_or_banned(vm)) { 495 up_write(&vm->lock); 496 trace_xe_vm_rebind_worker_exit(vm); 497 return; 498 } 499 500 retry: 501 if (xe_vm_userptr_check_repin(vm)) { 502 err = xe_vm_userptr_pin(vm); 503 if (err) 504 goto out_unlock_outer; 505 } 506 507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 508 509 drm_exec_until_all_locked(&exec) { 510 bool done = false; 511 512 err = xe_preempt_work_begin(&exec, vm, &done); 513 drm_exec_retry_on_contention(&exec); 514 if (err || done) { 515 drm_exec_fini(&exec); 516 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 517 err = -EAGAIN; 518 519 goto out_unlock_outer; 520 } 521 } 522 523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 524 if (err) 525 goto out_unlock; 526 527 err = xe_vm_rebind(vm, true); 528 if (err) 529 goto out_unlock; 530 531 /* Wait on rebinds and munmap style VM unbinds */ 532 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 533 DMA_RESV_USAGE_KERNEL, 534 false, MAX_SCHEDULE_TIMEOUT); 535 if (wait <= 0) { 536 err = -ETIME; 537 goto out_unlock; 538 } 539 540 #define retry_required(__tries, __vm) \ 541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 543 __xe_vm_userptr_needs_repin(__vm)) 544 545 down_read(&vm->userptr.notifier_lock); 546 if (retry_required(tries, vm)) { 547 up_read(&vm->userptr.notifier_lock); 548 err = -EAGAIN; 549 goto out_unlock; 550 } 551 552 #undef retry_required 553 554 spin_lock(&vm->xe->ttm.lru_lock); 555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 556 spin_unlock(&vm->xe->ttm.lru_lock); 557 558 /* Point of no return. */ 559 arm_preempt_fences(vm, &preempt_fences); 560 resume_and_reinstall_preempt_fences(vm, &exec); 561 up_read(&vm->userptr.notifier_lock); 562 563 out_unlock: 564 drm_exec_fini(&exec); 565 out_unlock_outer: 566 if (err == -EAGAIN) { 567 trace_xe_vm_rebind_worker_retry(vm); 568 goto retry; 569 } 570 571 if (err) { 572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 573 xe_vm_kill(vm, true); 574 } 575 up_write(&vm->lock); 576 577 free_preempt_fences(&preempt_fences); 578 579 trace_xe_vm_rebind_worker_exit(vm); 580 } 581 582 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 583 { 584 struct xe_userptr *userptr = &uvma->userptr; 585 struct xe_vma *vma = &uvma->vma; 586 struct dma_resv_iter cursor; 587 struct dma_fence *fence; 588 long err; 589 590 /* 591 * Tell exec and rebind worker they need to repin and rebind this 592 * userptr. 593 */ 594 if (!xe_vm_in_fault_mode(vm) && 595 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 596 spin_lock(&vm->userptr.invalidated_lock); 597 list_move_tail(&userptr->invalidate_link, 598 &vm->userptr.invalidated); 599 spin_unlock(&vm->userptr.invalidated_lock); 600 } 601 602 /* 603 * Preempt fences turn into schedule disables, pipeline these. 604 * Note that even in fault mode, we need to wait for binds and 605 * unbinds to complete, and those are attached as BOOKMARK fences 606 * to the vm. 607 */ 608 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 609 DMA_RESV_USAGE_BOOKKEEP); 610 dma_resv_for_each_fence_unlocked(&cursor, fence) 611 dma_fence_enable_sw_signaling(fence); 612 dma_resv_iter_end(&cursor); 613 614 err = dma_resv_wait_timeout(xe_vm_resv(vm), 615 DMA_RESV_USAGE_BOOKKEEP, 616 false, MAX_SCHEDULE_TIMEOUT); 617 XE_WARN_ON(err <= 0); 618 619 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 620 err = xe_vm_invalidate_vma(vma); 621 XE_WARN_ON(err); 622 } 623 624 xe_hmm_userptr_unmap(uvma); 625 } 626 627 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 628 const struct mmu_notifier_range *range, 629 unsigned long cur_seq) 630 { 631 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 632 struct xe_vma *vma = &uvma->vma; 633 struct xe_vm *vm = xe_vma_vm(vma); 634 635 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 636 trace_xe_vma_userptr_invalidate(vma); 637 638 if (!mmu_notifier_range_blockable(range)) 639 return false; 640 641 vm_dbg(&xe_vma_vm(vma)->xe->drm, 642 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 643 xe_vma_start(vma), xe_vma_size(vma)); 644 645 down_write(&vm->userptr.notifier_lock); 646 mmu_interval_set_seq(mni, cur_seq); 647 648 __vma_userptr_invalidate(vm, uvma); 649 up_write(&vm->userptr.notifier_lock); 650 trace_xe_vma_userptr_invalidate_complete(vma); 651 652 return true; 653 } 654 655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 656 .invalidate = vma_userptr_invalidate, 657 }; 658 659 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 660 /** 661 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 662 * @uvma: The userptr vma to invalidate 663 * 664 * Perform a forced userptr invalidation for testing purposes. 665 */ 666 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 667 { 668 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 669 670 /* Protect against concurrent userptr pinning */ 671 lockdep_assert_held(&vm->lock); 672 /* Protect against concurrent notifiers */ 673 lockdep_assert_held(&vm->userptr.notifier_lock); 674 /* 675 * Protect against concurrent instances of this function and 676 * the critical exec sections 677 */ 678 xe_vm_assert_held(vm); 679 680 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 681 uvma->userptr.notifier_seq)) 682 uvma->userptr.notifier_seq -= 2; 683 __vma_userptr_invalidate(vm, uvma); 684 } 685 #endif 686 687 int xe_vm_userptr_pin(struct xe_vm *vm) 688 { 689 struct xe_userptr_vma *uvma, *next; 690 int err = 0; 691 LIST_HEAD(tmp_evict); 692 693 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 694 lockdep_assert_held_write(&vm->lock); 695 696 /* Collect invalidated userptrs */ 697 spin_lock(&vm->userptr.invalidated_lock); 698 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 699 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 700 userptr.invalidate_link) { 701 list_del_init(&uvma->userptr.invalidate_link); 702 list_add_tail(&uvma->userptr.repin_link, 703 &vm->userptr.repin_list); 704 } 705 spin_unlock(&vm->userptr.invalidated_lock); 706 707 /* Pin and move to bind list */ 708 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 709 userptr.repin_link) { 710 err = xe_vma_userptr_pin_pages(uvma); 711 if (err == -EFAULT) { 712 list_del_init(&uvma->userptr.repin_link); 713 /* 714 * We might have already done the pin once already, but 715 * then had to retry before the re-bind happened, due 716 * some other condition in the caller, but in the 717 * meantime the userptr got dinged by the notifier such 718 * that we need to revalidate here, but this time we hit 719 * the EFAULT. In such a case make sure we remove 720 * ourselves from the rebind list to avoid going down in 721 * flames. 722 */ 723 if (!list_empty(&uvma->vma.combined_links.rebind)) 724 list_del_init(&uvma->vma.combined_links.rebind); 725 726 /* Wait for pending binds */ 727 xe_vm_lock(vm, false); 728 dma_resv_wait_timeout(xe_vm_resv(vm), 729 DMA_RESV_USAGE_BOOKKEEP, 730 false, MAX_SCHEDULE_TIMEOUT); 731 732 err = xe_vm_invalidate_vma(&uvma->vma); 733 xe_vm_unlock(vm); 734 if (err) 735 break; 736 } else { 737 if (err) 738 break; 739 740 list_del_init(&uvma->userptr.repin_link); 741 list_move_tail(&uvma->vma.combined_links.rebind, 742 &vm->rebind_list); 743 } 744 } 745 746 if (err) { 747 down_write(&vm->userptr.notifier_lock); 748 spin_lock(&vm->userptr.invalidated_lock); 749 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 750 userptr.repin_link) { 751 list_del_init(&uvma->userptr.repin_link); 752 list_move_tail(&uvma->userptr.invalidate_link, 753 &vm->userptr.invalidated); 754 } 755 spin_unlock(&vm->userptr.invalidated_lock); 756 up_write(&vm->userptr.notifier_lock); 757 } 758 return err; 759 } 760 761 /** 762 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 763 * that need repinning. 764 * @vm: The VM. 765 * 766 * This function does an advisory check for whether the VM has userptrs that 767 * need repinning. 768 * 769 * Return: 0 if there are no indications of userptrs needing repinning, 770 * -EAGAIN if there are. 771 */ 772 int xe_vm_userptr_check_repin(struct xe_vm *vm) 773 { 774 return (list_empty_careful(&vm->userptr.repin_list) && 775 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 776 } 777 778 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 779 { 780 int i; 781 782 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 783 if (!vops->pt_update_ops[i].num_ops) 784 continue; 785 786 vops->pt_update_ops[i].ops = 787 kmalloc_array(vops->pt_update_ops[i].num_ops, 788 sizeof(*vops->pt_update_ops[i].ops), 789 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 790 if (!vops->pt_update_ops[i].ops) 791 return array_of_binds ? -ENOBUFS : -ENOMEM; 792 } 793 794 return 0; 795 } 796 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 797 798 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 799 { 800 int i; 801 802 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 803 kfree(vops->pt_update_ops[i].ops); 804 } 805 806 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 807 { 808 int i; 809 810 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 811 if (BIT(i) & tile_mask) 812 ++vops->pt_update_ops[i].num_ops; 813 } 814 815 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 816 u8 tile_mask) 817 { 818 INIT_LIST_HEAD(&op->link); 819 op->tile_mask = tile_mask; 820 op->base.op = DRM_GPUVA_OP_MAP; 821 op->base.map.va.addr = vma->gpuva.va.addr; 822 op->base.map.va.range = vma->gpuva.va.range; 823 op->base.map.gem.obj = vma->gpuva.gem.obj; 824 op->base.map.gem.offset = vma->gpuva.gem.offset; 825 op->map.vma = vma; 826 op->map.immediate = true; 827 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 828 op->map.is_null = xe_vma_is_null(vma); 829 } 830 831 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 832 u8 tile_mask) 833 { 834 struct xe_vma_op *op; 835 836 op = kzalloc(sizeof(*op), GFP_KERNEL); 837 if (!op) 838 return -ENOMEM; 839 840 xe_vm_populate_rebind(op, vma, tile_mask); 841 list_add_tail(&op->link, &vops->list); 842 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 843 844 return 0; 845 } 846 847 static struct dma_fence *ops_execute(struct xe_vm *vm, 848 struct xe_vma_ops *vops); 849 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 850 struct xe_exec_queue *q, 851 struct xe_sync_entry *syncs, u32 num_syncs); 852 853 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 854 { 855 struct dma_fence *fence; 856 struct xe_vma *vma, *next; 857 struct xe_vma_ops vops; 858 struct xe_vma_op *op, *next_op; 859 int err, i; 860 861 lockdep_assert_held(&vm->lock); 862 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 863 list_empty(&vm->rebind_list)) 864 return 0; 865 866 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 867 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 868 vops.pt_update_ops[i].wait_vm_bookkeep = true; 869 870 xe_vm_assert_held(vm); 871 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 872 xe_assert(vm->xe, vma->tile_present); 873 874 if (rebind_worker) 875 trace_xe_vma_rebind_worker(vma); 876 else 877 trace_xe_vma_rebind_exec(vma); 878 879 err = xe_vm_ops_add_rebind(&vops, vma, 880 vma->tile_present); 881 if (err) 882 goto free_ops; 883 } 884 885 err = xe_vma_ops_alloc(&vops, false); 886 if (err) 887 goto free_ops; 888 889 fence = ops_execute(vm, &vops); 890 if (IS_ERR(fence)) { 891 err = PTR_ERR(fence); 892 } else { 893 dma_fence_put(fence); 894 list_for_each_entry_safe(vma, next, &vm->rebind_list, 895 combined_links.rebind) 896 list_del_init(&vma->combined_links.rebind); 897 } 898 free_ops: 899 list_for_each_entry_safe(op, next_op, &vops.list, link) { 900 list_del(&op->link); 901 kfree(op); 902 } 903 xe_vma_ops_fini(&vops); 904 905 return err; 906 } 907 908 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 909 { 910 struct dma_fence *fence = NULL; 911 struct xe_vma_ops vops; 912 struct xe_vma_op *op, *next_op; 913 struct xe_tile *tile; 914 u8 id; 915 int err; 916 917 lockdep_assert_held(&vm->lock); 918 xe_vm_assert_held(vm); 919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 920 921 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 922 for_each_tile(tile, vm->xe, id) { 923 vops.pt_update_ops[id].wait_vm_bookkeep = true; 924 vops.pt_update_ops[tile->id].q = 925 xe_tile_migrate_exec_queue(tile); 926 } 927 928 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 929 if (err) 930 return ERR_PTR(err); 931 932 err = xe_vma_ops_alloc(&vops, false); 933 if (err) { 934 fence = ERR_PTR(err); 935 goto free_ops; 936 } 937 938 fence = ops_execute(vm, &vops); 939 940 free_ops: 941 list_for_each_entry_safe(op, next_op, &vops.list, link) { 942 list_del(&op->link); 943 kfree(op); 944 } 945 xe_vma_ops_fini(&vops); 946 947 return fence; 948 } 949 950 static void xe_vma_free(struct xe_vma *vma) 951 { 952 if (xe_vma_is_userptr(vma)) 953 kfree(to_userptr_vma(vma)); 954 else 955 kfree(vma); 956 } 957 958 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 959 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 960 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 961 962 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 963 struct xe_bo *bo, 964 u64 bo_offset_or_userptr, 965 u64 start, u64 end, 966 u16 pat_index, unsigned int flags) 967 { 968 struct xe_vma *vma; 969 struct xe_tile *tile; 970 u8 id; 971 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 972 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 973 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 974 975 xe_assert(vm->xe, start < end); 976 xe_assert(vm->xe, end < vm->size); 977 978 /* 979 * Allocate and ensure that the xe_vma_is_userptr() return 980 * matches what was allocated. 981 */ 982 if (!bo && !is_null) { 983 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 984 985 if (!uvma) 986 return ERR_PTR(-ENOMEM); 987 988 vma = &uvma->vma; 989 } else { 990 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 991 if (!vma) 992 return ERR_PTR(-ENOMEM); 993 994 if (is_null) 995 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 996 if (bo) 997 vma->gpuva.gem.obj = &bo->ttm.base; 998 } 999 1000 INIT_LIST_HEAD(&vma->combined_links.rebind); 1001 1002 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1003 vma->gpuva.vm = &vm->gpuvm; 1004 vma->gpuva.va.addr = start; 1005 vma->gpuva.va.range = end - start + 1; 1006 if (read_only) 1007 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1008 if (dumpable) 1009 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1010 1011 for_each_tile(tile, vm->xe, id) 1012 vma->tile_mask |= 0x1 << id; 1013 1014 if (vm->xe->info.has_atomic_enable_pte_bit) 1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1016 1017 vma->pat_index = pat_index; 1018 1019 if (bo) { 1020 struct drm_gpuvm_bo *vm_bo; 1021 1022 xe_bo_assert_held(bo); 1023 1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1025 if (IS_ERR(vm_bo)) { 1026 xe_vma_free(vma); 1027 return ERR_CAST(vm_bo); 1028 } 1029 1030 drm_gpuvm_bo_extobj_add(vm_bo); 1031 drm_gem_object_get(&bo->ttm.base); 1032 vma->gpuva.gem.offset = bo_offset_or_userptr; 1033 drm_gpuva_link(&vma->gpuva, vm_bo); 1034 drm_gpuvm_bo_put(vm_bo); 1035 } else /* userptr or null */ { 1036 if (!is_null) { 1037 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1038 u64 size = end - start + 1; 1039 int err; 1040 1041 INIT_LIST_HEAD(&userptr->invalidate_link); 1042 INIT_LIST_HEAD(&userptr->repin_link); 1043 vma->gpuva.gem.offset = bo_offset_or_userptr; 1044 mutex_init(&userptr->unmap_mutex); 1045 1046 err = mmu_interval_notifier_insert(&userptr->notifier, 1047 current->mm, 1048 xe_vma_userptr(vma), size, 1049 &vma_userptr_notifier_ops); 1050 if (err) { 1051 xe_vma_free(vma); 1052 return ERR_PTR(err); 1053 } 1054 1055 userptr->notifier_seq = LONG_MAX; 1056 } 1057 1058 xe_vm_get(vm); 1059 } 1060 1061 return vma; 1062 } 1063 1064 static void xe_vma_destroy_late(struct xe_vma *vma) 1065 { 1066 struct xe_vm *vm = xe_vma_vm(vma); 1067 1068 if (vma->ufence) { 1069 xe_sync_ufence_put(vma->ufence); 1070 vma->ufence = NULL; 1071 } 1072 1073 if (xe_vma_is_userptr(vma)) { 1074 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1075 struct xe_userptr *userptr = &uvma->userptr; 1076 1077 if (userptr->sg) 1078 xe_hmm_userptr_free_sg(uvma); 1079 1080 /* 1081 * Since userptr pages are not pinned, we can't remove 1082 * the notifier until we're sure the GPU is not accessing 1083 * them anymore 1084 */ 1085 mmu_interval_notifier_remove(&userptr->notifier); 1086 mutex_destroy(&userptr->unmap_mutex); 1087 xe_vm_put(vm); 1088 } else if (xe_vma_is_null(vma)) { 1089 xe_vm_put(vm); 1090 } else { 1091 xe_bo_put(xe_vma_bo(vma)); 1092 } 1093 1094 xe_vma_free(vma); 1095 } 1096 1097 static void vma_destroy_work_func(struct work_struct *w) 1098 { 1099 struct xe_vma *vma = 1100 container_of(w, struct xe_vma, destroy_work); 1101 1102 xe_vma_destroy_late(vma); 1103 } 1104 1105 static void vma_destroy_cb(struct dma_fence *fence, 1106 struct dma_fence_cb *cb) 1107 { 1108 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1109 1110 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1111 queue_work(system_unbound_wq, &vma->destroy_work); 1112 } 1113 1114 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1115 { 1116 struct xe_vm *vm = xe_vma_vm(vma); 1117 1118 lockdep_assert_held_write(&vm->lock); 1119 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1120 1121 if (xe_vma_is_userptr(vma)) { 1122 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1123 1124 spin_lock(&vm->userptr.invalidated_lock); 1125 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1126 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1127 spin_unlock(&vm->userptr.invalidated_lock); 1128 } else if (!xe_vma_is_null(vma)) { 1129 xe_bo_assert_held(xe_vma_bo(vma)); 1130 1131 drm_gpuva_unlink(&vma->gpuva); 1132 } 1133 1134 xe_vm_assert_held(vm); 1135 if (fence) { 1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1137 vma_destroy_cb); 1138 1139 if (ret) { 1140 XE_WARN_ON(ret != -ENOENT); 1141 xe_vma_destroy_late(vma); 1142 } 1143 } else { 1144 xe_vma_destroy_late(vma); 1145 } 1146 } 1147 1148 /** 1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1150 * @exec: The drm_exec object we're currently locking for. 1151 * @vma: The vma for witch we want to lock the vm resv and any attached 1152 * object's resv. 1153 * 1154 * Return: 0 on success, negative error code on error. In particular 1155 * may return -EDEADLK on WW transaction contention and -EINTR if 1156 * an interruptible wait is terminated by a signal. 1157 */ 1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1159 { 1160 struct xe_vm *vm = xe_vma_vm(vma); 1161 struct xe_bo *bo = xe_vma_bo(vma); 1162 int err; 1163 1164 XE_WARN_ON(!vm); 1165 1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1167 if (!err && bo && !bo->vm) 1168 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1169 1170 return err; 1171 } 1172 1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1174 { 1175 struct drm_exec exec; 1176 int err; 1177 1178 drm_exec_init(&exec, 0, 0); 1179 drm_exec_until_all_locked(&exec) { 1180 err = xe_vm_lock_vma(&exec, vma); 1181 drm_exec_retry_on_contention(&exec); 1182 if (XE_WARN_ON(err)) 1183 break; 1184 } 1185 1186 xe_vma_destroy(vma, NULL); 1187 1188 drm_exec_fini(&exec); 1189 } 1190 1191 struct xe_vma * 1192 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1193 { 1194 struct drm_gpuva *gpuva; 1195 1196 lockdep_assert_held(&vm->lock); 1197 1198 if (xe_vm_is_closed_or_banned(vm)) 1199 return NULL; 1200 1201 xe_assert(vm->xe, start + range <= vm->size); 1202 1203 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1204 1205 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1206 } 1207 1208 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1209 { 1210 int err; 1211 1212 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1213 lockdep_assert_held(&vm->lock); 1214 1215 mutex_lock(&vm->snap_mutex); 1216 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1217 mutex_unlock(&vm->snap_mutex); 1218 XE_WARN_ON(err); /* Shouldn't be possible */ 1219 1220 return err; 1221 } 1222 1223 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1224 { 1225 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1226 lockdep_assert_held(&vm->lock); 1227 1228 mutex_lock(&vm->snap_mutex); 1229 drm_gpuva_remove(&vma->gpuva); 1230 mutex_unlock(&vm->snap_mutex); 1231 if (vm->usm.last_fault_vma == vma) 1232 vm->usm.last_fault_vma = NULL; 1233 } 1234 1235 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1236 { 1237 struct xe_vma_op *op; 1238 1239 op = kzalloc(sizeof(*op), GFP_KERNEL); 1240 1241 if (unlikely(!op)) 1242 return NULL; 1243 1244 return &op->base; 1245 } 1246 1247 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1248 1249 static const struct drm_gpuvm_ops gpuvm_ops = { 1250 .op_alloc = xe_vm_op_alloc, 1251 .vm_bo_validate = xe_gpuvm_validate, 1252 .vm_free = xe_vm_free, 1253 }; 1254 1255 static u64 pde_encode_pat_index(u16 pat_index) 1256 { 1257 u64 pte = 0; 1258 1259 if (pat_index & BIT(0)) 1260 pte |= XE_PPGTT_PTE_PAT0; 1261 1262 if (pat_index & BIT(1)) 1263 pte |= XE_PPGTT_PTE_PAT1; 1264 1265 return pte; 1266 } 1267 1268 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1269 { 1270 u64 pte = 0; 1271 1272 if (pat_index & BIT(0)) 1273 pte |= XE_PPGTT_PTE_PAT0; 1274 1275 if (pat_index & BIT(1)) 1276 pte |= XE_PPGTT_PTE_PAT1; 1277 1278 if (pat_index & BIT(2)) { 1279 if (pt_level) 1280 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1281 else 1282 pte |= XE_PPGTT_PTE_PAT2; 1283 } 1284 1285 if (pat_index & BIT(3)) 1286 pte |= XELPG_PPGTT_PTE_PAT3; 1287 1288 if (pat_index & (BIT(4))) 1289 pte |= XE2_PPGTT_PTE_PAT4; 1290 1291 return pte; 1292 } 1293 1294 static u64 pte_encode_ps(u32 pt_level) 1295 { 1296 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1297 1298 if (pt_level == 1) 1299 return XE_PDE_PS_2M; 1300 else if (pt_level == 2) 1301 return XE_PDPE_PS_1G; 1302 1303 return 0; 1304 } 1305 1306 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1307 const u16 pat_index) 1308 { 1309 u64 pde; 1310 1311 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1312 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1313 pde |= pde_encode_pat_index(pat_index); 1314 1315 return pde; 1316 } 1317 1318 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1319 u16 pat_index, u32 pt_level) 1320 { 1321 u64 pte; 1322 1323 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1324 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1325 pte |= pte_encode_pat_index(pat_index, pt_level); 1326 pte |= pte_encode_ps(pt_level); 1327 1328 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1329 pte |= XE_PPGTT_PTE_DM; 1330 1331 return pte; 1332 } 1333 1334 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1335 u16 pat_index, u32 pt_level) 1336 { 1337 pte |= XE_PAGE_PRESENT; 1338 1339 if (likely(!xe_vma_read_only(vma))) 1340 pte |= XE_PAGE_RW; 1341 1342 pte |= pte_encode_pat_index(pat_index, pt_level); 1343 pte |= pte_encode_ps(pt_level); 1344 1345 if (unlikely(xe_vma_is_null(vma))) 1346 pte |= XE_PTE_NULL; 1347 1348 return pte; 1349 } 1350 1351 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1352 u16 pat_index, 1353 u32 pt_level, bool devmem, u64 flags) 1354 { 1355 u64 pte; 1356 1357 /* Avoid passing random bits directly as flags */ 1358 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1359 1360 pte = addr; 1361 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1362 pte |= pte_encode_pat_index(pat_index, pt_level); 1363 pte |= pte_encode_ps(pt_level); 1364 1365 if (devmem) 1366 pte |= XE_PPGTT_PTE_DM; 1367 1368 pte |= flags; 1369 1370 return pte; 1371 } 1372 1373 static const struct xe_pt_ops xelp_pt_ops = { 1374 .pte_encode_bo = xelp_pte_encode_bo, 1375 .pte_encode_vma = xelp_pte_encode_vma, 1376 .pte_encode_addr = xelp_pte_encode_addr, 1377 .pde_encode_bo = xelp_pde_encode_bo, 1378 }; 1379 1380 static void vm_destroy_work_func(struct work_struct *w); 1381 1382 /** 1383 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1384 * given tile and vm. 1385 * @xe: xe device. 1386 * @tile: tile to set up for. 1387 * @vm: vm to set up for. 1388 * 1389 * Sets up a pagetable tree with one page-table per level and a single 1390 * leaf PTE. All pagetable entries point to the single page-table or, 1391 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1392 * writes become NOPs. 1393 * 1394 * Return: 0 on success, negative error code on error. 1395 */ 1396 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1397 struct xe_vm *vm) 1398 { 1399 u8 id = tile->id; 1400 int i; 1401 1402 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1403 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1404 if (IS_ERR(vm->scratch_pt[id][i])) 1405 return PTR_ERR(vm->scratch_pt[id][i]); 1406 1407 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1408 } 1409 1410 return 0; 1411 } 1412 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1413 1414 static void xe_vm_free_scratch(struct xe_vm *vm) 1415 { 1416 struct xe_tile *tile; 1417 u8 id; 1418 1419 if (!xe_vm_has_scratch(vm)) 1420 return; 1421 1422 for_each_tile(tile, vm->xe, id) { 1423 u32 i; 1424 1425 if (!vm->pt_root[id]) 1426 continue; 1427 1428 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1429 if (vm->scratch_pt[id][i]) 1430 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1431 } 1432 } 1433 1434 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1435 { 1436 struct drm_gem_object *vm_resv_obj; 1437 struct xe_vm *vm; 1438 int err, number_tiles = 0; 1439 struct xe_tile *tile; 1440 u8 id; 1441 1442 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1443 if (!vm) 1444 return ERR_PTR(-ENOMEM); 1445 1446 vm->xe = xe; 1447 1448 vm->size = 1ull << xe->info.va_bits; 1449 1450 vm->flags = flags; 1451 1452 init_rwsem(&vm->lock); 1453 mutex_init(&vm->snap_mutex); 1454 1455 INIT_LIST_HEAD(&vm->rebind_list); 1456 1457 INIT_LIST_HEAD(&vm->userptr.repin_list); 1458 INIT_LIST_HEAD(&vm->userptr.invalidated); 1459 init_rwsem(&vm->userptr.notifier_lock); 1460 spin_lock_init(&vm->userptr.invalidated_lock); 1461 1462 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1463 1464 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1465 1466 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1467 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1468 1469 for_each_tile(tile, xe, id) 1470 xe_range_fence_tree_init(&vm->rftree[id]); 1471 1472 vm->pt_ops = &xelp_pt_ops; 1473 1474 /* 1475 * Long-running workloads are not protected by the scheduler references. 1476 * By design, run_job for long-running workloads returns NULL and the 1477 * scheduler drops all the references of it, hence protecting the VM 1478 * for this case is necessary. 1479 */ 1480 if (flags & XE_VM_FLAG_LR_MODE) 1481 xe_pm_runtime_get_noresume(xe); 1482 1483 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1484 if (!vm_resv_obj) { 1485 err = -ENOMEM; 1486 goto err_no_resv; 1487 } 1488 1489 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1490 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1491 1492 drm_gem_object_put(vm_resv_obj); 1493 1494 err = xe_vm_lock(vm, true); 1495 if (err) 1496 goto err_close; 1497 1498 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1499 vm->flags |= XE_VM_FLAG_64K; 1500 1501 for_each_tile(tile, xe, id) { 1502 if (flags & XE_VM_FLAG_MIGRATION && 1503 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1504 continue; 1505 1506 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1507 if (IS_ERR(vm->pt_root[id])) { 1508 err = PTR_ERR(vm->pt_root[id]); 1509 vm->pt_root[id] = NULL; 1510 goto err_unlock_close; 1511 } 1512 } 1513 1514 if (xe_vm_has_scratch(vm)) { 1515 for_each_tile(tile, xe, id) { 1516 if (!vm->pt_root[id]) 1517 continue; 1518 1519 err = xe_vm_create_scratch(xe, tile, vm); 1520 if (err) 1521 goto err_unlock_close; 1522 } 1523 vm->batch_invalidate_tlb = true; 1524 } 1525 1526 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1527 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1528 vm->batch_invalidate_tlb = false; 1529 } 1530 1531 /* Fill pt_root after allocating scratch tables */ 1532 for_each_tile(tile, xe, id) { 1533 if (!vm->pt_root[id]) 1534 continue; 1535 1536 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1537 } 1538 xe_vm_unlock(vm); 1539 1540 /* Kernel migration VM shouldn't have a circular loop.. */ 1541 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1542 for_each_tile(tile, xe, id) { 1543 struct xe_exec_queue *q; 1544 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1545 1546 if (!vm->pt_root[id]) 1547 continue; 1548 1549 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1550 if (IS_ERR(q)) { 1551 err = PTR_ERR(q); 1552 goto err_close; 1553 } 1554 vm->q[id] = q; 1555 number_tiles++; 1556 } 1557 } 1558 1559 if (number_tiles > 1) 1560 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1561 1562 trace_xe_vm_create(vm); 1563 1564 return vm; 1565 1566 err_unlock_close: 1567 xe_vm_unlock(vm); 1568 err_close: 1569 xe_vm_close_and_put(vm); 1570 return ERR_PTR(err); 1571 1572 err_no_resv: 1573 mutex_destroy(&vm->snap_mutex); 1574 for_each_tile(tile, xe, id) 1575 xe_range_fence_tree_fini(&vm->rftree[id]); 1576 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1577 kfree(vm); 1578 if (flags & XE_VM_FLAG_LR_MODE) 1579 xe_pm_runtime_put(xe); 1580 return ERR_PTR(err); 1581 } 1582 1583 static void xe_vm_close(struct xe_vm *vm) 1584 { 1585 down_write(&vm->lock); 1586 vm->size = 0; 1587 up_write(&vm->lock); 1588 } 1589 1590 void xe_vm_close_and_put(struct xe_vm *vm) 1591 { 1592 LIST_HEAD(contested); 1593 struct xe_device *xe = vm->xe; 1594 struct xe_tile *tile; 1595 struct xe_vma *vma, *next_vma; 1596 struct drm_gpuva *gpuva, *next; 1597 u8 id; 1598 1599 xe_assert(xe, !vm->preempt.num_exec_queues); 1600 1601 xe_vm_close(vm); 1602 if (xe_vm_in_preempt_fence_mode(vm)) 1603 flush_work(&vm->preempt.rebind_work); 1604 1605 down_write(&vm->lock); 1606 for_each_tile(tile, xe, id) { 1607 if (vm->q[id]) 1608 xe_exec_queue_last_fence_put(vm->q[id], vm); 1609 } 1610 up_write(&vm->lock); 1611 1612 for_each_tile(tile, xe, id) { 1613 if (vm->q[id]) { 1614 xe_exec_queue_kill(vm->q[id]); 1615 xe_exec_queue_put(vm->q[id]); 1616 vm->q[id] = NULL; 1617 } 1618 } 1619 1620 down_write(&vm->lock); 1621 xe_vm_lock(vm, false); 1622 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1623 vma = gpuva_to_vma(gpuva); 1624 1625 if (xe_vma_has_no_bo(vma)) { 1626 down_read(&vm->userptr.notifier_lock); 1627 vma->gpuva.flags |= XE_VMA_DESTROYED; 1628 up_read(&vm->userptr.notifier_lock); 1629 } 1630 1631 xe_vm_remove_vma(vm, vma); 1632 1633 /* easy case, remove from VMA? */ 1634 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1635 list_del_init(&vma->combined_links.rebind); 1636 xe_vma_destroy(vma, NULL); 1637 continue; 1638 } 1639 1640 list_move_tail(&vma->combined_links.destroy, &contested); 1641 vma->gpuva.flags |= XE_VMA_DESTROYED; 1642 } 1643 1644 /* 1645 * All vm operations will add shared fences to resv. 1646 * The only exception is eviction for a shared object, 1647 * but even so, the unbind when evicted would still 1648 * install a fence to resv. Hence it's safe to 1649 * destroy the pagetables immediately. 1650 */ 1651 xe_vm_free_scratch(vm); 1652 1653 for_each_tile(tile, xe, id) { 1654 if (vm->pt_root[id]) { 1655 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1656 vm->pt_root[id] = NULL; 1657 } 1658 } 1659 xe_vm_unlock(vm); 1660 1661 /* 1662 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1663 * Since we hold a refcount to the bo, we can remove and free 1664 * the members safely without locking. 1665 */ 1666 list_for_each_entry_safe(vma, next_vma, &contested, 1667 combined_links.destroy) { 1668 list_del_init(&vma->combined_links.destroy); 1669 xe_vma_destroy_unlocked(vma); 1670 } 1671 1672 up_write(&vm->lock); 1673 1674 down_write(&xe->usm.lock); 1675 if (vm->usm.asid) { 1676 void *lookup; 1677 1678 xe_assert(xe, xe->info.has_asid); 1679 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1680 1681 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1682 xe_assert(xe, lookup == vm); 1683 } 1684 up_write(&xe->usm.lock); 1685 1686 for_each_tile(tile, xe, id) 1687 xe_range_fence_tree_fini(&vm->rftree[id]); 1688 1689 xe_vm_put(vm); 1690 } 1691 1692 static void vm_destroy_work_func(struct work_struct *w) 1693 { 1694 struct xe_vm *vm = 1695 container_of(w, struct xe_vm, destroy_work); 1696 struct xe_device *xe = vm->xe; 1697 struct xe_tile *tile; 1698 u8 id; 1699 1700 /* xe_vm_close_and_put was not called? */ 1701 xe_assert(xe, !vm->size); 1702 1703 if (xe_vm_in_preempt_fence_mode(vm)) 1704 flush_work(&vm->preempt.rebind_work); 1705 1706 mutex_destroy(&vm->snap_mutex); 1707 1708 if (vm->flags & XE_VM_FLAG_LR_MODE) 1709 xe_pm_runtime_put(xe); 1710 1711 for_each_tile(tile, xe, id) 1712 XE_WARN_ON(vm->pt_root[id]); 1713 1714 trace_xe_vm_free(vm); 1715 1716 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1717 1718 if (vm->xef) 1719 xe_file_put(vm->xef); 1720 1721 kfree(vm); 1722 } 1723 1724 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1725 { 1726 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1727 1728 /* To destroy the VM we need to be able to sleep */ 1729 queue_work(system_unbound_wq, &vm->destroy_work); 1730 } 1731 1732 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1733 { 1734 struct xe_vm *vm; 1735 1736 mutex_lock(&xef->vm.lock); 1737 vm = xa_load(&xef->vm.xa, id); 1738 if (vm) 1739 xe_vm_get(vm); 1740 mutex_unlock(&xef->vm.lock); 1741 1742 return vm; 1743 } 1744 1745 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1746 { 1747 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1748 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1749 } 1750 1751 static struct xe_exec_queue * 1752 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1753 { 1754 return q ? q : vm->q[0]; 1755 } 1756 1757 static struct xe_user_fence * 1758 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1759 { 1760 unsigned int i; 1761 1762 for (i = 0; i < num_syncs; i++) { 1763 struct xe_sync_entry *e = &syncs[i]; 1764 1765 if (xe_sync_is_ufence(e)) 1766 return xe_sync_ufence_get(e); 1767 } 1768 1769 return NULL; 1770 } 1771 1772 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1773 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1774 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1775 1776 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1777 struct drm_file *file) 1778 { 1779 struct xe_device *xe = to_xe_device(dev); 1780 struct xe_file *xef = to_xe_file(file); 1781 struct drm_xe_vm_create *args = data; 1782 struct xe_tile *tile; 1783 struct xe_vm *vm; 1784 u32 id, asid; 1785 int err; 1786 u32 flags = 0; 1787 1788 if (XE_IOCTL_DBG(xe, args->extensions)) 1789 return -EINVAL; 1790 1791 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1792 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1793 1794 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1795 !xe->info.has_usm)) 1796 return -EINVAL; 1797 1798 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1799 return -EINVAL; 1800 1801 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1802 return -EINVAL; 1803 1804 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1805 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1806 return -EINVAL; 1807 1808 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1809 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1810 return -EINVAL; 1811 1812 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1813 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1814 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1815 flags |= XE_VM_FLAG_LR_MODE; 1816 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1817 flags |= XE_VM_FLAG_FAULT_MODE; 1818 1819 vm = xe_vm_create(xe, flags); 1820 if (IS_ERR(vm)) 1821 return PTR_ERR(vm); 1822 1823 if (xe->info.has_asid) { 1824 down_write(&xe->usm.lock); 1825 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1826 XA_LIMIT(1, XE_MAX_ASID - 1), 1827 &xe->usm.next_asid, GFP_KERNEL); 1828 up_write(&xe->usm.lock); 1829 if (err < 0) 1830 goto err_close_and_put; 1831 1832 vm->usm.asid = asid; 1833 } 1834 1835 vm->xef = xe_file_get(xef); 1836 1837 /* Record BO memory for VM pagetable created against client */ 1838 for_each_tile(tile, xe, id) 1839 if (vm->pt_root[id]) 1840 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1841 1842 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1843 /* Warning: Security issue - never enable by default */ 1844 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1845 #endif 1846 1847 /* user id alloc must always be last in ioctl to prevent UAF */ 1848 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1849 if (err) 1850 goto err_close_and_put; 1851 1852 args->vm_id = id; 1853 1854 return 0; 1855 1856 err_close_and_put: 1857 xe_vm_close_and_put(vm); 1858 1859 return err; 1860 } 1861 1862 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1863 struct drm_file *file) 1864 { 1865 struct xe_device *xe = to_xe_device(dev); 1866 struct xe_file *xef = to_xe_file(file); 1867 struct drm_xe_vm_destroy *args = data; 1868 struct xe_vm *vm; 1869 int err = 0; 1870 1871 if (XE_IOCTL_DBG(xe, args->pad) || 1872 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1873 return -EINVAL; 1874 1875 mutex_lock(&xef->vm.lock); 1876 vm = xa_load(&xef->vm.xa, args->vm_id); 1877 if (XE_IOCTL_DBG(xe, !vm)) 1878 err = -ENOENT; 1879 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1880 err = -EBUSY; 1881 else 1882 xa_erase(&xef->vm.xa, args->vm_id); 1883 mutex_unlock(&xef->vm.lock); 1884 1885 if (!err) 1886 xe_vm_close_and_put(vm); 1887 1888 return err; 1889 } 1890 1891 static const u32 region_to_mem_type[] = { 1892 XE_PL_TT, 1893 XE_PL_VRAM0, 1894 XE_PL_VRAM1, 1895 }; 1896 1897 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1898 bool post_commit) 1899 { 1900 down_read(&vm->userptr.notifier_lock); 1901 vma->gpuva.flags |= XE_VMA_DESTROYED; 1902 up_read(&vm->userptr.notifier_lock); 1903 if (post_commit) 1904 xe_vm_remove_vma(vm, vma); 1905 } 1906 1907 #undef ULL 1908 #define ULL unsigned long long 1909 1910 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1911 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1912 { 1913 struct xe_vma *vma; 1914 1915 switch (op->op) { 1916 case DRM_GPUVA_OP_MAP: 1917 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1918 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1919 break; 1920 case DRM_GPUVA_OP_REMAP: 1921 vma = gpuva_to_vma(op->remap.unmap->va); 1922 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1923 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1924 op->remap.unmap->keep ? 1 : 0); 1925 if (op->remap.prev) 1926 vm_dbg(&xe->drm, 1927 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1928 (ULL)op->remap.prev->va.addr, 1929 (ULL)op->remap.prev->va.range); 1930 if (op->remap.next) 1931 vm_dbg(&xe->drm, 1932 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1933 (ULL)op->remap.next->va.addr, 1934 (ULL)op->remap.next->va.range); 1935 break; 1936 case DRM_GPUVA_OP_UNMAP: 1937 vma = gpuva_to_vma(op->unmap.va); 1938 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1939 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1940 op->unmap.keep ? 1 : 0); 1941 break; 1942 case DRM_GPUVA_OP_PREFETCH: 1943 vma = gpuva_to_vma(op->prefetch.va); 1944 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1945 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1946 break; 1947 default: 1948 drm_warn(&xe->drm, "NOT POSSIBLE"); 1949 } 1950 } 1951 #else 1952 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1953 { 1954 } 1955 #endif 1956 1957 /* 1958 * Create operations list from IOCTL arguments, setup operations fields so parse 1959 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1960 */ 1961 static struct drm_gpuva_ops * 1962 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1963 u64 bo_offset_or_userptr, u64 addr, u64 range, 1964 u32 operation, u32 flags, 1965 u32 prefetch_region, u16 pat_index) 1966 { 1967 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1968 struct drm_gpuva_ops *ops; 1969 struct drm_gpuva_op *__op; 1970 struct drm_gpuvm_bo *vm_bo; 1971 int err; 1972 1973 lockdep_assert_held_write(&vm->lock); 1974 1975 vm_dbg(&vm->xe->drm, 1976 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1977 operation, (ULL)addr, (ULL)range, 1978 (ULL)bo_offset_or_userptr); 1979 1980 switch (operation) { 1981 case DRM_XE_VM_BIND_OP_MAP: 1982 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1983 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1984 obj, bo_offset_or_userptr); 1985 break; 1986 case DRM_XE_VM_BIND_OP_UNMAP: 1987 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1988 break; 1989 case DRM_XE_VM_BIND_OP_PREFETCH: 1990 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1991 break; 1992 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1993 xe_assert(vm->xe, bo); 1994 1995 err = xe_bo_lock(bo, true); 1996 if (err) 1997 return ERR_PTR(err); 1998 1999 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2000 if (IS_ERR(vm_bo)) { 2001 xe_bo_unlock(bo); 2002 return ERR_CAST(vm_bo); 2003 } 2004 2005 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2006 drm_gpuvm_bo_put(vm_bo); 2007 xe_bo_unlock(bo); 2008 break; 2009 default: 2010 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2011 ops = ERR_PTR(-EINVAL); 2012 } 2013 if (IS_ERR(ops)) 2014 return ops; 2015 2016 drm_gpuva_for_each_op(__op, ops) { 2017 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2018 2019 if (__op->op == DRM_GPUVA_OP_MAP) { 2020 op->map.immediate = 2021 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2022 op->map.read_only = 2023 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2024 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2025 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2026 op->map.pat_index = pat_index; 2027 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2028 op->prefetch.region = prefetch_region; 2029 } 2030 2031 print_op(vm->xe, __op); 2032 } 2033 2034 return ops; 2035 } 2036 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2037 2038 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2039 u16 pat_index, unsigned int flags) 2040 { 2041 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2042 struct drm_exec exec; 2043 struct xe_vma *vma; 2044 int err = 0; 2045 2046 lockdep_assert_held_write(&vm->lock); 2047 2048 if (bo) { 2049 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2050 drm_exec_until_all_locked(&exec) { 2051 err = 0; 2052 if (!bo->vm) { 2053 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2054 drm_exec_retry_on_contention(&exec); 2055 } 2056 if (!err) { 2057 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2058 drm_exec_retry_on_contention(&exec); 2059 } 2060 if (err) { 2061 drm_exec_fini(&exec); 2062 return ERR_PTR(err); 2063 } 2064 } 2065 } 2066 vma = xe_vma_create(vm, bo, op->gem.offset, 2067 op->va.addr, op->va.addr + 2068 op->va.range - 1, pat_index, flags); 2069 if (IS_ERR(vma)) 2070 goto err_unlock; 2071 2072 if (xe_vma_is_userptr(vma)) 2073 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2074 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2075 err = add_preempt_fences(vm, bo); 2076 2077 err_unlock: 2078 if (bo) 2079 drm_exec_fini(&exec); 2080 2081 if (err) { 2082 prep_vma_destroy(vm, vma, false); 2083 xe_vma_destroy_unlocked(vma); 2084 vma = ERR_PTR(err); 2085 } 2086 2087 return vma; 2088 } 2089 2090 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2091 { 2092 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2093 return SZ_1G; 2094 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2095 return SZ_2M; 2096 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2097 return SZ_64K; 2098 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2099 return SZ_4K; 2100 2101 return SZ_1G; /* Uninitialized, used max size */ 2102 } 2103 2104 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2105 { 2106 switch (size) { 2107 case SZ_1G: 2108 vma->gpuva.flags |= XE_VMA_PTE_1G; 2109 break; 2110 case SZ_2M: 2111 vma->gpuva.flags |= XE_VMA_PTE_2M; 2112 break; 2113 case SZ_64K: 2114 vma->gpuva.flags |= XE_VMA_PTE_64K; 2115 break; 2116 case SZ_4K: 2117 vma->gpuva.flags |= XE_VMA_PTE_4K; 2118 break; 2119 } 2120 } 2121 2122 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2123 { 2124 int err = 0; 2125 2126 lockdep_assert_held_write(&vm->lock); 2127 2128 switch (op->base.op) { 2129 case DRM_GPUVA_OP_MAP: 2130 err |= xe_vm_insert_vma(vm, op->map.vma); 2131 if (!err) 2132 op->flags |= XE_VMA_OP_COMMITTED; 2133 break; 2134 case DRM_GPUVA_OP_REMAP: 2135 { 2136 u8 tile_present = 2137 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2138 2139 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2140 true); 2141 op->flags |= XE_VMA_OP_COMMITTED; 2142 2143 if (op->remap.prev) { 2144 err |= xe_vm_insert_vma(vm, op->remap.prev); 2145 if (!err) 2146 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2147 if (!err && op->remap.skip_prev) { 2148 op->remap.prev->tile_present = 2149 tile_present; 2150 op->remap.prev = NULL; 2151 } 2152 } 2153 if (op->remap.next) { 2154 err |= xe_vm_insert_vma(vm, op->remap.next); 2155 if (!err) 2156 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2157 if (!err && op->remap.skip_next) { 2158 op->remap.next->tile_present = 2159 tile_present; 2160 op->remap.next = NULL; 2161 } 2162 } 2163 2164 /* Adjust for partial unbind after removing VMA from VM */ 2165 if (!err) { 2166 op->base.remap.unmap->va->va.addr = op->remap.start; 2167 op->base.remap.unmap->va->va.range = op->remap.range; 2168 } 2169 break; 2170 } 2171 case DRM_GPUVA_OP_UNMAP: 2172 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2173 op->flags |= XE_VMA_OP_COMMITTED; 2174 break; 2175 case DRM_GPUVA_OP_PREFETCH: 2176 op->flags |= XE_VMA_OP_COMMITTED; 2177 break; 2178 default: 2179 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2180 } 2181 2182 return err; 2183 } 2184 2185 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2186 struct xe_vma_ops *vops) 2187 { 2188 struct xe_device *xe = vm->xe; 2189 struct drm_gpuva_op *__op; 2190 struct xe_tile *tile; 2191 u8 id, tile_mask = 0; 2192 int err = 0; 2193 2194 lockdep_assert_held_write(&vm->lock); 2195 2196 for_each_tile(tile, vm->xe, id) 2197 tile_mask |= 0x1 << id; 2198 2199 drm_gpuva_for_each_op(__op, ops) { 2200 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2201 struct xe_vma *vma; 2202 unsigned int flags = 0; 2203 2204 INIT_LIST_HEAD(&op->link); 2205 list_add_tail(&op->link, &vops->list); 2206 op->tile_mask = tile_mask; 2207 2208 switch (op->base.op) { 2209 case DRM_GPUVA_OP_MAP: 2210 { 2211 flags |= op->map.read_only ? 2212 VMA_CREATE_FLAG_READ_ONLY : 0; 2213 flags |= op->map.is_null ? 2214 VMA_CREATE_FLAG_IS_NULL : 0; 2215 flags |= op->map.dumpable ? 2216 VMA_CREATE_FLAG_DUMPABLE : 0; 2217 2218 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2219 flags); 2220 if (IS_ERR(vma)) 2221 return PTR_ERR(vma); 2222 2223 op->map.vma = vma; 2224 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2225 xe_vma_ops_incr_pt_update_ops(vops, 2226 op->tile_mask); 2227 break; 2228 } 2229 case DRM_GPUVA_OP_REMAP: 2230 { 2231 struct xe_vma *old = 2232 gpuva_to_vma(op->base.remap.unmap->va); 2233 2234 op->remap.start = xe_vma_start(old); 2235 op->remap.range = xe_vma_size(old); 2236 2237 if (op->base.remap.prev) { 2238 flags |= op->base.remap.unmap->va->flags & 2239 XE_VMA_READ_ONLY ? 2240 VMA_CREATE_FLAG_READ_ONLY : 0; 2241 flags |= op->base.remap.unmap->va->flags & 2242 DRM_GPUVA_SPARSE ? 2243 VMA_CREATE_FLAG_IS_NULL : 0; 2244 flags |= op->base.remap.unmap->va->flags & 2245 XE_VMA_DUMPABLE ? 2246 VMA_CREATE_FLAG_DUMPABLE : 0; 2247 2248 vma = new_vma(vm, op->base.remap.prev, 2249 old->pat_index, flags); 2250 if (IS_ERR(vma)) 2251 return PTR_ERR(vma); 2252 2253 op->remap.prev = vma; 2254 2255 /* 2256 * Userptr creates a new SG mapping so 2257 * we must also rebind. 2258 */ 2259 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2260 IS_ALIGNED(xe_vma_end(vma), 2261 xe_vma_max_pte_size(old)); 2262 if (op->remap.skip_prev) { 2263 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2264 op->remap.range -= 2265 xe_vma_end(vma) - 2266 xe_vma_start(old); 2267 op->remap.start = xe_vma_end(vma); 2268 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2269 (ULL)op->remap.start, 2270 (ULL)op->remap.range); 2271 } else { 2272 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2273 } 2274 } 2275 2276 if (op->base.remap.next) { 2277 flags |= op->base.remap.unmap->va->flags & 2278 XE_VMA_READ_ONLY ? 2279 VMA_CREATE_FLAG_READ_ONLY : 0; 2280 flags |= op->base.remap.unmap->va->flags & 2281 DRM_GPUVA_SPARSE ? 2282 VMA_CREATE_FLAG_IS_NULL : 0; 2283 flags |= op->base.remap.unmap->va->flags & 2284 XE_VMA_DUMPABLE ? 2285 VMA_CREATE_FLAG_DUMPABLE : 0; 2286 2287 vma = new_vma(vm, op->base.remap.next, 2288 old->pat_index, flags); 2289 if (IS_ERR(vma)) 2290 return PTR_ERR(vma); 2291 2292 op->remap.next = vma; 2293 2294 /* 2295 * Userptr creates a new SG mapping so 2296 * we must also rebind. 2297 */ 2298 op->remap.skip_next = !xe_vma_is_userptr(old) && 2299 IS_ALIGNED(xe_vma_start(vma), 2300 xe_vma_max_pte_size(old)); 2301 if (op->remap.skip_next) { 2302 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2303 op->remap.range -= 2304 xe_vma_end(old) - 2305 xe_vma_start(vma); 2306 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2307 (ULL)op->remap.start, 2308 (ULL)op->remap.range); 2309 } else { 2310 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2311 } 2312 } 2313 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2314 break; 2315 } 2316 case DRM_GPUVA_OP_UNMAP: 2317 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2318 break; 2319 case DRM_GPUVA_OP_PREFETCH: 2320 vma = gpuva_to_vma(op->base.prefetch.va); 2321 2322 if (xe_vma_is_userptr(vma)) { 2323 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2324 if (err) 2325 return err; 2326 } 2327 2328 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2329 break; 2330 default: 2331 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2332 } 2333 2334 err = xe_vma_op_commit(vm, op); 2335 if (err) 2336 return err; 2337 } 2338 2339 return 0; 2340 } 2341 2342 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2343 bool post_commit, bool prev_post_commit, 2344 bool next_post_commit) 2345 { 2346 lockdep_assert_held_write(&vm->lock); 2347 2348 switch (op->base.op) { 2349 case DRM_GPUVA_OP_MAP: 2350 if (op->map.vma) { 2351 prep_vma_destroy(vm, op->map.vma, post_commit); 2352 xe_vma_destroy_unlocked(op->map.vma); 2353 } 2354 break; 2355 case DRM_GPUVA_OP_UNMAP: 2356 { 2357 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2358 2359 if (vma) { 2360 down_read(&vm->userptr.notifier_lock); 2361 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2362 up_read(&vm->userptr.notifier_lock); 2363 if (post_commit) 2364 xe_vm_insert_vma(vm, vma); 2365 } 2366 break; 2367 } 2368 case DRM_GPUVA_OP_REMAP: 2369 { 2370 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2371 2372 if (op->remap.prev) { 2373 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2374 xe_vma_destroy_unlocked(op->remap.prev); 2375 } 2376 if (op->remap.next) { 2377 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2378 xe_vma_destroy_unlocked(op->remap.next); 2379 } 2380 if (vma) { 2381 down_read(&vm->userptr.notifier_lock); 2382 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2383 up_read(&vm->userptr.notifier_lock); 2384 if (post_commit) 2385 xe_vm_insert_vma(vm, vma); 2386 } 2387 break; 2388 } 2389 case DRM_GPUVA_OP_PREFETCH: 2390 /* Nothing to do */ 2391 break; 2392 default: 2393 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2394 } 2395 } 2396 2397 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2398 struct drm_gpuva_ops **ops, 2399 int num_ops_list) 2400 { 2401 int i; 2402 2403 for (i = num_ops_list - 1; i >= 0; --i) { 2404 struct drm_gpuva_ops *__ops = ops[i]; 2405 struct drm_gpuva_op *__op; 2406 2407 if (!__ops) 2408 continue; 2409 2410 drm_gpuva_for_each_op_reverse(__op, __ops) { 2411 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2412 2413 xe_vma_op_unwind(vm, op, 2414 op->flags & XE_VMA_OP_COMMITTED, 2415 op->flags & XE_VMA_OP_PREV_COMMITTED, 2416 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2417 } 2418 } 2419 } 2420 2421 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2422 bool validate) 2423 { 2424 struct xe_bo *bo = xe_vma_bo(vma); 2425 struct xe_vm *vm = xe_vma_vm(vma); 2426 int err = 0; 2427 2428 if (bo) { 2429 if (!bo->vm) 2430 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2431 if (!err && validate) 2432 err = xe_bo_validate(bo, vm, 2433 !xe_vm_in_preempt_fence_mode(vm)); 2434 } 2435 2436 return err; 2437 } 2438 2439 static int check_ufence(struct xe_vma *vma) 2440 { 2441 if (vma->ufence) { 2442 struct xe_user_fence * const f = vma->ufence; 2443 2444 if (!xe_sync_ufence_get_status(f)) 2445 return -EBUSY; 2446 2447 vma->ufence = NULL; 2448 xe_sync_ufence_put(f); 2449 } 2450 2451 return 0; 2452 } 2453 2454 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2455 struct xe_vma_op *op) 2456 { 2457 int err = 0; 2458 2459 switch (op->base.op) { 2460 case DRM_GPUVA_OP_MAP: 2461 err = vma_lock_and_validate(exec, op->map.vma, 2462 !xe_vm_in_fault_mode(vm) || 2463 op->map.immediate); 2464 break; 2465 case DRM_GPUVA_OP_REMAP: 2466 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2467 if (err) 2468 break; 2469 2470 err = vma_lock_and_validate(exec, 2471 gpuva_to_vma(op->base.remap.unmap->va), 2472 false); 2473 if (!err && op->remap.prev) 2474 err = vma_lock_and_validate(exec, op->remap.prev, true); 2475 if (!err && op->remap.next) 2476 err = vma_lock_and_validate(exec, op->remap.next, true); 2477 break; 2478 case DRM_GPUVA_OP_UNMAP: 2479 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2480 if (err) 2481 break; 2482 2483 err = vma_lock_and_validate(exec, 2484 gpuva_to_vma(op->base.unmap.va), 2485 false); 2486 break; 2487 case DRM_GPUVA_OP_PREFETCH: 2488 { 2489 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2490 u32 region = op->prefetch.region; 2491 2492 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2493 2494 err = vma_lock_and_validate(exec, 2495 gpuva_to_vma(op->base.prefetch.va), 2496 false); 2497 if (!err && !xe_vma_has_no_bo(vma)) 2498 err = xe_bo_migrate(xe_vma_bo(vma), 2499 region_to_mem_type[region]); 2500 break; 2501 } 2502 default: 2503 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2504 } 2505 2506 return err; 2507 } 2508 2509 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2510 struct xe_vm *vm, 2511 struct xe_vma_ops *vops) 2512 { 2513 struct xe_vma_op *op; 2514 int err; 2515 2516 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2517 if (err) 2518 return err; 2519 2520 list_for_each_entry(op, &vops->list, link) { 2521 err = op_lock_and_prep(exec, vm, op); 2522 if (err) 2523 return err; 2524 } 2525 2526 #ifdef TEST_VM_OPS_ERROR 2527 if (vops->inject_error && 2528 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2529 return -ENOSPC; 2530 #endif 2531 2532 return 0; 2533 } 2534 2535 static void op_trace(struct xe_vma_op *op) 2536 { 2537 switch (op->base.op) { 2538 case DRM_GPUVA_OP_MAP: 2539 trace_xe_vma_bind(op->map.vma); 2540 break; 2541 case DRM_GPUVA_OP_REMAP: 2542 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2543 if (op->remap.prev) 2544 trace_xe_vma_bind(op->remap.prev); 2545 if (op->remap.next) 2546 trace_xe_vma_bind(op->remap.next); 2547 break; 2548 case DRM_GPUVA_OP_UNMAP: 2549 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2550 break; 2551 case DRM_GPUVA_OP_PREFETCH: 2552 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2553 break; 2554 default: 2555 XE_WARN_ON("NOT POSSIBLE"); 2556 } 2557 } 2558 2559 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2560 { 2561 struct xe_vma_op *op; 2562 2563 list_for_each_entry(op, &vops->list, link) 2564 op_trace(op); 2565 } 2566 2567 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2568 { 2569 struct xe_exec_queue *q = vops->q; 2570 struct xe_tile *tile; 2571 int number_tiles = 0; 2572 u8 id; 2573 2574 for_each_tile(tile, vm->xe, id) { 2575 if (vops->pt_update_ops[id].num_ops) 2576 ++number_tiles; 2577 2578 if (vops->pt_update_ops[id].q) 2579 continue; 2580 2581 if (q) { 2582 vops->pt_update_ops[id].q = q; 2583 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2584 q = list_next_entry(q, multi_gt_list); 2585 } else { 2586 vops->pt_update_ops[id].q = vm->q[id]; 2587 } 2588 } 2589 2590 return number_tiles; 2591 } 2592 2593 static struct dma_fence *ops_execute(struct xe_vm *vm, 2594 struct xe_vma_ops *vops) 2595 { 2596 struct xe_tile *tile; 2597 struct dma_fence *fence = NULL; 2598 struct dma_fence **fences = NULL; 2599 struct dma_fence_array *cf = NULL; 2600 int number_tiles = 0, current_fence = 0, err; 2601 u8 id; 2602 2603 number_tiles = vm_ops_setup_tile_args(vm, vops); 2604 if (number_tiles == 0) 2605 return ERR_PTR(-ENODATA); 2606 2607 if (number_tiles > 1) { 2608 fences = kmalloc_array(number_tiles, sizeof(*fences), 2609 GFP_KERNEL); 2610 if (!fences) { 2611 fence = ERR_PTR(-ENOMEM); 2612 goto err_trace; 2613 } 2614 } 2615 2616 for_each_tile(tile, vm->xe, id) { 2617 if (!vops->pt_update_ops[id].num_ops) 2618 continue; 2619 2620 err = xe_pt_update_ops_prepare(tile, vops); 2621 if (err) { 2622 fence = ERR_PTR(err); 2623 goto err_out; 2624 } 2625 } 2626 2627 trace_xe_vm_ops_execute(vops); 2628 2629 for_each_tile(tile, vm->xe, id) { 2630 if (!vops->pt_update_ops[id].num_ops) 2631 continue; 2632 2633 fence = xe_pt_update_ops_run(tile, vops); 2634 if (IS_ERR(fence)) 2635 goto err_out; 2636 2637 if (fences) 2638 fences[current_fence++] = fence; 2639 } 2640 2641 if (fences) { 2642 cf = dma_fence_array_create(number_tiles, fences, 2643 vm->composite_fence_ctx, 2644 vm->composite_fence_seqno++, 2645 false); 2646 if (!cf) { 2647 --vm->composite_fence_seqno; 2648 fence = ERR_PTR(-ENOMEM); 2649 goto err_out; 2650 } 2651 fence = &cf->base; 2652 } 2653 2654 for_each_tile(tile, vm->xe, id) { 2655 if (!vops->pt_update_ops[id].num_ops) 2656 continue; 2657 2658 xe_pt_update_ops_fini(tile, vops); 2659 } 2660 2661 return fence; 2662 2663 err_out: 2664 for_each_tile(tile, vm->xe, id) { 2665 if (!vops->pt_update_ops[id].num_ops) 2666 continue; 2667 2668 xe_pt_update_ops_abort(tile, vops); 2669 } 2670 while (current_fence) 2671 dma_fence_put(fences[--current_fence]); 2672 kfree(fences); 2673 kfree(cf); 2674 2675 err_trace: 2676 trace_xe_vm_ops_fail(vm); 2677 return fence; 2678 } 2679 2680 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2681 { 2682 if (vma->ufence) 2683 xe_sync_ufence_put(vma->ufence); 2684 vma->ufence = __xe_sync_ufence_get(ufence); 2685 } 2686 2687 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2688 struct xe_user_fence *ufence) 2689 { 2690 switch (op->base.op) { 2691 case DRM_GPUVA_OP_MAP: 2692 vma_add_ufence(op->map.vma, ufence); 2693 break; 2694 case DRM_GPUVA_OP_REMAP: 2695 if (op->remap.prev) 2696 vma_add_ufence(op->remap.prev, ufence); 2697 if (op->remap.next) 2698 vma_add_ufence(op->remap.next, ufence); 2699 break; 2700 case DRM_GPUVA_OP_UNMAP: 2701 break; 2702 case DRM_GPUVA_OP_PREFETCH: 2703 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2704 break; 2705 default: 2706 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2707 } 2708 } 2709 2710 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2711 struct dma_fence *fence) 2712 { 2713 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2714 struct xe_user_fence *ufence; 2715 struct xe_vma_op *op; 2716 int i; 2717 2718 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2719 list_for_each_entry(op, &vops->list, link) { 2720 if (ufence) 2721 op_add_ufence(vm, op, ufence); 2722 2723 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2724 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2725 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2726 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2727 fence); 2728 } 2729 if (ufence) 2730 xe_sync_ufence_put(ufence); 2731 for (i = 0; i < vops->num_syncs; i++) 2732 xe_sync_entry_signal(vops->syncs + i, fence); 2733 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2734 dma_fence_put(fence); 2735 } 2736 2737 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2738 struct xe_vma_ops *vops) 2739 { 2740 struct drm_exec exec; 2741 struct dma_fence *fence; 2742 int err; 2743 2744 lockdep_assert_held_write(&vm->lock); 2745 2746 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2747 DRM_EXEC_IGNORE_DUPLICATES, 0); 2748 drm_exec_until_all_locked(&exec) { 2749 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2750 drm_exec_retry_on_contention(&exec); 2751 if (err) 2752 goto unlock; 2753 2754 fence = ops_execute(vm, vops); 2755 if (IS_ERR(fence)) { 2756 err = PTR_ERR(fence); 2757 goto unlock; 2758 } 2759 2760 vm_bind_ioctl_ops_fini(vm, vops, fence); 2761 } 2762 2763 unlock: 2764 drm_exec_fini(&exec); 2765 return err; 2766 } 2767 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2768 2769 #define SUPPORTED_FLAGS_STUB \ 2770 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2771 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2772 DRM_XE_VM_BIND_FLAG_NULL | \ 2773 DRM_XE_VM_BIND_FLAG_DUMPABLE) 2774 2775 #ifdef TEST_VM_OPS_ERROR 2776 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2777 #else 2778 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2779 #endif 2780 2781 #define XE_64K_PAGE_MASK 0xffffull 2782 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2783 2784 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2785 struct drm_xe_vm_bind *args, 2786 struct drm_xe_vm_bind_op **bind_ops) 2787 { 2788 int err; 2789 int i; 2790 2791 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2792 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2793 return -EINVAL; 2794 2795 if (XE_IOCTL_DBG(xe, args->extensions)) 2796 return -EINVAL; 2797 2798 if (args->num_binds > 1) { 2799 u64 __user *bind_user = 2800 u64_to_user_ptr(args->vector_of_binds); 2801 2802 *bind_ops = kvmalloc_array(args->num_binds, 2803 sizeof(struct drm_xe_vm_bind_op), 2804 GFP_KERNEL | __GFP_ACCOUNT | 2805 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2806 if (!*bind_ops) 2807 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2808 2809 err = __copy_from_user(*bind_ops, bind_user, 2810 sizeof(struct drm_xe_vm_bind_op) * 2811 args->num_binds); 2812 if (XE_IOCTL_DBG(xe, err)) { 2813 err = -EFAULT; 2814 goto free_bind_ops; 2815 } 2816 } else { 2817 *bind_ops = &args->bind; 2818 } 2819 2820 for (i = 0; i < args->num_binds; ++i) { 2821 u64 range = (*bind_ops)[i].range; 2822 u64 addr = (*bind_ops)[i].addr; 2823 u32 op = (*bind_ops)[i].op; 2824 u32 flags = (*bind_ops)[i].flags; 2825 u32 obj = (*bind_ops)[i].obj; 2826 u64 obj_offset = (*bind_ops)[i].obj_offset; 2827 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2828 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2829 u16 pat_index = (*bind_ops)[i].pat_index; 2830 u16 coh_mode; 2831 2832 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2833 err = -EINVAL; 2834 goto free_bind_ops; 2835 } 2836 2837 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2838 (*bind_ops)[i].pat_index = pat_index; 2839 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2840 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2841 err = -EINVAL; 2842 goto free_bind_ops; 2843 } 2844 2845 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2846 err = -EINVAL; 2847 goto free_bind_ops; 2848 } 2849 2850 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2851 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2852 XE_IOCTL_DBG(xe, obj && is_null) || 2853 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2854 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2855 is_null) || 2856 XE_IOCTL_DBG(xe, !obj && 2857 op == DRM_XE_VM_BIND_OP_MAP && 2858 !is_null) || 2859 XE_IOCTL_DBG(xe, !obj && 2860 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2861 XE_IOCTL_DBG(xe, addr && 2862 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2863 XE_IOCTL_DBG(xe, range && 2864 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2865 XE_IOCTL_DBG(xe, obj && 2866 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2867 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2868 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2869 XE_IOCTL_DBG(xe, obj && 2870 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2871 XE_IOCTL_DBG(xe, prefetch_region && 2872 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2873 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2874 xe->info.mem_region_mask)) || 2875 XE_IOCTL_DBG(xe, obj && 2876 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2877 err = -EINVAL; 2878 goto free_bind_ops; 2879 } 2880 2881 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2882 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2883 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2884 XE_IOCTL_DBG(xe, !range && 2885 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2886 err = -EINVAL; 2887 goto free_bind_ops; 2888 } 2889 } 2890 2891 return 0; 2892 2893 free_bind_ops: 2894 if (args->num_binds > 1) 2895 kvfree(*bind_ops); 2896 return err; 2897 } 2898 2899 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2900 struct xe_exec_queue *q, 2901 struct xe_sync_entry *syncs, 2902 int num_syncs) 2903 { 2904 struct dma_fence *fence; 2905 int i, err = 0; 2906 2907 fence = xe_sync_in_fence_get(syncs, num_syncs, 2908 to_wait_exec_queue(vm, q), vm); 2909 if (IS_ERR(fence)) 2910 return PTR_ERR(fence); 2911 2912 for (i = 0; i < num_syncs; i++) 2913 xe_sync_entry_signal(&syncs[i], fence); 2914 2915 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2916 fence); 2917 dma_fence_put(fence); 2918 2919 return err; 2920 } 2921 2922 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2923 struct xe_exec_queue *q, 2924 struct xe_sync_entry *syncs, u32 num_syncs) 2925 { 2926 memset(vops, 0, sizeof(*vops)); 2927 INIT_LIST_HEAD(&vops->list); 2928 vops->vm = vm; 2929 vops->q = q; 2930 vops->syncs = syncs; 2931 vops->num_syncs = num_syncs; 2932 } 2933 2934 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2935 u64 addr, u64 range, u64 obj_offset, 2936 u16 pat_index) 2937 { 2938 u16 coh_mode; 2939 2940 if (XE_IOCTL_DBG(xe, range > bo->size) || 2941 XE_IOCTL_DBG(xe, obj_offset > 2942 bo->size - range)) { 2943 return -EINVAL; 2944 } 2945 2946 /* 2947 * Some platforms require 64k VM_BIND alignment, 2948 * specifically those with XE_VRAM_FLAGS_NEED64K. 2949 * 2950 * Other platforms may have BO's set to 64k physical placement, 2951 * but can be mapped at 4k offsets anyway. This check is only 2952 * there for the former case. 2953 */ 2954 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2955 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2956 if (XE_IOCTL_DBG(xe, obj_offset & 2957 XE_64K_PAGE_MASK) || 2958 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2959 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2960 return -EINVAL; 2961 } 2962 } 2963 2964 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2965 if (bo->cpu_caching) { 2966 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2967 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2968 return -EINVAL; 2969 } 2970 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2971 /* 2972 * Imported dma-buf from a different device should 2973 * require 1way or 2way coherency since we don't know 2974 * how it was mapped on the CPU. Just assume is it 2975 * potentially cached on CPU side. 2976 */ 2977 return -EINVAL; 2978 } 2979 2980 return 0; 2981 } 2982 2983 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2984 { 2985 struct xe_device *xe = to_xe_device(dev); 2986 struct xe_file *xef = to_xe_file(file); 2987 struct drm_xe_vm_bind *args = data; 2988 struct drm_xe_sync __user *syncs_user; 2989 struct xe_bo **bos = NULL; 2990 struct drm_gpuva_ops **ops = NULL; 2991 struct xe_vm *vm; 2992 struct xe_exec_queue *q = NULL; 2993 u32 num_syncs, num_ufence = 0; 2994 struct xe_sync_entry *syncs = NULL; 2995 struct drm_xe_vm_bind_op *bind_ops; 2996 struct xe_vma_ops vops; 2997 int err; 2998 int i; 2999 3000 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 3001 if (err) 3002 return err; 3003 3004 if (args->exec_queue_id) { 3005 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3006 if (XE_IOCTL_DBG(xe, !q)) { 3007 err = -ENOENT; 3008 goto free_objs; 3009 } 3010 3011 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3012 err = -EINVAL; 3013 goto put_exec_queue; 3014 } 3015 } 3016 3017 vm = xe_vm_lookup(xef, args->vm_id); 3018 if (XE_IOCTL_DBG(xe, !vm)) { 3019 err = -EINVAL; 3020 goto put_exec_queue; 3021 } 3022 3023 err = down_write_killable(&vm->lock); 3024 if (err) 3025 goto put_vm; 3026 3027 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3028 err = -ENOENT; 3029 goto release_vm_lock; 3030 } 3031 3032 for (i = 0; i < args->num_binds; ++i) { 3033 u64 range = bind_ops[i].range; 3034 u64 addr = bind_ops[i].addr; 3035 3036 if (XE_IOCTL_DBG(xe, range > vm->size) || 3037 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3038 err = -EINVAL; 3039 goto release_vm_lock; 3040 } 3041 } 3042 3043 if (args->num_binds) { 3044 bos = kvcalloc(args->num_binds, sizeof(*bos), 3045 GFP_KERNEL | __GFP_ACCOUNT | 3046 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3047 if (!bos) { 3048 err = -ENOMEM; 3049 goto release_vm_lock; 3050 } 3051 3052 ops = kvcalloc(args->num_binds, sizeof(*ops), 3053 GFP_KERNEL | __GFP_ACCOUNT | 3054 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3055 if (!ops) { 3056 err = -ENOMEM; 3057 goto release_vm_lock; 3058 } 3059 } 3060 3061 for (i = 0; i < args->num_binds; ++i) { 3062 struct drm_gem_object *gem_obj; 3063 u64 range = bind_ops[i].range; 3064 u64 addr = bind_ops[i].addr; 3065 u32 obj = bind_ops[i].obj; 3066 u64 obj_offset = bind_ops[i].obj_offset; 3067 u16 pat_index = bind_ops[i].pat_index; 3068 3069 if (!obj) 3070 continue; 3071 3072 gem_obj = drm_gem_object_lookup(file, obj); 3073 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3074 err = -ENOENT; 3075 goto put_obj; 3076 } 3077 bos[i] = gem_to_xe_bo(gem_obj); 3078 3079 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3080 obj_offset, pat_index); 3081 if (err) 3082 goto put_obj; 3083 } 3084 3085 if (args->num_syncs) { 3086 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3087 if (!syncs) { 3088 err = -ENOMEM; 3089 goto put_obj; 3090 } 3091 } 3092 3093 syncs_user = u64_to_user_ptr(args->syncs); 3094 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3095 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3096 &syncs_user[num_syncs], 3097 (xe_vm_in_lr_mode(vm) ? 3098 SYNC_PARSE_FLAG_LR_MODE : 0) | 3099 (!args->num_binds ? 3100 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3101 if (err) 3102 goto free_syncs; 3103 3104 if (xe_sync_is_ufence(&syncs[num_syncs])) 3105 num_ufence++; 3106 } 3107 3108 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3109 err = -EINVAL; 3110 goto free_syncs; 3111 } 3112 3113 if (!args->num_binds) { 3114 err = -ENODATA; 3115 goto free_syncs; 3116 } 3117 3118 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3119 for (i = 0; i < args->num_binds; ++i) { 3120 u64 range = bind_ops[i].range; 3121 u64 addr = bind_ops[i].addr; 3122 u32 op = bind_ops[i].op; 3123 u32 flags = bind_ops[i].flags; 3124 u64 obj_offset = bind_ops[i].obj_offset; 3125 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3126 u16 pat_index = bind_ops[i].pat_index; 3127 3128 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3129 addr, range, op, flags, 3130 prefetch_region, pat_index); 3131 if (IS_ERR(ops[i])) { 3132 err = PTR_ERR(ops[i]); 3133 ops[i] = NULL; 3134 goto unwind_ops; 3135 } 3136 3137 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3138 if (err) 3139 goto unwind_ops; 3140 3141 #ifdef TEST_VM_OPS_ERROR 3142 if (flags & FORCE_OP_ERROR) { 3143 vops.inject_error = true; 3144 vm->xe->vm_inject_error_position = 3145 (vm->xe->vm_inject_error_position + 1) % 3146 FORCE_OP_ERROR_COUNT; 3147 } 3148 #endif 3149 } 3150 3151 /* Nothing to do */ 3152 if (list_empty(&vops.list)) { 3153 err = -ENODATA; 3154 goto unwind_ops; 3155 } 3156 3157 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3158 if (err) 3159 goto unwind_ops; 3160 3161 err = vm_bind_ioctl_ops_execute(vm, &vops); 3162 3163 unwind_ops: 3164 if (err && err != -ENODATA) 3165 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3166 xe_vma_ops_fini(&vops); 3167 for (i = args->num_binds - 1; i >= 0; --i) 3168 if (ops[i]) 3169 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3170 free_syncs: 3171 if (err == -ENODATA) 3172 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3173 while (num_syncs--) 3174 xe_sync_entry_cleanup(&syncs[num_syncs]); 3175 3176 kfree(syncs); 3177 put_obj: 3178 for (i = 0; i < args->num_binds; ++i) 3179 xe_bo_put(bos[i]); 3180 release_vm_lock: 3181 up_write(&vm->lock); 3182 put_vm: 3183 xe_vm_put(vm); 3184 put_exec_queue: 3185 if (q) 3186 xe_exec_queue_put(q); 3187 free_objs: 3188 kvfree(bos); 3189 kvfree(ops); 3190 if (args->num_binds > 1) 3191 kvfree(bind_ops); 3192 return err; 3193 } 3194 3195 /** 3196 * xe_vm_lock() - Lock the vm's dma_resv object 3197 * @vm: The struct xe_vm whose lock is to be locked 3198 * @intr: Whether to perform any wait interruptible 3199 * 3200 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3201 * contended lock was interrupted. If @intr is false, the function 3202 * always returns 0. 3203 */ 3204 int xe_vm_lock(struct xe_vm *vm, bool intr) 3205 { 3206 if (intr) 3207 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3208 3209 return dma_resv_lock(xe_vm_resv(vm), NULL); 3210 } 3211 3212 /** 3213 * xe_vm_unlock() - Unlock the vm's dma_resv object 3214 * @vm: The struct xe_vm whose lock is to be released. 3215 * 3216 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3217 */ 3218 void xe_vm_unlock(struct xe_vm *vm) 3219 { 3220 dma_resv_unlock(xe_vm_resv(vm)); 3221 } 3222 3223 /** 3224 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3225 * @vma: VMA to invalidate 3226 * 3227 * Walks a list of page tables leaves which it memset the entries owned by this 3228 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3229 * complete. 3230 * 3231 * Returns 0 for success, negative error code otherwise. 3232 */ 3233 int xe_vm_invalidate_vma(struct xe_vma *vma) 3234 { 3235 struct xe_device *xe = xe_vma_vm(vma)->xe; 3236 struct xe_tile *tile; 3237 struct xe_gt_tlb_invalidation_fence 3238 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3239 u8 id; 3240 u32 fence_id = 0; 3241 int ret = 0; 3242 3243 xe_assert(xe, !xe_vma_is_null(vma)); 3244 trace_xe_vma_invalidate(vma); 3245 3246 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3247 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3248 xe_vma_start(vma), xe_vma_size(vma)); 3249 3250 /* Check that we don't race with page-table updates */ 3251 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3252 if (xe_vma_is_userptr(vma)) { 3253 WARN_ON_ONCE(!mmu_interval_check_retry 3254 (&to_userptr_vma(vma)->userptr.notifier, 3255 to_userptr_vma(vma)->userptr.notifier_seq)); 3256 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3257 DMA_RESV_USAGE_BOOKKEEP)); 3258 3259 } else { 3260 xe_bo_assert_held(xe_vma_bo(vma)); 3261 } 3262 } 3263 3264 for_each_tile(tile, xe, id) { 3265 if (xe_pt_zap_ptes(tile, vma)) { 3266 xe_device_wmb(xe); 3267 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3268 &fence[fence_id], 3269 true); 3270 3271 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3272 &fence[fence_id], vma); 3273 if (ret) 3274 goto wait; 3275 ++fence_id; 3276 3277 if (!tile->media_gt) 3278 continue; 3279 3280 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3281 &fence[fence_id], 3282 true); 3283 3284 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3285 &fence[fence_id], vma); 3286 if (ret) 3287 goto wait; 3288 ++fence_id; 3289 } 3290 } 3291 3292 wait: 3293 for (id = 0; id < fence_id; ++id) 3294 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3295 3296 vma->tile_invalidated = vma->tile_mask; 3297 3298 return ret; 3299 } 3300 3301 struct xe_vm_snapshot { 3302 unsigned long num_snaps; 3303 struct { 3304 u64 ofs, bo_ofs; 3305 unsigned long len; 3306 struct xe_bo *bo; 3307 void *data; 3308 struct mm_struct *mm; 3309 } snap[]; 3310 }; 3311 3312 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3313 { 3314 unsigned long num_snaps = 0, i; 3315 struct xe_vm_snapshot *snap = NULL; 3316 struct drm_gpuva *gpuva; 3317 3318 if (!vm) 3319 return NULL; 3320 3321 mutex_lock(&vm->snap_mutex); 3322 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3323 if (gpuva->flags & XE_VMA_DUMPABLE) 3324 num_snaps++; 3325 } 3326 3327 if (num_snaps) 3328 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3329 if (!snap) { 3330 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3331 goto out_unlock; 3332 } 3333 3334 snap->num_snaps = num_snaps; 3335 i = 0; 3336 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3337 struct xe_vma *vma = gpuva_to_vma(gpuva); 3338 struct xe_bo *bo = vma->gpuva.gem.obj ? 3339 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3340 3341 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3342 continue; 3343 3344 snap->snap[i].ofs = xe_vma_start(vma); 3345 snap->snap[i].len = xe_vma_size(vma); 3346 if (bo) { 3347 snap->snap[i].bo = xe_bo_get(bo); 3348 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3349 } else if (xe_vma_is_userptr(vma)) { 3350 struct mm_struct *mm = 3351 to_userptr_vma(vma)->userptr.notifier.mm; 3352 3353 if (mmget_not_zero(mm)) 3354 snap->snap[i].mm = mm; 3355 else 3356 snap->snap[i].data = ERR_PTR(-EFAULT); 3357 3358 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3359 } else { 3360 snap->snap[i].data = ERR_PTR(-ENOENT); 3361 } 3362 i++; 3363 } 3364 3365 out_unlock: 3366 mutex_unlock(&vm->snap_mutex); 3367 return snap; 3368 } 3369 3370 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3371 { 3372 if (IS_ERR_OR_NULL(snap)) 3373 return; 3374 3375 for (int i = 0; i < snap->num_snaps; i++) { 3376 struct xe_bo *bo = snap->snap[i].bo; 3377 int err; 3378 3379 if (IS_ERR(snap->snap[i].data)) 3380 continue; 3381 3382 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3383 if (!snap->snap[i].data) { 3384 snap->snap[i].data = ERR_PTR(-ENOMEM); 3385 goto cleanup_bo; 3386 } 3387 3388 if (bo) { 3389 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3390 snap->snap[i].data, snap->snap[i].len); 3391 } else { 3392 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3393 3394 kthread_use_mm(snap->snap[i].mm); 3395 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3396 err = 0; 3397 else 3398 err = -EFAULT; 3399 kthread_unuse_mm(snap->snap[i].mm); 3400 3401 mmput(snap->snap[i].mm); 3402 snap->snap[i].mm = NULL; 3403 } 3404 3405 if (err) { 3406 kvfree(snap->snap[i].data); 3407 snap->snap[i].data = ERR_PTR(err); 3408 } 3409 3410 cleanup_bo: 3411 xe_bo_put(bo); 3412 snap->snap[i].bo = NULL; 3413 } 3414 } 3415 3416 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3417 { 3418 unsigned long i, j; 3419 3420 if (IS_ERR_OR_NULL(snap)) { 3421 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3422 return; 3423 } 3424 3425 for (i = 0; i < snap->num_snaps; i++) { 3426 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3427 3428 if (IS_ERR(snap->snap[i].data)) { 3429 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3430 PTR_ERR(snap->snap[i].data)); 3431 continue; 3432 } 3433 3434 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3435 3436 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3437 u32 *val = snap->snap[i].data + j; 3438 char dumped[ASCII85_BUFSZ]; 3439 3440 drm_puts(p, ascii85_encode(*val, dumped)); 3441 } 3442 3443 drm_puts(p, "\n"); 3444 } 3445 } 3446 3447 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3448 { 3449 unsigned long i; 3450 3451 if (IS_ERR_OR_NULL(snap)) 3452 return; 3453 3454 for (i = 0; i < snap->num_snaps; i++) { 3455 if (!IS_ERR(snap->snap[i].data)) 3456 kvfree(snap->snap[i].data); 3457 xe_bo_put(snap->snap[i].bo); 3458 if (snap->snap[i].mm) 3459 mmput(snap->snap[i].mm); 3460 } 3461 kvfree(snap); 3462 } 3463