1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sync.h" 38 #include "xe_trace_bo.h" 39 #include "xe_wa.h" 40 #include "xe_hmm.h" 41 42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 43 { 44 return vm->gpuvm.r_obj; 45 } 46 47 /** 48 * xe_vma_userptr_check_repin() - Advisory check for repin needed 49 * @uvma: The userptr vma 50 * 51 * Check if the userptr vma has been invalidated since last successful 52 * repin. The check is advisory only and can the function can be called 53 * without the vm->userptr.notifier_lock held. There is no guarantee that the 54 * vma userptr will remain valid after a lockless check, so typically 55 * the call needs to be followed by a proper check under the notifier_lock. 56 * 57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 */ 59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 { 61 return mmu_interval_check_retry(&uvma->userptr.notifier, 62 uvma->userptr.notifier_seq) ? 63 -EAGAIN : 0; 64 } 65 66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 67 { 68 struct xe_vma *vma = &uvma->vma; 69 struct xe_vm *vm = xe_vma_vm(vma); 70 struct xe_device *xe = vm->xe; 71 72 lockdep_assert_held(&vm->lock); 73 xe_assert(xe, xe_vma_is_userptr(vma)); 74 75 return xe_hmm_userptr_populate_range(uvma, false); 76 } 77 78 static bool preempt_fences_waiting(struct xe_vm *vm) 79 { 80 struct xe_exec_queue *q; 81 82 lockdep_assert_held(&vm->lock); 83 xe_vm_assert_held(vm); 84 85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 86 if (!q->lr.pfence || 87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 88 &q->lr.pfence->flags)) { 89 return true; 90 } 91 } 92 93 return false; 94 } 95 96 static void free_preempt_fences(struct list_head *list) 97 { 98 struct list_head *link, *next; 99 100 list_for_each_safe(link, next, list) 101 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 102 } 103 104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 105 unsigned int *count) 106 { 107 lockdep_assert_held(&vm->lock); 108 xe_vm_assert_held(vm); 109 110 if (*count >= vm->preempt.num_exec_queues) 111 return 0; 112 113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 115 116 if (IS_ERR(pfence)) 117 return PTR_ERR(pfence); 118 119 list_move_tail(xe_preempt_fence_link(pfence), list); 120 } 121 122 return 0; 123 } 124 125 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 126 { 127 struct xe_exec_queue *q; 128 129 xe_vm_assert_held(vm); 130 131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 132 if (q->lr.pfence) { 133 long timeout = dma_fence_wait(q->lr.pfence, false); 134 135 /* Only -ETIME on fence indicates VM needs to be killed */ 136 if (timeout < 0 || q->lr.pfence->error == -ETIME) 137 return -ETIME; 138 139 dma_fence_put(q->lr.pfence); 140 q->lr.pfence = NULL; 141 } 142 } 143 144 return 0; 145 } 146 147 static bool xe_vm_is_idle(struct xe_vm *vm) 148 { 149 struct xe_exec_queue *q; 150 151 xe_vm_assert_held(vm); 152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 153 if (!xe_exec_queue_is_idle(q)) 154 return false; 155 } 156 157 return true; 158 } 159 160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 161 { 162 struct list_head *link; 163 struct xe_exec_queue *q; 164 165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 166 struct dma_fence *fence; 167 168 link = list->next; 169 xe_assert(vm->xe, link != list); 170 171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 172 q, q->lr.context, 173 ++q->lr.seqno); 174 dma_fence_put(q->lr.pfence); 175 q->lr.pfence = fence; 176 } 177 } 178 179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 180 { 181 struct xe_exec_queue *q; 182 int err; 183 184 xe_bo_assert_held(bo); 185 186 if (!vm->preempt.num_exec_queues) 187 return 0; 188 189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 190 if (err) 191 return err; 192 193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 194 if (q->lr.pfence) { 195 dma_resv_add_fence(bo->ttm.base.resv, 196 q->lr.pfence, 197 DMA_RESV_USAGE_BOOKKEEP); 198 } 199 200 return 0; 201 } 202 203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 204 struct drm_exec *exec) 205 { 206 struct xe_exec_queue *q; 207 208 lockdep_assert_held(&vm->lock); 209 xe_vm_assert_held(vm); 210 211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 212 q->ops->resume(q); 213 214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 216 } 217 } 218 219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 220 { 221 struct drm_gpuvm_exec vm_exec = { 222 .vm = &vm->gpuvm, 223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 224 .num_fences = 1, 225 }; 226 struct drm_exec *exec = &vm_exec.exec; 227 struct dma_fence *pfence; 228 int err; 229 bool wait; 230 231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 232 233 down_write(&vm->lock); 234 err = drm_gpuvm_exec_lock(&vm_exec); 235 if (err) 236 goto out_up_write; 237 238 pfence = xe_preempt_fence_create(q, q->lr.context, 239 ++q->lr.seqno); 240 if (!pfence) { 241 err = -ENOMEM; 242 goto out_fini; 243 } 244 245 list_add(&q->lr.link, &vm->preempt.exec_queues); 246 ++vm->preempt.num_exec_queues; 247 q->lr.pfence = pfence; 248 249 down_read(&vm->userptr.notifier_lock); 250 251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 253 254 /* 255 * Check to see if a preemption on VM is in flight or userptr 256 * invalidation, if so trigger this preempt fence to sync state with 257 * other preempt fences on the VM. 258 */ 259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 260 if (wait) 261 dma_fence_enable_sw_signaling(pfence); 262 263 up_read(&vm->userptr.notifier_lock); 264 265 out_fini: 266 drm_exec_fini(exec); 267 out_up_write: 268 up_write(&vm->lock); 269 270 return err; 271 } 272 273 /** 274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 275 * @vm: The VM. 276 * @q: The exec_queue 277 * 278 * Note that this function might be called multiple times on the same queue. 279 */ 280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 281 { 282 if (!xe_vm_in_preempt_fence_mode(vm)) 283 return; 284 285 down_write(&vm->lock); 286 if (!list_empty(&q->lr.link)) { 287 list_del_init(&q->lr.link); 288 --vm->preempt.num_exec_queues; 289 } 290 if (q->lr.pfence) { 291 dma_fence_enable_sw_signaling(q->lr.pfence); 292 dma_fence_put(q->lr.pfence); 293 q->lr.pfence = NULL; 294 } 295 up_write(&vm->lock); 296 } 297 298 /** 299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 300 * that need repinning. 301 * @vm: The VM. 302 * 303 * This function checks for whether the VM has userptrs that need repinning, 304 * and provides a release-type barrier on the userptr.notifier_lock after 305 * checking. 306 * 307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 308 */ 309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 310 { 311 lockdep_assert_held_read(&vm->userptr.notifier_lock); 312 313 return (list_empty(&vm->userptr.repin_list) && 314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 315 } 316 317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 318 319 /** 320 * xe_vm_kill() - VM Kill 321 * @vm: The VM. 322 * @unlocked: Flag indicates the VM's dma-resv is not held 323 * 324 * Kill the VM by setting banned flag indicated VM is no longer available for 325 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 326 */ 327 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 328 { 329 struct xe_exec_queue *q; 330 331 lockdep_assert_held(&vm->lock); 332 333 if (unlocked) 334 xe_vm_lock(vm, false); 335 336 vm->flags |= XE_VM_FLAG_BANNED; 337 trace_xe_vm_kill(vm); 338 339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 340 q->ops->kill(q); 341 342 if (unlocked) 343 xe_vm_unlock(vm); 344 345 /* TODO: Inform user the VM is banned */ 346 } 347 348 /** 349 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 350 * @exec: The drm_exec object used for locking before validation. 351 * @err: The error returned from ttm_bo_validate(). 352 * @end: A ktime_t cookie that should be set to 0 before first use and 353 * that should be reused on subsequent calls. 354 * 355 * With multiple active VMs, under memory pressure, it is possible that 356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 357 * Until ttm properly handles locking in such scenarios, best thing the 358 * driver can do is retry with a timeout. Check if that is necessary, and 359 * if so unlock the drm_exec's objects while keeping the ticket to prepare 360 * for a rerun. 361 * 362 * Return: true if a retry after drm_exec_init() is recommended; 363 * false otherwise. 364 */ 365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 366 { 367 ktime_t cur; 368 369 if (err != -ENOMEM) 370 return false; 371 372 cur = ktime_get(); 373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 374 if (!ktime_before(cur, *end)) 375 return false; 376 377 msleep(20); 378 return true; 379 } 380 381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 382 { 383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 384 struct drm_gpuva *gpuva; 385 int ret; 386 387 lockdep_assert_held(&vm->lock); 388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 390 &vm->rebind_list); 391 392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 393 if (ret) 394 return ret; 395 396 vm_bo->evicted = false; 397 return 0; 398 } 399 400 /** 401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 402 * @vm: The vm for which we are rebinding. 403 * @exec: The struct drm_exec with the locked GEM objects. 404 * @num_fences: The number of fences to reserve for the operation, not 405 * including rebinds and validations. 406 * 407 * Validates all evicted gem objects and rebinds their vmas. Note that 408 * rebindings may cause evictions and hence the validation-rebind 409 * sequence is rerun until there are no more objects to validate. 410 * 411 * Return: 0 on success, negative error code on error. In particular, 412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 413 * the drm_exec transaction needs to be restarted. 414 */ 415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 416 unsigned int num_fences) 417 { 418 struct drm_gem_object *obj; 419 unsigned long index; 420 int ret; 421 422 do { 423 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 424 if (ret) 425 return ret; 426 427 ret = xe_vm_rebind(vm, false); 428 if (ret) 429 return ret; 430 } while (!list_empty(&vm->gpuvm.evict.list)); 431 432 drm_exec_for_each_locked_object(exec, index, obj) { 433 ret = dma_resv_reserve_fences(obj->resv, num_fences); 434 if (ret) 435 return ret; 436 } 437 438 return 0; 439 } 440 441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 442 bool *done) 443 { 444 int err; 445 446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 447 if (err) 448 return err; 449 450 if (xe_vm_is_idle(vm)) { 451 vm->preempt.rebind_deactivated = true; 452 *done = true; 453 return 0; 454 } 455 456 if (!preempt_fences_waiting(vm)) { 457 *done = true; 458 return 0; 459 } 460 461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 462 if (err) 463 return err; 464 465 err = wait_for_existing_preempt_fences(vm); 466 if (err) 467 return err; 468 469 /* 470 * Add validation and rebinding to the locking loop since both can 471 * cause evictions which may require blocing dma_resv locks. 472 * The fence reservation here is intended for the new preempt fences 473 * we attach at the end of the rebind work. 474 */ 475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 476 } 477 478 static void preempt_rebind_work_func(struct work_struct *w) 479 { 480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 481 struct drm_exec exec; 482 unsigned int fence_count = 0; 483 LIST_HEAD(preempt_fences); 484 ktime_t end = 0; 485 int err = 0; 486 long wait; 487 int __maybe_unused tries = 0; 488 489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 490 trace_xe_vm_rebind_worker_enter(vm); 491 492 down_write(&vm->lock); 493 494 if (xe_vm_is_closed_or_banned(vm)) { 495 up_write(&vm->lock); 496 trace_xe_vm_rebind_worker_exit(vm); 497 return; 498 } 499 500 retry: 501 if (xe_vm_userptr_check_repin(vm)) { 502 err = xe_vm_userptr_pin(vm); 503 if (err) 504 goto out_unlock_outer; 505 } 506 507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 508 509 drm_exec_until_all_locked(&exec) { 510 bool done = false; 511 512 err = xe_preempt_work_begin(&exec, vm, &done); 513 drm_exec_retry_on_contention(&exec); 514 if (err || done) { 515 drm_exec_fini(&exec); 516 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 517 err = -EAGAIN; 518 519 goto out_unlock_outer; 520 } 521 } 522 523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 524 if (err) 525 goto out_unlock; 526 527 err = xe_vm_rebind(vm, true); 528 if (err) 529 goto out_unlock; 530 531 /* Wait on rebinds and munmap style VM unbinds */ 532 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 533 DMA_RESV_USAGE_KERNEL, 534 false, MAX_SCHEDULE_TIMEOUT); 535 if (wait <= 0) { 536 err = -ETIME; 537 goto out_unlock; 538 } 539 540 #define retry_required(__tries, __vm) \ 541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 543 __xe_vm_userptr_needs_repin(__vm)) 544 545 down_read(&vm->userptr.notifier_lock); 546 if (retry_required(tries, vm)) { 547 up_read(&vm->userptr.notifier_lock); 548 err = -EAGAIN; 549 goto out_unlock; 550 } 551 552 #undef retry_required 553 554 spin_lock(&vm->xe->ttm.lru_lock); 555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 556 spin_unlock(&vm->xe->ttm.lru_lock); 557 558 /* Point of no return. */ 559 arm_preempt_fences(vm, &preempt_fences); 560 resume_and_reinstall_preempt_fences(vm, &exec); 561 up_read(&vm->userptr.notifier_lock); 562 563 out_unlock: 564 drm_exec_fini(&exec); 565 out_unlock_outer: 566 if (err == -EAGAIN) { 567 trace_xe_vm_rebind_worker_retry(vm); 568 goto retry; 569 } 570 571 if (err) { 572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 573 xe_vm_kill(vm, true); 574 } 575 up_write(&vm->lock); 576 577 free_preempt_fences(&preempt_fences); 578 579 trace_xe_vm_rebind_worker_exit(vm); 580 } 581 582 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 583 { 584 struct xe_userptr *userptr = &uvma->userptr; 585 struct xe_vma *vma = &uvma->vma; 586 struct dma_resv_iter cursor; 587 struct dma_fence *fence; 588 long err; 589 590 /* 591 * Tell exec and rebind worker they need to repin and rebind this 592 * userptr. 593 */ 594 if (!xe_vm_in_fault_mode(vm) && 595 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 596 spin_lock(&vm->userptr.invalidated_lock); 597 list_move_tail(&userptr->invalidate_link, 598 &vm->userptr.invalidated); 599 spin_unlock(&vm->userptr.invalidated_lock); 600 } 601 602 /* 603 * Preempt fences turn into schedule disables, pipeline these. 604 * Note that even in fault mode, we need to wait for binds and 605 * unbinds to complete, and those are attached as BOOKMARK fences 606 * to the vm. 607 */ 608 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 609 DMA_RESV_USAGE_BOOKKEEP); 610 dma_resv_for_each_fence_unlocked(&cursor, fence) 611 dma_fence_enable_sw_signaling(fence); 612 dma_resv_iter_end(&cursor); 613 614 err = dma_resv_wait_timeout(xe_vm_resv(vm), 615 DMA_RESV_USAGE_BOOKKEEP, 616 false, MAX_SCHEDULE_TIMEOUT); 617 XE_WARN_ON(err <= 0); 618 619 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 620 err = xe_vm_invalidate_vma(vma); 621 XE_WARN_ON(err); 622 } 623 624 xe_hmm_userptr_unmap(uvma); 625 } 626 627 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 628 const struct mmu_notifier_range *range, 629 unsigned long cur_seq) 630 { 631 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 632 struct xe_vma *vma = &uvma->vma; 633 struct xe_vm *vm = xe_vma_vm(vma); 634 635 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 636 trace_xe_vma_userptr_invalidate(vma); 637 638 if (!mmu_notifier_range_blockable(range)) 639 return false; 640 641 vm_dbg(&xe_vma_vm(vma)->xe->drm, 642 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 643 xe_vma_start(vma), xe_vma_size(vma)); 644 645 down_write(&vm->userptr.notifier_lock); 646 mmu_interval_set_seq(mni, cur_seq); 647 648 __vma_userptr_invalidate(vm, uvma); 649 up_write(&vm->userptr.notifier_lock); 650 trace_xe_vma_userptr_invalidate_complete(vma); 651 652 return true; 653 } 654 655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 656 .invalidate = vma_userptr_invalidate, 657 }; 658 659 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 660 /** 661 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 662 * @uvma: The userptr vma to invalidate 663 * 664 * Perform a forced userptr invalidation for testing purposes. 665 */ 666 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 667 { 668 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 669 670 /* Protect against concurrent userptr pinning */ 671 lockdep_assert_held(&vm->lock); 672 /* Protect against concurrent notifiers */ 673 lockdep_assert_held(&vm->userptr.notifier_lock); 674 /* 675 * Protect against concurrent instances of this function and 676 * the critical exec sections 677 */ 678 xe_vm_assert_held(vm); 679 680 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 681 uvma->userptr.notifier_seq)) 682 uvma->userptr.notifier_seq -= 2; 683 __vma_userptr_invalidate(vm, uvma); 684 } 685 #endif 686 687 int xe_vm_userptr_pin(struct xe_vm *vm) 688 { 689 struct xe_userptr_vma *uvma, *next; 690 int err = 0; 691 LIST_HEAD(tmp_evict); 692 693 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 694 lockdep_assert_held_write(&vm->lock); 695 696 /* Collect invalidated userptrs */ 697 spin_lock(&vm->userptr.invalidated_lock); 698 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 699 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 700 userptr.invalidate_link) { 701 list_del_init(&uvma->userptr.invalidate_link); 702 list_add_tail(&uvma->userptr.repin_link, 703 &vm->userptr.repin_list); 704 } 705 spin_unlock(&vm->userptr.invalidated_lock); 706 707 /* Pin and move to bind list */ 708 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 709 userptr.repin_link) { 710 err = xe_vma_userptr_pin_pages(uvma); 711 if (err == -EFAULT) { 712 list_del_init(&uvma->userptr.repin_link); 713 /* 714 * We might have already done the pin once already, but 715 * then had to retry before the re-bind happened, due 716 * some other condition in the caller, but in the 717 * meantime the userptr got dinged by the notifier such 718 * that we need to revalidate here, but this time we hit 719 * the EFAULT. In such a case make sure we remove 720 * ourselves from the rebind list to avoid going down in 721 * flames. 722 */ 723 if (!list_empty(&uvma->vma.combined_links.rebind)) 724 list_del_init(&uvma->vma.combined_links.rebind); 725 726 /* Wait for pending binds */ 727 xe_vm_lock(vm, false); 728 dma_resv_wait_timeout(xe_vm_resv(vm), 729 DMA_RESV_USAGE_BOOKKEEP, 730 false, MAX_SCHEDULE_TIMEOUT); 731 732 err = xe_vm_invalidate_vma(&uvma->vma); 733 xe_vm_unlock(vm); 734 if (err) 735 break; 736 } else { 737 if (err) 738 break; 739 740 list_del_init(&uvma->userptr.repin_link); 741 list_move_tail(&uvma->vma.combined_links.rebind, 742 &vm->rebind_list); 743 } 744 } 745 746 if (err) { 747 down_write(&vm->userptr.notifier_lock); 748 spin_lock(&vm->userptr.invalidated_lock); 749 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 750 userptr.repin_link) { 751 list_del_init(&uvma->userptr.repin_link); 752 list_move_tail(&uvma->userptr.invalidate_link, 753 &vm->userptr.invalidated); 754 } 755 spin_unlock(&vm->userptr.invalidated_lock); 756 up_write(&vm->userptr.notifier_lock); 757 } 758 return err; 759 } 760 761 /** 762 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 763 * that need repinning. 764 * @vm: The VM. 765 * 766 * This function does an advisory check for whether the VM has userptrs that 767 * need repinning. 768 * 769 * Return: 0 if there are no indications of userptrs needing repinning, 770 * -EAGAIN if there are. 771 */ 772 int xe_vm_userptr_check_repin(struct xe_vm *vm) 773 { 774 return (list_empty_careful(&vm->userptr.repin_list) && 775 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 776 } 777 778 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 779 { 780 int i; 781 782 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 783 if (!vops->pt_update_ops[i].num_ops) 784 continue; 785 786 vops->pt_update_ops[i].ops = 787 kmalloc_array(vops->pt_update_ops[i].num_ops, 788 sizeof(*vops->pt_update_ops[i].ops), 789 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 790 if (!vops->pt_update_ops[i].ops) 791 return array_of_binds ? -ENOBUFS : -ENOMEM; 792 } 793 794 return 0; 795 } 796 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 797 798 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 799 { 800 int i; 801 802 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 803 kfree(vops->pt_update_ops[i].ops); 804 } 805 806 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 807 { 808 int i; 809 810 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 811 if (BIT(i) & tile_mask) 812 ++vops->pt_update_ops[i].num_ops; 813 } 814 815 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 816 u8 tile_mask) 817 { 818 INIT_LIST_HEAD(&op->link); 819 op->tile_mask = tile_mask; 820 op->base.op = DRM_GPUVA_OP_MAP; 821 op->base.map.va.addr = vma->gpuva.va.addr; 822 op->base.map.va.range = vma->gpuva.va.range; 823 op->base.map.gem.obj = vma->gpuva.gem.obj; 824 op->base.map.gem.offset = vma->gpuva.gem.offset; 825 op->map.vma = vma; 826 op->map.immediate = true; 827 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 828 op->map.is_null = xe_vma_is_null(vma); 829 } 830 831 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 832 u8 tile_mask) 833 { 834 struct xe_vma_op *op; 835 836 op = kzalloc(sizeof(*op), GFP_KERNEL); 837 if (!op) 838 return -ENOMEM; 839 840 xe_vm_populate_rebind(op, vma, tile_mask); 841 list_add_tail(&op->link, &vops->list); 842 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 843 844 return 0; 845 } 846 847 static struct dma_fence *ops_execute(struct xe_vm *vm, 848 struct xe_vma_ops *vops); 849 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 850 struct xe_exec_queue *q, 851 struct xe_sync_entry *syncs, u32 num_syncs); 852 853 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 854 { 855 struct dma_fence *fence; 856 struct xe_vma *vma, *next; 857 struct xe_vma_ops vops; 858 struct xe_vma_op *op, *next_op; 859 int err, i; 860 861 lockdep_assert_held(&vm->lock); 862 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 863 list_empty(&vm->rebind_list)) 864 return 0; 865 866 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 867 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 868 vops.pt_update_ops[i].wait_vm_bookkeep = true; 869 870 xe_vm_assert_held(vm); 871 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 872 xe_assert(vm->xe, vma->tile_present); 873 874 if (rebind_worker) 875 trace_xe_vma_rebind_worker(vma); 876 else 877 trace_xe_vma_rebind_exec(vma); 878 879 err = xe_vm_ops_add_rebind(&vops, vma, 880 vma->tile_present); 881 if (err) 882 goto free_ops; 883 } 884 885 err = xe_vma_ops_alloc(&vops, false); 886 if (err) 887 goto free_ops; 888 889 fence = ops_execute(vm, &vops); 890 if (IS_ERR(fence)) { 891 err = PTR_ERR(fence); 892 } else { 893 dma_fence_put(fence); 894 list_for_each_entry_safe(vma, next, &vm->rebind_list, 895 combined_links.rebind) 896 list_del_init(&vma->combined_links.rebind); 897 } 898 free_ops: 899 list_for_each_entry_safe(op, next_op, &vops.list, link) { 900 list_del(&op->link); 901 kfree(op); 902 } 903 xe_vma_ops_fini(&vops); 904 905 return err; 906 } 907 908 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 909 { 910 struct dma_fence *fence = NULL; 911 struct xe_vma_ops vops; 912 struct xe_vma_op *op, *next_op; 913 struct xe_tile *tile; 914 u8 id; 915 int err; 916 917 lockdep_assert_held(&vm->lock); 918 xe_vm_assert_held(vm); 919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 920 921 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 922 for_each_tile(tile, vm->xe, id) { 923 vops.pt_update_ops[id].wait_vm_bookkeep = true; 924 vops.pt_update_ops[tile->id].q = 925 xe_tile_migrate_exec_queue(tile); 926 } 927 928 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 929 if (err) 930 return ERR_PTR(err); 931 932 err = xe_vma_ops_alloc(&vops, false); 933 if (err) { 934 fence = ERR_PTR(err); 935 goto free_ops; 936 } 937 938 fence = ops_execute(vm, &vops); 939 940 free_ops: 941 list_for_each_entry_safe(op, next_op, &vops.list, link) { 942 list_del(&op->link); 943 kfree(op); 944 } 945 xe_vma_ops_fini(&vops); 946 947 return fence; 948 } 949 950 static void xe_vma_free(struct xe_vma *vma) 951 { 952 if (xe_vma_is_userptr(vma)) 953 kfree(to_userptr_vma(vma)); 954 else 955 kfree(vma); 956 } 957 958 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 959 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 960 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 961 962 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 963 struct xe_bo *bo, 964 u64 bo_offset_or_userptr, 965 u64 start, u64 end, 966 u16 pat_index, unsigned int flags) 967 { 968 struct xe_vma *vma; 969 struct xe_tile *tile; 970 u8 id; 971 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 972 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 973 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 974 975 xe_assert(vm->xe, start < end); 976 xe_assert(vm->xe, end < vm->size); 977 978 /* 979 * Allocate and ensure that the xe_vma_is_userptr() return 980 * matches what was allocated. 981 */ 982 if (!bo && !is_null) { 983 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 984 985 if (!uvma) 986 return ERR_PTR(-ENOMEM); 987 988 vma = &uvma->vma; 989 } else { 990 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 991 if (!vma) 992 return ERR_PTR(-ENOMEM); 993 994 if (is_null) 995 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 996 if (bo) 997 vma->gpuva.gem.obj = &bo->ttm.base; 998 } 999 1000 INIT_LIST_HEAD(&vma->combined_links.rebind); 1001 1002 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1003 vma->gpuva.vm = &vm->gpuvm; 1004 vma->gpuva.va.addr = start; 1005 vma->gpuva.va.range = end - start + 1; 1006 if (read_only) 1007 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1008 if (dumpable) 1009 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1010 1011 for_each_tile(tile, vm->xe, id) 1012 vma->tile_mask |= 0x1 << id; 1013 1014 if (vm->xe->info.has_atomic_enable_pte_bit) 1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1016 1017 vma->pat_index = pat_index; 1018 1019 if (bo) { 1020 struct drm_gpuvm_bo *vm_bo; 1021 1022 xe_bo_assert_held(bo); 1023 1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1025 if (IS_ERR(vm_bo)) { 1026 xe_vma_free(vma); 1027 return ERR_CAST(vm_bo); 1028 } 1029 1030 drm_gpuvm_bo_extobj_add(vm_bo); 1031 drm_gem_object_get(&bo->ttm.base); 1032 vma->gpuva.gem.offset = bo_offset_or_userptr; 1033 drm_gpuva_link(&vma->gpuva, vm_bo); 1034 drm_gpuvm_bo_put(vm_bo); 1035 } else /* userptr or null */ { 1036 if (!is_null) { 1037 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1038 u64 size = end - start + 1; 1039 int err; 1040 1041 INIT_LIST_HEAD(&userptr->invalidate_link); 1042 INIT_LIST_HEAD(&userptr->repin_link); 1043 vma->gpuva.gem.offset = bo_offset_or_userptr; 1044 mutex_init(&userptr->unmap_mutex); 1045 1046 err = mmu_interval_notifier_insert(&userptr->notifier, 1047 current->mm, 1048 xe_vma_userptr(vma), size, 1049 &vma_userptr_notifier_ops); 1050 if (err) { 1051 xe_vma_free(vma); 1052 return ERR_PTR(err); 1053 } 1054 1055 userptr->notifier_seq = LONG_MAX; 1056 } 1057 1058 xe_vm_get(vm); 1059 } 1060 1061 return vma; 1062 } 1063 1064 static void xe_vma_destroy_late(struct xe_vma *vma) 1065 { 1066 struct xe_vm *vm = xe_vma_vm(vma); 1067 1068 if (vma->ufence) { 1069 xe_sync_ufence_put(vma->ufence); 1070 vma->ufence = NULL; 1071 } 1072 1073 if (xe_vma_is_userptr(vma)) { 1074 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1075 struct xe_userptr *userptr = &uvma->userptr; 1076 1077 if (userptr->sg) 1078 xe_hmm_userptr_free_sg(uvma); 1079 1080 /* 1081 * Since userptr pages are not pinned, we can't remove 1082 * the notifier until we're sure the GPU is not accessing 1083 * them anymore 1084 */ 1085 mmu_interval_notifier_remove(&userptr->notifier); 1086 mutex_destroy(&userptr->unmap_mutex); 1087 xe_vm_put(vm); 1088 } else if (xe_vma_is_null(vma)) { 1089 xe_vm_put(vm); 1090 } else { 1091 xe_bo_put(xe_vma_bo(vma)); 1092 } 1093 1094 xe_vma_free(vma); 1095 } 1096 1097 static void vma_destroy_work_func(struct work_struct *w) 1098 { 1099 struct xe_vma *vma = 1100 container_of(w, struct xe_vma, destroy_work); 1101 1102 xe_vma_destroy_late(vma); 1103 } 1104 1105 static void vma_destroy_cb(struct dma_fence *fence, 1106 struct dma_fence_cb *cb) 1107 { 1108 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1109 1110 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1111 queue_work(system_unbound_wq, &vma->destroy_work); 1112 } 1113 1114 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1115 { 1116 struct xe_vm *vm = xe_vma_vm(vma); 1117 1118 lockdep_assert_held_write(&vm->lock); 1119 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1120 1121 if (xe_vma_is_userptr(vma)) { 1122 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1123 1124 spin_lock(&vm->userptr.invalidated_lock); 1125 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1126 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1127 spin_unlock(&vm->userptr.invalidated_lock); 1128 } else if (!xe_vma_is_null(vma)) { 1129 xe_bo_assert_held(xe_vma_bo(vma)); 1130 1131 drm_gpuva_unlink(&vma->gpuva); 1132 } 1133 1134 xe_vm_assert_held(vm); 1135 if (fence) { 1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1137 vma_destroy_cb); 1138 1139 if (ret) { 1140 XE_WARN_ON(ret != -ENOENT); 1141 xe_vma_destroy_late(vma); 1142 } 1143 } else { 1144 xe_vma_destroy_late(vma); 1145 } 1146 } 1147 1148 /** 1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1150 * @exec: The drm_exec object we're currently locking for. 1151 * @vma: The vma for witch we want to lock the vm resv and any attached 1152 * object's resv. 1153 * 1154 * Return: 0 on success, negative error code on error. In particular 1155 * may return -EDEADLK on WW transaction contention and -EINTR if 1156 * an interruptible wait is terminated by a signal. 1157 */ 1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1159 { 1160 struct xe_vm *vm = xe_vma_vm(vma); 1161 struct xe_bo *bo = xe_vma_bo(vma); 1162 int err; 1163 1164 XE_WARN_ON(!vm); 1165 1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1167 if (!err && bo && !bo->vm) 1168 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1169 1170 return err; 1171 } 1172 1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1174 { 1175 struct drm_exec exec; 1176 int err; 1177 1178 drm_exec_init(&exec, 0, 0); 1179 drm_exec_until_all_locked(&exec) { 1180 err = xe_vm_lock_vma(&exec, vma); 1181 drm_exec_retry_on_contention(&exec); 1182 if (XE_WARN_ON(err)) 1183 break; 1184 } 1185 1186 xe_vma_destroy(vma, NULL); 1187 1188 drm_exec_fini(&exec); 1189 } 1190 1191 struct xe_vma * 1192 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1193 { 1194 struct drm_gpuva *gpuva; 1195 1196 lockdep_assert_held(&vm->lock); 1197 1198 if (xe_vm_is_closed_or_banned(vm)) 1199 return NULL; 1200 1201 xe_assert(vm->xe, start + range <= vm->size); 1202 1203 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1204 1205 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1206 } 1207 1208 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1209 { 1210 int err; 1211 1212 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1213 lockdep_assert_held(&vm->lock); 1214 1215 mutex_lock(&vm->snap_mutex); 1216 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1217 mutex_unlock(&vm->snap_mutex); 1218 XE_WARN_ON(err); /* Shouldn't be possible */ 1219 1220 return err; 1221 } 1222 1223 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1224 { 1225 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1226 lockdep_assert_held(&vm->lock); 1227 1228 mutex_lock(&vm->snap_mutex); 1229 drm_gpuva_remove(&vma->gpuva); 1230 mutex_unlock(&vm->snap_mutex); 1231 if (vm->usm.last_fault_vma == vma) 1232 vm->usm.last_fault_vma = NULL; 1233 } 1234 1235 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1236 { 1237 struct xe_vma_op *op; 1238 1239 op = kzalloc(sizeof(*op), GFP_KERNEL); 1240 1241 if (unlikely(!op)) 1242 return NULL; 1243 1244 return &op->base; 1245 } 1246 1247 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1248 1249 static const struct drm_gpuvm_ops gpuvm_ops = { 1250 .op_alloc = xe_vm_op_alloc, 1251 .vm_bo_validate = xe_gpuvm_validate, 1252 .vm_free = xe_vm_free, 1253 }; 1254 1255 static u64 pde_encode_pat_index(u16 pat_index) 1256 { 1257 u64 pte = 0; 1258 1259 if (pat_index & BIT(0)) 1260 pte |= XE_PPGTT_PTE_PAT0; 1261 1262 if (pat_index & BIT(1)) 1263 pte |= XE_PPGTT_PTE_PAT1; 1264 1265 return pte; 1266 } 1267 1268 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1269 { 1270 u64 pte = 0; 1271 1272 if (pat_index & BIT(0)) 1273 pte |= XE_PPGTT_PTE_PAT0; 1274 1275 if (pat_index & BIT(1)) 1276 pte |= XE_PPGTT_PTE_PAT1; 1277 1278 if (pat_index & BIT(2)) { 1279 if (pt_level) 1280 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1281 else 1282 pte |= XE_PPGTT_PTE_PAT2; 1283 } 1284 1285 if (pat_index & BIT(3)) 1286 pte |= XELPG_PPGTT_PTE_PAT3; 1287 1288 if (pat_index & (BIT(4))) 1289 pte |= XE2_PPGTT_PTE_PAT4; 1290 1291 return pte; 1292 } 1293 1294 static u64 pte_encode_ps(u32 pt_level) 1295 { 1296 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1297 1298 if (pt_level == 1) 1299 return XE_PDE_PS_2M; 1300 else if (pt_level == 2) 1301 return XE_PDPE_PS_1G; 1302 1303 return 0; 1304 } 1305 1306 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1307 const u16 pat_index) 1308 { 1309 u64 pde; 1310 1311 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1312 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1313 pde |= pde_encode_pat_index(pat_index); 1314 1315 return pde; 1316 } 1317 1318 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1319 u16 pat_index, u32 pt_level) 1320 { 1321 u64 pte; 1322 1323 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1324 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1325 pte |= pte_encode_pat_index(pat_index, pt_level); 1326 pte |= pte_encode_ps(pt_level); 1327 1328 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1329 pte |= XE_PPGTT_PTE_DM; 1330 1331 return pte; 1332 } 1333 1334 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1335 u16 pat_index, u32 pt_level) 1336 { 1337 pte |= XE_PAGE_PRESENT; 1338 1339 if (likely(!xe_vma_read_only(vma))) 1340 pte |= XE_PAGE_RW; 1341 1342 pte |= pte_encode_pat_index(pat_index, pt_level); 1343 pte |= pte_encode_ps(pt_level); 1344 1345 if (unlikely(xe_vma_is_null(vma))) 1346 pte |= XE_PTE_NULL; 1347 1348 return pte; 1349 } 1350 1351 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1352 u16 pat_index, 1353 u32 pt_level, bool devmem, u64 flags) 1354 { 1355 u64 pte; 1356 1357 /* Avoid passing random bits directly as flags */ 1358 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1359 1360 pte = addr; 1361 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1362 pte |= pte_encode_pat_index(pat_index, pt_level); 1363 pte |= pte_encode_ps(pt_level); 1364 1365 if (devmem) 1366 pte |= XE_PPGTT_PTE_DM; 1367 1368 pte |= flags; 1369 1370 return pte; 1371 } 1372 1373 static const struct xe_pt_ops xelp_pt_ops = { 1374 .pte_encode_bo = xelp_pte_encode_bo, 1375 .pte_encode_vma = xelp_pte_encode_vma, 1376 .pte_encode_addr = xelp_pte_encode_addr, 1377 .pde_encode_bo = xelp_pde_encode_bo, 1378 }; 1379 1380 static void vm_destroy_work_func(struct work_struct *w); 1381 1382 /** 1383 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1384 * given tile and vm. 1385 * @xe: xe device. 1386 * @tile: tile to set up for. 1387 * @vm: vm to set up for. 1388 * 1389 * Sets up a pagetable tree with one page-table per level and a single 1390 * leaf PTE. All pagetable entries point to the single page-table or, 1391 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1392 * writes become NOPs. 1393 * 1394 * Return: 0 on success, negative error code on error. 1395 */ 1396 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1397 struct xe_vm *vm) 1398 { 1399 u8 id = tile->id; 1400 int i; 1401 1402 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1403 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1404 if (IS_ERR(vm->scratch_pt[id][i])) 1405 return PTR_ERR(vm->scratch_pt[id][i]); 1406 1407 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1408 } 1409 1410 return 0; 1411 } 1412 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1413 1414 static void xe_vm_free_scratch(struct xe_vm *vm) 1415 { 1416 struct xe_tile *tile; 1417 u8 id; 1418 1419 if (!xe_vm_has_scratch(vm)) 1420 return; 1421 1422 for_each_tile(tile, vm->xe, id) { 1423 u32 i; 1424 1425 if (!vm->pt_root[id]) 1426 continue; 1427 1428 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1429 if (vm->scratch_pt[id][i]) 1430 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1431 } 1432 } 1433 1434 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1435 { 1436 struct drm_gem_object *vm_resv_obj; 1437 struct xe_vm *vm; 1438 int err, number_tiles = 0; 1439 struct xe_tile *tile; 1440 u8 id; 1441 1442 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1443 if (!vm) 1444 return ERR_PTR(-ENOMEM); 1445 1446 vm->xe = xe; 1447 1448 vm->size = 1ull << xe->info.va_bits; 1449 1450 vm->flags = flags; 1451 1452 init_rwsem(&vm->lock); 1453 mutex_init(&vm->snap_mutex); 1454 1455 INIT_LIST_HEAD(&vm->rebind_list); 1456 1457 INIT_LIST_HEAD(&vm->userptr.repin_list); 1458 INIT_LIST_HEAD(&vm->userptr.invalidated); 1459 init_rwsem(&vm->userptr.notifier_lock); 1460 spin_lock_init(&vm->userptr.invalidated_lock); 1461 1462 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1463 1464 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1465 1466 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1467 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1468 1469 for_each_tile(tile, xe, id) 1470 xe_range_fence_tree_init(&vm->rftree[id]); 1471 1472 vm->pt_ops = &xelp_pt_ops; 1473 1474 /* 1475 * Long-running workloads are not protected by the scheduler references. 1476 * By design, run_job for long-running workloads returns NULL and the 1477 * scheduler drops all the references of it, hence protecting the VM 1478 * for this case is necessary. 1479 */ 1480 if (flags & XE_VM_FLAG_LR_MODE) 1481 xe_pm_runtime_get_noresume(xe); 1482 1483 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1484 if (!vm_resv_obj) { 1485 err = -ENOMEM; 1486 goto err_no_resv; 1487 } 1488 1489 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1490 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1491 1492 drm_gem_object_put(vm_resv_obj); 1493 1494 err = xe_vm_lock(vm, true); 1495 if (err) 1496 goto err_close; 1497 1498 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1499 vm->flags |= XE_VM_FLAG_64K; 1500 1501 for_each_tile(tile, xe, id) { 1502 if (flags & XE_VM_FLAG_MIGRATION && 1503 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1504 continue; 1505 1506 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1507 if (IS_ERR(vm->pt_root[id])) { 1508 err = PTR_ERR(vm->pt_root[id]); 1509 vm->pt_root[id] = NULL; 1510 goto err_unlock_close; 1511 } 1512 } 1513 1514 if (xe_vm_has_scratch(vm)) { 1515 for_each_tile(tile, xe, id) { 1516 if (!vm->pt_root[id]) 1517 continue; 1518 1519 err = xe_vm_create_scratch(xe, tile, vm); 1520 if (err) 1521 goto err_unlock_close; 1522 } 1523 vm->batch_invalidate_tlb = true; 1524 } 1525 1526 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1527 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1528 vm->batch_invalidate_tlb = false; 1529 } 1530 1531 /* Fill pt_root after allocating scratch tables */ 1532 for_each_tile(tile, xe, id) { 1533 if (!vm->pt_root[id]) 1534 continue; 1535 1536 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1537 } 1538 xe_vm_unlock(vm); 1539 1540 /* Kernel migration VM shouldn't have a circular loop.. */ 1541 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1542 for_each_tile(tile, xe, id) { 1543 struct xe_exec_queue *q; 1544 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1545 1546 if (!vm->pt_root[id]) 1547 continue; 1548 1549 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1550 if (IS_ERR(q)) { 1551 err = PTR_ERR(q); 1552 goto err_close; 1553 } 1554 vm->q[id] = q; 1555 number_tiles++; 1556 } 1557 } 1558 1559 if (number_tiles > 1) 1560 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1561 1562 trace_xe_vm_create(vm); 1563 1564 return vm; 1565 1566 err_unlock_close: 1567 xe_vm_unlock(vm); 1568 err_close: 1569 xe_vm_close_and_put(vm); 1570 return ERR_PTR(err); 1571 1572 err_no_resv: 1573 mutex_destroy(&vm->snap_mutex); 1574 for_each_tile(tile, xe, id) 1575 xe_range_fence_tree_fini(&vm->rftree[id]); 1576 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1577 kfree(vm); 1578 if (flags & XE_VM_FLAG_LR_MODE) 1579 xe_pm_runtime_put(xe); 1580 return ERR_PTR(err); 1581 } 1582 1583 static void xe_vm_close(struct xe_vm *vm) 1584 { 1585 down_write(&vm->lock); 1586 vm->size = 0; 1587 up_write(&vm->lock); 1588 } 1589 1590 void xe_vm_close_and_put(struct xe_vm *vm) 1591 { 1592 LIST_HEAD(contested); 1593 struct xe_device *xe = vm->xe; 1594 struct xe_tile *tile; 1595 struct xe_vma *vma, *next_vma; 1596 struct drm_gpuva *gpuva, *next; 1597 u8 id; 1598 1599 xe_assert(xe, !vm->preempt.num_exec_queues); 1600 1601 xe_vm_close(vm); 1602 if (xe_vm_in_preempt_fence_mode(vm)) 1603 flush_work(&vm->preempt.rebind_work); 1604 1605 down_write(&vm->lock); 1606 for_each_tile(tile, xe, id) { 1607 if (vm->q[id]) 1608 xe_exec_queue_last_fence_put(vm->q[id], vm); 1609 } 1610 up_write(&vm->lock); 1611 1612 for_each_tile(tile, xe, id) { 1613 if (vm->q[id]) { 1614 xe_exec_queue_kill(vm->q[id]); 1615 xe_exec_queue_put(vm->q[id]); 1616 vm->q[id] = NULL; 1617 } 1618 } 1619 1620 down_write(&vm->lock); 1621 xe_vm_lock(vm, false); 1622 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1623 vma = gpuva_to_vma(gpuva); 1624 1625 if (xe_vma_has_no_bo(vma)) { 1626 down_read(&vm->userptr.notifier_lock); 1627 vma->gpuva.flags |= XE_VMA_DESTROYED; 1628 up_read(&vm->userptr.notifier_lock); 1629 } 1630 1631 xe_vm_remove_vma(vm, vma); 1632 1633 /* easy case, remove from VMA? */ 1634 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1635 list_del_init(&vma->combined_links.rebind); 1636 xe_vma_destroy(vma, NULL); 1637 continue; 1638 } 1639 1640 list_move_tail(&vma->combined_links.destroy, &contested); 1641 vma->gpuva.flags |= XE_VMA_DESTROYED; 1642 } 1643 1644 /* 1645 * All vm operations will add shared fences to resv. 1646 * The only exception is eviction for a shared object, 1647 * but even so, the unbind when evicted would still 1648 * install a fence to resv. Hence it's safe to 1649 * destroy the pagetables immediately. 1650 */ 1651 xe_vm_free_scratch(vm); 1652 1653 for_each_tile(tile, xe, id) { 1654 if (vm->pt_root[id]) { 1655 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1656 vm->pt_root[id] = NULL; 1657 } 1658 } 1659 xe_vm_unlock(vm); 1660 1661 /* 1662 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1663 * Since we hold a refcount to the bo, we can remove and free 1664 * the members safely without locking. 1665 */ 1666 list_for_each_entry_safe(vma, next_vma, &contested, 1667 combined_links.destroy) { 1668 list_del_init(&vma->combined_links.destroy); 1669 xe_vma_destroy_unlocked(vma); 1670 } 1671 1672 up_write(&vm->lock); 1673 1674 down_write(&xe->usm.lock); 1675 if (vm->usm.asid) { 1676 void *lookup; 1677 1678 xe_assert(xe, xe->info.has_asid); 1679 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1680 1681 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1682 xe_assert(xe, lookup == vm); 1683 } 1684 up_write(&xe->usm.lock); 1685 1686 for_each_tile(tile, xe, id) 1687 xe_range_fence_tree_fini(&vm->rftree[id]); 1688 1689 xe_vm_put(vm); 1690 } 1691 1692 static void vm_destroy_work_func(struct work_struct *w) 1693 { 1694 struct xe_vm *vm = 1695 container_of(w, struct xe_vm, destroy_work); 1696 struct xe_device *xe = vm->xe; 1697 struct xe_tile *tile; 1698 u8 id; 1699 1700 /* xe_vm_close_and_put was not called? */ 1701 xe_assert(xe, !vm->size); 1702 1703 if (xe_vm_in_preempt_fence_mode(vm)) 1704 flush_work(&vm->preempt.rebind_work); 1705 1706 mutex_destroy(&vm->snap_mutex); 1707 1708 if (vm->flags & XE_VM_FLAG_LR_MODE) 1709 xe_pm_runtime_put(xe); 1710 1711 for_each_tile(tile, xe, id) 1712 XE_WARN_ON(vm->pt_root[id]); 1713 1714 trace_xe_vm_free(vm); 1715 1716 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1717 1718 if (vm->xef) 1719 xe_file_put(vm->xef); 1720 1721 kfree(vm); 1722 } 1723 1724 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1725 { 1726 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1727 1728 /* To destroy the VM we need to be able to sleep */ 1729 queue_work(system_unbound_wq, &vm->destroy_work); 1730 } 1731 1732 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1733 { 1734 struct xe_vm *vm; 1735 1736 mutex_lock(&xef->vm.lock); 1737 vm = xa_load(&xef->vm.xa, id); 1738 if (vm) 1739 xe_vm_get(vm); 1740 mutex_unlock(&xef->vm.lock); 1741 1742 return vm; 1743 } 1744 1745 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1746 { 1747 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1748 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1749 } 1750 1751 static struct xe_exec_queue * 1752 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1753 { 1754 return q ? q : vm->q[0]; 1755 } 1756 1757 static struct xe_user_fence * 1758 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1759 { 1760 unsigned int i; 1761 1762 for (i = 0; i < num_syncs; i++) { 1763 struct xe_sync_entry *e = &syncs[i]; 1764 1765 if (xe_sync_is_ufence(e)) 1766 return xe_sync_ufence_get(e); 1767 } 1768 1769 return NULL; 1770 } 1771 1772 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1773 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1774 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1775 1776 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1777 struct drm_file *file) 1778 { 1779 struct xe_device *xe = to_xe_device(dev); 1780 struct xe_file *xef = to_xe_file(file); 1781 struct drm_xe_vm_create *args = data; 1782 struct xe_tile *tile; 1783 struct xe_vm *vm; 1784 u32 id, asid; 1785 int err; 1786 u32 flags = 0; 1787 1788 if (XE_IOCTL_DBG(xe, args->extensions)) 1789 return -EINVAL; 1790 1791 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1792 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1793 1794 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1795 !xe->info.has_usm)) 1796 return -EINVAL; 1797 1798 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1799 return -EINVAL; 1800 1801 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1802 return -EINVAL; 1803 1804 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1805 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1806 return -EINVAL; 1807 1808 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1809 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1810 return -EINVAL; 1811 1812 if (XE_IOCTL_DBG(xe, args->extensions)) 1813 return -EINVAL; 1814 1815 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1816 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1817 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1818 flags |= XE_VM_FLAG_LR_MODE; 1819 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1820 flags |= XE_VM_FLAG_FAULT_MODE; 1821 1822 vm = xe_vm_create(xe, flags); 1823 if (IS_ERR(vm)) 1824 return PTR_ERR(vm); 1825 1826 if (xe->info.has_asid) { 1827 down_write(&xe->usm.lock); 1828 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1829 XA_LIMIT(1, XE_MAX_ASID - 1), 1830 &xe->usm.next_asid, GFP_KERNEL); 1831 up_write(&xe->usm.lock); 1832 if (err < 0) 1833 goto err_close_and_put; 1834 1835 vm->usm.asid = asid; 1836 } 1837 1838 vm->xef = xe_file_get(xef); 1839 1840 /* Record BO memory for VM pagetable created against client */ 1841 for_each_tile(tile, xe, id) 1842 if (vm->pt_root[id]) 1843 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1844 1845 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1846 /* Warning: Security issue - never enable by default */ 1847 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1848 #endif 1849 1850 /* user id alloc must always be last in ioctl to prevent UAF */ 1851 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1852 if (err) 1853 goto err_close_and_put; 1854 1855 args->vm_id = id; 1856 1857 return 0; 1858 1859 err_close_and_put: 1860 xe_vm_close_and_put(vm); 1861 1862 return err; 1863 } 1864 1865 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1866 struct drm_file *file) 1867 { 1868 struct xe_device *xe = to_xe_device(dev); 1869 struct xe_file *xef = to_xe_file(file); 1870 struct drm_xe_vm_destroy *args = data; 1871 struct xe_vm *vm; 1872 int err = 0; 1873 1874 if (XE_IOCTL_DBG(xe, args->pad) || 1875 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1876 return -EINVAL; 1877 1878 mutex_lock(&xef->vm.lock); 1879 vm = xa_load(&xef->vm.xa, args->vm_id); 1880 if (XE_IOCTL_DBG(xe, !vm)) 1881 err = -ENOENT; 1882 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1883 err = -EBUSY; 1884 else 1885 xa_erase(&xef->vm.xa, args->vm_id); 1886 mutex_unlock(&xef->vm.lock); 1887 1888 if (!err) 1889 xe_vm_close_and_put(vm); 1890 1891 return err; 1892 } 1893 1894 static const u32 region_to_mem_type[] = { 1895 XE_PL_TT, 1896 XE_PL_VRAM0, 1897 XE_PL_VRAM1, 1898 }; 1899 1900 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1901 bool post_commit) 1902 { 1903 down_read(&vm->userptr.notifier_lock); 1904 vma->gpuva.flags |= XE_VMA_DESTROYED; 1905 up_read(&vm->userptr.notifier_lock); 1906 if (post_commit) 1907 xe_vm_remove_vma(vm, vma); 1908 } 1909 1910 #undef ULL 1911 #define ULL unsigned long long 1912 1913 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1914 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1915 { 1916 struct xe_vma *vma; 1917 1918 switch (op->op) { 1919 case DRM_GPUVA_OP_MAP: 1920 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1921 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1922 break; 1923 case DRM_GPUVA_OP_REMAP: 1924 vma = gpuva_to_vma(op->remap.unmap->va); 1925 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1926 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1927 op->remap.unmap->keep ? 1 : 0); 1928 if (op->remap.prev) 1929 vm_dbg(&xe->drm, 1930 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1931 (ULL)op->remap.prev->va.addr, 1932 (ULL)op->remap.prev->va.range); 1933 if (op->remap.next) 1934 vm_dbg(&xe->drm, 1935 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1936 (ULL)op->remap.next->va.addr, 1937 (ULL)op->remap.next->va.range); 1938 break; 1939 case DRM_GPUVA_OP_UNMAP: 1940 vma = gpuva_to_vma(op->unmap.va); 1941 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1942 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1943 op->unmap.keep ? 1 : 0); 1944 break; 1945 case DRM_GPUVA_OP_PREFETCH: 1946 vma = gpuva_to_vma(op->prefetch.va); 1947 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1948 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1949 break; 1950 default: 1951 drm_warn(&xe->drm, "NOT POSSIBLE"); 1952 } 1953 } 1954 #else 1955 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1956 { 1957 } 1958 #endif 1959 1960 /* 1961 * Create operations list from IOCTL arguments, setup operations fields so parse 1962 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1963 */ 1964 static struct drm_gpuva_ops * 1965 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1966 u64 bo_offset_or_userptr, u64 addr, u64 range, 1967 u32 operation, u32 flags, 1968 u32 prefetch_region, u16 pat_index) 1969 { 1970 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1971 struct drm_gpuva_ops *ops; 1972 struct drm_gpuva_op *__op; 1973 struct drm_gpuvm_bo *vm_bo; 1974 int err; 1975 1976 lockdep_assert_held_write(&vm->lock); 1977 1978 vm_dbg(&vm->xe->drm, 1979 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1980 operation, (ULL)addr, (ULL)range, 1981 (ULL)bo_offset_or_userptr); 1982 1983 switch (operation) { 1984 case DRM_XE_VM_BIND_OP_MAP: 1985 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1986 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1987 obj, bo_offset_or_userptr); 1988 break; 1989 case DRM_XE_VM_BIND_OP_UNMAP: 1990 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1991 break; 1992 case DRM_XE_VM_BIND_OP_PREFETCH: 1993 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1994 break; 1995 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1996 xe_assert(vm->xe, bo); 1997 1998 err = xe_bo_lock(bo, true); 1999 if (err) 2000 return ERR_PTR(err); 2001 2002 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2003 if (IS_ERR(vm_bo)) { 2004 xe_bo_unlock(bo); 2005 return ERR_CAST(vm_bo); 2006 } 2007 2008 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2009 drm_gpuvm_bo_put(vm_bo); 2010 xe_bo_unlock(bo); 2011 break; 2012 default: 2013 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2014 ops = ERR_PTR(-EINVAL); 2015 } 2016 if (IS_ERR(ops)) 2017 return ops; 2018 2019 drm_gpuva_for_each_op(__op, ops) { 2020 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2021 2022 if (__op->op == DRM_GPUVA_OP_MAP) { 2023 op->map.immediate = 2024 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2025 op->map.read_only = 2026 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2027 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2028 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2029 op->map.pat_index = pat_index; 2030 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2031 op->prefetch.region = prefetch_region; 2032 } 2033 2034 print_op(vm->xe, __op); 2035 } 2036 2037 return ops; 2038 } 2039 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2040 2041 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2042 u16 pat_index, unsigned int flags) 2043 { 2044 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2045 struct drm_exec exec; 2046 struct xe_vma *vma; 2047 int err = 0; 2048 2049 lockdep_assert_held_write(&vm->lock); 2050 2051 if (bo) { 2052 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2053 drm_exec_until_all_locked(&exec) { 2054 err = 0; 2055 if (!bo->vm) { 2056 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2057 drm_exec_retry_on_contention(&exec); 2058 } 2059 if (!err) { 2060 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2061 drm_exec_retry_on_contention(&exec); 2062 } 2063 if (err) { 2064 drm_exec_fini(&exec); 2065 return ERR_PTR(err); 2066 } 2067 } 2068 } 2069 vma = xe_vma_create(vm, bo, op->gem.offset, 2070 op->va.addr, op->va.addr + 2071 op->va.range - 1, pat_index, flags); 2072 if (IS_ERR(vma)) 2073 goto err_unlock; 2074 2075 if (xe_vma_is_userptr(vma)) 2076 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2077 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2078 err = add_preempt_fences(vm, bo); 2079 2080 err_unlock: 2081 if (bo) 2082 drm_exec_fini(&exec); 2083 2084 if (err) { 2085 prep_vma_destroy(vm, vma, false); 2086 xe_vma_destroy_unlocked(vma); 2087 vma = ERR_PTR(err); 2088 } 2089 2090 return vma; 2091 } 2092 2093 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2094 { 2095 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2096 return SZ_1G; 2097 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2098 return SZ_2M; 2099 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2100 return SZ_64K; 2101 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2102 return SZ_4K; 2103 2104 return SZ_1G; /* Uninitialized, used max size */ 2105 } 2106 2107 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2108 { 2109 switch (size) { 2110 case SZ_1G: 2111 vma->gpuva.flags |= XE_VMA_PTE_1G; 2112 break; 2113 case SZ_2M: 2114 vma->gpuva.flags |= XE_VMA_PTE_2M; 2115 break; 2116 case SZ_64K: 2117 vma->gpuva.flags |= XE_VMA_PTE_64K; 2118 break; 2119 case SZ_4K: 2120 vma->gpuva.flags |= XE_VMA_PTE_4K; 2121 break; 2122 } 2123 } 2124 2125 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2126 { 2127 int err = 0; 2128 2129 lockdep_assert_held_write(&vm->lock); 2130 2131 switch (op->base.op) { 2132 case DRM_GPUVA_OP_MAP: 2133 err |= xe_vm_insert_vma(vm, op->map.vma); 2134 if (!err) 2135 op->flags |= XE_VMA_OP_COMMITTED; 2136 break; 2137 case DRM_GPUVA_OP_REMAP: 2138 { 2139 u8 tile_present = 2140 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2141 2142 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2143 true); 2144 op->flags |= XE_VMA_OP_COMMITTED; 2145 2146 if (op->remap.prev) { 2147 err |= xe_vm_insert_vma(vm, op->remap.prev); 2148 if (!err) 2149 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2150 if (!err && op->remap.skip_prev) { 2151 op->remap.prev->tile_present = 2152 tile_present; 2153 op->remap.prev = NULL; 2154 } 2155 } 2156 if (op->remap.next) { 2157 err |= xe_vm_insert_vma(vm, op->remap.next); 2158 if (!err) 2159 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2160 if (!err && op->remap.skip_next) { 2161 op->remap.next->tile_present = 2162 tile_present; 2163 op->remap.next = NULL; 2164 } 2165 } 2166 2167 /* Adjust for partial unbind after removing VMA from VM */ 2168 if (!err) { 2169 op->base.remap.unmap->va->va.addr = op->remap.start; 2170 op->base.remap.unmap->va->va.range = op->remap.range; 2171 } 2172 break; 2173 } 2174 case DRM_GPUVA_OP_UNMAP: 2175 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2176 op->flags |= XE_VMA_OP_COMMITTED; 2177 break; 2178 case DRM_GPUVA_OP_PREFETCH: 2179 op->flags |= XE_VMA_OP_COMMITTED; 2180 break; 2181 default: 2182 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2183 } 2184 2185 return err; 2186 } 2187 2188 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2189 struct xe_vma_ops *vops) 2190 { 2191 struct xe_device *xe = vm->xe; 2192 struct drm_gpuva_op *__op; 2193 struct xe_tile *tile; 2194 u8 id, tile_mask = 0; 2195 int err = 0; 2196 2197 lockdep_assert_held_write(&vm->lock); 2198 2199 for_each_tile(tile, vm->xe, id) 2200 tile_mask |= 0x1 << id; 2201 2202 drm_gpuva_for_each_op(__op, ops) { 2203 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2204 struct xe_vma *vma; 2205 unsigned int flags = 0; 2206 2207 INIT_LIST_HEAD(&op->link); 2208 list_add_tail(&op->link, &vops->list); 2209 op->tile_mask = tile_mask; 2210 2211 switch (op->base.op) { 2212 case DRM_GPUVA_OP_MAP: 2213 { 2214 flags |= op->map.read_only ? 2215 VMA_CREATE_FLAG_READ_ONLY : 0; 2216 flags |= op->map.is_null ? 2217 VMA_CREATE_FLAG_IS_NULL : 0; 2218 flags |= op->map.dumpable ? 2219 VMA_CREATE_FLAG_DUMPABLE : 0; 2220 2221 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2222 flags); 2223 if (IS_ERR(vma)) 2224 return PTR_ERR(vma); 2225 2226 op->map.vma = vma; 2227 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2228 xe_vma_ops_incr_pt_update_ops(vops, 2229 op->tile_mask); 2230 break; 2231 } 2232 case DRM_GPUVA_OP_REMAP: 2233 { 2234 struct xe_vma *old = 2235 gpuva_to_vma(op->base.remap.unmap->va); 2236 2237 op->remap.start = xe_vma_start(old); 2238 op->remap.range = xe_vma_size(old); 2239 2240 if (op->base.remap.prev) { 2241 flags |= op->base.remap.unmap->va->flags & 2242 XE_VMA_READ_ONLY ? 2243 VMA_CREATE_FLAG_READ_ONLY : 0; 2244 flags |= op->base.remap.unmap->va->flags & 2245 DRM_GPUVA_SPARSE ? 2246 VMA_CREATE_FLAG_IS_NULL : 0; 2247 flags |= op->base.remap.unmap->va->flags & 2248 XE_VMA_DUMPABLE ? 2249 VMA_CREATE_FLAG_DUMPABLE : 0; 2250 2251 vma = new_vma(vm, op->base.remap.prev, 2252 old->pat_index, flags); 2253 if (IS_ERR(vma)) 2254 return PTR_ERR(vma); 2255 2256 op->remap.prev = vma; 2257 2258 /* 2259 * Userptr creates a new SG mapping so 2260 * we must also rebind. 2261 */ 2262 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2263 IS_ALIGNED(xe_vma_end(vma), 2264 xe_vma_max_pte_size(old)); 2265 if (op->remap.skip_prev) { 2266 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2267 op->remap.range -= 2268 xe_vma_end(vma) - 2269 xe_vma_start(old); 2270 op->remap.start = xe_vma_end(vma); 2271 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2272 (ULL)op->remap.start, 2273 (ULL)op->remap.range); 2274 } else { 2275 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2276 } 2277 } 2278 2279 if (op->base.remap.next) { 2280 flags |= op->base.remap.unmap->va->flags & 2281 XE_VMA_READ_ONLY ? 2282 VMA_CREATE_FLAG_READ_ONLY : 0; 2283 flags |= op->base.remap.unmap->va->flags & 2284 DRM_GPUVA_SPARSE ? 2285 VMA_CREATE_FLAG_IS_NULL : 0; 2286 flags |= op->base.remap.unmap->va->flags & 2287 XE_VMA_DUMPABLE ? 2288 VMA_CREATE_FLAG_DUMPABLE : 0; 2289 2290 vma = new_vma(vm, op->base.remap.next, 2291 old->pat_index, flags); 2292 if (IS_ERR(vma)) 2293 return PTR_ERR(vma); 2294 2295 op->remap.next = vma; 2296 2297 /* 2298 * Userptr creates a new SG mapping so 2299 * we must also rebind. 2300 */ 2301 op->remap.skip_next = !xe_vma_is_userptr(old) && 2302 IS_ALIGNED(xe_vma_start(vma), 2303 xe_vma_max_pte_size(old)); 2304 if (op->remap.skip_next) { 2305 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2306 op->remap.range -= 2307 xe_vma_end(old) - 2308 xe_vma_start(vma); 2309 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2310 (ULL)op->remap.start, 2311 (ULL)op->remap.range); 2312 } else { 2313 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2314 } 2315 } 2316 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2317 break; 2318 } 2319 case DRM_GPUVA_OP_UNMAP: 2320 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2321 break; 2322 case DRM_GPUVA_OP_PREFETCH: 2323 vma = gpuva_to_vma(op->base.prefetch.va); 2324 2325 if (xe_vma_is_userptr(vma)) { 2326 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2327 if (err) 2328 return err; 2329 } 2330 2331 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2332 break; 2333 default: 2334 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2335 } 2336 2337 err = xe_vma_op_commit(vm, op); 2338 if (err) 2339 return err; 2340 } 2341 2342 return 0; 2343 } 2344 2345 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2346 bool post_commit, bool prev_post_commit, 2347 bool next_post_commit) 2348 { 2349 lockdep_assert_held_write(&vm->lock); 2350 2351 switch (op->base.op) { 2352 case DRM_GPUVA_OP_MAP: 2353 if (op->map.vma) { 2354 prep_vma_destroy(vm, op->map.vma, post_commit); 2355 xe_vma_destroy_unlocked(op->map.vma); 2356 } 2357 break; 2358 case DRM_GPUVA_OP_UNMAP: 2359 { 2360 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2361 2362 if (vma) { 2363 down_read(&vm->userptr.notifier_lock); 2364 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2365 up_read(&vm->userptr.notifier_lock); 2366 if (post_commit) 2367 xe_vm_insert_vma(vm, vma); 2368 } 2369 break; 2370 } 2371 case DRM_GPUVA_OP_REMAP: 2372 { 2373 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2374 2375 if (op->remap.prev) { 2376 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2377 xe_vma_destroy_unlocked(op->remap.prev); 2378 } 2379 if (op->remap.next) { 2380 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2381 xe_vma_destroy_unlocked(op->remap.next); 2382 } 2383 if (vma) { 2384 down_read(&vm->userptr.notifier_lock); 2385 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2386 up_read(&vm->userptr.notifier_lock); 2387 if (post_commit) 2388 xe_vm_insert_vma(vm, vma); 2389 } 2390 break; 2391 } 2392 case DRM_GPUVA_OP_PREFETCH: 2393 /* Nothing to do */ 2394 break; 2395 default: 2396 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2397 } 2398 } 2399 2400 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2401 struct drm_gpuva_ops **ops, 2402 int num_ops_list) 2403 { 2404 int i; 2405 2406 for (i = num_ops_list - 1; i >= 0; --i) { 2407 struct drm_gpuva_ops *__ops = ops[i]; 2408 struct drm_gpuva_op *__op; 2409 2410 if (!__ops) 2411 continue; 2412 2413 drm_gpuva_for_each_op_reverse(__op, __ops) { 2414 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2415 2416 xe_vma_op_unwind(vm, op, 2417 op->flags & XE_VMA_OP_COMMITTED, 2418 op->flags & XE_VMA_OP_PREV_COMMITTED, 2419 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2420 } 2421 } 2422 } 2423 2424 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2425 bool validate) 2426 { 2427 struct xe_bo *bo = xe_vma_bo(vma); 2428 struct xe_vm *vm = xe_vma_vm(vma); 2429 int err = 0; 2430 2431 if (bo) { 2432 if (!bo->vm) 2433 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2434 if (!err && validate) 2435 err = xe_bo_validate(bo, vm, 2436 !xe_vm_in_preempt_fence_mode(vm)); 2437 } 2438 2439 return err; 2440 } 2441 2442 static int check_ufence(struct xe_vma *vma) 2443 { 2444 if (vma->ufence) { 2445 struct xe_user_fence * const f = vma->ufence; 2446 2447 if (!xe_sync_ufence_get_status(f)) 2448 return -EBUSY; 2449 2450 vma->ufence = NULL; 2451 xe_sync_ufence_put(f); 2452 } 2453 2454 return 0; 2455 } 2456 2457 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2458 struct xe_vma_op *op) 2459 { 2460 int err = 0; 2461 2462 switch (op->base.op) { 2463 case DRM_GPUVA_OP_MAP: 2464 err = vma_lock_and_validate(exec, op->map.vma, 2465 !xe_vm_in_fault_mode(vm) || 2466 op->map.immediate); 2467 break; 2468 case DRM_GPUVA_OP_REMAP: 2469 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2470 if (err) 2471 break; 2472 2473 err = vma_lock_and_validate(exec, 2474 gpuva_to_vma(op->base.remap.unmap->va), 2475 false); 2476 if (!err && op->remap.prev) 2477 err = vma_lock_and_validate(exec, op->remap.prev, true); 2478 if (!err && op->remap.next) 2479 err = vma_lock_and_validate(exec, op->remap.next, true); 2480 break; 2481 case DRM_GPUVA_OP_UNMAP: 2482 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2483 if (err) 2484 break; 2485 2486 err = vma_lock_and_validate(exec, 2487 gpuva_to_vma(op->base.unmap.va), 2488 false); 2489 break; 2490 case DRM_GPUVA_OP_PREFETCH: 2491 { 2492 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2493 u32 region = op->prefetch.region; 2494 2495 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2496 2497 err = vma_lock_and_validate(exec, 2498 gpuva_to_vma(op->base.prefetch.va), 2499 false); 2500 if (!err && !xe_vma_has_no_bo(vma)) 2501 err = xe_bo_migrate(xe_vma_bo(vma), 2502 region_to_mem_type[region]); 2503 break; 2504 } 2505 default: 2506 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2507 } 2508 2509 return err; 2510 } 2511 2512 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2513 struct xe_vm *vm, 2514 struct xe_vma_ops *vops) 2515 { 2516 struct xe_vma_op *op; 2517 int err; 2518 2519 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2520 if (err) 2521 return err; 2522 2523 list_for_each_entry(op, &vops->list, link) { 2524 err = op_lock_and_prep(exec, vm, op); 2525 if (err) 2526 return err; 2527 } 2528 2529 #ifdef TEST_VM_OPS_ERROR 2530 if (vops->inject_error && 2531 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2532 return -ENOSPC; 2533 #endif 2534 2535 return 0; 2536 } 2537 2538 static void op_trace(struct xe_vma_op *op) 2539 { 2540 switch (op->base.op) { 2541 case DRM_GPUVA_OP_MAP: 2542 trace_xe_vma_bind(op->map.vma); 2543 break; 2544 case DRM_GPUVA_OP_REMAP: 2545 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2546 if (op->remap.prev) 2547 trace_xe_vma_bind(op->remap.prev); 2548 if (op->remap.next) 2549 trace_xe_vma_bind(op->remap.next); 2550 break; 2551 case DRM_GPUVA_OP_UNMAP: 2552 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2553 break; 2554 case DRM_GPUVA_OP_PREFETCH: 2555 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2556 break; 2557 default: 2558 XE_WARN_ON("NOT POSSIBLE"); 2559 } 2560 } 2561 2562 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2563 { 2564 struct xe_vma_op *op; 2565 2566 list_for_each_entry(op, &vops->list, link) 2567 op_trace(op); 2568 } 2569 2570 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2571 { 2572 struct xe_exec_queue *q = vops->q; 2573 struct xe_tile *tile; 2574 int number_tiles = 0; 2575 u8 id; 2576 2577 for_each_tile(tile, vm->xe, id) { 2578 if (vops->pt_update_ops[id].num_ops) 2579 ++number_tiles; 2580 2581 if (vops->pt_update_ops[id].q) 2582 continue; 2583 2584 if (q) { 2585 vops->pt_update_ops[id].q = q; 2586 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2587 q = list_next_entry(q, multi_gt_list); 2588 } else { 2589 vops->pt_update_ops[id].q = vm->q[id]; 2590 } 2591 } 2592 2593 return number_tiles; 2594 } 2595 2596 static struct dma_fence *ops_execute(struct xe_vm *vm, 2597 struct xe_vma_ops *vops) 2598 { 2599 struct xe_tile *tile; 2600 struct dma_fence *fence = NULL; 2601 struct dma_fence **fences = NULL; 2602 struct dma_fence_array *cf = NULL; 2603 int number_tiles = 0, current_fence = 0, err; 2604 u8 id; 2605 2606 number_tiles = vm_ops_setup_tile_args(vm, vops); 2607 if (number_tiles == 0) 2608 return ERR_PTR(-ENODATA); 2609 2610 if (number_tiles > 1) { 2611 fences = kmalloc_array(number_tiles, sizeof(*fences), 2612 GFP_KERNEL); 2613 if (!fences) { 2614 fence = ERR_PTR(-ENOMEM); 2615 goto err_trace; 2616 } 2617 } 2618 2619 for_each_tile(tile, vm->xe, id) { 2620 if (!vops->pt_update_ops[id].num_ops) 2621 continue; 2622 2623 err = xe_pt_update_ops_prepare(tile, vops); 2624 if (err) { 2625 fence = ERR_PTR(err); 2626 goto err_out; 2627 } 2628 } 2629 2630 trace_xe_vm_ops_execute(vops); 2631 2632 for_each_tile(tile, vm->xe, id) { 2633 if (!vops->pt_update_ops[id].num_ops) 2634 continue; 2635 2636 fence = xe_pt_update_ops_run(tile, vops); 2637 if (IS_ERR(fence)) 2638 goto err_out; 2639 2640 if (fences) 2641 fences[current_fence++] = fence; 2642 } 2643 2644 if (fences) { 2645 cf = dma_fence_array_create(number_tiles, fences, 2646 vm->composite_fence_ctx, 2647 vm->composite_fence_seqno++, 2648 false); 2649 if (!cf) { 2650 --vm->composite_fence_seqno; 2651 fence = ERR_PTR(-ENOMEM); 2652 goto err_out; 2653 } 2654 fence = &cf->base; 2655 } 2656 2657 for_each_tile(tile, vm->xe, id) { 2658 if (!vops->pt_update_ops[id].num_ops) 2659 continue; 2660 2661 xe_pt_update_ops_fini(tile, vops); 2662 } 2663 2664 return fence; 2665 2666 err_out: 2667 for_each_tile(tile, vm->xe, id) { 2668 if (!vops->pt_update_ops[id].num_ops) 2669 continue; 2670 2671 xe_pt_update_ops_abort(tile, vops); 2672 } 2673 while (current_fence) 2674 dma_fence_put(fences[--current_fence]); 2675 kfree(fences); 2676 kfree(cf); 2677 2678 err_trace: 2679 trace_xe_vm_ops_fail(vm); 2680 return fence; 2681 } 2682 2683 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2684 { 2685 if (vma->ufence) 2686 xe_sync_ufence_put(vma->ufence); 2687 vma->ufence = __xe_sync_ufence_get(ufence); 2688 } 2689 2690 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2691 struct xe_user_fence *ufence) 2692 { 2693 switch (op->base.op) { 2694 case DRM_GPUVA_OP_MAP: 2695 vma_add_ufence(op->map.vma, ufence); 2696 break; 2697 case DRM_GPUVA_OP_REMAP: 2698 if (op->remap.prev) 2699 vma_add_ufence(op->remap.prev, ufence); 2700 if (op->remap.next) 2701 vma_add_ufence(op->remap.next, ufence); 2702 break; 2703 case DRM_GPUVA_OP_UNMAP: 2704 break; 2705 case DRM_GPUVA_OP_PREFETCH: 2706 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2707 break; 2708 default: 2709 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2710 } 2711 } 2712 2713 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2714 struct dma_fence *fence) 2715 { 2716 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2717 struct xe_user_fence *ufence; 2718 struct xe_vma_op *op; 2719 int i; 2720 2721 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2722 list_for_each_entry(op, &vops->list, link) { 2723 if (ufence) 2724 op_add_ufence(vm, op, ufence); 2725 2726 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2727 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2728 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2729 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2730 fence); 2731 } 2732 if (ufence) 2733 xe_sync_ufence_put(ufence); 2734 for (i = 0; i < vops->num_syncs; i++) 2735 xe_sync_entry_signal(vops->syncs + i, fence); 2736 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2737 dma_fence_put(fence); 2738 } 2739 2740 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2741 struct xe_vma_ops *vops) 2742 { 2743 struct drm_exec exec; 2744 struct dma_fence *fence; 2745 int err; 2746 2747 lockdep_assert_held_write(&vm->lock); 2748 2749 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2750 DRM_EXEC_IGNORE_DUPLICATES, 0); 2751 drm_exec_until_all_locked(&exec) { 2752 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2753 drm_exec_retry_on_contention(&exec); 2754 if (err) 2755 goto unlock; 2756 2757 fence = ops_execute(vm, vops); 2758 if (IS_ERR(fence)) { 2759 err = PTR_ERR(fence); 2760 goto unlock; 2761 } 2762 2763 vm_bind_ioctl_ops_fini(vm, vops, fence); 2764 } 2765 2766 unlock: 2767 drm_exec_fini(&exec); 2768 return err; 2769 } 2770 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2771 2772 #define SUPPORTED_FLAGS_STUB \ 2773 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2774 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2775 DRM_XE_VM_BIND_FLAG_NULL | \ 2776 DRM_XE_VM_BIND_FLAG_DUMPABLE) 2777 2778 #ifdef TEST_VM_OPS_ERROR 2779 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2780 #else 2781 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2782 #endif 2783 2784 #define XE_64K_PAGE_MASK 0xffffull 2785 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2786 2787 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2788 struct drm_xe_vm_bind *args, 2789 struct drm_xe_vm_bind_op **bind_ops) 2790 { 2791 int err; 2792 int i; 2793 2794 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2795 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2796 return -EINVAL; 2797 2798 if (XE_IOCTL_DBG(xe, args->extensions)) 2799 return -EINVAL; 2800 2801 if (args->num_binds > 1) { 2802 u64 __user *bind_user = 2803 u64_to_user_ptr(args->vector_of_binds); 2804 2805 *bind_ops = kvmalloc_array(args->num_binds, 2806 sizeof(struct drm_xe_vm_bind_op), 2807 GFP_KERNEL | __GFP_ACCOUNT | 2808 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2809 if (!*bind_ops) 2810 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2811 2812 err = __copy_from_user(*bind_ops, bind_user, 2813 sizeof(struct drm_xe_vm_bind_op) * 2814 args->num_binds); 2815 if (XE_IOCTL_DBG(xe, err)) { 2816 err = -EFAULT; 2817 goto free_bind_ops; 2818 } 2819 } else { 2820 *bind_ops = &args->bind; 2821 } 2822 2823 for (i = 0; i < args->num_binds; ++i) { 2824 u64 range = (*bind_ops)[i].range; 2825 u64 addr = (*bind_ops)[i].addr; 2826 u32 op = (*bind_ops)[i].op; 2827 u32 flags = (*bind_ops)[i].flags; 2828 u32 obj = (*bind_ops)[i].obj; 2829 u64 obj_offset = (*bind_ops)[i].obj_offset; 2830 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2831 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2832 u16 pat_index = (*bind_ops)[i].pat_index; 2833 u16 coh_mode; 2834 2835 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2836 err = -EINVAL; 2837 goto free_bind_ops; 2838 } 2839 2840 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2841 (*bind_ops)[i].pat_index = pat_index; 2842 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2843 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2844 err = -EINVAL; 2845 goto free_bind_ops; 2846 } 2847 2848 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2849 err = -EINVAL; 2850 goto free_bind_ops; 2851 } 2852 2853 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2854 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2855 XE_IOCTL_DBG(xe, obj && is_null) || 2856 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2857 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2858 is_null) || 2859 XE_IOCTL_DBG(xe, !obj && 2860 op == DRM_XE_VM_BIND_OP_MAP && 2861 !is_null) || 2862 XE_IOCTL_DBG(xe, !obj && 2863 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2864 XE_IOCTL_DBG(xe, addr && 2865 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2866 XE_IOCTL_DBG(xe, range && 2867 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2868 XE_IOCTL_DBG(xe, obj && 2869 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2870 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2871 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2872 XE_IOCTL_DBG(xe, obj && 2873 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2874 XE_IOCTL_DBG(xe, prefetch_region && 2875 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2876 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2877 xe->info.mem_region_mask)) || 2878 XE_IOCTL_DBG(xe, obj && 2879 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2880 err = -EINVAL; 2881 goto free_bind_ops; 2882 } 2883 2884 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2885 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2886 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2887 XE_IOCTL_DBG(xe, !range && 2888 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2889 err = -EINVAL; 2890 goto free_bind_ops; 2891 } 2892 } 2893 2894 return 0; 2895 2896 free_bind_ops: 2897 if (args->num_binds > 1) 2898 kvfree(*bind_ops); 2899 return err; 2900 } 2901 2902 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2903 struct xe_exec_queue *q, 2904 struct xe_sync_entry *syncs, 2905 int num_syncs) 2906 { 2907 struct dma_fence *fence; 2908 int i, err = 0; 2909 2910 fence = xe_sync_in_fence_get(syncs, num_syncs, 2911 to_wait_exec_queue(vm, q), vm); 2912 if (IS_ERR(fence)) 2913 return PTR_ERR(fence); 2914 2915 for (i = 0; i < num_syncs; i++) 2916 xe_sync_entry_signal(&syncs[i], fence); 2917 2918 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2919 fence); 2920 dma_fence_put(fence); 2921 2922 return err; 2923 } 2924 2925 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2926 struct xe_exec_queue *q, 2927 struct xe_sync_entry *syncs, u32 num_syncs) 2928 { 2929 memset(vops, 0, sizeof(*vops)); 2930 INIT_LIST_HEAD(&vops->list); 2931 vops->vm = vm; 2932 vops->q = q; 2933 vops->syncs = syncs; 2934 vops->num_syncs = num_syncs; 2935 } 2936 2937 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2938 u64 addr, u64 range, u64 obj_offset, 2939 u16 pat_index) 2940 { 2941 u16 coh_mode; 2942 2943 if (XE_IOCTL_DBG(xe, range > bo->size) || 2944 XE_IOCTL_DBG(xe, obj_offset > 2945 bo->size - range)) { 2946 return -EINVAL; 2947 } 2948 2949 /* 2950 * Some platforms require 64k VM_BIND alignment, 2951 * specifically those with XE_VRAM_FLAGS_NEED64K. 2952 * 2953 * Other platforms may have BO's set to 64k physical placement, 2954 * but can be mapped at 4k offsets anyway. This check is only 2955 * there for the former case. 2956 */ 2957 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2958 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2959 if (XE_IOCTL_DBG(xe, obj_offset & 2960 XE_64K_PAGE_MASK) || 2961 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2962 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2963 return -EINVAL; 2964 } 2965 } 2966 2967 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2968 if (bo->cpu_caching) { 2969 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2970 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2971 return -EINVAL; 2972 } 2973 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2974 /* 2975 * Imported dma-buf from a different device should 2976 * require 1way or 2way coherency since we don't know 2977 * how it was mapped on the CPU. Just assume is it 2978 * potentially cached on CPU side. 2979 */ 2980 return -EINVAL; 2981 } 2982 2983 return 0; 2984 } 2985 2986 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2987 { 2988 struct xe_device *xe = to_xe_device(dev); 2989 struct xe_file *xef = to_xe_file(file); 2990 struct drm_xe_vm_bind *args = data; 2991 struct drm_xe_sync __user *syncs_user; 2992 struct xe_bo **bos = NULL; 2993 struct drm_gpuva_ops **ops = NULL; 2994 struct xe_vm *vm; 2995 struct xe_exec_queue *q = NULL; 2996 u32 num_syncs, num_ufence = 0; 2997 struct xe_sync_entry *syncs = NULL; 2998 struct drm_xe_vm_bind_op *bind_ops; 2999 struct xe_vma_ops vops; 3000 int err; 3001 int i; 3002 3003 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 3004 if (err) 3005 return err; 3006 3007 if (args->exec_queue_id) { 3008 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3009 if (XE_IOCTL_DBG(xe, !q)) { 3010 err = -ENOENT; 3011 goto free_objs; 3012 } 3013 3014 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3015 err = -EINVAL; 3016 goto put_exec_queue; 3017 } 3018 } 3019 3020 vm = xe_vm_lookup(xef, args->vm_id); 3021 if (XE_IOCTL_DBG(xe, !vm)) { 3022 err = -EINVAL; 3023 goto put_exec_queue; 3024 } 3025 3026 err = down_write_killable(&vm->lock); 3027 if (err) 3028 goto put_vm; 3029 3030 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3031 err = -ENOENT; 3032 goto release_vm_lock; 3033 } 3034 3035 for (i = 0; i < args->num_binds; ++i) { 3036 u64 range = bind_ops[i].range; 3037 u64 addr = bind_ops[i].addr; 3038 3039 if (XE_IOCTL_DBG(xe, range > vm->size) || 3040 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3041 err = -EINVAL; 3042 goto release_vm_lock; 3043 } 3044 } 3045 3046 if (args->num_binds) { 3047 bos = kvcalloc(args->num_binds, sizeof(*bos), 3048 GFP_KERNEL | __GFP_ACCOUNT | 3049 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3050 if (!bos) { 3051 err = -ENOMEM; 3052 goto release_vm_lock; 3053 } 3054 3055 ops = kvcalloc(args->num_binds, sizeof(*ops), 3056 GFP_KERNEL | __GFP_ACCOUNT | 3057 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3058 if (!ops) { 3059 err = -ENOMEM; 3060 goto release_vm_lock; 3061 } 3062 } 3063 3064 for (i = 0; i < args->num_binds; ++i) { 3065 struct drm_gem_object *gem_obj; 3066 u64 range = bind_ops[i].range; 3067 u64 addr = bind_ops[i].addr; 3068 u32 obj = bind_ops[i].obj; 3069 u64 obj_offset = bind_ops[i].obj_offset; 3070 u16 pat_index = bind_ops[i].pat_index; 3071 3072 if (!obj) 3073 continue; 3074 3075 gem_obj = drm_gem_object_lookup(file, obj); 3076 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3077 err = -ENOENT; 3078 goto put_obj; 3079 } 3080 bos[i] = gem_to_xe_bo(gem_obj); 3081 3082 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3083 obj_offset, pat_index); 3084 if (err) 3085 goto put_obj; 3086 } 3087 3088 if (args->num_syncs) { 3089 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3090 if (!syncs) { 3091 err = -ENOMEM; 3092 goto put_obj; 3093 } 3094 } 3095 3096 syncs_user = u64_to_user_ptr(args->syncs); 3097 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3098 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3099 &syncs_user[num_syncs], 3100 (xe_vm_in_lr_mode(vm) ? 3101 SYNC_PARSE_FLAG_LR_MODE : 0) | 3102 (!args->num_binds ? 3103 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3104 if (err) 3105 goto free_syncs; 3106 3107 if (xe_sync_is_ufence(&syncs[num_syncs])) 3108 num_ufence++; 3109 } 3110 3111 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3112 err = -EINVAL; 3113 goto free_syncs; 3114 } 3115 3116 if (!args->num_binds) { 3117 err = -ENODATA; 3118 goto free_syncs; 3119 } 3120 3121 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3122 for (i = 0; i < args->num_binds; ++i) { 3123 u64 range = bind_ops[i].range; 3124 u64 addr = bind_ops[i].addr; 3125 u32 op = bind_ops[i].op; 3126 u32 flags = bind_ops[i].flags; 3127 u64 obj_offset = bind_ops[i].obj_offset; 3128 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3129 u16 pat_index = bind_ops[i].pat_index; 3130 3131 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3132 addr, range, op, flags, 3133 prefetch_region, pat_index); 3134 if (IS_ERR(ops[i])) { 3135 err = PTR_ERR(ops[i]); 3136 ops[i] = NULL; 3137 goto unwind_ops; 3138 } 3139 3140 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3141 if (err) 3142 goto unwind_ops; 3143 3144 #ifdef TEST_VM_OPS_ERROR 3145 if (flags & FORCE_OP_ERROR) { 3146 vops.inject_error = true; 3147 vm->xe->vm_inject_error_position = 3148 (vm->xe->vm_inject_error_position + 1) % 3149 FORCE_OP_ERROR_COUNT; 3150 } 3151 #endif 3152 } 3153 3154 /* Nothing to do */ 3155 if (list_empty(&vops.list)) { 3156 err = -ENODATA; 3157 goto unwind_ops; 3158 } 3159 3160 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3161 if (err) 3162 goto unwind_ops; 3163 3164 err = vm_bind_ioctl_ops_execute(vm, &vops); 3165 3166 unwind_ops: 3167 if (err && err != -ENODATA) 3168 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3169 xe_vma_ops_fini(&vops); 3170 for (i = args->num_binds - 1; i >= 0; --i) 3171 if (ops[i]) 3172 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3173 free_syncs: 3174 if (err == -ENODATA) 3175 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3176 while (num_syncs--) 3177 xe_sync_entry_cleanup(&syncs[num_syncs]); 3178 3179 kfree(syncs); 3180 put_obj: 3181 for (i = 0; i < args->num_binds; ++i) 3182 xe_bo_put(bos[i]); 3183 release_vm_lock: 3184 up_write(&vm->lock); 3185 put_vm: 3186 xe_vm_put(vm); 3187 put_exec_queue: 3188 if (q) 3189 xe_exec_queue_put(q); 3190 free_objs: 3191 kvfree(bos); 3192 kvfree(ops); 3193 if (args->num_binds > 1) 3194 kvfree(bind_ops); 3195 return err; 3196 } 3197 3198 /** 3199 * xe_vm_lock() - Lock the vm's dma_resv object 3200 * @vm: The struct xe_vm whose lock is to be locked 3201 * @intr: Whether to perform any wait interruptible 3202 * 3203 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3204 * contended lock was interrupted. If @intr is false, the function 3205 * always returns 0. 3206 */ 3207 int xe_vm_lock(struct xe_vm *vm, bool intr) 3208 { 3209 if (intr) 3210 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3211 3212 return dma_resv_lock(xe_vm_resv(vm), NULL); 3213 } 3214 3215 /** 3216 * xe_vm_unlock() - Unlock the vm's dma_resv object 3217 * @vm: The struct xe_vm whose lock is to be released. 3218 * 3219 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3220 */ 3221 void xe_vm_unlock(struct xe_vm *vm) 3222 { 3223 dma_resv_unlock(xe_vm_resv(vm)); 3224 } 3225 3226 /** 3227 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3228 * @vma: VMA to invalidate 3229 * 3230 * Walks a list of page tables leaves which it memset the entries owned by this 3231 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3232 * complete. 3233 * 3234 * Returns 0 for success, negative error code otherwise. 3235 */ 3236 int xe_vm_invalidate_vma(struct xe_vma *vma) 3237 { 3238 struct xe_device *xe = xe_vma_vm(vma)->xe; 3239 struct xe_tile *tile; 3240 struct xe_gt_tlb_invalidation_fence 3241 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3242 u8 id; 3243 u32 fence_id = 0; 3244 int ret = 0; 3245 3246 xe_assert(xe, !xe_vma_is_null(vma)); 3247 trace_xe_vma_invalidate(vma); 3248 3249 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3250 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3251 xe_vma_start(vma), xe_vma_size(vma)); 3252 3253 /* Check that we don't race with page-table updates */ 3254 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3255 if (xe_vma_is_userptr(vma)) { 3256 WARN_ON_ONCE(!mmu_interval_check_retry 3257 (&to_userptr_vma(vma)->userptr.notifier, 3258 to_userptr_vma(vma)->userptr.notifier_seq)); 3259 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3260 DMA_RESV_USAGE_BOOKKEEP)); 3261 3262 } else { 3263 xe_bo_assert_held(xe_vma_bo(vma)); 3264 } 3265 } 3266 3267 for_each_tile(tile, xe, id) { 3268 if (xe_pt_zap_ptes(tile, vma)) { 3269 xe_device_wmb(xe); 3270 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3271 &fence[fence_id], 3272 true); 3273 3274 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3275 &fence[fence_id], vma); 3276 if (ret) 3277 goto wait; 3278 ++fence_id; 3279 3280 if (!tile->media_gt) 3281 continue; 3282 3283 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3284 &fence[fence_id], 3285 true); 3286 3287 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3288 &fence[fence_id], vma); 3289 if (ret) 3290 goto wait; 3291 ++fence_id; 3292 } 3293 } 3294 3295 wait: 3296 for (id = 0; id < fence_id; ++id) 3297 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3298 3299 vma->tile_invalidated = vma->tile_mask; 3300 3301 return ret; 3302 } 3303 3304 struct xe_vm_snapshot { 3305 unsigned long num_snaps; 3306 struct { 3307 u64 ofs, bo_ofs; 3308 unsigned long len; 3309 struct xe_bo *bo; 3310 void *data; 3311 struct mm_struct *mm; 3312 } snap[]; 3313 }; 3314 3315 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3316 { 3317 unsigned long num_snaps = 0, i; 3318 struct xe_vm_snapshot *snap = NULL; 3319 struct drm_gpuva *gpuva; 3320 3321 if (!vm) 3322 return NULL; 3323 3324 mutex_lock(&vm->snap_mutex); 3325 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3326 if (gpuva->flags & XE_VMA_DUMPABLE) 3327 num_snaps++; 3328 } 3329 3330 if (num_snaps) 3331 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3332 if (!snap) { 3333 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3334 goto out_unlock; 3335 } 3336 3337 snap->num_snaps = num_snaps; 3338 i = 0; 3339 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3340 struct xe_vma *vma = gpuva_to_vma(gpuva); 3341 struct xe_bo *bo = vma->gpuva.gem.obj ? 3342 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3343 3344 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3345 continue; 3346 3347 snap->snap[i].ofs = xe_vma_start(vma); 3348 snap->snap[i].len = xe_vma_size(vma); 3349 if (bo) { 3350 snap->snap[i].bo = xe_bo_get(bo); 3351 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3352 } else if (xe_vma_is_userptr(vma)) { 3353 struct mm_struct *mm = 3354 to_userptr_vma(vma)->userptr.notifier.mm; 3355 3356 if (mmget_not_zero(mm)) 3357 snap->snap[i].mm = mm; 3358 else 3359 snap->snap[i].data = ERR_PTR(-EFAULT); 3360 3361 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3362 } else { 3363 snap->snap[i].data = ERR_PTR(-ENOENT); 3364 } 3365 i++; 3366 } 3367 3368 out_unlock: 3369 mutex_unlock(&vm->snap_mutex); 3370 return snap; 3371 } 3372 3373 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3374 { 3375 if (IS_ERR_OR_NULL(snap)) 3376 return; 3377 3378 for (int i = 0; i < snap->num_snaps; i++) { 3379 struct xe_bo *bo = snap->snap[i].bo; 3380 int err; 3381 3382 if (IS_ERR(snap->snap[i].data)) 3383 continue; 3384 3385 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3386 if (!snap->snap[i].data) { 3387 snap->snap[i].data = ERR_PTR(-ENOMEM); 3388 goto cleanup_bo; 3389 } 3390 3391 if (bo) { 3392 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3393 snap->snap[i].data, snap->snap[i].len); 3394 } else { 3395 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3396 3397 kthread_use_mm(snap->snap[i].mm); 3398 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3399 err = 0; 3400 else 3401 err = -EFAULT; 3402 kthread_unuse_mm(snap->snap[i].mm); 3403 3404 mmput(snap->snap[i].mm); 3405 snap->snap[i].mm = NULL; 3406 } 3407 3408 if (err) { 3409 kvfree(snap->snap[i].data); 3410 snap->snap[i].data = ERR_PTR(err); 3411 } 3412 3413 cleanup_bo: 3414 xe_bo_put(bo); 3415 snap->snap[i].bo = NULL; 3416 } 3417 } 3418 3419 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3420 { 3421 unsigned long i, j; 3422 3423 if (IS_ERR_OR_NULL(snap)) { 3424 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3425 return; 3426 } 3427 3428 for (i = 0; i < snap->num_snaps; i++) { 3429 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3430 3431 if (IS_ERR(snap->snap[i].data)) { 3432 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3433 PTR_ERR(snap->snap[i].data)); 3434 continue; 3435 } 3436 3437 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3438 3439 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3440 u32 *val = snap->snap[i].data + j; 3441 char dumped[ASCII85_BUFSZ]; 3442 3443 drm_puts(p, ascii85_encode(*val, dumped)); 3444 } 3445 3446 drm_puts(p, "\n"); 3447 } 3448 } 3449 3450 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3451 { 3452 unsigned long i; 3453 3454 if (IS_ERR_OR_NULL(snap)) 3455 return; 3456 3457 for (i = 0; i < snap->num_snaps; i++) { 3458 if (!IS_ERR(snap->snap[i].data)) 3459 kvfree(snap->snap[i].data); 3460 xe_bo_put(snap->snap[i].bo); 3461 if (snap->snap[i].mm) 3462 mmput(snap->snap[i].mm); 3463 } 3464 kvfree(snap); 3465 } 3466