1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sync.h" 38 #include "xe_trace_bo.h" 39 #include "xe_wa.h" 40 #include "xe_hmm.h" 41 42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 43 { 44 return vm->gpuvm.r_obj; 45 } 46 47 /** 48 * xe_vma_userptr_check_repin() - Advisory check for repin needed 49 * @uvma: The userptr vma 50 * 51 * Check if the userptr vma has been invalidated since last successful 52 * repin. The check is advisory only and can the function can be called 53 * without the vm->userptr.notifier_lock held. There is no guarantee that the 54 * vma userptr will remain valid after a lockless check, so typically 55 * the call needs to be followed by a proper check under the notifier_lock. 56 * 57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 */ 59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 { 61 return mmu_interval_check_retry(&uvma->userptr.notifier, 62 uvma->userptr.notifier_seq) ? 63 -EAGAIN : 0; 64 } 65 66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 67 { 68 struct xe_vma *vma = &uvma->vma; 69 struct xe_vm *vm = xe_vma_vm(vma); 70 struct xe_device *xe = vm->xe; 71 72 lockdep_assert_held(&vm->lock); 73 xe_assert(xe, xe_vma_is_userptr(vma)); 74 75 return xe_hmm_userptr_populate_range(uvma, false); 76 } 77 78 static bool preempt_fences_waiting(struct xe_vm *vm) 79 { 80 struct xe_exec_queue *q; 81 82 lockdep_assert_held(&vm->lock); 83 xe_vm_assert_held(vm); 84 85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 86 if (!q->lr.pfence || 87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 88 &q->lr.pfence->flags)) { 89 return true; 90 } 91 } 92 93 return false; 94 } 95 96 static void free_preempt_fences(struct list_head *list) 97 { 98 struct list_head *link, *next; 99 100 list_for_each_safe(link, next, list) 101 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 102 } 103 104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 105 unsigned int *count) 106 { 107 lockdep_assert_held(&vm->lock); 108 xe_vm_assert_held(vm); 109 110 if (*count >= vm->preempt.num_exec_queues) 111 return 0; 112 113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 115 116 if (IS_ERR(pfence)) 117 return PTR_ERR(pfence); 118 119 list_move_tail(xe_preempt_fence_link(pfence), list); 120 } 121 122 return 0; 123 } 124 125 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 126 { 127 struct xe_exec_queue *q; 128 129 xe_vm_assert_held(vm); 130 131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 132 if (q->lr.pfence) { 133 long timeout = dma_fence_wait(q->lr.pfence, false); 134 135 /* Only -ETIME on fence indicates VM needs to be killed */ 136 if (timeout < 0 || q->lr.pfence->error == -ETIME) 137 return -ETIME; 138 139 dma_fence_put(q->lr.pfence); 140 q->lr.pfence = NULL; 141 } 142 } 143 144 return 0; 145 } 146 147 static bool xe_vm_is_idle(struct xe_vm *vm) 148 { 149 struct xe_exec_queue *q; 150 151 xe_vm_assert_held(vm); 152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 153 if (!xe_exec_queue_is_idle(q)) 154 return false; 155 } 156 157 return true; 158 } 159 160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 161 { 162 struct list_head *link; 163 struct xe_exec_queue *q; 164 165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 166 struct dma_fence *fence; 167 168 link = list->next; 169 xe_assert(vm->xe, link != list); 170 171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 172 q, q->lr.context, 173 ++q->lr.seqno); 174 dma_fence_put(q->lr.pfence); 175 q->lr.pfence = fence; 176 } 177 } 178 179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 180 { 181 struct xe_exec_queue *q; 182 int err; 183 184 xe_bo_assert_held(bo); 185 186 if (!vm->preempt.num_exec_queues) 187 return 0; 188 189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 190 if (err) 191 return err; 192 193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 194 if (q->lr.pfence) { 195 dma_resv_add_fence(bo->ttm.base.resv, 196 q->lr.pfence, 197 DMA_RESV_USAGE_BOOKKEEP); 198 } 199 200 return 0; 201 } 202 203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 204 struct drm_exec *exec) 205 { 206 struct xe_exec_queue *q; 207 208 lockdep_assert_held(&vm->lock); 209 xe_vm_assert_held(vm); 210 211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 212 q->ops->resume(q); 213 214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 216 } 217 } 218 219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 220 { 221 struct drm_gpuvm_exec vm_exec = { 222 .vm = &vm->gpuvm, 223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 224 .num_fences = 1, 225 }; 226 struct drm_exec *exec = &vm_exec.exec; 227 struct dma_fence *pfence; 228 int err; 229 bool wait; 230 231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 232 233 down_write(&vm->lock); 234 err = drm_gpuvm_exec_lock(&vm_exec); 235 if (err) 236 goto out_up_write; 237 238 pfence = xe_preempt_fence_create(q, q->lr.context, 239 ++q->lr.seqno); 240 if (!pfence) { 241 err = -ENOMEM; 242 goto out_fini; 243 } 244 245 list_add(&q->lr.link, &vm->preempt.exec_queues); 246 ++vm->preempt.num_exec_queues; 247 q->lr.pfence = pfence; 248 249 down_read(&vm->userptr.notifier_lock); 250 251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 253 254 /* 255 * Check to see if a preemption on VM is in flight or userptr 256 * invalidation, if so trigger this preempt fence to sync state with 257 * other preempt fences on the VM. 258 */ 259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 260 if (wait) 261 dma_fence_enable_sw_signaling(pfence); 262 263 up_read(&vm->userptr.notifier_lock); 264 265 out_fini: 266 drm_exec_fini(exec); 267 out_up_write: 268 up_write(&vm->lock); 269 270 return err; 271 } 272 273 /** 274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 275 * @vm: The VM. 276 * @q: The exec_queue 277 * 278 * Note that this function might be called multiple times on the same queue. 279 */ 280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 281 { 282 if (!xe_vm_in_preempt_fence_mode(vm)) 283 return; 284 285 down_write(&vm->lock); 286 if (!list_empty(&q->lr.link)) { 287 list_del_init(&q->lr.link); 288 --vm->preempt.num_exec_queues; 289 } 290 if (q->lr.pfence) { 291 dma_fence_enable_sw_signaling(q->lr.pfence); 292 dma_fence_put(q->lr.pfence); 293 q->lr.pfence = NULL; 294 } 295 up_write(&vm->lock); 296 } 297 298 /** 299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 300 * that need repinning. 301 * @vm: The VM. 302 * 303 * This function checks for whether the VM has userptrs that need repinning, 304 * and provides a release-type barrier on the userptr.notifier_lock after 305 * checking. 306 * 307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 308 */ 309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 310 { 311 lockdep_assert_held_read(&vm->userptr.notifier_lock); 312 313 return (list_empty(&vm->userptr.repin_list) && 314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 315 } 316 317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 318 319 /** 320 * xe_vm_kill() - VM Kill 321 * @vm: The VM. 322 * @unlocked: Flag indicates the VM's dma-resv is not held 323 * 324 * Kill the VM by setting banned flag indicated VM is no longer available for 325 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 326 */ 327 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 328 { 329 struct xe_exec_queue *q; 330 331 lockdep_assert_held(&vm->lock); 332 333 if (unlocked) 334 xe_vm_lock(vm, false); 335 336 vm->flags |= XE_VM_FLAG_BANNED; 337 trace_xe_vm_kill(vm); 338 339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 340 q->ops->kill(q); 341 342 if (unlocked) 343 xe_vm_unlock(vm); 344 345 /* TODO: Inform user the VM is banned */ 346 } 347 348 /** 349 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 350 * @exec: The drm_exec object used for locking before validation. 351 * @err: The error returned from ttm_bo_validate(). 352 * @end: A ktime_t cookie that should be set to 0 before first use and 353 * that should be reused on subsequent calls. 354 * 355 * With multiple active VMs, under memory pressure, it is possible that 356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 357 * Until ttm properly handles locking in such scenarios, best thing the 358 * driver can do is retry with a timeout. Check if that is necessary, and 359 * if so unlock the drm_exec's objects while keeping the ticket to prepare 360 * for a rerun. 361 * 362 * Return: true if a retry after drm_exec_init() is recommended; 363 * false otherwise. 364 */ 365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 366 { 367 ktime_t cur; 368 369 if (err != -ENOMEM) 370 return false; 371 372 cur = ktime_get(); 373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 374 if (!ktime_before(cur, *end)) 375 return false; 376 377 msleep(20); 378 return true; 379 } 380 381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 382 { 383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 384 struct drm_gpuva *gpuva; 385 int ret; 386 387 lockdep_assert_held(&vm->lock); 388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 390 &vm->rebind_list); 391 392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 393 if (ret) 394 return ret; 395 396 vm_bo->evicted = false; 397 return 0; 398 } 399 400 /** 401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 402 * @vm: The vm for which we are rebinding. 403 * @exec: The struct drm_exec with the locked GEM objects. 404 * @num_fences: The number of fences to reserve for the operation, not 405 * including rebinds and validations. 406 * 407 * Validates all evicted gem objects and rebinds their vmas. Note that 408 * rebindings may cause evictions and hence the validation-rebind 409 * sequence is rerun until there are no more objects to validate. 410 * 411 * Return: 0 on success, negative error code on error. In particular, 412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 413 * the drm_exec transaction needs to be restarted. 414 */ 415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 416 unsigned int num_fences) 417 { 418 struct drm_gem_object *obj; 419 unsigned long index; 420 int ret; 421 422 do { 423 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 424 if (ret) 425 return ret; 426 427 ret = xe_vm_rebind(vm, false); 428 if (ret) 429 return ret; 430 } while (!list_empty(&vm->gpuvm.evict.list)); 431 432 drm_exec_for_each_locked_object(exec, index, obj) { 433 ret = dma_resv_reserve_fences(obj->resv, num_fences); 434 if (ret) 435 return ret; 436 } 437 438 return 0; 439 } 440 441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 442 bool *done) 443 { 444 int err; 445 446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 447 if (err) 448 return err; 449 450 if (xe_vm_is_idle(vm)) { 451 vm->preempt.rebind_deactivated = true; 452 *done = true; 453 return 0; 454 } 455 456 if (!preempt_fences_waiting(vm)) { 457 *done = true; 458 return 0; 459 } 460 461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 462 if (err) 463 return err; 464 465 err = wait_for_existing_preempt_fences(vm); 466 if (err) 467 return err; 468 469 /* 470 * Add validation and rebinding to the locking loop since both can 471 * cause evictions which may require blocing dma_resv locks. 472 * The fence reservation here is intended for the new preempt fences 473 * we attach at the end of the rebind work. 474 */ 475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 476 } 477 478 static void preempt_rebind_work_func(struct work_struct *w) 479 { 480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 481 struct drm_exec exec; 482 unsigned int fence_count = 0; 483 LIST_HEAD(preempt_fences); 484 ktime_t end = 0; 485 int err = 0; 486 long wait; 487 int __maybe_unused tries = 0; 488 489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 490 trace_xe_vm_rebind_worker_enter(vm); 491 492 down_write(&vm->lock); 493 494 if (xe_vm_is_closed_or_banned(vm)) { 495 up_write(&vm->lock); 496 trace_xe_vm_rebind_worker_exit(vm); 497 return; 498 } 499 500 retry: 501 if (xe_vm_userptr_check_repin(vm)) { 502 err = xe_vm_userptr_pin(vm); 503 if (err) 504 goto out_unlock_outer; 505 } 506 507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 508 509 drm_exec_until_all_locked(&exec) { 510 bool done = false; 511 512 err = xe_preempt_work_begin(&exec, vm, &done); 513 drm_exec_retry_on_contention(&exec); 514 if (err || done) { 515 drm_exec_fini(&exec); 516 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 517 err = -EAGAIN; 518 519 goto out_unlock_outer; 520 } 521 } 522 523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 524 if (err) 525 goto out_unlock; 526 527 err = xe_vm_rebind(vm, true); 528 if (err) 529 goto out_unlock; 530 531 /* Wait on rebinds and munmap style VM unbinds */ 532 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 533 DMA_RESV_USAGE_KERNEL, 534 false, MAX_SCHEDULE_TIMEOUT); 535 if (wait <= 0) { 536 err = -ETIME; 537 goto out_unlock; 538 } 539 540 #define retry_required(__tries, __vm) \ 541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 543 __xe_vm_userptr_needs_repin(__vm)) 544 545 down_read(&vm->userptr.notifier_lock); 546 if (retry_required(tries, vm)) { 547 up_read(&vm->userptr.notifier_lock); 548 err = -EAGAIN; 549 goto out_unlock; 550 } 551 552 #undef retry_required 553 554 spin_lock(&vm->xe->ttm.lru_lock); 555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 556 spin_unlock(&vm->xe->ttm.lru_lock); 557 558 /* Point of no return. */ 559 arm_preempt_fences(vm, &preempt_fences); 560 resume_and_reinstall_preempt_fences(vm, &exec); 561 up_read(&vm->userptr.notifier_lock); 562 563 out_unlock: 564 drm_exec_fini(&exec); 565 out_unlock_outer: 566 if (err == -EAGAIN) { 567 trace_xe_vm_rebind_worker_retry(vm); 568 goto retry; 569 } 570 571 if (err) { 572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 573 xe_vm_kill(vm, true); 574 } 575 up_write(&vm->lock); 576 577 free_preempt_fences(&preempt_fences); 578 579 trace_xe_vm_rebind_worker_exit(vm); 580 } 581 582 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 583 const struct mmu_notifier_range *range, 584 unsigned long cur_seq) 585 { 586 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); 587 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); 588 struct xe_vma *vma = &uvma->vma; 589 struct xe_vm *vm = xe_vma_vm(vma); 590 struct dma_resv_iter cursor; 591 struct dma_fence *fence; 592 long err; 593 594 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 595 trace_xe_vma_userptr_invalidate(vma); 596 597 if (!mmu_notifier_range_blockable(range)) 598 return false; 599 600 vm_dbg(&xe_vma_vm(vma)->xe->drm, 601 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 602 xe_vma_start(vma), xe_vma_size(vma)); 603 604 down_write(&vm->userptr.notifier_lock); 605 mmu_interval_set_seq(mni, cur_seq); 606 607 /* No need to stop gpu access if the userptr is not yet bound. */ 608 if (!userptr->initial_bind) { 609 up_write(&vm->userptr.notifier_lock); 610 return true; 611 } 612 613 /* 614 * Tell exec and rebind worker they need to repin and rebind this 615 * userptr. 616 */ 617 if (!xe_vm_in_fault_mode(vm) && 618 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { 619 spin_lock(&vm->userptr.invalidated_lock); 620 list_move_tail(&userptr->invalidate_link, 621 &vm->userptr.invalidated); 622 spin_unlock(&vm->userptr.invalidated_lock); 623 } 624 625 up_write(&vm->userptr.notifier_lock); 626 627 /* 628 * Preempt fences turn into schedule disables, pipeline these. 629 * Note that even in fault mode, we need to wait for binds and 630 * unbinds to complete, and those are attached as BOOKMARK fences 631 * to the vm. 632 */ 633 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 634 DMA_RESV_USAGE_BOOKKEEP); 635 dma_resv_for_each_fence_unlocked(&cursor, fence) 636 dma_fence_enable_sw_signaling(fence); 637 dma_resv_iter_end(&cursor); 638 639 err = dma_resv_wait_timeout(xe_vm_resv(vm), 640 DMA_RESV_USAGE_BOOKKEEP, 641 false, MAX_SCHEDULE_TIMEOUT); 642 XE_WARN_ON(err <= 0); 643 644 if (xe_vm_in_fault_mode(vm)) { 645 err = xe_vm_invalidate_vma(vma); 646 XE_WARN_ON(err); 647 } 648 649 trace_xe_vma_userptr_invalidate_complete(vma); 650 651 return true; 652 } 653 654 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 655 .invalidate = vma_userptr_invalidate, 656 }; 657 658 int xe_vm_userptr_pin(struct xe_vm *vm) 659 { 660 struct xe_userptr_vma *uvma, *next; 661 int err = 0; 662 LIST_HEAD(tmp_evict); 663 664 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 665 lockdep_assert_held_write(&vm->lock); 666 667 /* Collect invalidated userptrs */ 668 spin_lock(&vm->userptr.invalidated_lock); 669 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 670 userptr.invalidate_link) { 671 list_del_init(&uvma->userptr.invalidate_link); 672 list_move_tail(&uvma->userptr.repin_link, 673 &vm->userptr.repin_list); 674 } 675 spin_unlock(&vm->userptr.invalidated_lock); 676 677 /* Pin and move to temporary list */ 678 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 679 userptr.repin_link) { 680 err = xe_vma_userptr_pin_pages(uvma); 681 if (err == -EFAULT) { 682 list_del_init(&uvma->userptr.repin_link); 683 684 /* Wait for pending binds */ 685 xe_vm_lock(vm, false); 686 dma_resv_wait_timeout(xe_vm_resv(vm), 687 DMA_RESV_USAGE_BOOKKEEP, 688 false, MAX_SCHEDULE_TIMEOUT); 689 690 err = xe_vm_invalidate_vma(&uvma->vma); 691 xe_vm_unlock(vm); 692 if (err) 693 return err; 694 } else { 695 if (err < 0) 696 return err; 697 698 list_del_init(&uvma->userptr.repin_link); 699 list_move_tail(&uvma->vma.combined_links.rebind, 700 &vm->rebind_list); 701 } 702 } 703 704 return 0; 705 } 706 707 /** 708 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 709 * that need repinning. 710 * @vm: The VM. 711 * 712 * This function does an advisory check for whether the VM has userptrs that 713 * need repinning. 714 * 715 * Return: 0 if there are no indications of userptrs needing repinning, 716 * -EAGAIN if there are. 717 */ 718 int xe_vm_userptr_check_repin(struct xe_vm *vm) 719 { 720 return (list_empty_careful(&vm->userptr.repin_list) && 721 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 722 } 723 724 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 725 { 726 int i; 727 728 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 729 if (!vops->pt_update_ops[i].num_ops) 730 continue; 731 732 vops->pt_update_ops[i].ops = 733 kmalloc_array(vops->pt_update_ops[i].num_ops, 734 sizeof(*vops->pt_update_ops[i].ops), 735 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 736 if (!vops->pt_update_ops[i].ops) 737 return array_of_binds ? -ENOBUFS : -ENOMEM; 738 } 739 740 return 0; 741 } 742 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 743 744 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 745 { 746 int i; 747 748 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 749 kfree(vops->pt_update_ops[i].ops); 750 } 751 752 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 753 { 754 int i; 755 756 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 757 if (BIT(i) & tile_mask) 758 ++vops->pt_update_ops[i].num_ops; 759 } 760 761 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 762 u8 tile_mask) 763 { 764 INIT_LIST_HEAD(&op->link); 765 op->tile_mask = tile_mask; 766 op->base.op = DRM_GPUVA_OP_MAP; 767 op->base.map.va.addr = vma->gpuva.va.addr; 768 op->base.map.va.range = vma->gpuva.va.range; 769 op->base.map.gem.obj = vma->gpuva.gem.obj; 770 op->base.map.gem.offset = vma->gpuva.gem.offset; 771 op->map.vma = vma; 772 op->map.immediate = true; 773 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 774 op->map.is_null = xe_vma_is_null(vma); 775 } 776 777 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 778 u8 tile_mask) 779 { 780 struct xe_vma_op *op; 781 782 op = kzalloc(sizeof(*op), GFP_KERNEL); 783 if (!op) 784 return -ENOMEM; 785 786 xe_vm_populate_rebind(op, vma, tile_mask); 787 list_add_tail(&op->link, &vops->list); 788 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 789 790 return 0; 791 } 792 793 static struct dma_fence *ops_execute(struct xe_vm *vm, 794 struct xe_vma_ops *vops); 795 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 796 struct xe_exec_queue *q, 797 struct xe_sync_entry *syncs, u32 num_syncs); 798 799 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 800 { 801 struct dma_fence *fence; 802 struct xe_vma *vma, *next; 803 struct xe_vma_ops vops; 804 struct xe_vma_op *op, *next_op; 805 int err, i; 806 807 lockdep_assert_held(&vm->lock); 808 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 809 list_empty(&vm->rebind_list)) 810 return 0; 811 812 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 813 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 814 vops.pt_update_ops[i].wait_vm_bookkeep = true; 815 816 xe_vm_assert_held(vm); 817 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 818 xe_assert(vm->xe, vma->tile_present); 819 820 if (rebind_worker) 821 trace_xe_vma_rebind_worker(vma); 822 else 823 trace_xe_vma_rebind_exec(vma); 824 825 err = xe_vm_ops_add_rebind(&vops, vma, 826 vma->tile_present); 827 if (err) 828 goto free_ops; 829 } 830 831 err = xe_vma_ops_alloc(&vops, false); 832 if (err) 833 goto free_ops; 834 835 fence = ops_execute(vm, &vops); 836 if (IS_ERR(fence)) { 837 err = PTR_ERR(fence); 838 } else { 839 dma_fence_put(fence); 840 list_for_each_entry_safe(vma, next, &vm->rebind_list, 841 combined_links.rebind) 842 list_del_init(&vma->combined_links.rebind); 843 } 844 free_ops: 845 list_for_each_entry_safe(op, next_op, &vops.list, link) { 846 list_del(&op->link); 847 kfree(op); 848 } 849 xe_vma_ops_fini(&vops); 850 851 return err; 852 } 853 854 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 855 { 856 struct dma_fence *fence = NULL; 857 struct xe_vma_ops vops; 858 struct xe_vma_op *op, *next_op; 859 struct xe_tile *tile; 860 u8 id; 861 int err; 862 863 lockdep_assert_held(&vm->lock); 864 xe_vm_assert_held(vm); 865 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 866 867 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 868 for_each_tile(tile, vm->xe, id) { 869 vops.pt_update_ops[id].wait_vm_bookkeep = true; 870 vops.pt_update_ops[tile->id].q = 871 xe_tile_migrate_exec_queue(tile); 872 } 873 874 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 875 if (err) 876 return ERR_PTR(err); 877 878 err = xe_vma_ops_alloc(&vops, false); 879 if (err) { 880 fence = ERR_PTR(err); 881 goto free_ops; 882 } 883 884 fence = ops_execute(vm, &vops); 885 886 free_ops: 887 list_for_each_entry_safe(op, next_op, &vops.list, link) { 888 list_del(&op->link); 889 kfree(op); 890 } 891 xe_vma_ops_fini(&vops); 892 893 return fence; 894 } 895 896 static void xe_vma_free(struct xe_vma *vma) 897 { 898 if (xe_vma_is_userptr(vma)) 899 kfree(to_userptr_vma(vma)); 900 else 901 kfree(vma); 902 } 903 904 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 905 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 906 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 907 908 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 909 struct xe_bo *bo, 910 u64 bo_offset_or_userptr, 911 u64 start, u64 end, 912 u16 pat_index, unsigned int flags) 913 { 914 struct xe_vma *vma; 915 struct xe_tile *tile; 916 u8 id; 917 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 918 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 919 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 920 921 xe_assert(vm->xe, start < end); 922 xe_assert(vm->xe, end < vm->size); 923 924 /* 925 * Allocate and ensure that the xe_vma_is_userptr() return 926 * matches what was allocated. 927 */ 928 if (!bo && !is_null) { 929 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 930 931 if (!uvma) 932 return ERR_PTR(-ENOMEM); 933 934 vma = &uvma->vma; 935 } else { 936 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 937 if (!vma) 938 return ERR_PTR(-ENOMEM); 939 940 if (is_null) 941 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 942 if (bo) 943 vma->gpuva.gem.obj = &bo->ttm.base; 944 } 945 946 INIT_LIST_HEAD(&vma->combined_links.rebind); 947 948 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 949 vma->gpuva.vm = &vm->gpuvm; 950 vma->gpuva.va.addr = start; 951 vma->gpuva.va.range = end - start + 1; 952 if (read_only) 953 vma->gpuva.flags |= XE_VMA_READ_ONLY; 954 if (dumpable) 955 vma->gpuva.flags |= XE_VMA_DUMPABLE; 956 957 for_each_tile(tile, vm->xe, id) 958 vma->tile_mask |= 0x1 << id; 959 960 if (vm->xe->info.has_atomic_enable_pte_bit) 961 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 962 963 vma->pat_index = pat_index; 964 965 if (bo) { 966 struct drm_gpuvm_bo *vm_bo; 967 968 xe_bo_assert_held(bo); 969 970 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 971 if (IS_ERR(vm_bo)) { 972 xe_vma_free(vma); 973 return ERR_CAST(vm_bo); 974 } 975 976 drm_gpuvm_bo_extobj_add(vm_bo); 977 drm_gem_object_get(&bo->ttm.base); 978 vma->gpuva.gem.offset = bo_offset_or_userptr; 979 drm_gpuva_link(&vma->gpuva, vm_bo); 980 drm_gpuvm_bo_put(vm_bo); 981 } else /* userptr or null */ { 982 if (!is_null) { 983 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 984 u64 size = end - start + 1; 985 int err; 986 987 INIT_LIST_HEAD(&userptr->invalidate_link); 988 INIT_LIST_HEAD(&userptr->repin_link); 989 vma->gpuva.gem.offset = bo_offset_or_userptr; 990 991 err = mmu_interval_notifier_insert(&userptr->notifier, 992 current->mm, 993 xe_vma_userptr(vma), size, 994 &vma_userptr_notifier_ops); 995 if (err) { 996 xe_vma_free(vma); 997 return ERR_PTR(err); 998 } 999 1000 userptr->notifier_seq = LONG_MAX; 1001 } 1002 1003 xe_vm_get(vm); 1004 } 1005 1006 return vma; 1007 } 1008 1009 static void xe_vma_destroy_late(struct xe_vma *vma) 1010 { 1011 struct xe_vm *vm = xe_vma_vm(vma); 1012 1013 if (vma->ufence) { 1014 xe_sync_ufence_put(vma->ufence); 1015 vma->ufence = NULL; 1016 } 1017 1018 if (xe_vma_is_userptr(vma)) { 1019 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1020 struct xe_userptr *userptr = &uvma->userptr; 1021 1022 if (userptr->sg) 1023 xe_hmm_userptr_free_sg(uvma); 1024 1025 /* 1026 * Since userptr pages are not pinned, we can't remove 1027 * the notifier until we're sure the GPU is not accessing 1028 * them anymore 1029 */ 1030 mmu_interval_notifier_remove(&userptr->notifier); 1031 xe_vm_put(vm); 1032 } else if (xe_vma_is_null(vma)) { 1033 xe_vm_put(vm); 1034 } else { 1035 xe_bo_put(xe_vma_bo(vma)); 1036 } 1037 1038 xe_vma_free(vma); 1039 } 1040 1041 static void vma_destroy_work_func(struct work_struct *w) 1042 { 1043 struct xe_vma *vma = 1044 container_of(w, struct xe_vma, destroy_work); 1045 1046 xe_vma_destroy_late(vma); 1047 } 1048 1049 static void vma_destroy_cb(struct dma_fence *fence, 1050 struct dma_fence_cb *cb) 1051 { 1052 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1053 1054 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1055 queue_work(system_unbound_wq, &vma->destroy_work); 1056 } 1057 1058 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1059 { 1060 struct xe_vm *vm = xe_vma_vm(vma); 1061 1062 lockdep_assert_held_write(&vm->lock); 1063 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1064 1065 if (xe_vma_is_userptr(vma)) { 1066 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1067 1068 spin_lock(&vm->userptr.invalidated_lock); 1069 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1070 spin_unlock(&vm->userptr.invalidated_lock); 1071 } else if (!xe_vma_is_null(vma)) { 1072 xe_bo_assert_held(xe_vma_bo(vma)); 1073 1074 drm_gpuva_unlink(&vma->gpuva); 1075 } 1076 1077 xe_vm_assert_held(vm); 1078 if (fence) { 1079 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1080 vma_destroy_cb); 1081 1082 if (ret) { 1083 XE_WARN_ON(ret != -ENOENT); 1084 xe_vma_destroy_late(vma); 1085 } 1086 } else { 1087 xe_vma_destroy_late(vma); 1088 } 1089 } 1090 1091 /** 1092 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1093 * @exec: The drm_exec object we're currently locking for. 1094 * @vma: The vma for witch we want to lock the vm resv and any attached 1095 * object's resv. 1096 * 1097 * Return: 0 on success, negative error code on error. In particular 1098 * may return -EDEADLK on WW transaction contention and -EINTR if 1099 * an interruptible wait is terminated by a signal. 1100 */ 1101 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1102 { 1103 struct xe_vm *vm = xe_vma_vm(vma); 1104 struct xe_bo *bo = xe_vma_bo(vma); 1105 int err; 1106 1107 XE_WARN_ON(!vm); 1108 1109 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1110 if (!err && bo && !bo->vm) 1111 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1112 1113 return err; 1114 } 1115 1116 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1117 { 1118 struct drm_exec exec; 1119 int err; 1120 1121 drm_exec_init(&exec, 0, 0); 1122 drm_exec_until_all_locked(&exec) { 1123 err = xe_vm_lock_vma(&exec, vma); 1124 drm_exec_retry_on_contention(&exec); 1125 if (XE_WARN_ON(err)) 1126 break; 1127 } 1128 1129 xe_vma_destroy(vma, NULL); 1130 1131 drm_exec_fini(&exec); 1132 } 1133 1134 struct xe_vma * 1135 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1136 { 1137 struct drm_gpuva *gpuva; 1138 1139 lockdep_assert_held(&vm->lock); 1140 1141 if (xe_vm_is_closed_or_banned(vm)) 1142 return NULL; 1143 1144 xe_assert(vm->xe, start + range <= vm->size); 1145 1146 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1147 1148 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1149 } 1150 1151 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1152 { 1153 int err; 1154 1155 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1156 lockdep_assert_held(&vm->lock); 1157 1158 mutex_lock(&vm->snap_mutex); 1159 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1160 mutex_unlock(&vm->snap_mutex); 1161 XE_WARN_ON(err); /* Shouldn't be possible */ 1162 1163 return err; 1164 } 1165 1166 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1167 { 1168 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1169 lockdep_assert_held(&vm->lock); 1170 1171 mutex_lock(&vm->snap_mutex); 1172 drm_gpuva_remove(&vma->gpuva); 1173 mutex_unlock(&vm->snap_mutex); 1174 if (vm->usm.last_fault_vma == vma) 1175 vm->usm.last_fault_vma = NULL; 1176 } 1177 1178 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1179 { 1180 struct xe_vma_op *op; 1181 1182 op = kzalloc(sizeof(*op), GFP_KERNEL); 1183 1184 if (unlikely(!op)) 1185 return NULL; 1186 1187 return &op->base; 1188 } 1189 1190 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1191 1192 static const struct drm_gpuvm_ops gpuvm_ops = { 1193 .op_alloc = xe_vm_op_alloc, 1194 .vm_bo_validate = xe_gpuvm_validate, 1195 .vm_free = xe_vm_free, 1196 }; 1197 1198 static u64 pde_encode_pat_index(u16 pat_index) 1199 { 1200 u64 pte = 0; 1201 1202 if (pat_index & BIT(0)) 1203 pte |= XE_PPGTT_PTE_PAT0; 1204 1205 if (pat_index & BIT(1)) 1206 pte |= XE_PPGTT_PTE_PAT1; 1207 1208 return pte; 1209 } 1210 1211 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1212 { 1213 u64 pte = 0; 1214 1215 if (pat_index & BIT(0)) 1216 pte |= XE_PPGTT_PTE_PAT0; 1217 1218 if (pat_index & BIT(1)) 1219 pte |= XE_PPGTT_PTE_PAT1; 1220 1221 if (pat_index & BIT(2)) { 1222 if (pt_level) 1223 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1224 else 1225 pte |= XE_PPGTT_PTE_PAT2; 1226 } 1227 1228 if (pat_index & BIT(3)) 1229 pte |= XELPG_PPGTT_PTE_PAT3; 1230 1231 if (pat_index & (BIT(4))) 1232 pte |= XE2_PPGTT_PTE_PAT4; 1233 1234 return pte; 1235 } 1236 1237 static u64 pte_encode_ps(u32 pt_level) 1238 { 1239 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1240 1241 if (pt_level == 1) 1242 return XE_PDE_PS_2M; 1243 else if (pt_level == 2) 1244 return XE_PDPE_PS_1G; 1245 1246 return 0; 1247 } 1248 1249 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1250 const u16 pat_index) 1251 { 1252 u64 pde; 1253 1254 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1255 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1256 pde |= pde_encode_pat_index(pat_index); 1257 1258 return pde; 1259 } 1260 1261 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1262 u16 pat_index, u32 pt_level) 1263 { 1264 u64 pte; 1265 1266 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1267 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1268 pte |= pte_encode_pat_index(pat_index, pt_level); 1269 pte |= pte_encode_ps(pt_level); 1270 1271 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1272 pte |= XE_PPGTT_PTE_DM; 1273 1274 return pte; 1275 } 1276 1277 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1278 u16 pat_index, u32 pt_level) 1279 { 1280 pte |= XE_PAGE_PRESENT; 1281 1282 if (likely(!xe_vma_read_only(vma))) 1283 pte |= XE_PAGE_RW; 1284 1285 pte |= pte_encode_pat_index(pat_index, pt_level); 1286 pte |= pte_encode_ps(pt_level); 1287 1288 if (unlikely(xe_vma_is_null(vma))) 1289 pte |= XE_PTE_NULL; 1290 1291 return pte; 1292 } 1293 1294 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1295 u16 pat_index, 1296 u32 pt_level, bool devmem, u64 flags) 1297 { 1298 u64 pte; 1299 1300 /* Avoid passing random bits directly as flags */ 1301 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1302 1303 pte = addr; 1304 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1305 pte |= pte_encode_pat_index(pat_index, pt_level); 1306 pte |= pte_encode_ps(pt_level); 1307 1308 if (devmem) 1309 pte |= XE_PPGTT_PTE_DM; 1310 1311 pte |= flags; 1312 1313 return pte; 1314 } 1315 1316 static const struct xe_pt_ops xelp_pt_ops = { 1317 .pte_encode_bo = xelp_pte_encode_bo, 1318 .pte_encode_vma = xelp_pte_encode_vma, 1319 .pte_encode_addr = xelp_pte_encode_addr, 1320 .pde_encode_bo = xelp_pde_encode_bo, 1321 }; 1322 1323 static void vm_destroy_work_func(struct work_struct *w); 1324 1325 /** 1326 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1327 * given tile and vm. 1328 * @xe: xe device. 1329 * @tile: tile to set up for. 1330 * @vm: vm to set up for. 1331 * 1332 * Sets up a pagetable tree with one page-table per level and a single 1333 * leaf PTE. All pagetable entries point to the single page-table or, 1334 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1335 * writes become NOPs. 1336 * 1337 * Return: 0 on success, negative error code on error. 1338 */ 1339 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1340 struct xe_vm *vm) 1341 { 1342 u8 id = tile->id; 1343 int i; 1344 1345 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1346 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1347 if (IS_ERR(vm->scratch_pt[id][i])) 1348 return PTR_ERR(vm->scratch_pt[id][i]); 1349 1350 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1351 } 1352 1353 return 0; 1354 } 1355 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1356 1357 static void xe_vm_free_scratch(struct xe_vm *vm) 1358 { 1359 struct xe_tile *tile; 1360 u8 id; 1361 1362 if (!xe_vm_has_scratch(vm)) 1363 return; 1364 1365 for_each_tile(tile, vm->xe, id) { 1366 u32 i; 1367 1368 if (!vm->pt_root[id]) 1369 continue; 1370 1371 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1372 if (vm->scratch_pt[id][i]) 1373 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1374 } 1375 } 1376 1377 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1378 { 1379 struct drm_gem_object *vm_resv_obj; 1380 struct xe_vm *vm; 1381 int err, number_tiles = 0; 1382 struct xe_tile *tile; 1383 u8 id; 1384 1385 /* 1386 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1387 * ever be in faulting mode. 1388 */ 1389 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1390 1391 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1392 if (!vm) 1393 return ERR_PTR(-ENOMEM); 1394 1395 vm->xe = xe; 1396 1397 vm->size = 1ull << xe->info.va_bits; 1398 1399 vm->flags = flags; 1400 1401 /** 1402 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1403 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1404 * under a user-VM lock when the PXP session is started at exec_queue 1405 * creation time. Those are different VMs and therefore there is no risk 1406 * of deadlock, but we need to tell lockdep that this is the case or it 1407 * will print a warning. 1408 */ 1409 if (flags & XE_VM_FLAG_GSC) { 1410 static struct lock_class_key gsc_vm_key; 1411 1412 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1413 } else { 1414 init_rwsem(&vm->lock); 1415 } 1416 mutex_init(&vm->snap_mutex); 1417 1418 INIT_LIST_HEAD(&vm->rebind_list); 1419 1420 INIT_LIST_HEAD(&vm->userptr.repin_list); 1421 INIT_LIST_HEAD(&vm->userptr.invalidated); 1422 init_rwsem(&vm->userptr.notifier_lock); 1423 spin_lock_init(&vm->userptr.invalidated_lock); 1424 1425 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1426 1427 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1428 1429 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1430 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1431 1432 for_each_tile(tile, xe, id) 1433 xe_range_fence_tree_init(&vm->rftree[id]); 1434 1435 vm->pt_ops = &xelp_pt_ops; 1436 1437 /* 1438 * Long-running workloads are not protected by the scheduler references. 1439 * By design, run_job for long-running workloads returns NULL and the 1440 * scheduler drops all the references of it, hence protecting the VM 1441 * for this case is necessary. 1442 */ 1443 if (flags & XE_VM_FLAG_LR_MODE) 1444 xe_pm_runtime_get_noresume(xe); 1445 1446 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1447 if (!vm_resv_obj) { 1448 err = -ENOMEM; 1449 goto err_no_resv; 1450 } 1451 1452 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1453 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1454 1455 drm_gem_object_put(vm_resv_obj); 1456 1457 err = xe_vm_lock(vm, true); 1458 if (err) 1459 goto err_close; 1460 1461 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1462 vm->flags |= XE_VM_FLAG_64K; 1463 1464 for_each_tile(tile, xe, id) { 1465 if (flags & XE_VM_FLAG_MIGRATION && 1466 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1467 continue; 1468 1469 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1470 if (IS_ERR(vm->pt_root[id])) { 1471 err = PTR_ERR(vm->pt_root[id]); 1472 vm->pt_root[id] = NULL; 1473 goto err_unlock_close; 1474 } 1475 } 1476 1477 if (xe_vm_has_scratch(vm)) { 1478 for_each_tile(tile, xe, id) { 1479 if (!vm->pt_root[id]) 1480 continue; 1481 1482 err = xe_vm_create_scratch(xe, tile, vm); 1483 if (err) 1484 goto err_unlock_close; 1485 } 1486 vm->batch_invalidate_tlb = true; 1487 } 1488 1489 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1490 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1491 vm->batch_invalidate_tlb = false; 1492 } 1493 1494 /* Fill pt_root after allocating scratch tables */ 1495 for_each_tile(tile, xe, id) { 1496 if (!vm->pt_root[id]) 1497 continue; 1498 1499 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1500 } 1501 xe_vm_unlock(vm); 1502 1503 /* Kernel migration VM shouldn't have a circular loop.. */ 1504 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1505 for_each_tile(tile, xe, id) { 1506 struct xe_exec_queue *q; 1507 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1508 1509 if (!vm->pt_root[id]) 1510 continue; 1511 1512 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1513 if (IS_ERR(q)) { 1514 err = PTR_ERR(q); 1515 goto err_close; 1516 } 1517 vm->q[id] = q; 1518 number_tiles++; 1519 } 1520 } 1521 1522 if (number_tiles > 1) 1523 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1524 1525 trace_xe_vm_create(vm); 1526 1527 return vm; 1528 1529 err_unlock_close: 1530 xe_vm_unlock(vm); 1531 err_close: 1532 xe_vm_close_and_put(vm); 1533 return ERR_PTR(err); 1534 1535 err_no_resv: 1536 mutex_destroy(&vm->snap_mutex); 1537 for_each_tile(tile, xe, id) 1538 xe_range_fence_tree_fini(&vm->rftree[id]); 1539 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1540 kfree(vm); 1541 if (flags & XE_VM_FLAG_LR_MODE) 1542 xe_pm_runtime_put(xe); 1543 return ERR_PTR(err); 1544 } 1545 1546 static void xe_vm_close(struct xe_vm *vm) 1547 { 1548 down_write(&vm->lock); 1549 vm->size = 0; 1550 up_write(&vm->lock); 1551 } 1552 1553 void xe_vm_close_and_put(struct xe_vm *vm) 1554 { 1555 LIST_HEAD(contested); 1556 struct xe_device *xe = vm->xe; 1557 struct xe_tile *tile; 1558 struct xe_vma *vma, *next_vma; 1559 struct drm_gpuva *gpuva, *next; 1560 u8 id; 1561 1562 xe_assert(xe, !vm->preempt.num_exec_queues); 1563 1564 xe_vm_close(vm); 1565 if (xe_vm_in_preempt_fence_mode(vm)) 1566 flush_work(&vm->preempt.rebind_work); 1567 1568 down_write(&vm->lock); 1569 for_each_tile(tile, xe, id) { 1570 if (vm->q[id]) 1571 xe_exec_queue_last_fence_put(vm->q[id], vm); 1572 } 1573 up_write(&vm->lock); 1574 1575 for_each_tile(tile, xe, id) { 1576 if (vm->q[id]) { 1577 xe_exec_queue_kill(vm->q[id]); 1578 xe_exec_queue_put(vm->q[id]); 1579 vm->q[id] = NULL; 1580 } 1581 } 1582 1583 down_write(&vm->lock); 1584 xe_vm_lock(vm, false); 1585 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1586 vma = gpuva_to_vma(gpuva); 1587 1588 if (xe_vma_has_no_bo(vma)) { 1589 down_read(&vm->userptr.notifier_lock); 1590 vma->gpuva.flags |= XE_VMA_DESTROYED; 1591 up_read(&vm->userptr.notifier_lock); 1592 } 1593 1594 xe_vm_remove_vma(vm, vma); 1595 1596 /* easy case, remove from VMA? */ 1597 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1598 list_del_init(&vma->combined_links.rebind); 1599 xe_vma_destroy(vma, NULL); 1600 continue; 1601 } 1602 1603 list_move_tail(&vma->combined_links.destroy, &contested); 1604 vma->gpuva.flags |= XE_VMA_DESTROYED; 1605 } 1606 1607 /* 1608 * All vm operations will add shared fences to resv. 1609 * The only exception is eviction for a shared object, 1610 * but even so, the unbind when evicted would still 1611 * install a fence to resv. Hence it's safe to 1612 * destroy the pagetables immediately. 1613 */ 1614 xe_vm_free_scratch(vm); 1615 1616 for_each_tile(tile, xe, id) { 1617 if (vm->pt_root[id]) { 1618 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1619 vm->pt_root[id] = NULL; 1620 } 1621 } 1622 xe_vm_unlock(vm); 1623 1624 /* 1625 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1626 * Since we hold a refcount to the bo, we can remove and free 1627 * the members safely without locking. 1628 */ 1629 list_for_each_entry_safe(vma, next_vma, &contested, 1630 combined_links.destroy) { 1631 list_del_init(&vma->combined_links.destroy); 1632 xe_vma_destroy_unlocked(vma); 1633 } 1634 1635 up_write(&vm->lock); 1636 1637 down_write(&xe->usm.lock); 1638 if (vm->usm.asid) { 1639 void *lookup; 1640 1641 xe_assert(xe, xe->info.has_asid); 1642 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1643 1644 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1645 xe_assert(xe, lookup == vm); 1646 } 1647 up_write(&xe->usm.lock); 1648 1649 for_each_tile(tile, xe, id) 1650 xe_range_fence_tree_fini(&vm->rftree[id]); 1651 1652 xe_vm_put(vm); 1653 } 1654 1655 static void vm_destroy_work_func(struct work_struct *w) 1656 { 1657 struct xe_vm *vm = 1658 container_of(w, struct xe_vm, destroy_work); 1659 struct xe_device *xe = vm->xe; 1660 struct xe_tile *tile; 1661 u8 id; 1662 1663 /* xe_vm_close_and_put was not called? */ 1664 xe_assert(xe, !vm->size); 1665 1666 if (xe_vm_in_preempt_fence_mode(vm)) 1667 flush_work(&vm->preempt.rebind_work); 1668 1669 mutex_destroy(&vm->snap_mutex); 1670 1671 if (vm->flags & XE_VM_FLAG_LR_MODE) 1672 xe_pm_runtime_put(xe); 1673 1674 for_each_tile(tile, xe, id) 1675 XE_WARN_ON(vm->pt_root[id]); 1676 1677 trace_xe_vm_free(vm); 1678 1679 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1680 1681 if (vm->xef) 1682 xe_file_put(vm->xef); 1683 1684 kfree(vm); 1685 } 1686 1687 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1688 { 1689 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1690 1691 /* To destroy the VM we need to be able to sleep */ 1692 queue_work(system_unbound_wq, &vm->destroy_work); 1693 } 1694 1695 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1696 { 1697 struct xe_vm *vm; 1698 1699 mutex_lock(&xef->vm.lock); 1700 vm = xa_load(&xef->vm.xa, id); 1701 if (vm) 1702 xe_vm_get(vm); 1703 mutex_unlock(&xef->vm.lock); 1704 1705 return vm; 1706 } 1707 1708 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1709 { 1710 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1711 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1712 } 1713 1714 static struct xe_exec_queue * 1715 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1716 { 1717 return q ? q : vm->q[0]; 1718 } 1719 1720 static struct xe_user_fence * 1721 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1722 { 1723 unsigned int i; 1724 1725 for (i = 0; i < num_syncs; i++) { 1726 struct xe_sync_entry *e = &syncs[i]; 1727 1728 if (xe_sync_is_ufence(e)) 1729 return xe_sync_ufence_get(e); 1730 } 1731 1732 return NULL; 1733 } 1734 1735 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1736 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1737 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1738 1739 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1740 struct drm_file *file) 1741 { 1742 struct xe_device *xe = to_xe_device(dev); 1743 struct xe_file *xef = to_xe_file(file); 1744 struct drm_xe_vm_create *args = data; 1745 struct xe_tile *tile; 1746 struct xe_vm *vm; 1747 u32 id, asid; 1748 int err; 1749 u32 flags = 0; 1750 1751 if (XE_IOCTL_DBG(xe, args->extensions)) 1752 return -EINVAL; 1753 1754 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1755 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1756 1757 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1758 !xe->info.has_usm)) 1759 return -EINVAL; 1760 1761 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1762 return -EINVAL; 1763 1764 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1765 return -EINVAL; 1766 1767 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1768 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1769 return -EINVAL; 1770 1771 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1772 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1773 return -EINVAL; 1774 1775 if (XE_IOCTL_DBG(xe, args->extensions)) 1776 return -EINVAL; 1777 1778 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1779 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1780 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1781 flags |= XE_VM_FLAG_LR_MODE; 1782 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1783 flags |= XE_VM_FLAG_FAULT_MODE; 1784 1785 vm = xe_vm_create(xe, flags); 1786 if (IS_ERR(vm)) 1787 return PTR_ERR(vm); 1788 1789 if (xe->info.has_asid) { 1790 down_write(&xe->usm.lock); 1791 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1792 XA_LIMIT(1, XE_MAX_ASID - 1), 1793 &xe->usm.next_asid, GFP_KERNEL); 1794 up_write(&xe->usm.lock); 1795 if (err < 0) 1796 goto err_close_and_put; 1797 1798 vm->usm.asid = asid; 1799 } 1800 1801 vm->xef = xe_file_get(xef); 1802 1803 /* Record BO memory for VM pagetable created against client */ 1804 for_each_tile(tile, xe, id) 1805 if (vm->pt_root[id]) 1806 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1807 1808 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1809 /* Warning: Security issue - never enable by default */ 1810 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1811 #endif 1812 1813 /* user id alloc must always be last in ioctl to prevent UAF */ 1814 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1815 if (err) 1816 goto err_close_and_put; 1817 1818 args->vm_id = id; 1819 1820 return 0; 1821 1822 err_close_and_put: 1823 xe_vm_close_and_put(vm); 1824 1825 return err; 1826 } 1827 1828 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1829 struct drm_file *file) 1830 { 1831 struct xe_device *xe = to_xe_device(dev); 1832 struct xe_file *xef = to_xe_file(file); 1833 struct drm_xe_vm_destroy *args = data; 1834 struct xe_vm *vm; 1835 int err = 0; 1836 1837 if (XE_IOCTL_DBG(xe, args->pad) || 1838 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1839 return -EINVAL; 1840 1841 mutex_lock(&xef->vm.lock); 1842 vm = xa_load(&xef->vm.xa, args->vm_id); 1843 if (XE_IOCTL_DBG(xe, !vm)) 1844 err = -ENOENT; 1845 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1846 err = -EBUSY; 1847 else 1848 xa_erase(&xef->vm.xa, args->vm_id); 1849 mutex_unlock(&xef->vm.lock); 1850 1851 if (!err) 1852 xe_vm_close_and_put(vm); 1853 1854 return err; 1855 } 1856 1857 static const u32 region_to_mem_type[] = { 1858 XE_PL_TT, 1859 XE_PL_VRAM0, 1860 XE_PL_VRAM1, 1861 }; 1862 1863 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1864 bool post_commit) 1865 { 1866 down_read(&vm->userptr.notifier_lock); 1867 vma->gpuva.flags |= XE_VMA_DESTROYED; 1868 up_read(&vm->userptr.notifier_lock); 1869 if (post_commit) 1870 xe_vm_remove_vma(vm, vma); 1871 } 1872 1873 #undef ULL 1874 #define ULL unsigned long long 1875 1876 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1877 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1878 { 1879 struct xe_vma *vma; 1880 1881 switch (op->op) { 1882 case DRM_GPUVA_OP_MAP: 1883 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1884 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1885 break; 1886 case DRM_GPUVA_OP_REMAP: 1887 vma = gpuva_to_vma(op->remap.unmap->va); 1888 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1889 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1890 op->remap.unmap->keep ? 1 : 0); 1891 if (op->remap.prev) 1892 vm_dbg(&xe->drm, 1893 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1894 (ULL)op->remap.prev->va.addr, 1895 (ULL)op->remap.prev->va.range); 1896 if (op->remap.next) 1897 vm_dbg(&xe->drm, 1898 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1899 (ULL)op->remap.next->va.addr, 1900 (ULL)op->remap.next->va.range); 1901 break; 1902 case DRM_GPUVA_OP_UNMAP: 1903 vma = gpuva_to_vma(op->unmap.va); 1904 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1905 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1906 op->unmap.keep ? 1 : 0); 1907 break; 1908 case DRM_GPUVA_OP_PREFETCH: 1909 vma = gpuva_to_vma(op->prefetch.va); 1910 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1911 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1912 break; 1913 default: 1914 drm_warn(&xe->drm, "NOT POSSIBLE"); 1915 } 1916 } 1917 #else 1918 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1919 { 1920 } 1921 #endif 1922 1923 /* 1924 * Create operations list from IOCTL arguments, setup operations fields so parse 1925 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1926 */ 1927 static struct drm_gpuva_ops * 1928 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1929 u64 bo_offset_or_userptr, u64 addr, u64 range, 1930 u32 operation, u32 flags, 1931 u32 prefetch_region, u16 pat_index) 1932 { 1933 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1934 struct drm_gpuva_ops *ops; 1935 struct drm_gpuva_op *__op; 1936 struct drm_gpuvm_bo *vm_bo; 1937 int err; 1938 1939 lockdep_assert_held_write(&vm->lock); 1940 1941 vm_dbg(&vm->xe->drm, 1942 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1943 operation, (ULL)addr, (ULL)range, 1944 (ULL)bo_offset_or_userptr); 1945 1946 switch (operation) { 1947 case DRM_XE_VM_BIND_OP_MAP: 1948 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1949 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1950 obj, bo_offset_or_userptr); 1951 break; 1952 case DRM_XE_VM_BIND_OP_UNMAP: 1953 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1954 break; 1955 case DRM_XE_VM_BIND_OP_PREFETCH: 1956 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1957 break; 1958 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1959 xe_assert(vm->xe, bo); 1960 1961 err = xe_bo_lock(bo, true); 1962 if (err) 1963 return ERR_PTR(err); 1964 1965 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 1966 if (IS_ERR(vm_bo)) { 1967 xe_bo_unlock(bo); 1968 return ERR_CAST(vm_bo); 1969 } 1970 1971 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 1972 drm_gpuvm_bo_put(vm_bo); 1973 xe_bo_unlock(bo); 1974 break; 1975 default: 1976 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1977 ops = ERR_PTR(-EINVAL); 1978 } 1979 if (IS_ERR(ops)) 1980 return ops; 1981 1982 drm_gpuva_for_each_op(__op, ops) { 1983 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 1984 1985 if (__op->op == DRM_GPUVA_OP_MAP) { 1986 op->map.immediate = 1987 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 1988 op->map.read_only = 1989 flags & DRM_XE_VM_BIND_FLAG_READONLY; 1990 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 1991 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 1992 op->map.pat_index = pat_index; 1993 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 1994 op->prefetch.region = prefetch_region; 1995 } 1996 1997 print_op(vm->xe, __op); 1998 } 1999 2000 return ops; 2001 } 2002 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2003 2004 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2005 u16 pat_index, unsigned int flags) 2006 { 2007 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2008 struct drm_exec exec; 2009 struct xe_vma *vma; 2010 int err = 0; 2011 2012 lockdep_assert_held_write(&vm->lock); 2013 2014 if (bo) { 2015 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2016 drm_exec_until_all_locked(&exec) { 2017 err = 0; 2018 if (!bo->vm) { 2019 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2020 drm_exec_retry_on_contention(&exec); 2021 } 2022 if (!err) { 2023 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2024 drm_exec_retry_on_contention(&exec); 2025 } 2026 if (err) { 2027 drm_exec_fini(&exec); 2028 return ERR_PTR(err); 2029 } 2030 } 2031 } 2032 vma = xe_vma_create(vm, bo, op->gem.offset, 2033 op->va.addr, op->va.addr + 2034 op->va.range - 1, pat_index, flags); 2035 if (IS_ERR(vma)) 2036 goto err_unlock; 2037 2038 if (xe_vma_is_userptr(vma)) 2039 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2040 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2041 err = add_preempt_fences(vm, bo); 2042 2043 err_unlock: 2044 if (bo) 2045 drm_exec_fini(&exec); 2046 2047 if (err) { 2048 prep_vma_destroy(vm, vma, false); 2049 xe_vma_destroy_unlocked(vma); 2050 vma = ERR_PTR(err); 2051 } 2052 2053 return vma; 2054 } 2055 2056 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2057 { 2058 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2059 return SZ_1G; 2060 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2061 return SZ_2M; 2062 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2063 return SZ_64K; 2064 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2065 return SZ_4K; 2066 2067 return SZ_1G; /* Uninitialized, used max size */ 2068 } 2069 2070 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2071 { 2072 switch (size) { 2073 case SZ_1G: 2074 vma->gpuva.flags |= XE_VMA_PTE_1G; 2075 break; 2076 case SZ_2M: 2077 vma->gpuva.flags |= XE_VMA_PTE_2M; 2078 break; 2079 case SZ_64K: 2080 vma->gpuva.flags |= XE_VMA_PTE_64K; 2081 break; 2082 case SZ_4K: 2083 vma->gpuva.flags |= XE_VMA_PTE_4K; 2084 break; 2085 } 2086 } 2087 2088 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2089 { 2090 int err = 0; 2091 2092 lockdep_assert_held_write(&vm->lock); 2093 2094 switch (op->base.op) { 2095 case DRM_GPUVA_OP_MAP: 2096 err |= xe_vm_insert_vma(vm, op->map.vma); 2097 if (!err) 2098 op->flags |= XE_VMA_OP_COMMITTED; 2099 break; 2100 case DRM_GPUVA_OP_REMAP: 2101 { 2102 u8 tile_present = 2103 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2104 2105 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2106 true); 2107 op->flags |= XE_VMA_OP_COMMITTED; 2108 2109 if (op->remap.prev) { 2110 err |= xe_vm_insert_vma(vm, op->remap.prev); 2111 if (!err) 2112 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2113 if (!err && op->remap.skip_prev) { 2114 op->remap.prev->tile_present = 2115 tile_present; 2116 op->remap.prev = NULL; 2117 } 2118 } 2119 if (op->remap.next) { 2120 err |= xe_vm_insert_vma(vm, op->remap.next); 2121 if (!err) 2122 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2123 if (!err && op->remap.skip_next) { 2124 op->remap.next->tile_present = 2125 tile_present; 2126 op->remap.next = NULL; 2127 } 2128 } 2129 2130 /* Adjust for partial unbind after removing VMA from VM */ 2131 if (!err) { 2132 op->base.remap.unmap->va->va.addr = op->remap.start; 2133 op->base.remap.unmap->va->va.range = op->remap.range; 2134 } 2135 break; 2136 } 2137 case DRM_GPUVA_OP_UNMAP: 2138 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2139 op->flags |= XE_VMA_OP_COMMITTED; 2140 break; 2141 case DRM_GPUVA_OP_PREFETCH: 2142 op->flags |= XE_VMA_OP_COMMITTED; 2143 break; 2144 default: 2145 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2146 } 2147 2148 return err; 2149 } 2150 2151 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2152 struct xe_vma_ops *vops) 2153 { 2154 struct xe_device *xe = vm->xe; 2155 struct drm_gpuva_op *__op; 2156 struct xe_tile *tile; 2157 u8 id, tile_mask = 0; 2158 int err = 0; 2159 2160 lockdep_assert_held_write(&vm->lock); 2161 2162 for_each_tile(tile, vm->xe, id) 2163 tile_mask |= 0x1 << id; 2164 2165 drm_gpuva_for_each_op(__op, ops) { 2166 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2167 struct xe_vma *vma; 2168 unsigned int flags = 0; 2169 2170 INIT_LIST_HEAD(&op->link); 2171 list_add_tail(&op->link, &vops->list); 2172 op->tile_mask = tile_mask; 2173 2174 switch (op->base.op) { 2175 case DRM_GPUVA_OP_MAP: 2176 { 2177 flags |= op->map.read_only ? 2178 VMA_CREATE_FLAG_READ_ONLY : 0; 2179 flags |= op->map.is_null ? 2180 VMA_CREATE_FLAG_IS_NULL : 0; 2181 flags |= op->map.dumpable ? 2182 VMA_CREATE_FLAG_DUMPABLE : 0; 2183 2184 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2185 flags); 2186 if (IS_ERR(vma)) 2187 return PTR_ERR(vma); 2188 2189 op->map.vma = vma; 2190 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2191 xe_vma_ops_incr_pt_update_ops(vops, 2192 op->tile_mask); 2193 break; 2194 } 2195 case DRM_GPUVA_OP_REMAP: 2196 { 2197 struct xe_vma *old = 2198 gpuva_to_vma(op->base.remap.unmap->va); 2199 2200 op->remap.start = xe_vma_start(old); 2201 op->remap.range = xe_vma_size(old); 2202 2203 if (op->base.remap.prev) { 2204 flags |= op->base.remap.unmap->va->flags & 2205 XE_VMA_READ_ONLY ? 2206 VMA_CREATE_FLAG_READ_ONLY : 0; 2207 flags |= op->base.remap.unmap->va->flags & 2208 DRM_GPUVA_SPARSE ? 2209 VMA_CREATE_FLAG_IS_NULL : 0; 2210 flags |= op->base.remap.unmap->va->flags & 2211 XE_VMA_DUMPABLE ? 2212 VMA_CREATE_FLAG_DUMPABLE : 0; 2213 2214 vma = new_vma(vm, op->base.remap.prev, 2215 old->pat_index, flags); 2216 if (IS_ERR(vma)) 2217 return PTR_ERR(vma); 2218 2219 op->remap.prev = vma; 2220 2221 /* 2222 * Userptr creates a new SG mapping so 2223 * we must also rebind. 2224 */ 2225 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2226 IS_ALIGNED(xe_vma_end(vma), 2227 xe_vma_max_pte_size(old)); 2228 if (op->remap.skip_prev) { 2229 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2230 op->remap.range -= 2231 xe_vma_end(vma) - 2232 xe_vma_start(old); 2233 op->remap.start = xe_vma_end(vma); 2234 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2235 (ULL)op->remap.start, 2236 (ULL)op->remap.range); 2237 } else { 2238 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2239 } 2240 } 2241 2242 if (op->base.remap.next) { 2243 flags |= op->base.remap.unmap->va->flags & 2244 XE_VMA_READ_ONLY ? 2245 VMA_CREATE_FLAG_READ_ONLY : 0; 2246 flags |= op->base.remap.unmap->va->flags & 2247 DRM_GPUVA_SPARSE ? 2248 VMA_CREATE_FLAG_IS_NULL : 0; 2249 flags |= op->base.remap.unmap->va->flags & 2250 XE_VMA_DUMPABLE ? 2251 VMA_CREATE_FLAG_DUMPABLE : 0; 2252 2253 vma = new_vma(vm, op->base.remap.next, 2254 old->pat_index, flags); 2255 if (IS_ERR(vma)) 2256 return PTR_ERR(vma); 2257 2258 op->remap.next = vma; 2259 2260 /* 2261 * Userptr creates a new SG mapping so 2262 * we must also rebind. 2263 */ 2264 op->remap.skip_next = !xe_vma_is_userptr(old) && 2265 IS_ALIGNED(xe_vma_start(vma), 2266 xe_vma_max_pte_size(old)); 2267 if (op->remap.skip_next) { 2268 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2269 op->remap.range -= 2270 xe_vma_end(old) - 2271 xe_vma_start(vma); 2272 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2273 (ULL)op->remap.start, 2274 (ULL)op->remap.range); 2275 } else { 2276 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2277 } 2278 } 2279 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2280 break; 2281 } 2282 case DRM_GPUVA_OP_UNMAP: 2283 case DRM_GPUVA_OP_PREFETCH: 2284 /* FIXME: Need to skip some prefetch ops */ 2285 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2286 break; 2287 default: 2288 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2289 } 2290 2291 err = xe_vma_op_commit(vm, op); 2292 if (err) 2293 return err; 2294 } 2295 2296 return 0; 2297 } 2298 2299 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2300 bool post_commit, bool prev_post_commit, 2301 bool next_post_commit) 2302 { 2303 lockdep_assert_held_write(&vm->lock); 2304 2305 switch (op->base.op) { 2306 case DRM_GPUVA_OP_MAP: 2307 if (op->map.vma) { 2308 prep_vma_destroy(vm, op->map.vma, post_commit); 2309 xe_vma_destroy_unlocked(op->map.vma); 2310 } 2311 break; 2312 case DRM_GPUVA_OP_UNMAP: 2313 { 2314 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2315 2316 if (vma) { 2317 down_read(&vm->userptr.notifier_lock); 2318 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2319 up_read(&vm->userptr.notifier_lock); 2320 if (post_commit) 2321 xe_vm_insert_vma(vm, vma); 2322 } 2323 break; 2324 } 2325 case DRM_GPUVA_OP_REMAP: 2326 { 2327 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2328 2329 if (op->remap.prev) { 2330 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2331 xe_vma_destroy_unlocked(op->remap.prev); 2332 } 2333 if (op->remap.next) { 2334 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2335 xe_vma_destroy_unlocked(op->remap.next); 2336 } 2337 if (vma) { 2338 down_read(&vm->userptr.notifier_lock); 2339 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2340 up_read(&vm->userptr.notifier_lock); 2341 if (post_commit) 2342 xe_vm_insert_vma(vm, vma); 2343 } 2344 break; 2345 } 2346 case DRM_GPUVA_OP_PREFETCH: 2347 /* Nothing to do */ 2348 break; 2349 default: 2350 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2351 } 2352 } 2353 2354 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2355 struct drm_gpuva_ops **ops, 2356 int num_ops_list) 2357 { 2358 int i; 2359 2360 for (i = num_ops_list - 1; i >= 0; --i) { 2361 struct drm_gpuva_ops *__ops = ops[i]; 2362 struct drm_gpuva_op *__op; 2363 2364 if (!__ops) 2365 continue; 2366 2367 drm_gpuva_for_each_op_reverse(__op, __ops) { 2368 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2369 2370 xe_vma_op_unwind(vm, op, 2371 op->flags & XE_VMA_OP_COMMITTED, 2372 op->flags & XE_VMA_OP_PREV_COMMITTED, 2373 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2374 } 2375 } 2376 } 2377 2378 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2379 bool validate) 2380 { 2381 struct xe_bo *bo = xe_vma_bo(vma); 2382 struct xe_vm *vm = xe_vma_vm(vma); 2383 int err = 0; 2384 2385 if (bo) { 2386 if (!bo->vm) 2387 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2388 if (!err && validate) 2389 err = xe_bo_validate(bo, vm, 2390 !xe_vm_in_preempt_fence_mode(vm)); 2391 } 2392 2393 return err; 2394 } 2395 2396 static int check_ufence(struct xe_vma *vma) 2397 { 2398 if (vma->ufence) { 2399 struct xe_user_fence * const f = vma->ufence; 2400 2401 if (!xe_sync_ufence_get_status(f)) 2402 return -EBUSY; 2403 2404 vma->ufence = NULL; 2405 xe_sync_ufence_put(f); 2406 } 2407 2408 return 0; 2409 } 2410 2411 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2412 struct xe_vma_op *op) 2413 { 2414 int err = 0; 2415 2416 switch (op->base.op) { 2417 case DRM_GPUVA_OP_MAP: 2418 err = vma_lock_and_validate(exec, op->map.vma, 2419 !xe_vm_in_fault_mode(vm) || 2420 op->map.immediate); 2421 break; 2422 case DRM_GPUVA_OP_REMAP: 2423 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2424 if (err) 2425 break; 2426 2427 err = vma_lock_and_validate(exec, 2428 gpuva_to_vma(op->base.remap.unmap->va), 2429 false); 2430 if (!err && op->remap.prev) 2431 err = vma_lock_and_validate(exec, op->remap.prev, true); 2432 if (!err && op->remap.next) 2433 err = vma_lock_and_validate(exec, op->remap.next, true); 2434 break; 2435 case DRM_GPUVA_OP_UNMAP: 2436 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2437 if (err) 2438 break; 2439 2440 err = vma_lock_and_validate(exec, 2441 gpuva_to_vma(op->base.unmap.va), 2442 false); 2443 break; 2444 case DRM_GPUVA_OP_PREFETCH: 2445 { 2446 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2447 u32 region = op->prefetch.region; 2448 2449 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2450 2451 err = vma_lock_and_validate(exec, 2452 gpuva_to_vma(op->base.prefetch.va), 2453 false); 2454 if (!err && !xe_vma_has_no_bo(vma)) 2455 err = xe_bo_migrate(xe_vma_bo(vma), 2456 region_to_mem_type[region]); 2457 break; 2458 } 2459 default: 2460 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2461 } 2462 2463 return err; 2464 } 2465 2466 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2467 struct xe_vm *vm, 2468 struct xe_vma_ops *vops) 2469 { 2470 struct xe_vma_op *op; 2471 int err; 2472 2473 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2474 if (err) 2475 return err; 2476 2477 list_for_each_entry(op, &vops->list, link) { 2478 err = op_lock_and_prep(exec, vm, op); 2479 if (err) 2480 return err; 2481 } 2482 2483 #ifdef TEST_VM_OPS_ERROR 2484 if (vops->inject_error && 2485 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2486 return -ENOSPC; 2487 #endif 2488 2489 return 0; 2490 } 2491 2492 static void op_trace(struct xe_vma_op *op) 2493 { 2494 switch (op->base.op) { 2495 case DRM_GPUVA_OP_MAP: 2496 trace_xe_vma_bind(op->map.vma); 2497 break; 2498 case DRM_GPUVA_OP_REMAP: 2499 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2500 if (op->remap.prev) 2501 trace_xe_vma_bind(op->remap.prev); 2502 if (op->remap.next) 2503 trace_xe_vma_bind(op->remap.next); 2504 break; 2505 case DRM_GPUVA_OP_UNMAP: 2506 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2507 break; 2508 case DRM_GPUVA_OP_PREFETCH: 2509 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2510 break; 2511 default: 2512 XE_WARN_ON("NOT POSSIBLE"); 2513 } 2514 } 2515 2516 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2517 { 2518 struct xe_vma_op *op; 2519 2520 list_for_each_entry(op, &vops->list, link) 2521 op_trace(op); 2522 } 2523 2524 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2525 { 2526 struct xe_exec_queue *q = vops->q; 2527 struct xe_tile *tile; 2528 int number_tiles = 0; 2529 u8 id; 2530 2531 for_each_tile(tile, vm->xe, id) { 2532 if (vops->pt_update_ops[id].num_ops) 2533 ++number_tiles; 2534 2535 if (vops->pt_update_ops[id].q) 2536 continue; 2537 2538 if (q) { 2539 vops->pt_update_ops[id].q = q; 2540 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2541 q = list_next_entry(q, multi_gt_list); 2542 } else { 2543 vops->pt_update_ops[id].q = vm->q[id]; 2544 } 2545 } 2546 2547 return number_tiles; 2548 } 2549 2550 static struct dma_fence *ops_execute(struct xe_vm *vm, 2551 struct xe_vma_ops *vops) 2552 { 2553 struct xe_tile *tile; 2554 struct dma_fence *fence = NULL; 2555 struct dma_fence **fences = NULL; 2556 struct dma_fence_array *cf = NULL; 2557 int number_tiles = 0, current_fence = 0, err; 2558 u8 id; 2559 2560 number_tiles = vm_ops_setup_tile_args(vm, vops); 2561 if (number_tiles == 0) 2562 return ERR_PTR(-ENODATA); 2563 2564 if (number_tiles > 1) { 2565 fences = kmalloc_array(number_tiles, sizeof(*fences), 2566 GFP_KERNEL); 2567 if (!fences) { 2568 fence = ERR_PTR(-ENOMEM); 2569 goto err_trace; 2570 } 2571 } 2572 2573 for_each_tile(tile, vm->xe, id) { 2574 if (!vops->pt_update_ops[id].num_ops) 2575 continue; 2576 2577 err = xe_pt_update_ops_prepare(tile, vops); 2578 if (err) { 2579 fence = ERR_PTR(err); 2580 goto err_out; 2581 } 2582 } 2583 2584 trace_xe_vm_ops_execute(vops); 2585 2586 for_each_tile(tile, vm->xe, id) { 2587 if (!vops->pt_update_ops[id].num_ops) 2588 continue; 2589 2590 fence = xe_pt_update_ops_run(tile, vops); 2591 if (IS_ERR(fence)) 2592 goto err_out; 2593 2594 if (fences) 2595 fences[current_fence++] = fence; 2596 } 2597 2598 if (fences) { 2599 cf = dma_fence_array_create(number_tiles, fences, 2600 vm->composite_fence_ctx, 2601 vm->composite_fence_seqno++, 2602 false); 2603 if (!cf) { 2604 --vm->composite_fence_seqno; 2605 fence = ERR_PTR(-ENOMEM); 2606 goto err_out; 2607 } 2608 fence = &cf->base; 2609 } 2610 2611 for_each_tile(tile, vm->xe, id) { 2612 if (!vops->pt_update_ops[id].num_ops) 2613 continue; 2614 2615 xe_pt_update_ops_fini(tile, vops); 2616 } 2617 2618 return fence; 2619 2620 err_out: 2621 for_each_tile(tile, vm->xe, id) { 2622 if (!vops->pt_update_ops[id].num_ops) 2623 continue; 2624 2625 xe_pt_update_ops_abort(tile, vops); 2626 } 2627 while (current_fence) 2628 dma_fence_put(fences[--current_fence]); 2629 kfree(fences); 2630 kfree(cf); 2631 2632 err_trace: 2633 trace_xe_vm_ops_fail(vm); 2634 return fence; 2635 } 2636 2637 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2638 { 2639 if (vma->ufence) 2640 xe_sync_ufence_put(vma->ufence); 2641 vma->ufence = __xe_sync_ufence_get(ufence); 2642 } 2643 2644 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2645 struct xe_user_fence *ufence) 2646 { 2647 switch (op->base.op) { 2648 case DRM_GPUVA_OP_MAP: 2649 vma_add_ufence(op->map.vma, ufence); 2650 break; 2651 case DRM_GPUVA_OP_REMAP: 2652 if (op->remap.prev) 2653 vma_add_ufence(op->remap.prev, ufence); 2654 if (op->remap.next) 2655 vma_add_ufence(op->remap.next, ufence); 2656 break; 2657 case DRM_GPUVA_OP_UNMAP: 2658 break; 2659 case DRM_GPUVA_OP_PREFETCH: 2660 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2661 break; 2662 default: 2663 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2664 } 2665 } 2666 2667 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2668 struct dma_fence *fence) 2669 { 2670 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2671 struct xe_user_fence *ufence; 2672 struct xe_vma_op *op; 2673 int i; 2674 2675 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2676 list_for_each_entry(op, &vops->list, link) { 2677 if (ufence) 2678 op_add_ufence(vm, op, ufence); 2679 2680 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2681 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2682 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2683 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2684 fence); 2685 } 2686 if (ufence) 2687 xe_sync_ufence_put(ufence); 2688 for (i = 0; i < vops->num_syncs; i++) 2689 xe_sync_entry_signal(vops->syncs + i, fence); 2690 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2691 } 2692 2693 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2694 struct xe_vma_ops *vops) 2695 { 2696 struct drm_exec exec; 2697 struct dma_fence *fence; 2698 int err; 2699 2700 lockdep_assert_held_write(&vm->lock); 2701 2702 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2703 DRM_EXEC_IGNORE_DUPLICATES, 0); 2704 drm_exec_until_all_locked(&exec) { 2705 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2706 drm_exec_retry_on_contention(&exec); 2707 if (err) { 2708 fence = ERR_PTR(err); 2709 goto unlock; 2710 } 2711 2712 fence = ops_execute(vm, vops); 2713 if (IS_ERR(fence)) 2714 goto unlock; 2715 2716 vm_bind_ioctl_ops_fini(vm, vops, fence); 2717 } 2718 2719 unlock: 2720 drm_exec_fini(&exec); 2721 return fence; 2722 } 2723 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2724 2725 #define SUPPORTED_FLAGS_STUB \ 2726 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2727 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2728 DRM_XE_VM_BIND_FLAG_NULL | \ 2729 DRM_XE_VM_BIND_FLAG_DUMPABLE) 2730 2731 #ifdef TEST_VM_OPS_ERROR 2732 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2733 #else 2734 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2735 #endif 2736 2737 #define XE_64K_PAGE_MASK 0xffffull 2738 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2739 2740 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2741 struct drm_xe_vm_bind *args, 2742 struct drm_xe_vm_bind_op **bind_ops) 2743 { 2744 int err; 2745 int i; 2746 2747 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2748 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2749 return -EINVAL; 2750 2751 if (XE_IOCTL_DBG(xe, args->extensions)) 2752 return -EINVAL; 2753 2754 if (args->num_binds > 1) { 2755 u64 __user *bind_user = 2756 u64_to_user_ptr(args->vector_of_binds); 2757 2758 *bind_ops = kvmalloc_array(args->num_binds, 2759 sizeof(struct drm_xe_vm_bind_op), 2760 GFP_KERNEL | __GFP_ACCOUNT | 2761 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2762 if (!*bind_ops) 2763 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2764 2765 err = __copy_from_user(*bind_ops, bind_user, 2766 sizeof(struct drm_xe_vm_bind_op) * 2767 args->num_binds); 2768 if (XE_IOCTL_DBG(xe, err)) { 2769 err = -EFAULT; 2770 goto free_bind_ops; 2771 } 2772 } else { 2773 *bind_ops = &args->bind; 2774 } 2775 2776 for (i = 0; i < args->num_binds; ++i) { 2777 u64 range = (*bind_ops)[i].range; 2778 u64 addr = (*bind_ops)[i].addr; 2779 u32 op = (*bind_ops)[i].op; 2780 u32 flags = (*bind_ops)[i].flags; 2781 u32 obj = (*bind_ops)[i].obj; 2782 u64 obj_offset = (*bind_ops)[i].obj_offset; 2783 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2784 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2785 u16 pat_index = (*bind_ops)[i].pat_index; 2786 u16 coh_mode; 2787 2788 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2789 err = -EINVAL; 2790 goto free_bind_ops; 2791 } 2792 2793 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2794 (*bind_ops)[i].pat_index = pat_index; 2795 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2796 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2797 err = -EINVAL; 2798 goto free_bind_ops; 2799 } 2800 2801 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2802 err = -EINVAL; 2803 goto free_bind_ops; 2804 } 2805 2806 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2807 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2808 XE_IOCTL_DBG(xe, obj && is_null) || 2809 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2810 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2811 is_null) || 2812 XE_IOCTL_DBG(xe, !obj && 2813 op == DRM_XE_VM_BIND_OP_MAP && 2814 !is_null) || 2815 XE_IOCTL_DBG(xe, !obj && 2816 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2817 XE_IOCTL_DBG(xe, addr && 2818 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2819 XE_IOCTL_DBG(xe, range && 2820 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2821 XE_IOCTL_DBG(xe, obj && 2822 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2823 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2824 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2825 XE_IOCTL_DBG(xe, obj && 2826 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2827 XE_IOCTL_DBG(xe, prefetch_region && 2828 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2829 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2830 xe->info.mem_region_mask)) || 2831 XE_IOCTL_DBG(xe, obj && 2832 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2833 err = -EINVAL; 2834 goto free_bind_ops; 2835 } 2836 2837 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2838 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2839 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2840 XE_IOCTL_DBG(xe, !range && 2841 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2842 err = -EINVAL; 2843 goto free_bind_ops; 2844 } 2845 } 2846 2847 return 0; 2848 2849 free_bind_ops: 2850 if (args->num_binds > 1) 2851 kvfree(*bind_ops); 2852 return err; 2853 } 2854 2855 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2856 struct xe_exec_queue *q, 2857 struct xe_sync_entry *syncs, 2858 int num_syncs) 2859 { 2860 struct dma_fence *fence; 2861 int i, err = 0; 2862 2863 fence = xe_sync_in_fence_get(syncs, num_syncs, 2864 to_wait_exec_queue(vm, q), vm); 2865 if (IS_ERR(fence)) 2866 return PTR_ERR(fence); 2867 2868 for (i = 0; i < num_syncs; i++) 2869 xe_sync_entry_signal(&syncs[i], fence); 2870 2871 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2872 fence); 2873 dma_fence_put(fence); 2874 2875 return err; 2876 } 2877 2878 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2879 struct xe_exec_queue *q, 2880 struct xe_sync_entry *syncs, u32 num_syncs) 2881 { 2882 memset(vops, 0, sizeof(*vops)); 2883 INIT_LIST_HEAD(&vops->list); 2884 vops->vm = vm; 2885 vops->q = q; 2886 vops->syncs = syncs; 2887 vops->num_syncs = num_syncs; 2888 } 2889 2890 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2891 u64 addr, u64 range, u64 obj_offset, 2892 u16 pat_index) 2893 { 2894 u16 coh_mode; 2895 2896 if (XE_IOCTL_DBG(xe, range > bo->size) || 2897 XE_IOCTL_DBG(xe, obj_offset > 2898 bo->size - range)) { 2899 return -EINVAL; 2900 } 2901 2902 /* 2903 * Some platforms require 64k VM_BIND alignment, 2904 * specifically those with XE_VRAM_FLAGS_NEED64K. 2905 * 2906 * Other platforms may have BO's set to 64k physical placement, 2907 * but can be mapped at 4k offsets anyway. This check is only 2908 * there for the former case. 2909 */ 2910 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2911 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2912 if (XE_IOCTL_DBG(xe, obj_offset & 2913 XE_64K_PAGE_MASK) || 2914 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2915 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2916 return -EINVAL; 2917 } 2918 } 2919 2920 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2921 if (bo->cpu_caching) { 2922 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2923 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2924 return -EINVAL; 2925 } 2926 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2927 /* 2928 * Imported dma-buf from a different device should 2929 * require 1way or 2way coherency since we don't know 2930 * how it was mapped on the CPU. Just assume is it 2931 * potentially cached on CPU side. 2932 */ 2933 return -EINVAL; 2934 } 2935 2936 return 0; 2937 } 2938 2939 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2940 { 2941 struct xe_device *xe = to_xe_device(dev); 2942 struct xe_file *xef = to_xe_file(file); 2943 struct drm_xe_vm_bind *args = data; 2944 struct drm_xe_sync __user *syncs_user; 2945 struct xe_bo **bos = NULL; 2946 struct drm_gpuva_ops **ops = NULL; 2947 struct xe_vm *vm; 2948 struct xe_exec_queue *q = NULL; 2949 u32 num_syncs, num_ufence = 0; 2950 struct xe_sync_entry *syncs = NULL; 2951 struct drm_xe_vm_bind_op *bind_ops; 2952 struct xe_vma_ops vops; 2953 struct dma_fence *fence; 2954 int err; 2955 int i; 2956 2957 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 2958 if (err) 2959 return err; 2960 2961 if (args->exec_queue_id) { 2962 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 2963 if (XE_IOCTL_DBG(xe, !q)) { 2964 err = -ENOENT; 2965 goto free_objs; 2966 } 2967 2968 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 2969 err = -EINVAL; 2970 goto put_exec_queue; 2971 } 2972 } 2973 2974 vm = xe_vm_lookup(xef, args->vm_id); 2975 if (XE_IOCTL_DBG(xe, !vm)) { 2976 err = -EINVAL; 2977 goto put_exec_queue; 2978 } 2979 2980 err = down_write_killable(&vm->lock); 2981 if (err) 2982 goto put_vm; 2983 2984 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 2985 err = -ENOENT; 2986 goto release_vm_lock; 2987 } 2988 2989 for (i = 0; i < args->num_binds; ++i) { 2990 u64 range = bind_ops[i].range; 2991 u64 addr = bind_ops[i].addr; 2992 2993 if (XE_IOCTL_DBG(xe, range > vm->size) || 2994 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 2995 err = -EINVAL; 2996 goto release_vm_lock; 2997 } 2998 } 2999 3000 if (args->num_binds) { 3001 bos = kvcalloc(args->num_binds, sizeof(*bos), 3002 GFP_KERNEL | __GFP_ACCOUNT | 3003 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3004 if (!bos) { 3005 err = -ENOMEM; 3006 goto release_vm_lock; 3007 } 3008 3009 ops = kvcalloc(args->num_binds, sizeof(*ops), 3010 GFP_KERNEL | __GFP_ACCOUNT | 3011 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3012 if (!ops) { 3013 err = -ENOMEM; 3014 goto release_vm_lock; 3015 } 3016 } 3017 3018 for (i = 0; i < args->num_binds; ++i) { 3019 struct drm_gem_object *gem_obj; 3020 u64 range = bind_ops[i].range; 3021 u64 addr = bind_ops[i].addr; 3022 u32 obj = bind_ops[i].obj; 3023 u64 obj_offset = bind_ops[i].obj_offset; 3024 u16 pat_index = bind_ops[i].pat_index; 3025 3026 if (!obj) 3027 continue; 3028 3029 gem_obj = drm_gem_object_lookup(file, obj); 3030 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3031 err = -ENOENT; 3032 goto put_obj; 3033 } 3034 bos[i] = gem_to_xe_bo(gem_obj); 3035 3036 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3037 obj_offset, pat_index); 3038 if (err) 3039 goto put_obj; 3040 } 3041 3042 if (args->num_syncs) { 3043 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3044 if (!syncs) { 3045 err = -ENOMEM; 3046 goto put_obj; 3047 } 3048 } 3049 3050 syncs_user = u64_to_user_ptr(args->syncs); 3051 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3052 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3053 &syncs_user[num_syncs], 3054 (xe_vm_in_lr_mode(vm) ? 3055 SYNC_PARSE_FLAG_LR_MODE : 0) | 3056 (!args->num_binds ? 3057 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3058 if (err) 3059 goto free_syncs; 3060 3061 if (xe_sync_is_ufence(&syncs[num_syncs])) 3062 num_ufence++; 3063 } 3064 3065 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3066 err = -EINVAL; 3067 goto free_syncs; 3068 } 3069 3070 if (!args->num_binds) { 3071 err = -ENODATA; 3072 goto free_syncs; 3073 } 3074 3075 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3076 for (i = 0; i < args->num_binds; ++i) { 3077 u64 range = bind_ops[i].range; 3078 u64 addr = bind_ops[i].addr; 3079 u32 op = bind_ops[i].op; 3080 u32 flags = bind_ops[i].flags; 3081 u64 obj_offset = bind_ops[i].obj_offset; 3082 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3083 u16 pat_index = bind_ops[i].pat_index; 3084 3085 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3086 addr, range, op, flags, 3087 prefetch_region, pat_index); 3088 if (IS_ERR(ops[i])) { 3089 err = PTR_ERR(ops[i]); 3090 ops[i] = NULL; 3091 goto unwind_ops; 3092 } 3093 3094 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3095 if (err) 3096 goto unwind_ops; 3097 3098 #ifdef TEST_VM_OPS_ERROR 3099 if (flags & FORCE_OP_ERROR) { 3100 vops.inject_error = true; 3101 vm->xe->vm_inject_error_position = 3102 (vm->xe->vm_inject_error_position + 1) % 3103 FORCE_OP_ERROR_COUNT; 3104 } 3105 #endif 3106 } 3107 3108 /* Nothing to do */ 3109 if (list_empty(&vops.list)) { 3110 err = -ENODATA; 3111 goto unwind_ops; 3112 } 3113 3114 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3115 if (err) 3116 goto unwind_ops; 3117 3118 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3119 if (IS_ERR(fence)) 3120 err = PTR_ERR(fence); 3121 else 3122 dma_fence_put(fence); 3123 3124 unwind_ops: 3125 if (err && err != -ENODATA) 3126 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3127 xe_vma_ops_fini(&vops); 3128 for (i = args->num_binds - 1; i >= 0; --i) 3129 if (ops[i]) 3130 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3131 free_syncs: 3132 if (err == -ENODATA) 3133 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3134 while (num_syncs--) 3135 xe_sync_entry_cleanup(&syncs[num_syncs]); 3136 3137 kfree(syncs); 3138 put_obj: 3139 for (i = 0; i < args->num_binds; ++i) 3140 xe_bo_put(bos[i]); 3141 release_vm_lock: 3142 up_write(&vm->lock); 3143 put_vm: 3144 xe_vm_put(vm); 3145 put_exec_queue: 3146 if (q) 3147 xe_exec_queue_put(q); 3148 free_objs: 3149 kvfree(bos); 3150 kvfree(ops); 3151 if (args->num_binds > 1) 3152 kvfree(bind_ops); 3153 return err; 3154 } 3155 3156 /** 3157 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3158 * @vm: VM to bind the BO to 3159 * @bo: BO to bind 3160 * @q: exec queue to use for the bind (optional) 3161 * @addr: address at which to bind the BO 3162 * @cache_lvl: PAT cache level to use 3163 * 3164 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3165 * kernel-owned VM. 3166 * 3167 * Returns a dma_fence to track the binding completion if the job to do so was 3168 * successfully submitted, an error pointer otherwise. 3169 */ 3170 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3171 struct xe_exec_queue *q, u64 addr, 3172 enum xe_cache_level cache_lvl) 3173 { 3174 struct xe_vma_ops vops; 3175 struct drm_gpuva_ops *ops = NULL; 3176 struct dma_fence *fence; 3177 int err; 3178 3179 xe_bo_get(bo); 3180 xe_vm_get(vm); 3181 if (q) 3182 xe_exec_queue_get(q); 3183 3184 down_write(&vm->lock); 3185 3186 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3187 3188 ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, 3189 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3190 vm->xe->pat.idx[cache_lvl]); 3191 if (IS_ERR(ops)) { 3192 err = PTR_ERR(ops); 3193 goto release_vm_lock; 3194 } 3195 3196 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3197 if (err) 3198 goto release_vm_lock; 3199 3200 xe_assert(vm->xe, !list_empty(&vops.list)); 3201 3202 err = xe_vma_ops_alloc(&vops, false); 3203 if (err) 3204 goto unwind_ops; 3205 3206 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3207 if (IS_ERR(fence)) 3208 err = PTR_ERR(fence); 3209 3210 unwind_ops: 3211 if (err && err != -ENODATA) 3212 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3213 3214 xe_vma_ops_fini(&vops); 3215 drm_gpuva_ops_free(&vm->gpuvm, ops); 3216 3217 release_vm_lock: 3218 up_write(&vm->lock); 3219 3220 if (q) 3221 xe_exec_queue_put(q); 3222 xe_vm_put(vm); 3223 xe_bo_put(bo); 3224 3225 if (err) 3226 fence = ERR_PTR(err); 3227 3228 return fence; 3229 } 3230 3231 /** 3232 * xe_vm_lock() - Lock the vm's dma_resv object 3233 * @vm: The struct xe_vm whose lock is to be locked 3234 * @intr: Whether to perform any wait interruptible 3235 * 3236 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3237 * contended lock was interrupted. If @intr is false, the function 3238 * always returns 0. 3239 */ 3240 int xe_vm_lock(struct xe_vm *vm, bool intr) 3241 { 3242 if (intr) 3243 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3244 3245 return dma_resv_lock(xe_vm_resv(vm), NULL); 3246 } 3247 3248 /** 3249 * xe_vm_unlock() - Unlock the vm's dma_resv object 3250 * @vm: The struct xe_vm whose lock is to be released. 3251 * 3252 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3253 */ 3254 void xe_vm_unlock(struct xe_vm *vm) 3255 { 3256 dma_resv_unlock(xe_vm_resv(vm)); 3257 } 3258 3259 /** 3260 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3261 * @vma: VMA to invalidate 3262 * 3263 * Walks a list of page tables leaves which it memset the entries owned by this 3264 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3265 * complete. 3266 * 3267 * Returns 0 for success, negative error code otherwise. 3268 */ 3269 int xe_vm_invalidate_vma(struct xe_vma *vma) 3270 { 3271 struct xe_device *xe = xe_vma_vm(vma)->xe; 3272 struct xe_tile *tile; 3273 struct xe_gt_tlb_invalidation_fence 3274 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3275 u8 id; 3276 u32 fence_id = 0; 3277 int ret = 0; 3278 3279 xe_assert(xe, !xe_vma_is_null(vma)); 3280 trace_xe_vma_invalidate(vma); 3281 3282 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3283 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3284 xe_vma_start(vma), xe_vma_size(vma)); 3285 3286 /* Check that we don't race with page-table updates */ 3287 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3288 if (xe_vma_is_userptr(vma)) { 3289 WARN_ON_ONCE(!mmu_interval_check_retry 3290 (&to_userptr_vma(vma)->userptr.notifier, 3291 to_userptr_vma(vma)->userptr.notifier_seq)); 3292 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3293 DMA_RESV_USAGE_BOOKKEEP)); 3294 3295 } else { 3296 xe_bo_assert_held(xe_vma_bo(vma)); 3297 } 3298 } 3299 3300 for_each_tile(tile, xe, id) { 3301 if (xe_pt_zap_ptes(tile, vma)) { 3302 xe_device_wmb(xe); 3303 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3304 &fence[fence_id], 3305 true); 3306 3307 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3308 &fence[fence_id], vma); 3309 if (ret) 3310 goto wait; 3311 ++fence_id; 3312 3313 if (!tile->media_gt) 3314 continue; 3315 3316 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3317 &fence[fence_id], 3318 true); 3319 3320 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3321 &fence[fence_id], vma); 3322 if (ret) 3323 goto wait; 3324 ++fence_id; 3325 } 3326 } 3327 3328 wait: 3329 for (id = 0; id < fence_id; ++id) 3330 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3331 3332 vma->tile_invalidated = vma->tile_mask; 3333 3334 return ret; 3335 } 3336 3337 struct xe_vm_snapshot { 3338 unsigned long num_snaps; 3339 struct { 3340 u64 ofs, bo_ofs; 3341 unsigned long len; 3342 struct xe_bo *bo; 3343 void *data; 3344 struct mm_struct *mm; 3345 } snap[]; 3346 }; 3347 3348 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3349 { 3350 unsigned long num_snaps = 0, i; 3351 struct xe_vm_snapshot *snap = NULL; 3352 struct drm_gpuva *gpuva; 3353 3354 if (!vm) 3355 return NULL; 3356 3357 mutex_lock(&vm->snap_mutex); 3358 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3359 if (gpuva->flags & XE_VMA_DUMPABLE) 3360 num_snaps++; 3361 } 3362 3363 if (num_snaps) 3364 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3365 if (!snap) { 3366 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3367 goto out_unlock; 3368 } 3369 3370 snap->num_snaps = num_snaps; 3371 i = 0; 3372 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3373 struct xe_vma *vma = gpuva_to_vma(gpuva); 3374 struct xe_bo *bo = vma->gpuva.gem.obj ? 3375 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3376 3377 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3378 continue; 3379 3380 snap->snap[i].ofs = xe_vma_start(vma); 3381 snap->snap[i].len = xe_vma_size(vma); 3382 if (bo) { 3383 snap->snap[i].bo = xe_bo_get(bo); 3384 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3385 } else if (xe_vma_is_userptr(vma)) { 3386 struct mm_struct *mm = 3387 to_userptr_vma(vma)->userptr.notifier.mm; 3388 3389 if (mmget_not_zero(mm)) 3390 snap->snap[i].mm = mm; 3391 else 3392 snap->snap[i].data = ERR_PTR(-EFAULT); 3393 3394 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3395 } else { 3396 snap->snap[i].data = ERR_PTR(-ENOENT); 3397 } 3398 i++; 3399 } 3400 3401 out_unlock: 3402 mutex_unlock(&vm->snap_mutex); 3403 return snap; 3404 } 3405 3406 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3407 { 3408 if (IS_ERR_OR_NULL(snap)) 3409 return; 3410 3411 for (int i = 0; i < snap->num_snaps; i++) { 3412 struct xe_bo *bo = snap->snap[i].bo; 3413 int err; 3414 3415 if (IS_ERR(snap->snap[i].data)) 3416 continue; 3417 3418 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3419 if (!snap->snap[i].data) { 3420 snap->snap[i].data = ERR_PTR(-ENOMEM); 3421 goto cleanup_bo; 3422 } 3423 3424 if (bo) { 3425 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3426 snap->snap[i].data, snap->snap[i].len); 3427 } else { 3428 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3429 3430 kthread_use_mm(snap->snap[i].mm); 3431 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3432 err = 0; 3433 else 3434 err = -EFAULT; 3435 kthread_unuse_mm(snap->snap[i].mm); 3436 3437 mmput(snap->snap[i].mm); 3438 snap->snap[i].mm = NULL; 3439 } 3440 3441 if (err) { 3442 kvfree(snap->snap[i].data); 3443 snap->snap[i].data = ERR_PTR(err); 3444 } 3445 3446 cleanup_bo: 3447 xe_bo_put(bo); 3448 snap->snap[i].bo = NULL; 3449 } 3450 } 3451 3452 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3453 { 3454 unsigned long i, j; 3455 3456 if (IS_ERR_OR_NULL(snap)) { 3457 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3458 return; 3459 } 3460 3461 for (i = 0; i < snap->num_snaps; i++) { 3462 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3463 3464 if (IS_ERR(snap->snap[i].data)) { 3465 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3466 PTR_ERR(snap->snap[i].data)); 3467 continue; 3468 } 3469 3470 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3471 3472 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3473 u32 *val = snap->snap[i].data + j; 3474 char dumped[ASCII85_BUFSZ]; 3475 3476 drm_puts(p, ascii85_encode(*val, dumped)); 3477 } 3478 3479 drm_puts(p, "\n"); 3480 } 3481 } 3482 3483 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3484 { 3485 unsigned long i; 3486 3487 if (IS_ERR_OR_NULL(snap)) 3488 return; 3489 3490 for (i = 0; i < snap->num_snaps; i++) { 3491 if (!IS_ERR(snap->snap[i].data)) 3492 kvfree(snap->snap[i].data); 3493 xe_bo_put(snap->snap[i].bo); 3494 if (snap->snap[i].mm) 3495 mmput(snap->snap[i].mm); 3496 } 3497 kvfree(snap); 3498 } 3499