1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_sync.h" 39 #include "xe_trace_bo.h" 40 #include "xe_wa.h" 41 #include "xe_hmm.h" 42 43 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 44 { 45 return vm->gpuvm.r_obj; 46 } 47 48 /** 49 * xe_vma_userptr_check_repin() - Advisory check for repin needed 50 * @uvma: The userptr vma 51 * 52 * Check if the userptr vma has been invalidated since last successful 53 * repin. The check is advisory only and can the function can be called 54 * without the vm->userptr.notifier_lock held. There is no guarantee that the 55 * vma userptr will remain valid after a lockless check, so typically 56 * the call needs to be followed by a proper check under the notifier_lock. 57 * 58 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 59 */ 60 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 61 { 62 return mmu_interval_check_retry(&uvma->userptr.notifier, 63 uvma->userptr.notifier_seq) ? 64 -EAGAIN : 0; 65 } 66 67 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 68 { 69 struct xe_vma *vma = &uvma->vma; 70 struct xe_vm *vm = xe_vma_vm(vma); 71 struct xe_device *xe = vm->xe; 72 73 lockdep_assert_held(&vm->lock); 74 xe_assert(xe, xe_vma_is_userptr(vma)); 75 76 return xe_hmm_userptr_populate_range(uvma, false); 77 } 78 79 static bool preempt_fences_waiting(struct xe_vm *vm) 80 { 81 struct xe_exec_queue *q; 82 83 lockdep_assert_held(&vm->lock); 84 xe_vm_assert_held(vm); 85 86 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 87 if (!q->lr.pfence || 88 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 89 &q->lr.pfence->flags)) { 90 return true; 91 } 92 } 93 94 return false; 95 } 96 97 static void free_preempt_fences(struct list_head *list) 98 { 99 struct list_head *link, *next; 100 101 list_for_each_safe(link, next, list) 102 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 103 } 104 105 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 106 unsigned int *count) 107 { 108 lockdep_assert_held(&vm->lock); 109 xe_vm_assert_held(vm); 110 111 if (*count >= vm->preempt.num_exec_queues) 112 return 0; 113 114 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 115 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 116 117 if (IS_ERR(pfence)) 118 return PTR_ERR(pfence); 119 120 list_move_tail(xe_preempt_fence_link(pfence), list); 121 } 122 123 return 0; 124 } 125 126 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 127 { 128 struct xe_exec_queue *q; 129 130 xe_vm_assert_held(vm); 131 132 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 133 if (q->lr.pfence) { 134 long timeout = dma_fence_wait(q->lr.pfence, false); 135 136 /* Only -ETIME on fence indicates VM needs to be killed */ 137 if (timeout < 0 || q->lr.pfence->error == -ETIME) 138 return -ETIME; 139 140 dma_fence_put(q->lr.pfence); 141 q->lr.pfence = NULL; 142 } 143 } 144 145 return 0; 146 } 147 148 static bool xe_vm_is_idle(struct xe_vm *vm) 149 { 150 struct xe_exec_queue *q; 151 152 xe_vm_assert_held(vm); 153 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 154 if (!xe_exec_queue_is_idle(q)) 155 return false; 156 } 157 158 return true; 159 } 160 161 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 162 { 163 struct list_head *link; 164 struct xe_exec_queue *q; 165 166 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 167 struct dma_fence *fence; 168 169 link = list->next; 170 xe_assert(vm->xe, link != list); 171 172 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 173 q, q->lr.context, 174 ++q->lr.seqno); 175 dma_fence_put(q->lr.pfence); 176 q->lr.pfence = fence; 177 } 178 } 179 180 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 181 { 182 struct xe_exec_queue *q; 183 int err; 184 185 xe_bo_assert_held(bo); 186 187 if (!vm->preempt.num_exec_queues) 188 return 0; 189 190 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 191 if (err) 192 return err; 193 194 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 195 if (q->lr.pfence) { 196 dma_resv_add_fence(bo->ttm.base.resv, 197 q->lr.pfence, 198 DMA_RESV_USAGE_BOOKKEEP); 199 } 200 201 return 0; 202 } 203 204 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 205 struct drm_exec *exec) 206 { 207 struct xe_exec_queue *q; 208 209 lockdep_assert_held(&vm->lock); 210 xe_vm_assert_held(vm); 211 212 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 213 q->ops->resume(q); 214 215 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 216 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 217 } 218 } 219 220 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 221 { 222 struct drm_gpuvm_exec vm_exec = { 223 .vm = &vm->gpuvm, 224 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 225 .num_fences = 1, 226 }; 227 struct drm_exec *exec = &vm_exec.exec; 228 struct dma_fence *pfence; 229 int err; 230 bool wait; 231 232 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 233 234 down_write(&vm->lock); 235 err = drm_gpuvm_exec_lock(&vm_exec); 236 if (err) 237 goto out_up_write; 238 239 pfence = xe_preempt_fence_create(q, q->lr.context, 240 ++q->lr.seqno); 241 if (!pfence) { 242 err = -ENOMEM; 243 goto out_fini; 244 } 245 246 list_add(&q->lr.link, &vm->preempt.exec_queues); 247 ++vm->preempt.num_exec_queues; 248 q->lr.pfence = pfence; 249 250 down_read(&vm->userptr.notifier_lock); 251 252 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 253 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 254 255 /* 256 * Check to see if a preemption on VM is in flight or userptr 257 * invalidation, if so trigger this preempt fence to sync state with 258 * other preempt fences on the VM. 259 */ 260 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 261 if (wait) 262 dma_fence_enable_sw_signaling(pfence); 263 264 up_read(&vm->userptr.notifier_lock); 265 266 out_fini: 267 drm_exec_fini(exec); 268 out_up_write: 269 up_write(&vm->lock); 270 271 return err; 272 } 273 274 /** 275 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 276 * @vm: The VM. 277 * @q: The exec_queue 278 * 279 * Note that this function might be called multiple times on the same queue. 280 */ 281 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 282 { 283 if (!xe_vm_in_preempt_fence_mode(vm)) 284 return; 285 286 down_write(&vm->lock); 287 if (!list_empty(&q->lr.link)) { 288 list_del_init(&q->lr.link); 289 --vm->preempt.num_exec_queues; 290 } 291 if (q->lr.pfence) { 292 dma_fence_enable_sw_signaling(q->lr.pfence); 293 dma_fence_put(q->lr.pfence); 294 q->lr.pfence = NULL; 295 } 296 up_write(&vm->lock); 297 } 298 299 /** 300 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 301 * that need repinning. 302 * @vm: The VM. 303 * 304 * This function checks for whether the VM has userptrs that need repinning, 305 * and provides a release-type barrier on the userptr.notifier_lock after 306 * checking. 307 * 308 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 309 */ 310 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 311 { 312 lockdep_assert_held_read(&vm->userptr.notifier_lock); 313 314 return (list_empty(&vm->userptr.repin_list) && 315 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 316 } 317 318 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 319 320 /** 321 * xe_vm_kill() - VM Kill 322 * @vm: The VM. 323 * @unlocked: Flag indicates the VM's dma-resv is not held 324 * 325 * Kill the VM by setting banned flag indicated VM is no longer available for 326 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 327 */ 328 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 329 { 330 struct xe_exec_queue *q; 331 332 lockdep_assert_held(&vm->lock); 333 334 if (unlocked) 335 xe_vm_lock(vm, false); 336 337 vm->flags |= XE_VM_FLAG_BANNED; 338 trace_xe_vm_kill(vm); 339 340 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 341 q->ops->kill(q); 342 343 if (unlocked) 344 xe_vm_unlock(vm); 345 346 /* TODO: Inform user the VM is banned */ 347 } 348 349 /** 350 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 351 * @exec: The drm_exec object used for locking before validation. 352 * @err: The error returned from ttm_bo_validate(). 353 * @end: A ktime_t cookie that should be set to 0 before first use and 354 * that should be reused on subsequent calls. 355 * 356 * With multiple active VMs, under memory pressure, it is possible that 357 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 358 * Until ttm properly handles locking in such scenarios, best thing the 359 * driver can do is retry with a timeout. Check if that is necessary, and 360 * if so unlock the drm_exec's objects while keeping the ticket to prepare 361 * for a rerun. 362 * 363 * Return: true if a retry after drm_exec_init() is recommended; 364 * false otherwise. 365 */ 366 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 367 { 368 ktime_t cur; 369 370 if (err != -ENOMEM) 371 return false; 372 373 cur = ktime_get(); 374 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 375 if (!ktime_before(cur, *end)) 376 return false; 377 378 msleep(20); 379 return true; 380 } 381 382 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 383 { 384 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 385 struct drm_gpuva *gpuva; 386 int ret; 387 388 lockdep_assert_held(&vm->lock); 389 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 390 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 391 &vm->rebind_list); 392 393 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 394 if (ret) 395 return ret; 396 397 vm_bo->evicted = false; 398 return 0; 399 } 400 401 /** 402 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 403 * @vm: The vm for which we are rebinding. 404 * @exec: The struct drm_exec with the locked GEM objects. 405 * @num_fences: The number of fences to reserve for the operation, not 406 * including rebinds and validations. 407 * 408 * Validates all evicted gem objects and rebinds their vmas. Note that 409 * rebindings may cause evictions and hence the validation-rebind 410 * sequence is rerun until there are no more objects to validate. 411 * 412 * Return: 0 on success, negative error code on error. In particular, 413 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 414 * the drm_exec transaction needs to be restarted. 415 */ 416 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 417 unsigned int num_fences) 418 { 419 struct drm_gem_object *obj; 420 unsigned long index; 421 int ret; 422 423 do { 424 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 425 if (ret) 426 return ret; 427 428 ret = xe_vm_rebind(vm, false); 429 if (ret) 430 return ret; 431 } while (!list_empty(&vm->gpuvm.evict.list)); 432 433 drm_exec_for_each_locked_object(exec, index, obj) { 434 ret = dma_resv_reserve_fences(obj->resv, num_fences); 435 if (ret) 436 return ret; 437 } 438 439 return 0; 440 } 441 442 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 443 bool *done) 444 { 445 int err; 446 447 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 448 if (err) 449 return err; 450 451 if (xe_vm_is_idle(vm)) { 452 vm->preempt.rebind_deactivated = true; 453 *done = true; 454 return 0; 455 } 456 457 if (!preempt_fences_waiting(vm)) { 458 *done = true; 459 return 0; 460 } 461 462 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 463 if (err) 464 return err; 465 466 err = wait_for_existing_preempt_fences(vm); 467 if (err) 468 return err; 469 470 /* 471 * Add validation and rebinding to the locking loop since both can 472 * cause evictions which may require blocing dma_resv locks. 473 * The fence reservation here is intended for the new preempt fences 474 * we attach at the end of the rebind work. 475 */ 476 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 477 } 478 479 static void preempt_rebind_work_func(struct work_struct *w) 480 { 481 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 482 struct drm_exec exec; 483 unsigned int fence_count = 0; 484 LIST_HEAD(preempt_fences); 485 ktime_t end = 0; 486 int err = 0; 487 long wait; 488 int __maybe_unused tries = 0; 489 490 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 491 trace_xe_vm_rebind_worker_enter(vm); 492 493 down_write(&vm->lock); 494 495 if (xe_vm_is_closed_or_banned(vm)) { 496 up_write(&vm->lock); 497 trace_xe_vm_rebind_worker_exit(vm); 498 return; 499 } 500 501 retry: 502 if (xe_vm_userptr_check_repin(vm)) { 503 err = xe_vm_userptr_pin(vm); 504 if (err) 505 goto out_unlock_outer; 506 } 507 508 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 509 510 drm_exec_until_all_locked(&exec) { 511 bool done = false; 512 513 err = xe_preempt_work_begin(&exec, vm, &done); 514 drm_exec_retry_on_contention(&exec); 515 if (err || done) { 516 drm_exec_fini(&exec); 517 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 518 err = -EAGAIN; 519 520 goto out_unlock_outer; 521 } 522 } 523 524 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 525 if (err) 526 goto out_unlock; 527 528 err = xe_vm_rebind(vm, true); 529 if (err) 530 goto out_unlock; 531 532 /* Wait on rebinds and munmap style VM unbinds */ 533 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 534 DMA_RESV_USAGE_KERNEL, 535 false, MAX_SCHEDULE_TIMEOUT); 536 if (wait <= 0) { 537 err = -ETIME; 538 goto out_unlock; 539 } 540 541 #define retry_required(__tries, __vm) \ 542 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 543 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 544 __xe_vm_userptr_needs_repin(__vm)) 545 546 down_read(&vm->userptr.notifier_lock); 547 if (retry_required(tries, vm)) { 548 up_read(&vm->userptr.notifier_lock); 549 err = -EAGAIN; 550 goto out_unlock; 551 } 552 553 #undef retry_required 554 555 spin_lock(&vm->xe->ttm.lru_lock); 556 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 557 spin_unlock(&vm->xe->ttm.lru_lock); 558 559 /* Point of no return. */ 560 arm_preempt_fences(vm, &preempt_fences); 561 resume_and_reinstall_preempt_fences(vm, &exec); 562 up_read(&vm->userptr.notifier_lock); 563 564 out_unlock: 565 drm_exec_fini(&exec); 566 out_unlock_outer: 567 if (err == -EAGAIN) { 568 trace_xe_vm_rebind_worker_retry(vm); 569 goto retry; 570 } 571 572 if (err) { 573 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 574 xe_vm_kill(vm, true); 575 } 576 up_write(&vm->lock); 577 578 free_preempt_fences(&preempt_fences); 579 580 trace_xe_vm_rebind_worker_exit(vm); 581 } 582 583 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 584 const struct mmu_notifier_range *range, 585 unsigned long cur_seq) 586 { 587 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); 588 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); 589 struct xe_vma *vma = &uvma->vma; 590 struct xe_vm *vm = xe_vma_vm(vma); 591 struct dma_resv_iter cursor; 592 struct dma_fence *fence; 593 long err; 594 595 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 596 trace_xe_vma_userptr_invalidate(vma); 597 598 if (!mmu_notifier_range_blockable(range)) 599 return false; 600 601 vm_dbg(&xe_vma_vm(vma)->xe->drm, 602 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 603 xe_vma_start(vma), xe_vma_size(vma)); 604 605 down_write(&vm->userptr.notifier_lock); 606 mmu_interval_set_seq(mni, cur_seq); 607 608 /* No need to stop gpu access if the userptr is not yet bound. */ 609 if (!userptr->initial_bind) { 610 up_write(&vm->userptr.notifier_lock); 611 return true; 612 } 613 614 /* 615 * Tell exec and rebind worker they need to repin and rebind this 616 * userptr. 617 */ 618 if (!xe_vm_in_fault_mode(vm) && 619 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { 620 spin_lock(&vm->userptr.invalidated_lock); 621 list_move_tail(&userptr->invalidate_link, 622 &vm->userptr.invalidated); 623 spin_unlock(&vm->userptr.invalidated_lock); 624 } 625 626 up_write(&vm->userptr.notifier_lock); 627 628 /* 629 * Preempt fences turn into schedule disables, pipeline these. 630 * Note that even in fault mode, we need to wait for binds and 631 * unbinds to complete, and those are attached as BOOKMARK fences 632 * to the vm. 633 */ 634 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 635 DMA_RESV_USAGE_BOOKKEEP); 636 dma_resv_for_each_fence_unlocked(&cursor, fence) 637 dma_fence_enable_sw_signaling(fence); 638 dma_resv_iter_end(&cursor); 639 640 err = dma_resv_wait_timeout(xe_vm_resv(vm), 641 DMA_RESV_USAGE_BOOKKEEP, 642 false, MAX_SCHEDULE_TIMEOUT); 643 XE_WARN_ON(err <= 0); 644 645 if (xe_vm_in_fault_mode(vm)) { 646 err = xe_vm_invalidate_vma(vma); 647 XE_WARN_ON(err); 648 } 649 650 trace_xe_vma_userptr_invalidate_complete(vma); 651 652 return true; 653 } 654 655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 656 .invalidate = vma_userptr_invalidate, 657 }; 658 659 int xe_vm_userptr_pin(struct xe_vm *vm) 660 { 661 struct xe_userptr_vma *uvma, *next; 662 int err = 0; 663 664 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 665 lockdep_assert_held_write(&vm->lock); 666 667 /* Collect invalidated userptrs */ 668 spin_lock(&vm->userptr.invalidated_lock); 669 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 670 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 671 userptr.invalidate_link) { 672 list_del_init(&uvma->userptr.invalidate_link); 673 list_add_tail(&uvma->userptr.repin_link, 674 &vm->userptr.repin_list); 675 } 676 spin_unlock(&vm->userptr.invalidated_lock); 677 678 /* Pin and move to bind list */ 679 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 680 userptr.repin_link) { 681 err = xe_vma_userptr_pin_pages(uvma); 682 if (err == -EFAULT) { 683 list_del_init(&uvma->userptr.repin_link); 684 /* 685 * We might have already done the pin once already, but 686 * then had to retry before the re-bind happened, due 687 * some other condition in the caller, but in the 688 * meantime the userptr got dinged by the notifier such 689 * that we need to revalidate here, but this time we hit 690 * the EFAULT. In such a case make sure we remove 691 * ourselves from the rebind list to avoid going down in 692 * flames. 693 */ 694 if (!list_empty(&uvma->vma.combined_links.rebind)) 695 list_del_init(&uvma->vma.combined_links.rebind); 696 697 /* Wait for pending binds */ 698 xe_vm_lock(vm, false); 699 dma_resv_wait_timeout(xe_vm_resv(vm), 700 DMA_RESV_USAGE_BOOKKEEP, 701 false, MAX_SCHEDULE_TIMEOUT); 702 703 err = xe_vm_invalidate_vma(&uvma->vma); 704 xe_vm_unlock(vm); 705 if (err) 706 break; 707 } else { 708 if (err) 709 break; 710 711 list_del_init(&uvma->userptr.repin_link); 712 list_move_tail(&uvma->vma.combined_links.rebind, 713 &vm->rebind_list); 714 } 715 } 716 717 if (err) { 718 down_write(&vm->userptr.notifier_lock); 719 spin_lock(&vm->userptr.invalidated_lock); 720 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 721 userptr.repin_link) { 722 list_del_init(&uvma->userptr.repin_link); 723 list_move_tail(&uvma->userptr.invalidate_link, 724 &vm->userptr.invalidated); 725 } 726 spin_unlock(&vm->userptr.invalidated_lock); 727 up_write(&vm->userptr.notifier_lock); 728 } 729 return err; 730 } 731 732 /** 733 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 734 * that need repinning. 735 * @vm: The VM. 736 * 737 * This function does an advisory check for whether the VM has userptrs that 738 * need repinning. 739 * 740 * Return: 0 if there are no indications of userptrs needing repinning, 741 * -EAGAIN if there are. 742 */ 743 int xe_vm_userptr_check_repin(struct xe_vm *vm) 744 { 745 return (list_empty_careful(&vm->userptr.repin_list) && 746 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 747 } 748 749 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 750 { 751 int i; 752 753 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 754 if (!vops->pt_update_ops[i].num_ops) 755 continue; 756 757 vops->pt_update_ops[i].ops = 758 kmalloc_array(vops->pt_update_ops[i].num_ops, 759 sizeof(*vops->pt_update_ops[i].ops), 760 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 761 if (!vops->pt_update_ops[i].ops) 762 return array_of_binds ? -ENOBUFS : -ENOMEM; 763 } 764 765 return 0; 766 } 767 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 768 769 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 770 { 771 int i; 772 773 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 774 kfree(vops->pt_update_ops[i].ops); 775 } 776 777 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 778 { 779 int i; 780 781 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 782 if (BIT(i) & tile_mask) 783 ++vops->pt_update_ops[i].num_ops; 784 } 785 786 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 787 u8 tile_mask) 788 { 789 INIT_LIST_HEAD(&op->link); 790 op->tile_mask = tile_mask; 791 op->base.op = DRM_GPUVA_OP_MAP; 792 op->base.map.va.addr = vma->gpuva.va.addr; 793 op->base.map.va.range = vma->gpuva.va.range; 794 op->base.map.gem.obj = vma->gpuva.gem.obj; 795 op->base.map.gem.offset = vma->gpuva.gem.offset; 796 op->map.vma = vma; 797 op->map.immediate = true; 798 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 799 op->map.is_null = xe_vma_is_null(vma); 800 } 801 802 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 803 u8 tile_mask) 804 { 805 struct xe_vma_op *op; 806 807 op = kzalloc(sizeof(*op), GFP_KERNEL); 808 if (!op) 809 return -ENOMEM; 810 811 xe_vm_populate_rebind(op, vma, tile_mask); 812 list_add_tail(&op->link, &vops->list); 813 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 814 815 return 0; 816 } 817 818 static struct dma_fence *ops_execute(struct xe_vm *vm, 819 struct xe_vma_ops *vops); 820 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 821 struct xe_exec_queue *q, 822 struct xe_sync_entry *syncs, u32 num_syncs); 823 824 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 825 { 826 struct dma_fence *fence; 827 struct xe_vma *vma, *next; 828 struct xe_vma_ops vops; 829 struct xe_vma_op *op, *next_op; 830 int err, i; 831 832 lockdep_assert_held(&vm->lock); 833 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 834 list_empty(&vm->rebind_list)) 835 return 0; 836 837 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 838 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 839 vops.pt_update_ops[i].wait_vm_bookkeep = true; 840 841 xe_vm_assert_held(vm); 842 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 843 xe_assert(vm->xe, vma->tile_present); 844 845 if (rebind_worker) 846 trace_xe_vma_rebind_worker(vma); 847 else 848 trace_xe_vma_rebind_exec(vma); 849 850 err = xe_vm_ops_add_rebind(&vops, vma, 851 vma->tile_present); 852 if (err) 853 goto free_ops; 854 } 855 856 err = xe_vma_ops_alloc(&vops, false); 857 if (err) 858 goto free_ops; 859 860 fence = ops_execute(vm, &vops); 861 if (IS_ERR(fence)) { 862 err = PTR_ERR(fence); 863 } else { 864 dma_fence_put(fence); 865 list_for_each_entry_safe(vma, next, &vm->rebind_list, 866 combined_links.rebind) 867 list_del_init(&vma->combined_links.rebind); 868 } 869 free_ops: 870 list_for_each_entry_safe(op, next_op, &vops.list, link) { 871 list_del(&op->link); 872 kfree(op); 873 } 874 xe_vma_ops_fini(&vops); 875 876 return err; 877 } 878 879 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 880 { 881 struct dma_fence *fence = NULL; 882 struct xe_vma_ops vops; 883 struct xe_vma_op *op, *next_op; 884 struct xe_tile *tile; 885 u8 id; 886 int err; 887 888 lockdep_assert_held(&vm->lock); 889 xe_vm_assert_held(vm); 890 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 891 892 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 893 for_each_tile(tile, vm->xe, id) { 894 vops.pt_update_ops[id].wait_vm_bookkeep = true; 895 vops.pt_update_ops[tile->id].q = 896 xe_tile_migrate_exec_queue(tile); 897 } 898 899 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 900 if (err) 901 return ERR_PTR(err); 902 903 err = xe_vma_ops_alloc(&vops, false); 904 if (err) { 905 fence = ERR_PTR(err); 906 goto free_ops; 907 } 908 909 fence = ops_execute(vm, &vops); 910 911 free_ops: 912 list_for_each_entry_safe(op, next_op, &vops.list, link) { 913 list_del(&op->link); 914 kfree(op); 915 } 916 xe_vma_ops_fini(&vops); 917 918 return fence; 919 } 920 921 static void xe_vma_free(struct xe_vma *vma) 922 { 923 if (xe_vma_is_userptr(vma)) 924 kfree(to_userptr_vma(vma)); 925 else 926 kfree(vma); 927 } 928 929 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 930 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 931 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 932 933 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 934 struct xe_bo *bo, 935 u64 bo_offset_or_userptr, 936 u64 start, u64 end, 937 u16 pat_index, unsigned int flags) 938 { 939 struct xe_vma *vma; 940 struct xe_tile *tile; 941 u8 id; 942 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 943 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 944 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 945 946 xe_assert(vm->xe, start < end); 947 xe_assert(vm->xe, end < vm->size); 948 949 /* 950 * Allocate and ensure that the xe_vma_is_userptr() return 951 * matches what was allocated. 952 */ 953 if (!bo && !is_null) { 954 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 955 956 if (!uvma) 957 return ERR_PTR(-ENOMEM); 958 959 vma = &uvma->vma; 960 } else { 961 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 962 if (!vma) 963 return ERR_PTR(-ENOMEM); 964 965 if (is_null) 966 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 967 if (bo) 968 vma->gpuva.gem.obj = &bo->ttm.base; 969 } 970 971 INIT_LIST_HEAD(&vma->combined_links.rebind); 972 973 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 974 vma->gpuva.vm = &vm->gpuvm; 975 vma->gpuva.va.addr = start; 976 vma->gpuva.va.range = end - start + 1; 977 if (read_only) 978 vma->gpuva.flags |= XE_VMA_READ_ONLY; 979 if (dumpable) 980 vma->gpuva.flags |= XE_VMA_DUMPABLE; 981 982 for_each_tile(tile, vm->xe, id) 983 vma->tile_mask |= 0x1 << id; 984 985 if (vm->xe->info.has_atomic_enable_pte_bit) 986 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 987 988 vma->pat_index = pat_index; 989 990 if (bo) { 991 struct drm_gpuvm_bo *vm_bo; 992 993 xe_bo_assert_held(bo); 994 995 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 996 if (IS_ERR(vm_bo)) { 997 xe_vma_free(vma); 998 return ERR_CAST(vm_bo); 999 } 1000 1001 drm_gpuvm_bo_extobj_add(vm_bo); 1002 drm_gem_object_get(&bo->ttm.base); 1003 vma->gpuva.gem.offset = bo_offset_or_userptr; 1004 drm_gpuva_link(&vma->gpuva, vm_bo); 1005 drm_gpuvm_bo_put(vm_bo); 1006 } else /* userptr or null */ { 1007 if (!is_null) { 1008 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1009 u64 size = end - start + 1; 1010 int err; 1011 1012 INIT_LIST_HEAD(&userptr->invalidate_link); 1013 INIT_LIST_HEAD(&userptr->repin_link); 1014 vma->gpuva.gem.offset = bo_offset_or_userptr; 1015 1016 err = mmu_interval_notifier_insert(&userptr->notifier, 1017 current->mm, 1018 xe_vma_userptr(vma), size, 1019 &vma_userptr_notifier_ops); 1020 if (err) { 1021 xe_vma_free(vma); 1022 return ERR_PTR(err); 1023 } 1024 1025 userptr->notifier_seq = LONG_MAX; 1026 } 1027 1028 xe_vm_get(vm); 1029 } 1030 1031 return vma; 1032 } 1033 1034 static void xe_vma_destroy_late(struct xe_vma *vma) 1035 { 1036 struct xe_vm *vm = xe_vma_vm(vma); 1037 1038 if (vma->ufence) { 1039 xe_sync_ufence_put(vma->ufence); 1040 vma->ufence = NULL; 1041 } 1042 1043 if (xe_vma_is_userptr(vma)) { 1044 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1045 struct xe_userptr *userptr = &uvma->userptr; 1046 1047 if (userptr->sg) 1048 xe_hmm_userptr_free_sg(uvma); 1049 1050 /* 1051 * Since userptr pages are not pinned, we can't remove 1052 * the notifier until we're sure the GPU is not accessing 1053 * them anymore 1054 */ 1055 mmu_interval_notifier_remove(&userptr->notifier); 1056 xe_vm_put(vm); 1057 } else if (xe_vma_is_null(vma)) { 1058 xe_vm_put(vm); 1059 } else { 1060 xe_bo_put(xe_vma_bo(vma)); 1061 } 1062 1063 xe_vma_free(vma); 1064 } 1065 1066 static void vma_destroy_work_func(struct work_struct *w) 1067 { 1068 struct xe_vma *vma = 1069 container_of(w, struct xe_vma, destroy_work); 1070 1071 xe_vma_destroy_late(vma); 1072 } 1073 1074 static void vma_destroy_cb(struct dma_fence *fence, 1075 struct dma_fence_cb *cb) 1076 { 1077 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1078 1079 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1080 queue_work(system_unbound_wq, &vma->destroy_work); 1081 } 1082 1083 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1084 { 1085 struct xe_vm *vm = xe_vma_vm(vma); 1086 1087 lockdep_assert_held_write(&vm->lock); 1088 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1089 1090 if (xe_vma_is_userptr(vma)) { 1091 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1092 1093 spin_lock(&vm->userptr.invalidated_lock); 1094 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1095 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1096 spin_unlock(&vm->userptr.invalidated_lock); 1097 } else if (!xe_vma_is_null(vma)) { 1098 xe_bo_assert_held(xe_vma_bo(vma)); 1099 1100 drm_gpuva_unlink(&vma->gpuva); 1101 } 1102 1103 xe_vm_assert_held(vm); 1104 if (fence) { 1105 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1106 vma_destroy_cb); 1107 1108 if (ret) { 1109 XE_WARN_ON(ret != -ENOENT); 1110 xe_vma_destroy_late(vma); 1111 } 1112 } else { 1113 xe_vma_destroy_late(vma); 1114 } 1115 } 1116 1117 /** 1118 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1119 * @exec: The drm_exec object we're currently locking for. 1120 * @vma: The vma for witch we want to lock the vm resv and any attached 1121 * object's resv. 1122 * 1123 * Return: 0 on success, negative error code on error. In particular 1124 * may return -EDEADLK on WW transaction contention and -EINTR if 1125 * an interruptible wait is terminated by a signal. 1126 */ 1127 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1128 { 1129 struct xe_vm *vm = xe_vma_vm(vma); 1130 struct xe_bo *bo = xe_vma_bo(vma); 1131 int err; 1132 1133 XE_WARN_ON(!vm); 1134 1135 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1136 if (!err && bo && !bo->vm) 1137 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1138 1139 return err; 1140 } 1141 1142 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1143 { 1144 struct drm_exec exec; 1145 int err; 1146 1147 drm_exec_init(&exec, 0, 0); 1148 drm_exec_until_all_locked(&exec) { 1149 err = xe_vm_lock_vma(&exec, vma); 1150 drm_exec_retry_on_contention(&exec); 1151 if (XE_WARN_ON(err)) 1152 break; 1153 } 1154 1155 xe_vma_destroy(vma, NULL); 1156 1157 drm_exec_fini(&exec); 1158 } 1159 1160 struct xe_vma * 1161 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1162 { 1163 struct drm_gpuva *gpuva; 1164 1165 lockdep_assert_held(&vm->lock); 1166 1167 if (xe_vm_is_closed_or_banned(vm)) 1168 return NULL; 1169 1170 xe_assert(vm->xe, start + range <= vm->size); 1171 1172 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1173 1174 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1175 } 1176 1177 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1178 { 1179 int err; 1180 1181 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1182 lockdep_assert_held(&vm->lock); 1183 1184 mutex_lock(&vm->snap_mutex); 1185 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1186 mutex_unlock(&vm->snap_mutex); 1187 XE_WARN_ON(err); /* Shouldn't be possible */ 1188 1189 return err; 1190 } 1191 1192 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1193 { 1194 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1195 lockdep_assert_held(&vm->lock); 1196 1197 mutex_lock(&vm->snap_mutex); 1198 drm_gpuva_remove(&vma->gpuva); 1199 mutex_unlock(&vm->snap_mutex); 1200 if (vm->usm.last_fault_vma == vma) 1201 vm->usm.last_fault_vma = NULL; 1202 } 1203 1204 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1205 { 1206 struct xe_vma_op *op; 1207 1208 op = kzalloc(sizeof(*op), GFP_KERNEL); 1209 1210 if (unlikely(!op)) 1211 return NULL; 1212 1213 return &op->base; 1214 } 1215 1216 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1217 1218 static const struct drm_gpuvm_ops gpuvm_ops = { 1219 .op_alloc = xe_vm_op_alloc, 1220 .vm_bo_validate = xe_gpuvm_validate, 1221 .vm_free = xe_vm_free, 1222 }; 1223 1224 static u64 pde_encode_pat_index(u16 pat_index) 1225 { 1226 u64 pte = 0; 1227 1228 if (pat_index & BIT(0)) 1229 pte |= XE_PPGTT_PTE_PAT0; 1230 1231 if (pat_index & BIT(1)) 1232 pte |= XE_PPGTT_PTE_PAT1; 1233 1234 return pte; 1235 } 1236 1237 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1238 { 1239 u64 pte = 0; 1240 1241 if (pat_index & BIT(0)) 1242 pte |= XE_PPGTT_PTE_PAT0; 1243 1244 if (pat_index & BIT(1)) 1245 pte |= XE_PPGTT_PTE_PAT1; 1246 1247 if (pat_index & BIT(2)) { 1248 if (pt_level) 1249 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1250 else 1251 pte |= XE_PPGTT_PTE_PAT2; 1252 } 1253 1254 if (pat_index & BIT(3)) 1255 pte |= XELPG_PPGTT_PTE_PAT3; 1256 1257 if (pat_index & (BIT(4))) 1258 pte |= XE2_PPGTT_PTE_PAT4; 1259 1260 return pte; 1261 } 1262 1263 static u64 pte_encode_ps(u32 pt_level) 1264 { 1265 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1266 1267 if (pt_level == 1) 1268 return XE_PDE_PS_2M; 1269 else if (pt_level == 2) 1270 return XE_PDPE_PS_1G; 1271 1272 return 0; 1273 } 1274 1275 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1276 const u16 pat_index) 1277 { 1278 u64 pde; 1279 1280 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1281 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1282 pde |= pde_encode_pat_index(pat_index); 1283 1284 return pde; 1285 } 1286 1287 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1288 u16 pat_index, u32 pt_level) 1289 { 1290 u64 pte; 1291 1292 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1293 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1294 pte |= pte_encode_pat_index(pat_index, pt_level); 1295 pte |= pte_encode_ps(pt_level); 1296 1297 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1298 pte |= XE_PPGTT_PTE_DM; 1299 1300 return pte; 1301 } 1302 1303 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1304 u16 pat_index, u32 pt_level) 1305 { 1306 pte |= XE_PAGE_PRESENT; 1307 1308 if (likely(!xe_vma_read_only(vma))) 1309 pte |= XE_PAGE_RW; 1310 1311 pte |= pte_encode_pat_index(pat_index, pt_level); 1312 pte |= pte_encode_ps(pt_level); 1313 1314 if (unlikely(xe_vma_is_null(vma))) 1315 pte |= XE_PTE_NULL; 1316 1317 return pte; 1318 } 1319 1320 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1321 u16 pat_index, 1322 u32 pt_level, bool devmem, u64 flags) 1323 { 1324 u64 pte; 1325 1326 /* Avoid passing random bits directly as flags */ 1327 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1328 1329 pte = addr; 1330 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1331 pte |= pte_encode_pat_index(pat_index, pt_level); 1332 pte |= pte_encode_ps(pt_level); 1333 1334 if (devmem) 1335 pte |= XE_PPGTT_PTE_DM; 1336 1337 pte |= flags; 1338 1339 return pte; 1340 } 1341 1342 static const struct xe_pt_ops xelp_pt_ops = { 1343 .pte_encode_bo = xelp_pte_encode_bo, 1344 .pte_encode_vma = xelp_pte_encode_vma, 1345 .pte_encode_addr = xelp_pte_encode_addr, 1346 .pde_encode_bo = xelp_pde_encode_bo, 1347 }; 1348 1349 static void vm_destroy_work_func(struct work_struct *w); 1350 1351 /** 1352 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1353 * given tile and vm. 1354 * @xe: xe device. 1355 * @tile: tile to set up for. 1356 * @vm: vm to set up for. 1357 * 1358 * Sets up a pagetable tree with one page-table per level and a single 1359 * leaf PTE. All pagetable entries point to the single page-table or, 1360 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1361 * writes become NOPs. 1362 * 1363 * Return: 0 on success, negative error code on error. 1364 */ 1365 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1366 struct xe_vm *vm) 1367 { 1368 u8 id = tile->id; 1369 int i; 1370 1371 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1372 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1373 if (IS_ERR(vm->scratch_pt[id][i])) 1374 return PTR_ERR(vm->scratch_pt[id][i]); 1375 1376 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1377 } 1378 1379 return 0; 1380 } 1381 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1382 1383 static void xe_vm_free_scratch(struct xe_vm *vm) 1384 { 1385 struct xe_tile *tile; 1386 u8 id; 1387 1388 if (!xe_vm_has_scratch(vm)) 1389 return; 1390 1391 for_each_tile(tile, vm->xe, id) { 1392 u32 i; 1393 1394 if (!vm->pt_root[id]) 1395 continue; 1396 1397 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1398 if (vm->scratch_pt[id][i]) 1399 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1400 } 1401 } 1402 1403 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1404 { 1405 struct drm_gem_object *vm_resv_obj; 1406 struct xe_vm *vm; 1407 int err, number_tiles = 0; 1408 struct xe_tile *tile; 1409 u8 id; 1410 1411 /* 1412 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1413 * ever be in faulting mode. 1414 */ 1415 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1416 1417 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1418 if (!vm) 1419 return ERR_PTR(-ENOMEM); 1420 1421 vm->xe = xe; 1422 1423 vm->size = 1ull << xe->info.va_bits; 1424 1425 vm->flags = flags; 1426 1427 /** 1428 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1429 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1430 * under a user-VM lock when the PXP session is started at exec_queue 1431 * creation time. Those are different VMs and therefore there is no risk 1432 * of deadlock, but we need to tell lockdep that this is the case or it 1433 * will print a warning. 1434 */ 1435 if (flags & XE_VM_FLAG_GSC) { 1436 static struct lock_class_key gsc_vm_key; 1437 1438 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1439 } else { 1440 init_rwsem(&vm->lock); 1441 } 1442 mutex_init(&vm->snap_mutex); 1443 1444 INIT_LIST_HEAD(&vm->rebind_list); 1445 1446 INIT_LIST_HEAD(&vm->userptr.repin_list); 1447 INIT_LIST_HEAD(&vm->userptr.invalidated); 1448 init_rwsem(&vm->userptr.notifier_lock); 1449 spin_lock_init(&vm->userptr.invalidated_lock); 1450 1451 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1452 1453 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1454 1455 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1456 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1457 1458 for_each_tile(tile, xe, id) 1459 xe_range_fence_tree_init(&vm->rftree[id]); 1460 1461 vm->pt_ops = &xelp_pt_ops; 1462 1463 /* 1464 * Long-running workloads are not protected by the scheduler references. 1465 * By design, run_job for long-running workloads returns NULL and the 1466 * scheduler drops all the references of it, hence protecting the VM 1467 * for this case is necessary. 1468 */ 1469 if (flags & XE_VM_FLAG_LR_MODE) 1470 xe_pm_runtime_get_noresume(xe); 1471 1472 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1473 if (!vm_resv_obj) { 1474 err = -ENOMEM; 1475 goto err_no_resv; 1476 } 1477 1478 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1479 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1480 1481 drm_gem_object_put(vm_resv_obj); 1482 1483 err = xe_vm_lock(vm, true); 1484 if (err) 1485 goto err_close; 1486 1487 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1488 vm->flags |= XE_VM_FLAG_64K; 1489 1490 for_each_tile(tile, xe, id) { 1491 if (flags & XE_VM_FLAG_MIGRATION && 1492 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1493 continue; 1494 1495 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1496 if (IS_ERR(vm->pt_root[id])) { 1497 err = PTR_ERR(vm->pt_root[id]); 1498 vm->pt_root[id] = NULL; 1499 goto err_unlock_close; 1500 } 1501 } 1502 1503 if (xe_vm_has_scratch(vm)) { 1504 for_each_tile(tile, xe, id) { 1505 if (!vm->pt_root[id]) 1506 continue; 1507 1508 err = xe_vm_create_scratch(xe, tile, vm); 1509 if (err) 1510 goto err_unlock_close; 1511 } 1512 vm->batch_invalidate_tlb = true; 1513 } 1514 1515 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1516 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1517 vm->batch_invalidate_tlb = false; 1518 } 1519 1520 /* Fill pt_root after allocating scratch tables */ 1521 for_each_tile(tile, xe, id) { 1522 if (!vm->pt_root[id]) 1523 continue; 1524 1525 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1526 } 1527 xe_vm_unlock(vm); 1528 1529 /* Kernel migration VM shouldn't have a circular loop.. */ 1530 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1531 for_each_tile(tile, xe, id) { 1532 struct xe_exec_queue *q; 1533 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1534 1535 if (!vm->pt_root[id]) 1536 continue; 1537 1538 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1539 if (IS_ERR(q)) { 1540 err = PTR_ERR(q); 1541 goto err_close; 1542 } 1543 vm->q[id] = q; 1544 number_tiles++; 1545 } 1546 } 1547 1548 if (number_tiles > 1) 1549 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1550 1551 trace_xe_vm_create(vm); 1552 1553 return vm; 1554 1555 err_unlock_close: 1556 xe_vm_unlock(vm); 1557 err_close: 1558 xe_vm_close_and_put(vm); 1559 return ERR_PTR(err); 1560 1561 err_no_resv: 1562 mutex_destroy(&vm->snap_mutex); 1563 for_each_tile(tile, xe, id) 1564 xe_range_fence_tree_fini(&vm->rftree[id]); 1565 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1566 kfree(vm); 1567 if (flags & XE_VM_FLAG_LR_MODE) 1568 xe_pm_runtime_put(xe); 1569 return ERR_PTR(err); 1570 } 1571 1572 static void xe_vm_close(struct xe_vm *vm) 1573 { 1574 down_write(&vm->lock); 1575 vm->size = 0; 1576 up_write(&vm->lock); 1577 } 1578 1579 void xe_vm_close_and_put(struct xe_vm *vm) 1580 { 1581 LIST_HEAD(contested); 1582 struct xe_device *xe = vm->xe; 1583 struct xe_tile *tile; 1584 struct xe_vma *vma, *next_vma; 1585 struct drm_gpuva *gpuva, *next; 1586 u8 id; 1587 1588 xe_assert(xe, !vm->preempt.num_exec_queues); 1589 1590 xe_vm_close(vm); 1591 if (xe_vm_in_preempt_fence_mode(vm)) 1592 flush_work(&vm->preempt.rebind_work); 1593 1594 down_write(&vm->lock); 1595 for_each_tile(tile, xe, id) { 1596 if (vm->q[id]) 1597 xe_exec_queue_last_fence_put(vm->q[id], vm); 1598 } 1599 up_write(&vm->lock); 1600 1601 for_each_tile(tile, xe, id) { 1602 if (vm->q[id]) { 1603 xe_exec_queue_kill(vm->q[id]); 1604 xe_exec_queue_put(vm->q[id]); 1605 vm->q[id] = NULL; 1606 } 1607 } 1608 1609 down_write(&vm->lock); 1610 xe_vm_lock(vm, false); 1611 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1612 vma = gpuva_to_vma(gpuva); 1613 1614 if (xe_vma_has_no_bo(vma)) { 1615 down_read(&vm->userptr.notifier_lock); 1616 vma->gpuva.flags |= XE_VMA_DESTROYED; 1617 up_read(&vm->userptr.notifier_lock); 1618 } 1619 1620 xe_vm_remove_vma(vm, vma); 1621 1622 /* easy case, remove from VMA? */ 1623 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1624 list_del_init(&vma->combined_links.rebind); 1625 xe_vma_destroy(vma, NULL); 1626 continue; 1627 } 1628 1629 list_move_tail(&vma->combined_links.destroy, &contested); 1630 vma->gpuva.flags |= XE_VMA_DESTROYED; 1631 } 1632 1633 /* 1634 * All vm operations will add shared fences to resv. 1635 * The only exception is eviction for a shared object, 1636 * but even so, the unbind when evicted would still 1637 * install a fence to resv. Hence it's safe to 1638 * destroy the pagetables immediately. 1639 */ 1640 xe_vm_free_scratch(vm); 1641 1642 for_each_tile(tile, xe, id) { 1643 if (vm->pt_root[id]) { 1644 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1645 vm->pt_root[id] = NULL; 1646 } 1647 } 1648 xe_vm_unlock(vm); 1649 1650 /* 1651 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1652 * Since we hold a refcount to the bo, we can remove and free 1653 * the members safely without locking. 1654 */ 1655 list_for_each_entry_safe(vma, next_vma, &contested, 1656 combined_links.destroy) { 1657 list_del_init(&vma->combined_links.destroy); 1658 xe_vma_destroy_unlocked(vma); 1659 } 1660 1661 up_write(&vm->lock); 1662 1663 down_write(&xe->usm.lock); 1664 if (vm->usm.asid) { 1665 void *lookup; 1666 1667 xe_assert(xe, xe->info.has_asid); 1668 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1669 1670 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1671 xe_assert(xe, lookup == vm); 1672 } 1673 up_write(&xe->usm.lock); 1674 1675 for_each_tile(tile, xe, id) 1676 xe_range_fence_tree_fini(&vm->rftree[id]); 1677 1678 xe_vm_put(vm); 1679 } 1680 1681 static void vm_destroy_work_func(struct work_struct *w) 1682 { 1683 struct xe_vm *vm = 1684 container_of(w, struct xe_vm, destroy_work); 1685 struct xe_device *xe = vm->xe; 1686 struct xe_tile *tile; 1687 u8 id; 1688 1689 /* xe_vm_close_and_put was not called? */ 1690 xe_assert(xe, !vm->size); 1691 1692 if (xe_vm_in_preempt_fence_mode(vm)) 1693 flush_work(&vm->preempt.rebind_work); 1694 1695 mutex_destroy(&vm->snap_mutex); 1696 1697 if (vm->flags & XE_VM_FLAG_LR_MODE) 1698 xe_pm_runtime_put(xe); 1699 1700 for_each_tile(tile, xe, id) 1701 XE_WARN_ON(vm->pt_root[id]); 1702 1703 trace_xe_vm_free(vm); 1704 1705 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1706 1707 if (vm->xef) 1708 xe_file_put(vm->xef); 1709 1710 kfree(vm); 1711 } 1712 1713 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1714 { 1715 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1716 1717 /* To destroy the VM we need to be able to sleep */ 1718 queue_work(system_unbound_wq, &vm->destroy_work); 1719 } 1720 1721 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1722 { 1723 struct xe_vm *vm; 1724 1725 mutex_lock(&xef->vm.lock); 1726 vm = xa_load(&xef->vm.xa, id); 1727 if (vm) 1728 xe_vm_get(vm); 1729 mutex_unlock(&xef->vm.lock); 1730 1731 return vm; 1732 } 1733 1734 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1735 { 1736 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1737 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1738 } 1739 1740 static struct xe_exec_queue * 1741 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1742 { 1743 return q ? q : vm->q[0]; 1744 } 1745 1746 static struct xe_user_fence * 1747 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1748 { 1749 unsigned int i; 1750 1751 for (i = 0; i < num_syncs; i++) { 1752 struct xe_sync_entry *e = &syncs[i]; 1753 1754 if (xe_sync_is_ufence(e)) 1755 return xe_sync_ufence_get(e); 1756 } 1757 1758 return NULL; 1759 } 1760 1761 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1762 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1763 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1764 1765 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1766 struct drm_file *file) 1767 { 1768 struct xe_device *xe = to_xe_device(dev); 1769 struct xe_file *xef = to_xe_file(file); 1770 struct drm_xe_vm_create *args = data; 1771 struct xe_tile *tile; 1772 struct xe_vm *vm; 1773 u32 id, asid; 1774 int err; 1775 u32 flags = 0; 1776 1777 if (XE_IOCTL_DBG(xe, args->extensions)) 1778 return -EINVAL; 1779 1780 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1781 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1782 1783 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1784 !xe->info.has_usm)) 1785 return -EINVAL; 1786 1787 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1788 return -EINVAL; 1789 1790 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1791 return -EINVAL; 1792 1793 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1794 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1795 return -EINVAL; 1796 1797 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1798 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1799 return -EINVAL; 1800 1801 if (XE_IOCTL_DBG(xe, args->extensions)) 1802 return -EINVAL; 1803 1804 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1805 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1806 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1807 flags |= XE_VM_FLAG_LR_MODE; 1808 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1809 flags |= XE_VM_FLAG_FAULT_MODE; 1810 1811 vm = xe_vm_create(xe, flags); 1812 if (IS_ERR(vm)) 1813 return PTR_ERR(vm); 1814 1815 if (xe->info.has_asid) { 1816 down_write(&xe->usm.lock); 1817 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1818 XA_LIMIT(1, XE_MAX_ASID - 1), 1819 &xe->usm.next_asid, GFP_KERNEL); 1820 up_write(&xe->usm.lock); 1821 if (err < 0) 1822 goto err_close_and_put; 1823 1824 vm->usm.asid = asid; 1825 } 1826 1827 vm->xef = xe_file_get(xef); 1828 1829 /* Record BO memory for VM pagetable created against client */ 1830 for_each_tile(tile, xe, id) 1831 if (vm->pt_root[id]) 1832 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1833 1834 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1835 /* Warning: Security issue - never enable by default */ 1836 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1837 #endif 1838 1839 /* user id alloc must always be last in ioctl to prevent UAF */ 1840 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1841 if (err) 1842 goto err_close_and_put; 1843 1844 args->vm_id = id; 1845 1846 return 0; 1847 1848 err_close_and_put: 1849 xe_vm_close_and_put(vm); 1850 1851 return err; 1852 } 1853 1854 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1855 struct drm_file *file) 1856 { 1857 struct xe_device *xe = to_xe_device(dev); 1858 struct xe_file *xef = to_xe_file(file); 1859 struct drm_xe_vm_destroy *args = data; 1860 struct xe_vm *vm; 1861 int err = 0; 1862 1863 if (XE_IOCTL_DBG(xe, args->pad) || 1864 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1865 return -EINVAL; 1866 1867 mutex_lock(&xef->vm.lock); 1868 vm = xa_load(&xef->vm.xa, args->vm_id); 1869 if (XE_IOCTL_DBG(xe, !vm)) 1870 err = -ENOENT; 1871 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1872 err = -EBUSY; 1873 else 1874 xa_erase(&xef->vm.xa, args->vm_id); 1875 mutex_unlock(&xef->vm.lock); 1876 1877 if (!err) 1878 xe_vm_close_and_put(vm); 1879 1880 return err; 1881 } 1882 1883 static const u32 region_to_mem_type[] = { 1884 XE_PL_TT, 1885 XE_PL_VRAM0, 1886 XE_PL_VRAM1, 1887 }; 1888 1889 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1890 bool post_commit) 1891 { 1892 down_read(&vm->userptr.notifier_lock); 1893 vma->gpuva.flags |= XE_VMA_DESTROYED; 1894 up_read(&vm->userptr.notifier_lock); 1895 if (post_commit) 1896 xe_vm_remove_vma(vm, vma); 1897 } 1898 1899 #undef ULL 1900 #define ULL unsigned long long 1901 1902 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1903 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1904 { 1905 struct xe_vma *vma; 1906 1907 switch (op->op) { 1908 case DRM_GPUVA_OP_MAP: 1909 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1910 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1911 break; 1912 case DRM_GPUVA_OP_REMAP: 1913 vma = gpuva_to_vma(op->remap.unmap->va); 1914 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1915 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1916 op->remap.unmap->keep ? 1 : 0); 1917 if (op->remap.prev) 1918 vm_dbg(&xe->drm, 1919 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1920 (ULL)op->remap.prev->va.addr, 1921 (ULL)op->remap.prev->va.range); 1922 if (op->remap.next) 1923 vm_dbg(&xe->drm, 1924 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1925 (ULL)op->remap.next->va.addr, 1926 (ULL)op->remap.next->va.range); 1927 break; 1928 case DRM_GPUVA_OP_UNMAP: 1929 vma = gpuva_to_vma(op->unmap.va); 1930 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1931 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1932 op->unmap.keep ? 1 : 0); 1933 break; 1934 case DRM_GPUVA_OP_PREFETCH: 1935 vma = gpuva_to_vma(op->prefetch.va); 1936 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1937 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1938 break; 1939 default: 1940 drm_warn(&xe->drm, "NOT POSSIBLE"); 1941 } 1942 } 1943 #else 1944 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1945 { 1946 } 1947 #endif 1948 1949 /* 1950 * Create operations list from IOCTL arguments, setup operations fields so parse 1951 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1952 */ 1953 static struct drm_gpuva_ops * 1954 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1955 u64 bo_offset_or_userptr, u64 addr, u64 range, 1956 u32 operation, u32 flags, 1957 u32 prefetch_region, u16 pat_index) 1958 { 1959 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1960 struct drm_gpuva_ops *ops; 1961 struct drm_gpuva_op *__op; 1962 struct drm_gpuvm_bo *vm_bo; 1963 int err; 1964 1965 lockdep_assert_held_write(&vm->lock); 1966 1967 vm_dbg(&vm->xe->drm, 1968 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1969 operation, (ULL)addr, (ULL)range, 1970 (ULL)bo_offset_or_userptr); 1971 1972 switch (operation) { 1973 case DRM_XE_VM_BIND_OP_MAP: 1974 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1975 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1976 obj, bo_offset_or_userptr); 1977 break; 1978 case DRM_XE_VM_BIND_OP_UNMAP: 1979 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1980 break; 1981 case DRM_XE_VM_BIND_OP_PREFETCH: 1982 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1983 break; 1984 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1985 xe_assert(vm->xe, bo); 1986 1987 err = xe_bo_lock(bo, true); 1988 if (err) 1989 return ERR_PTR(err); 1990 1991 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 1992 if (IS_ERR(vm_bo)) { 1993 xe_bo_unlock(bo); 1994 return ERR_CAST(vm_bo); 1995 } 1996 1997 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 1998 drm_gpuvm_bo_put(vm_bo); 1999 xe_bo_unlock(bo); 2000 break; 2001 default: 2002 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2003 ops = ERR_PTR(-EINVAL); 2004 } 2005 if (IS_ERR(ops)) 2006 return ops; 2007 2008 drm_gpuva_for_each_op(__op, ops) { 2009 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2010 2011 if (__op->op == DRM_GPUVA_OP_MAP) { 2012 op->map.immediate = 2013 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2014 op->map.read_only = 2015 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2016 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2017 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2018 op->map.pat_index = pat_index; 2019 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2020 op->prefetch.region = prefetch_region; 2021 } 2022 2023 print_op(vm->xe, __op); 2024 } 2025 2026 return ops; 2027 } 2028 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2029 2030 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2031 u16 pat_index, unsigned int flags) 2032 { 2033 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2034 struct drm_exec exec; 2035 struct xe_vma *vma; 2036 int err = 0; 2037 2038 lockdep_assert_held_write(&vm->lock); 2039 2040 if (bo) { 2041 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2042 drm_exec_until_all_locked(&exec) { 2043 err = 0; 2044 if (!bo->vm) { 2045 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2046 drm_exec_retry_on_contention(&exec); 2047 } 2048 if (!err) { 2049 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2050 drm_exec_retry_on_contention(&exec); 2051 } 2052 if (err) { 2053 drm_exec_fini(&exec); 2054 return ERR_PTR(err); 2055 } 2056 } 2057 } 2058 vma = xe_vma_create(vm, bo, op->gem.offset, 2059 op->va.addr, op->va.addr + 2060 op->va.range - 1, pat_index, flags); 2061 if (IS_ERR(vma)) 2062 goto err_unlock; 2063 2064 if (xe_vma_is_userptr(vma)) 2065 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2066 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2067 err = add_preempt_fences(vm, bo); 2068 2069 err_unlock: 2070 if (bo) 2071 drm_exec_fini(&exec); 2072 2073 if (err) { 2074 prep_vma_destroy(vm, vma, false); 2075 xe_vma_destroy_unlocked(vma); 2076 vma = ERR_PTR(err); 2077 } 2078 2079 return vma; 2080 } 2081 2082 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2083 { 2084 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2085 return SZ_1G; 2086 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2087 return SZ_2M; 2088 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2089 return SZ_64K; 2090 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2091 return SZ_4K; 2092 2093 return SZ_1G; /* Uninitialized, used max size */ 2094 } 2095 2096 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2097 { 2098 switch (size) { 2099 case SZ_1G: 2100 vma->gpuva.flags |= XE_VMA_PTE_1G; 2101 break; 2102 case SZ_2M: 2103 vma->gpuva.flags |= XE_VMA_PTE_2M; 2104 break; 2105 case SZ_64K: 2106 vma->gpuva.flags |= XE_VMA_PTE_64K; 2107 break; 2108 case SZ_4K: 2109 vma->gpuva.flags |= XE_VMA_PTE_4K; 2110 break; 2111 } 2112 } 2113 2114 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2115 { 2116 int err = 0; 2117 2118 lockdep_assert_held_write(&vm->lock); 2119 2120 switch (op->base.op) { 2121 case DRM_GPUVA_OP_MAP: 2122 err |= xe_vm_insert_vma(vm, op->map.vma); 2123 if (!err) 2124 op->flags |= XE_VMA_OP_COMMITTED; 2125 break; 2126 case DRM_GPUVA_OP_REMAP: 2127 { 2128 u8 tile_present = 2129 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2130 2131 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2132 true); 2133 op->flags |= XE_VMA_OP_COMMITTED; 2134 2135 if (op->remap.prev) { 2136 err |= xe_vm_insert_vma(vm, op->remap.prev); 2137 if (!err) 2138 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2139 if (!err && op->remap.skip_prev) { 2140 op->remap.prev->tile_present = 2141 tile_present; 2142 op->remap.prev = NULL; 2143 } 2144 } 2145 if (op->remap.next) { 2146 err |= xe_vm_insert_vma(vm, op->remap.next); 2147 if (!err) 2148 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2149 if (!err && op->remap.skip_next) { 2150 op->remap.next->tile_present = 2151 tile_present; 2152 op->remap.next = NULL; 2153 } 2154 } 2155 2156 /* Adjust for partial unbind after removing VMA from VM */ 2157 if (!err) { 2158 op->base.remap.unmap->va->va.addr = op->remap.start; 2159 op->base.remap.unmap->va->va.range = op->remap.range; 2160 } 2161 break; 2162 } 2163 case DRM_GPUVA_OP_UNMAP: 2164 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2165 op->flags |= XE_VMA_OP_COMMITTED; 2166 break; 2167 case DRM_GPUVA_OP_PREFETCH: 2168 op->flags |= XE_VMA_OP_COMMITTED; 2169 break; 2170 default: 2171 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2172 } 2173 2174 return err; 2175 } 2176 2177 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2178 struct xe_vma_ops *vops) 2179 { 2180 struct xe_device *xe = vm->xe; 2181 struct drm_gpuva_op *__op; 2182 struct xe_tile *tile; 2183 u8 id, tile_mask = 0; 2184 int err = 0; 2185 2186 lockdep_assert_held_write(&vm->lock); 2187 2188 for_each_tile(tile, vm->xe, id) 2189 tile_mask |= 0x1 << id; 2190 2191 drm_gpuva_for_each_op(__op, ops) { 2192 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2193 struct xe_vma *vma; 2194 unsigned int flags = 0; 2195 2196 INIT_LIST_HEAD(&op->link); 2197 list_add_tail(&op->link, &vops->list); 2198 op->tile_mask = tile_mask; 2199 2200 switch (op->base.op) { 2201 case DRM_GPUVA_OP_MAP: 2202 { 2203 flags |= op->map.read_only ? 2204 VMA_CREATE_FLAG_READ_ONLY : 0; 2205 flags |= op->map.is_null ? 2206 VMA_CREATE_FLAG_IS_NULL : 0; 2207 flags |= op->map.dumpable ? 2208 VMA_CREATE_FLAG_DUMPABLE : 0; 2209 2210 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2211 flags); 2212 if (IS_ERR(vma)) 2213 return PTR_ERR(vma); 2214 2215 op->map.vma = vma; 2216 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2217 xe_vma_ops_incr_pt_update_ops(vops, 2218 op->tile_mask); 2219 break; 2220 } 2221 case DRM_GPUVA_OP_REMAP: 2222 { 2223 struct xe_vma *old = 2224 gpuva_to_vma(op->base.remap.unmap->va); 2225 2226 op->remap.start = xe_vma_start(old); 2227 op->remap.range = xe_vma_size(old); 2228 2229 if (op->base.remap.prev) { 2230 flags |= op->base.remap.unmap->va->flags & 2231 XE_VMA_READ_ONLY ? 2232 VMA_CREATE_FLAG_READ_ONLY : 0; 2233 flags |= op->base.remap.unmap->va->flags & 2234 DRM_GPUVA_SPARSE ? 2235 VMA_CREATE_FLAG_IS_NULL : 0; 2236 flags |= op->base.remap.unmap->va->flags & 2237 XE_VMA_DUMPABLE ? 2238 VMA_CREATE_FLAG_DUMPABLE : 0; 2239 2240 vma = new_vma(vm, op->base.remap.prev, 2241 old->pat_index, flags); 2242 if (IS_ERR(vma)) 2243 return PTR_ERR(vma); 2244 2245 op->remap.prev = vma; 2246 2247 /* 2248 * Userptr creates a new SG mapping so 2249 * we must also rebind. 2250 */ 2251 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2252 IS_ALIGNED(xe_vma_end(vma), 2253 xe_vma_max_pte_size(old)); 2254 if (op->remap.skip_prev) { 2255 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2256 op->remap.range -= 2257 xe_vma_end(vma) - 2258 xe_vma_start(old); 2259 op->remap.start = xe_vma_end(vma); 2260 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2261 (ULL)op->remap.start, 2262 (ULL)op->remap.range); 2263 } else { 2264 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2265 } 2266 } 2267 2268 if (op->base.remap.next) { 2269 flags |= op->base.remap.unmap->va->flags & 2270 XE_VMA_READ_ONLY ? 2271 VMA_CREATE_FLAG_READ_ONLY : 0; 2272 flags |= op->base.remap.unmap->va->flags & 2273 DRM_GPUVA_SPARSE ? 2274 VMA_CREATE_FLAG_IS_NULL : 0; 2275 flags |= op->base.remap.unmap->va->flags & 2276 XE_VMA_DUMPABLE ? 2277 VMA_CREATE_FLAG_DUMPABLE : 0; 2278 2279 vma = new_vma(vm, op->base.remap.next, 2280 old->pat_index, flags); 2281 if (IS_ERR(vma)) 2282 return PTR_ERR(vma); 2283 2284 op->remap.next = vma; 2285 2286 /* 2287 * Userptr creates a new SG mapping so 2288 * we must also rebind. 2289 */ 2290 op->remap.skip_next = !xe_vma_is_userptr(old) && 2291 IS_ALIGNED(xe_vma_start(vma), 2292 xe_vma_max_pte_size(old)); 2293 if (op->remap.skip_next) { 2294 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2295 op->remap.range -= 2296 xe_vma_end(old) - 2297 xe_vma_start(vma); 2298 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2299 (ULL)op->remap.start, 2300 (ULL)op->remap.range); 2301 } else { 2302 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2303 } 2304 } 2305 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2306 break; 2307 } 2308 case DRM_GPUVA_OP_UNMAP: 2309 case DRM_GPUVA_OP_PREFETCH: 2310 /* FIXME: Need to skip some prefetch ops */ 2311 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2312 break; 2313 default: 2314 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2315 } 2316 2317 err = xe_vma_op_commit(vm, op); 2318 if (err) 2319 return err; 2320 } 2321 2322 return 0; 2323 } 2324 2325 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2326 bool post_commit, bool prev_post_commit, 2327 bool next_post_commit) 2328 { 2329 lockdep_assert_held_write(&vm->lock); 2330 2331 switch (op->base.op) { 2332 case DRM_GPUVA_OP_MAP: 2333 if (op->map.vma) { 2334 prep_vma_destroy(vm, op->map.vma, post_commit); 2335 xe_vma_destroy_unlocked(op->map.vma); 2336 } 2337 break; 2338 case DRM_GPUVA_OP_UNMAP: 2339 { 2340 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2341 2342 if (vma) { 2343 down_read(&vm->userptr.notifier_lock); 2344 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2345 up_read(&vm->userptr.notifier_lock); 2346 if (post_commit) 2347 xe_vm_insert_vma(vm, vma); 2348 } 2349 break; 2350 } 2351 case DRM_GPUVA_OP_REMAP: 2352 { 2353 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2354 2355 if (op->remap.prev) { 2356 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2357 xe_vma_destroy_unlocked(op->remap.prev); 2358 } 2359 if (op->remap.next) { 2360 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2361 xe_vma_destroy_unlocked(op->remap.next); 2362 } 2363 if (vma) { 2364 down_read(&vm->userptr.notifier_lock); 2365 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2366 up_read(&vm->userptr.notifier_lock); 2367 if (post_commit) 2368 xe_vm_insert_vma(vm, vma); 2369 } 2370 break; 2371 } 2372 case DRM_GPUVA_OP_PREFETCH: 2373 /* Nothing to do */ 2374 break; 2375 default: 2376 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2377 } 2378 } 2379 2380 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2381 struct drm_gpuva_ops **ops, 2382 int num_ops_list) 2383 { 2384 int i; 2385 2386 for (i = num_ops_list - 1; i >= 0; --i) { 2387 struct drm_gpuva_ops *__ops = ops[i]; 2388 struct drm_gpuva_op *__op; 2389 2390 if (!__ops) 2391 continue; 2392 2393 drm_gpuva_for_each_op_reverse(__op, __ops) { 2394 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2395 2396 xe_vma_op_unwind(vm, op, 2397 op->flags & XE_VMA_OP_COMMITTED, 2398 op->flags & XE_VMA_OP_PREV_COMMITTED, 2399 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2400 } 2401 } 2402 } 2403 2404 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2405 bool validate) 2406 { 2407 struct xe_bo *bo = xe_vma_bo(vma); 2408 struct xe_vm *vm = xe_vma_vm(vma); 2409 int err = 0; 2410 2411 if (bo) { 2412 if (!bo->vm) 2413 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2414 if (!err && validate) 2415 err = xe_bo_validate(bo, vm, 2416 !xe_vm_in_preempt_fence_mode(vm)); 2417 } 2418 2419 return err; 2420 } 2421 2422 static int check_ufence(struct xe_vma *vma) 2423 { 2424 if (vma->ufence) { 2425 struct xe_user_fence * const f = vma->ufence; 2426 2427 if (!xe_sync_ufence_get_status(f)) 2428 return -EBUSY; 2429 2430 vma->ufence = NULL; 2431 xe_sync_ufence_put(f); 2432 } 2433 2434 return 0; 2435 } 2436 2437 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2438 struct xe_vma_op *op) 2439 { 2440 int err = 0; 2441 2442 switch (op->base.op) { 2443 case DRM_GPUVA_OP_MAP: 2444 err = vma_lock_and_validate(exec, op->map.vma, 2445 !xe_vm_in_fault_mode(vm) || 2446 op->map.immediate); 2447 break; 2448 case DRM_GPUVA_OP_REMAP: 2449 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2450 if (err) 2451 break; 2452 2453 err = vma_lock_and_validate(exec, 2454 gpuva_to_vma(op->base.remap.unmap->va), 2455 false); 2456 if (!err && op->remap.prev) 2457 err = vma_lock_and_validate(exec, op->remap.prev, true); 2458 if (!err && op->remap.next) 2459 err = vma_lock_and_validate(exec, op->remap.next, true); 2460 break; 2461 case DRM_GPUVA_OP_UNMAP: 2462 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2463 if (err) 2464 break; 2465 2466 err = vma_lock_and_validate(exec, 2467 gpuva_to_vma(op->base.unmap.va), 2468 false); 2469 break; 2470 case DRM_GPUVA_OP_PREFETCH: 2471 { 2472 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2473 u32 region = op->prefetch.region; 2474 2475 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2476 2477 err = vma_lock_and_validate(exec, 2478 gpuva_to_vma(op->base.prefetch.va), 2479 false); 2480 if (!err && !xe_vma_has_no_bo(vma)) 2481 err = xe_bo_migrate(xe_vma_bo(vma), 2482 region_to_mem_type[region]); 2483 break; 2484 } 2485 default: 2486 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2487 } 2488 2489 return err; 2490 } 2491 2492 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2493 struct xe_vm *vm, 2494 struct xe_vma_ops *vops) 2495 { 2496 struct xe_vma_op *op; 2497 int err; 2498 2499 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2500 if (err) 2501 return err; 2502 2503 list_for_each_entry(op, &vops->list, link) { 2504 err = op_lock_and_prep(exec, vm, op); 2505 if (err) 2506 return err; 2507 } 2508 2509 #ifdef TEST_VM_OPS_ERROR 2510 if (vops->inject_error && 2511 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2512 return -ENOSPC; 2513 #endif 2514 2515 return 0; 2516 } 2517 2518 static void op_trace(struct xe_vma_op *op) 2519 { 2520 switch (op->base.op) { 2521 case DRM_GPUVA_OP_MAP: 2522 trace_xe_vma_bind(op->map.vma); 2523 break; 2524 case DRM_GPUVA_OP_REMAP: 2525 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2526 if (op->remap.prev) 2527 trace_xe_vma_bind(op->remap.prev); 2528 if (op->remap.next) 2529 trace_xe_vma_bind(op->remap.next); 2530 break; 2531 case DRM_GPUVA_OP_UNMAP: 2532 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2533 break; 2534 case DRM_GPUVA_OP_PREFETCH: 2535 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2536 break; 2537 default: 2538 XE_WARN_ON("NOT POSSIBLE"); 2539 } 2540 } 2541 2542 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2543 { 2544 struct xe_vma_op *op; 2545 2546 list_for_each_entry(op, &vops->list, link) 2547 op_trace(op); 2548 } 2549 2550 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2551 { 2552 struct xe_exec_queue *q = vops->q; 2553 struct xe_tile *tile; 2554 int number_tiles = 0; 2555 u8 id; 2556 2557 for_each_tile(tile, vm->xe, id) { 2558 if (vops->pt_update_ops[id].num_ops) 2559 ++number_tiles; 2560 2561 if (vops->pt_update_ops[id].q) 2562 continue; 2563 2564 if (q) { 2565 vops->pt_update_ops[id].q = q; 2566 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2567 q = list_next_entry(q, multi_gt_list); 2568 } else { 2569 vops->pt_update_ops[id].q = vm->q[id]; 2570 } 2571 } 2572 2573 return number_tiles; 2574 } 2575 2576 static struct dma_fence *ops_execute(struct xe_vm *vm, 2577 struct xe_vma_ops *vops) 2578 { 2579 struct xe_tile *tile; 2580 struct dma_fence *fence = NULL; 2581 struct dma_fence **fences = NULL; 2582 struct dma_fence_array *cf = NULL; 2583 int number_tiles = 0, current_fence = 0, err; 2584 u8 id; 2585 2586 number_tiles = vm_ops_setup_tile_args(vm, vops); 2587 if (number_tiles == 0) 2588 return ERR_PTR(-ENODATA); 2589 2590 if (number_tiles > 1) { 2591 fences = kmalloc_array(number_tiles, sizeof(*fences), 2592 GFP_KERNEL); 2593 if (!fences) { 2594 fence = ERR_PTR(-ENOMEM); 2595 goto err_trace; 2596 } 2597 } 2598 2599 for_each_tile(tile, vm->xe, id) { 2600 if (!vops->pt_update_ops[id].num_ops) 2601 continue; 2602 2603 err = xe_pt_update_ops_prepare(tile, vops); 2604 if (err) { 2605 fence = ERR_PTR(err); 2606 goto err_out; 2607 } 2608 } 2609 2610 trace_xe_vm_ops_execute(vops); 2611 2612 for_each_tile(tile, vm->xe, id) { 2613 if (!vops->pt_update_ops[id].num_ops) 2614 continue; 2615 2616 fence = xe_pt_update_ops_run(tile, vops); 2617 if (IS_ERR(fence)) 2618 goto err_out; 2619 2620 if (fences) 2621 fences[current_fence++] = fence; 2622 } 2623 2624 if (fences) { 2625 cf = dma_fence_array_create(number_tiles, fences, 2626 vm->composite_fence_ctx, 2627 vm->composite_fence_seqno++, 2628 false); 2629 if (!cf) { 2630 --vm->composite_fence_seqno; 2631 fence = ERR_PTR(-ENOMEM); 2632 goto err_out; 2633 } 2634 fence = &cf->base; 2635 } 2636 2637 for_each_tile(tile, vm->xe, id) { 2638 if (!vops->pt_update_ops[id].num_ops) 2639 continue; 2640 2641 xe_pt_update_ops_fini(tile, vops); 2642 } 2643 2644 return fence; 2645 2646 err_out: 2647 for_each_tile(tile, vm->xe, id) { 2648 if (!vops->pt_update_ops[id].num_ops) 2649 continue; 2650 2651 xe_pt_update_ops_abort(tile, vops); 2652 } 2653 while (current_fence) 2654 dma_fence_put(fences[--current_fence]); 2655 kfree(fences); 2656 kfree(cf); 2657 2658 err_trace: 2659 trace_xe_vm_ops_fail(vm); 2660 return fence; 2661 } 2662 2663 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2664 { 2665 if (vma->ufence) 2666 xe_sync_ufence_put(vma->ufence); 2667 vma->ufence = __xe_sync_ufence_get(ufence); 2668 } 2669 2670 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2671 struct xe_user_fence *ufence) 2672 { 2673 switch (op->base.op) { 2674 case DRM_GPUVA_OP_MAP: 2675 vma_add_ufence(op->map.vma, ufence); 2676 break; 2677 case DRM_GPUVA_OP_REMAP: 2678 if (op->remap.prev) 2679 vma_add_ufence(op->remap.prev, ufence); 2680 if (op->remap.next) 2681 vma_add_ufence(op->remap.next, ufence); 2682 break; 2683 case DRM_GPUVA_OP_UNMAP: 2684 break; 2685 case DRM_GPUVA_OP_PREFETCH: 2686 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2687 break; 2688 default: 2689 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2690 } 2691 } 2692 2693 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2694 struct dma_fence *fence) 2695 { 2696 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2697 struct xe_user_fence *ufence; 2698 struct xe_vma_op *op; 2699 int i; 2700 2701 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2702 list_for_each_entry(op, &vops->list, link) { 2703 if (ufence) 2704 op_add_ufence(vm, op, ufence); 2705 2706 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2707 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2708 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2709 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2710 fence); 2711 } 2712 if (ufence) 2713 xe_sync_ufence_put(ufence); 2714 for (i = 0; i < vops->num_syncs; i++) 2715 xe_sync_entry_signal(vops->syncs + i, fence); 2716 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2717 } 2718 2719 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2720 struct xe_vma_ops *vops) 2721 { 2722 struct drm_exec exec; 2723 struct dma_fence *fence; 2724 int err; 2725 2726 lockdep_assert_held_write(&vm->lock); 2727 2728 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2729 DRM_EXEC_IGNORE_DUPLICATES, 0); 2730 drm_exec_until_all_locked(&exec) { 2731 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2732 drm_exec_retry_on_contention(&exec); 2733 if (err) { 2734 fence = ERR_PTR(err); 2735 goto unlock; 2736 } 2737 2738 fence = ops_execute(vm, vops); 2739 if (IS_ERR(fence)) 2740 goto unlock; 2741 2742 vm_bind_ioctl_ops_fini(vm, vops, fence); 2743 } 2744 2745 unlock: 2746 drm_exec_fini(&exec); 2747 return fence; 2748 } 2749 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2750 2751 #define SUPPORTED_FLAGS_STUB \ 2752 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2753 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2754 DRM_XE_VM_BIND_FLAG_NULL | \ 2755 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 2756 DRM_XE_VM_BIND_FLAG_CHECK_PXP) 2757 2758 #ifdef TEST_VM_OPS_ERROR 2759 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2760 #else 2761 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2762 #endif 2763 2764 #define XE_64K_PAGE_MASK 0xffffull 2765 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2766 2767 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2768 struct drm_xe_vm_bind *args, 2769 struct drm_xe_vm_bind_op **bind_ops) 2770 { 2771 int err; 2772 int i; 2773 2774 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2775 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2776 return -EINVAL; 2777 2778 if (XE_IOCTL_DBG(xe, args->extensions)) 2779 return -EINVAL; 2780 2781 if (args->num_binds > 1) { 2782 u64 __user *bind_user = 2783 u64_to_user_ptr(args->vector_of_binds); 2784 2785 *bind_ops = kvmalloc_array(args->num_binds, 2786 sizeof(struct drm_xe_vm_bind_op), 2787 GFP_KERNEL | __GFP_ACCOUNT | 2788 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2789 if (!*bind_ops) 2790 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2791 2792 err = __copy_from_user(*bind_ops, bind_user, 2793 sizeof(struct drm_xe_vm_bind_op) * 2794 args->num_binds); 2795 if (XE_IOCTL_DBG(xe, err)) { 2796 err = -EFAULT; 2797 goto free_bind_ops; 2798 } 2799 } else { 2800 *bind_ops = &args->bind; 2801 } 2802 2803 for (i = 0; i < args->num_binds; ++i) { 2804 u64 range = (*bind_ops)[i].range; 2805 u64 addr = (*bind_ops)[i].addr; 2806 u32 op = (*bind_ops)[i].op; 2807 u32 flags = (*bind_ops)[i].flags; 2808 u32 obj = (*bind_ops)[i].obj; 2809 u64 obj_offset = (*bind_ops)[i].obj_offset; 2810 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2811 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2812 u16 pat_index = (*bind_ops)[i].pat_index; 2813 u16 coh_mode; 2814 2815 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2816 err = -EINVAL; 2817 goto free_bind_ops; 2818 } 2819 2820 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2821 (*bind_ops)[i].pat_index = pat_index; 2822 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2823 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2824 err = -EINVAL; 2825 goto free_bind_ops; 2826 } 2827 2828 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2829 err = -EINVAL; 2830 goto free_bind_ops; 2831 } 2832 2833 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2834 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2835 XE_IOCTL_DBG(xe, obj && is_null) || 2836 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2837 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2838 is_null) || 2839 XE_IOCTL_DBG(xe, !obj && 2840 op == DRM_XE_VM_BIND_OP_MAP && 2841 !is_null) || 2842 XE_IOCTL_DBG(xe, !obj && 2843 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2844 XE_IOCTL_DBG(xe, addr && 2845 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2846 XE_IOCTL_DBG(xe, range && 2847 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2848 XE_IOCTL_DBG(xe, obj && 2849 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2850 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2851 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2852 XE_IOCTL_DBG(xe, obj && 2853 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2854 XE_IOCTL_DBG(xe, prefetch_region && 2855 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2856 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2857 xe->info.mem_region_mask)) || 2858 XE_IOCTL_DBG(xe, obj && 2859 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2860 err = -EINVAL; 2861 goto free_bind_ops; 2862 } 2863 2864 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2865 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2866 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2867 XE_IOCTL_DBG(xe, !range && 2868 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2869 err = -EINVAL; 2870 goto free_bind_ops; 2871 } 2872 } 2873 2874 return 0; 2875 2876 free_bind_ops: 2877 if (args->num_binds > 1) 2878 kvfree(*bind_ops); 2879 return err; 2880 } 2881 2882 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2883 struct xe_exec_queue *q, 2884 struct xe_sync_entry *syncs, 2885 int num_syncs) 2886 { 2887 struct dma_fence *fence; 2888 int i, err = 0; 2889 2890 fence = xe_sync_in_fence_get(syncs, num_syncs, 2891 to_wait_exec_queue(vm, q), vm); 2892 if (IS_ERR(fence)) 2893 return PTR_ERR(fence); 2894 2895 for (i = 0; i < num_syncs; i++) 2896 xe_sync_entry_signal(&syncs[i], fence); 2897 2898 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2899 fence); 2900 dma_fence_put(fence); 2901 2902 return err; 2903 } 2904 2905 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2906 struct xe_exec_queue *q, 2907 struct xe_sync_entry *syncs, u32 num_syncs) 2908 { 2909 memset(vops, 0, sizeof(*vops)); 2910 INIT_LIST_HEAD(&vops->list); 2911 vops->vm = vm; 2912 vops->q = q; 2913 vops->syncs = syncs; 2914 vops->num_syncs = num_syncs; 2915 } 2916 2917 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2918 u64 addr, u64 range, u64 obj_offset, 2919 u16 pat_index, u32 op, u32 bind_flags) 2920 { 2921 u16 coh_mode; 2922 2923 if (XE_IOCTL_DBG(xe, range > bo->size) || 2924 XE_IOCTL_DBG(xe, obj_offset > 2925 bo->size - range)) { 2926 return -EINVAL; 2927 } 2928 2929 /* 2930 * Some platforms require 64k VM_BIND alignment, 2931 * specifically those with XE_VRAM_FLAGS_NEED64K. 2932 * 2933 * Other platforms may have BO's set to 64k physical placement, 2934 * but can be mapped at 4k offsets anyway. This check is only 2935 * there for the former case. 2936 */ 2937 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2938 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2939 if (XE_IOCTL_DBG(xe, obj_offset & 2940 XE_64K_PAGE_MASK) || 2941 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2942 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2943 return -EINVAL; 2944 } 2945 } 2946 2947 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2948 if (bo->cpu_caching) { 2949 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2950 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2951 return -EINVAL; 2952 } 2953 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2954 /* 2955 * Imported dma-buf from a different device should 2956 * require 1way or 2way coherency since we don't know 2957 * how it was mapped on the CPU. Just assume is it 2958 * potentially cached on CPU side. 2959 */ 2960 return -EINVAL; 2961 } 2962 2963 /* If a BO is protected it can only be mapped if the key is still valid */ 2964 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 2965 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 2966 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 2967 return -ENOEXEC; 2968 2969 return 0; 2970 } 2971 2972 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2973 { 2974 struct xe_device *xe = to_xe_device(dev); 2975 struct xe_file *xef = to_xe_file(file); 2976 struct drm_xe_vm_bind *args = data; 2977 struct drm_xe_sync __user *syncs_user; 2978 struct xe_bo **bos = NULL; 2979 struct drm_gpuva_ops **ops = NULL; 2980 struct xe_vm *vm; 2981 struct xe_exec_queue *q = NULL; 2982 u32 num_syncs, num_ufence = 0; 2983 struct xe_sync_entry *syncs = NULL; 2984 struct drm_xe_vm_bind_op *bind_ops; 2985 struct xe_vma_ops vops; 2986 struct dma_fence *fence; 2987 int err; 2988 int i; 2989 2990 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 2991 if (err) 2992 return err; 2993 2994 if (args->exec_queue_id) { 2995 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 2996 if (XE_IOCTL_DBG(xe, !q)) { 2997 err = -ENOENT; 2998 goto free_objs; 2999 } 3000 3001 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3002 err = -EINVAL; 3003 goto put_exec_queue; 3004 } 3005 } 3006 3007 vm = xe_vm_lookup(xef, args->vm_id); 3008 if (XE_IOCTL_DBG(xe, !vm)) { 3009 err = -EINVAL; 3010 goto put_exec_queue; 3011 } 3012 3013 err = down_write_killable(&vm->lock); 3014 if (err) 3015 goto put_vm; 3016 3017 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3018 err = -ENOENT; 3019 goto release_vm_lock; 3020 } 3021 3022 for (i = 0; i < args->num_binds; ++i) { 3023 u64 range = bind_ops[i].range; 3024 u64 addr = bind_ops[i].addr; 3025 3026 if (XE_IOCTL_DBG(xe, range > vm->size) || 3027 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3028 err = -EINVAL; 3029 goto release_vm_lock; 3030 } 3031 } 3032 3033 if (args->num_binds) { 3034 bos = kvcalloc(args->num_binds, sizeof(*bos), 3035 GFP_KERNEL | __GFP_ACCOUNT | 3036 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3037 if (!bos) { 3038 err = -ENOMEM; 3039 goto release_vm_lock; 3040 } 3041 3042 ops = kvcalloc(args->num_binds, sizeof(*ops), 3043 GFP_KERNEL | __GFP_ACCOUNT | 3044 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3045 if (!ops) { 3046 err = -ENOMEM; 3047 goto release_vm_lock; 3048 } 3049 } 3050 3051 for (i = 0; i < args->num_binds; ++i) { 3052 struct drm_gem_object *gem_obj; 3053 u64 range = bind_ops[i].range; 3054 u64 addr = bind_ops[i].addr; 3055 u32 obj = bind_ops[i].obj; 3056 u64 obj_offset = bind_ops[i].obj_offset; 3057 u16 pat_index = bind_ops[i].pat_index; 3058 u32 op = bind_ops[i].op; 3059 u32 bind_flags = bind_ops[i].flags; 3060 3061 if (!obj) 3062 continue; 3063 3064 gem_obj = drm_gem_object_lookup(file, obj); 3065 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3066 err = -ENOENT; 3067 goto put_obj; 3068 } 3069 bos[i] = gem_to_xe_bo(gem_obj); 3070 3071 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3072 obj_offset, pat_index, op, 3073 bind_flags); 3074 if (err) 3075 goto put_obj; 3076 } 3077 3078 if (args->num_syncs) { 3079 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3080 if (!syncs) { 3081 err = -ENOMEM; 3082 goto put_obj; 3083 } 3084 } 3085 3086 syncs_user = u64_to_user_ptr(args->syncs); 3087 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3088 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3089 &syncs_user[num_syncs], 3090 (xe_vm_in_lr_mode(vm) ? 3091 SYNC_PARSE_FLAG_LR_MODE : 0) | 3092 (!args->num_binds ? 3093 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3094 if (err) 3095 goto free_syncs; 3096 3097 if (xe_sync_is_ufence(&syncs[num_syncs])) 3098 num_ufence++; 3099 } 3100 3101 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3102 err = -EINVAL; 3103 goto free_syncs; 3104 } 3105 3106 if (!args->num_binds) { 3107 err = -ENODATA; 3108 goto free_syncs; 3109 } 3110 3111 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3112 for (i = 0; i < args->num_binds; ++i) { 3113 u64 range = bind_ops[i].range; 3114 u64 addr = bind_ops[i].addr; 3115 u32 op = bind_ops[i].op; 3116 u32 flags = bind_ops[i].flags; 3117 u64 obj_offset = bind_ops[i].obj_offset; 3118 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3119 u16 pat_index = bind_ops[i].pat_index; 3120 3121 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3122 addr, range, op, flags, 3123 prefetch_region, pat_index); 3124 if (IS_ERR(ops[i])) { 3125 err = PTR_ERR(ops[i]); 3126 ops[i] = NULL; 3127 goto unwind_ops; 3128 } 3129 3130 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3131 if (err) 3132 goto unwind_ops; 3133 3134 #ifdef TEST_VM_OPS_ERROR 3135 if (flags & FORCE_OP_ERROR) { 3136 vops.inject_error = true; 3137 vm->xe->vm_inject_error_position = 3138 (vm->xe->vm_inject_error_position + 1) % 3139 FORCE_OP_ERROR_COUNT; 3140 } 3141 #endif 3142 } 3143 3144 /* Nothing to do */ 3145 if (list_empty(&vops.list)) { 3146 err = -ENODATA; 3147 goto unwind_ops; 3148 } 3149 3150 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3151 if (err) 3152 goto unwind_ops; 3153 3154 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3155 if (IS_ERR(fence)) 3156 err = PTR_ERR(fence); 3157 else 3158 dma_fence_put(fence); 3159 3160 unwind_ops: 3161 if (err && err != -ENODATA) 3162 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3163 xe_vma_ops_fini(&vops); 3164 for (i = args->num_binds - 1; i >= 0; --i) 3165 if (ops[i]) 3166 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3167 free_syncs: 3168 if (err == -ENODATA) 3169 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3170 while (num_syncs--) 3171 xe_sync_entry_cleanup(&syncs[num_syncs]); 3172 3173 kfree(syncs); 3174 put_obj: 3175 for (i = 0; i < args->num_binds; ++i) 3176 xe_bo_put(bos[i]); 3177 release_vm_lock: 3178 up_write(&vm->lock); 3179 put_vm: 3180 xe_vm_put(vm); 3181 put_exec_queue: 3182 if (q) 3183 xe_exec_queue_put(q); 3184 free_objs: 3185 kvfree(bos); 3186 kvfree(ops); 3187 if (args->num_binds > 1) 3188 kvfree(bind_ops); 3189 return err; 3190 } 3191 3192 /** 3193 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3194 * @vm: VM to bind the BO to 3195 * @bo: BO to bind 3196 * @q: exec queue to use for the bind (optional) 3197 * @addr: address at which to bind the BO 3198 * @cache_lvl: PAT cache level to use 3199 * 3200 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3201 * kernel-owned VM. 3202 * 3203 * Returns a dma_fence to track the binding completion if the job to do so was 3204 * successfully submitted, an error pointer otherwise. 3205 */ 3206 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3207 struct xe_exec_queue *q, u64 addr, 3208 enum xe_cache_level cache_lvl) 3209 { 3210 struct xe_vma_ops vops; 3211 struct drm_gpuva_ops *ops = NULL; 3212 struct dma_fence *fence; 3213 int err; 3214 3215 xe_bo_get(bo); 3216 xe_vm_get(vm); 3217 if (q) 3218 xe_exec_queue_get(q); 3219 3220 down_write(&vm->lock); 3221 3222 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3223 3224 ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, 3225 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3226 vm->xe->pat.idx[cache_lvl]); 3227 if (IS_ERR(ops)) { 3228 err = PTR_ERR(ops); 3229 goto release_vm_lock; 3230 } 3231 3232 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3233 if (err) 3234 goto release_vm_lock; 3235 3236 xe_assert(vm->xe, !list_empty(&vops.list)); 3237 3238 err = xe_vma_ops_alloc(&vops, false); 3239 if (err) 3240 goto unwind_ops; 3241 3242 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3243 if (IS_ERR(fence)) 3244 err = PTR_ERR(fence); 3245 3246 unwind_ops: 3247 if (err && err != -ENODATA) 3248 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3249 3250 xe_vma_ops_fini(&vops); 3251 drm_gpuva_ops_free(&vm->gpuvm, ops); 3252 3253 release_vm_lock: 3254 up_write(&vm->lock); 3255 3256 if (q) 3257 xe_exec_queue_put(q); 3258 xe_vm_put(vm); 3259 xe_bo_put(bo); 3260 3261 if (err) 3262 fence = ERR_PTR(err); 3263 3264 return fence; 3265 } 3266 3267 /** 3268 * xe_vm_lock() - Lock the vm's dma_resv object 3269 * @vm: The struct xe_vm whose lock is to be locked 3270 * @intr: Whether to perform any wait interruptible 3271 * 3272 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3273 * contended lock was interrupted. If @intr is false, the function 3274 * always returns 0. 3275 */ 3276 int xe_vm_lock(struct xe_vm *vm, bool intr) 3277 { 3278 if (intr) 3279 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3280 3281 return dma_resv_lock(xe_vm_resv(vm), NULL); 3282 } 3283 3284 /** 3285 * xe_vm_unlock() - Unlock the vm's dma_resv object 3286 * @vm: The struct xe_vm whose lock is to be released. 3287 * 3288 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3289 */ 3290 void xe_vm_unlock(struct xe_vm *vm) 3291 { 3292 dma_resv_unlock(xe_vm_resv(vm)); 3293 } 3294 3295 /** 3296 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3297 * @vma: VMA to invalidate 3298 * 3299 * Walks a list of page tables leaves which it memset the entries owned by this 3300 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3301 * complete. 3302 * 3303 * Returns 0 for success, negative error code otherwise. 3304 */ 3305 int xe_vm_invalidate_vma(struct xe_vma *vma) 3306 { 3307 struct xe_device *xe = xe_vma_vm(vma)->xe; 3308 struct xe_tile *tile; 3309 struct xe_gt_tlb_invalidation_fence 3310 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3311 u8 id; 3312 u32 fence_id = 0; 3313 int ret = 0; 3314 3315 xe_assert(xe, !xe_vma_is_null(vma)); 3316 trace_xe_vma_invalidate(vma); 3317 3318 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3319 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3320 xe_vma_start(vma), xe_vma_size(vma)); 3321 3322 /* Check that we don't race with page-table updates */ 3323 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3324 if (xe_vma_is_userptr(vma)) { 3325 WARN_ON_ONCE(!mmu_interval_check_retry 3326 (&to_userptr_vma(vma)->userptr.notifier, 3327 to_userptr_vma(vma)->userptr.notifier_seq)); 3328 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3329 DMA_RESV_USAGE_BOOKKEEP)); 3330 3331 } else { 3332 xe_bo_assert_held(xe_vma_bo(vma)); 3333 } 3334 } 3335 3336 for_each_tile(tile, xe, id) { 3337 if (xe_pt_zap_ptes(tile, vma)) { 3338 xe_device_wmb(xe); 3339 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3340 &fence[fence_id], 3341 true); 3342 3343 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3344 &fence[fence_id], vma); 3345 if (ret) 3346 goto wait; 3347 ++fence_id; 3348 3349 if (!tile->media_gt) 3350 continue; 3351 3352 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3353 &fence[fence_id], 3354 true); 3355 3356 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3357 &fence[fence_id], vma); 3358 if (ret) 3359 goto wait; 3360 ++fence_id; 3361 } 3362 } 3363 3364 wait: 3365 for (id = 0; id < fence_id; ++id) 3366 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3367 3368 vma->tile_invalidated = vma->tile_mask; 3369 3370 return ret; 3371 } 3372 3373 int xe_vm_validate_protected(struct xe_vm *vm) 3374 { 3375 struct drm_gpuva *gpuva; 3376 int err = 0; 3377 3378 if (!vm) 3379 return -ENODEV; 3380 3381 mutex_lock(&vm->snap_mutex); 3382 3383 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3384 struct xe_vma *vma = gpuva_to_vma(gpuva); 3385 struct xe_bo *bo = vma->gpuva.gem.obj ? 3386 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3387 3388 if (!bo) 3389 continue; 3390 3391 if (xe_bo_is_protected(bo)) { 3392 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3393 if (err) 3394 break; 3395 } 3396 } 3397 3398 mutex_unlock(&vm->snap_mutex); 3399 return err; 3400 } 3401 3402 struct xe_vm_snapshot { 3403 unsigned long num_snaps; 3404 struct { 3405 u64 ofs, bo_ofs; 3406 unsigned long len; 3407 struct xe_bo *bo; 3408 void *data; 3409 struct mm_struct *mm; 3410 } snap[]; 3411 }; 3412 3413 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3414 { 3415 unsigned long num_snaps = 0, i; 3416 struct xe_vm_snapshot *snap = NULL; 3417 struct drm_gpuva *gpuva; 3418 3419 if (!vm) 3420 return NULL; 3421 3422 mutex_lock(&vm->snap_mutex); 3423 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3424 if (gpuva->flags & XE_VMA_DUMPABLE) 3425 num_snaps++; 3426 } 3427 3428 if (num_snaps) 3429 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3430 if (!snap) { 3431 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3432 goto out_unlock; 3433 } 3434 3435 snap->num_snaps = num_snaps; 3436 i = 0; 3437 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3438 struct xe_vma *vma = gpuva_to_vma(gpuva); 3439 struct xe_bo *bo = vma->gpuva.gem.obj ? 3440 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3441 3442 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3443 continue; 3444 3445 snap->snap[i].ofs = xe_vma_start(vma); 3446 snap->snap[i].len = xe_vma_size(vma); 3447 if (bo) { 3448 snap->snap[i].bo = xe_bo_get(bo); 3449 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3450 } else if (xe_vma_is_userptr(vma)) { 3451 struct mm_struct *mm = 3452 to_userptr_vma(vma)->userptr.notifier.mm; 3453 3454 if (mmget_not_zero(mm)) 3455 snap->snap[i].mm = mm; 3456 else 3457 snap->snap[i].data = ERR_PTR(-EFAULT); 3458 3459 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3460 } else { 3461 snap->snap[i].data = ERR_PTR(-ENOENT); 3462 } 3463 i++; 3464 } 3465 3466 out_unlock: 3467 mutex_unlock(&vm->snap_mutex); 3468 return snap; 3469 } 3470 3471 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3472 { 3473 if (IS_ERR_OR_NULL(snap)) 3474 return; 3475 3476 for (int i = 0; i < snap->num_snaps; i++) { 3477 struct xe_bo *bo = snap->snap[i].bo; 3478 int err; 3479 3480 if (IS_ERR(snap->snap[i].data)) 3481 continue; 3482 3483 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3484 if (!snap->snap[i].data) { 3485 snap->snap[i].data = ERR_PTR(-ENOMEM); 3486 goto cleanup_bo; 3487 } 3488 3489 if (bo) { 3490 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3491 snap->snap[i].data, snap->snap[i].len); 3492 } else { 3493 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3494 3495 kthread_use_mm(snap->snap[i].mm); 3496 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3497 err = 0; 3498 else 3499 err = -EFAULT; 3500 kthread_unuse_mm(snap->snap[i].mm); 3501 3502 mmput(snap->snap[i].mm); 3503 snap->snap[i].mm = NULL; 3504 } 3505 3506 if (err) { 3507 kvfree(snap->snap[i].data); 3508 snap->snap[i].data = ERR_PTR(err); 3509 } 3510 3511 cleanup_bo: 3512 xe_bo_put(bo); 3513 snap->snap[i].bo = NULL; 3514 } 3515 } 3516 3517 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3518 { 3519 unsigned long i, j; 3520 3521 if (IS_ERR_OR_NULL(snap)) { 3522 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3523 return; 3524 } 3525 3526 for (i = 0; i < snap->num_snaps; i++) { 3527 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3528 3529 if (IS_ERR(snap->snap[i].data)) { 3530 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3531 PTR_ERR(snap->snap[i].data)); 3532 continue; 3533 } 3534 3535 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3536 3537 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3538 u32 *val = snap->snap[i].data + j; 3539 char dumped[ASCII85_BUFSZ]; 3540 3541 drm_puts(p, ascii85_encode(*val, dumped)); 3542 } 3543 3544 drm_puts(p, "\n"); 3545 } 3546 } 3547 3548 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3549 { 3550 unsigned long i; 3551 3552 if (IS_ERR_OR_NULL(snap)) 3553 return; 3554 3555 for (i = 0; i < snap->num_snaps; i++) { 3556 if (!IS_ERR(snap->snap[i].data)) 3557 kvfree(snap->snap[i].data); 3558 xe_bo_put(snap->snap[i].bo); 3559 if (snap->snap[i].mm) 3560 mmput(snap->snap[i].mm); 3561 } 3562 kvfree(snap); 3563 } 3564