1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_sync.h" 39 #include "xe_trace_bo.h" 40 #include "xe_wa.h" 41 #include "xe_hmm.h" 42 43 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 44 { 45 return vm->gpuvm.r_obj; 46 } 47 48 /** 49 * xe_vma_userptr_check_repin() - Advisory check for repin needed 50 * @uvma: The userptr vma 51 * 52 * Check if the userptr vma has been invalidated since last successful 53 * repin. The check is advisory only and can the function can be called 54 * without the vm->userptr.notifier_lock held. There is no guarantee that the 55 * vma userptr will remain valid after a lockless check, so typically 56 * the call needs to be followed by a proper check under the notifier_lock. 57 * 58 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 59 */ 60 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 61 { 62 return mmu_interval_check_retry(&uvma->userptr.notifier, 63 uvma->userptr.notifier_seq) ? 64 -EAGAIN : 0; 65 } 66 67 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 68 { 69 struct xe_vma *vma = &uvma->vma; 70 struct xe_vm *vm = xe_vma_vm(vma); 71 struct xe_device *xe = vm->xe; 72 73 lockdep_assert_held(&vm->lock); 74 xe_assert(xe, xe_vma_is_userptr(vma)); 75 76 return xe_hmm_userptr_populate_range(uvma, false); 77 } 78 79 static bool preempt_fences_waiting(struct xe_vm *vm) 80 { 81 struct xe_exec_queue *q; 82 83 lockdep_assert_held(&vm->lock); 84 xe_vm_assert_held(vm); 85 86 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 87 if (!q->lr.pfence || 88 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 89 &q->lr.pfence->flags)) { 90 return true; 91 } 92 } 93 94 return false; 95 } 96 97 static void free_preempt_fences(struct list_head *list) 98 { 99 struct list_head *link, *next; 100 101 list_for_each_safe(link, next, list) 102 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 103 } 104 105 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 106 unsigned int *count) 107 { 108 lockdep_assert_held(&vm->lock); 109 xe_vm_assert_held(vm); 110 111 if (*count >= vm->preempt.num_exec_queues) 112 return 0; 113 114 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 115 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 116 117 if (IS_ERR(pfence)) 118 return PTR_ERR(pfence); 119 120 list_move_tail(xe_preempt_fence_link(pfence), list); 121 } 122 123 return 0; 124 } 125 126 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 127 { 128 struct xe_exec_queue *q; 129 130 xe_vm_assert_held(vm); 131 132 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 133 if (q->lr.pfence) { 134 long timeout = dma_fence_wait(q->lr.pfence, false); 135 136 /* Only -ETIME on fence indicates VM needs to be killed */ 137 if (timeout < 0 || q->lr.pfence->error == -ETIME) 138 return -ETIME; 139 140 dma_fence_put(q->lr.pfence); 141 q->lr.pfence = NULL; 142 } 143 } 144 145 return 0; 146 } 147 148 static bool xe_vm_is_idle(struct xe_vm *vm) 149 { 150 struct xe_exec_queue *q; 151 152 xe_vm_assert_held(vm); 153 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 154 if (!xe_exec_queue_is_idle(q)) 155 return false; 156 } 157 158 return true; 159 } 160 161 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 162 { 163 struct list_head *link; 164 struct xe_exec_queue *q; 165 166 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 167 struct dma_fence *fence; 168 169 link = list->next; 170 xe_assert(vm->xe, link != list); 171 172 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 173 q, q->lr.context, 174 ++q->lr.seqno); 175 dma_fence_put(q->lr.pfence); 176 q->lr.pfence = fence; 177 } 178 } 179 180 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 181 { 182 struct xe_exec_queue *q; 183 int err; 184 185 xe_bo_assert_held(bo); 186 187 if (!vm->preempt.num_exec_queues) 188 return 0; 189 190 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 191 if (err) 192 return err; 193 194 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 195 if (q->lr.pfence) { 196 dma_resv_add_fence(bo->ttm.base.resv, 197 q->lr.pfence, 198 DMA_RESV_USAGE_BOOKKEEP); 199 } 200 201 return 0; 202 } 203 204 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 205 struct drm_exec *exec) 206 { 207 struct xe_exec_queue *q; 208 209 lockdep_assert_held(&vm->lock); 210 xe_vm_assert_held(vm); 211 212 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 213 q->ops->resume(q); 214 215 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 216 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 217 } 218 } 219 220 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 221 { 222 struct drm_gpuvm_exec vm_exec = { 223 .vm = &vm->gpuvm, 224 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 225 .num_fences = 1, 226 }; 227 struct drm_exec *exec = &vm_exec.exec; 228 struct dma_fence *pfence; 229 int err; 230 bool wait; 231 232 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 233 234 down_write(&vm->lock); 235 err = drm_gpuvm_exec_lock(&vm_exec); 236 if (err) 237 goto out_up_write; 238 239 pfence = xe_preempt_fence_create(q, q->lr.context, 240 ++q->lr.seqno); 241 if (!pfence) { 242 err = -ENOMEM; 243 goto out_fini; 244 } 245 246 list_add(&q->lr.link, &vm->preempt.exec_queues); 247 ++vm->preempt.num_exec_queues; 248 q->lr.pfence = pfence; 249 250 down_read(&vm->userptr.notifier_lock); 251 252 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 253 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 254 255 /* 256 * Check to see if a preemption on VM is in flight or userptr 257 * invalidation, if so trigger this preempt fence to sync state with 258 * other preempt fences on the VM. 259 */ 260 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 261 if (wait) 262 dma_fence_enable_sw_signaling(pfence); 263 264 up_read(&vm->userptr.notifier_lock); 265 266 out_fini: 267 drm_exec_fini(exec); 268 out_up_write: 269 up_write(&vm->lock); 270 271 return err; 272 } 273 274 /** 275 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 276 * @vm: The VM. 277 * @q: The exec_queue 278 * 279 * Note that this function might be called multiple times on the same queue. 280 */ 281 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 282 { 283 if (!xe_vm_in_preempt_fence_mode(vm)) 284 return; 285 286 down_write(&vm->lock); 287 if (!list_empty(&q->lr.link)) { 288 list_del_init(&q->lr.link); 289 --vm->preempt.num_exec_queues; 290 } 291 if (q->lr.pfence) { 292 dma_fence_enable_sw_signaling(q->lr.pfence); 293 dma_fence_put(q->lr.pfence); 294 q->lr.pfence = NULL; 295 } 296 up_write(&vm->lock); 297 } 298 299 /** 300 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 301 * that need repinning. 302 * @vm: The VM. 303 * 304 * This function checks for whether the VM has userptrs that need repinning, 305 * and provides a release-type barrier on the userptr.notifier_lock after 306 * checking. 307 * 308 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 309 */ 310 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 311 { 312 lockdep_assert_held_read(&vm->userptr.notifier_lock); 313 314 return (list_empty(&vm->userptr.repin_list) && 315 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 316 } 317 318 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 319 320 /** 321 * xe_vm_kill() - VM Kill 322 * @vm: The VM. 323 * @unlocked: Flag indicates the VM's dma-resv is not held 324 * 325 * Kill the VM by setting banned flag indicated VM is no longer available for 326 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 327 */ 328 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 329 { 330 struct xe_exec_queue *q; 331 332 lockdep_assert_held(&vm->lock); 333 334 if (unlocked) 335 xe_vm_lock(vm, false); 336 337 vm->flags |= XE_VM_FLAG_BANNED; 338 trace_xe_vm_kill(vm); 339 340 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 341 q->ops->kill(q); 342 343 if (unlocked) 344 xe_vm_unlock(vm); 345 346 /* TODO: Inform user the VM is banned */ 347 } 348 349 /** 350 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 351 * @exec: The drm_exec object used for locking before validation. 352 * @err: The error returned from ttm_bo_validate(). 353 * @end: A ktime_t cookie that should be set to 0 before first use and 354 * that should be reused on subsequent calls. 355 * 356 * With multiple active VMs, under memory pressure, it is possible that 357 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 358 * Until ttm properly handles locking in such scenarios, best thing the 359 * driver can do is retry with a timeout. Check if that is necessary, and 360 * if so unlock the drm_exec's objects while keeping the ticket to prepare 361 * for a rerun. 362 * 363 * Return: true if a retry after drm_exec_init() is recommended; 364 * false otherwise. 365 */ 366 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 367 { 368 ktime_t cur; 369 370 if (err != -ENOMEM) 371 return false; 372 373 cur = ktime_get(); 374 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 375 if (!ktime_before(cur, *end)) 376 return false; 377 378 msleep(20); 379 return true; 380 } 381 382 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 383 { 384 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 385 struct drm_gpuva *gpuva; 386 int ret; 387 388 lockdep_assert_held(&vm->lock); 389 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 390 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 391 &vm->rebind_list); 392 393 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 394 if (ret) 395 return ret; 396 397 vm_bo->evicted = false; 398 return 0; 399 } 400 401 /** 402 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 403 * @vm: The vm for which we are rebinding. 404 * @exec: The struct drm_exec with the locked GEM objects. 405 * @num_fences: The number of fences to reserve for the operation, not 406 * including rebinds and validations. 407 * 408 * Validates all evicted gem objects and rebinds their vmas. Note that 409 * rebindings may cause evictions and hence the validation-rebind 410 * sequence is rerun until there are no more objects to validate. 411 * 412 * Return: 0 on success, negative error code on error. In particular, 413 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 414 * the drm_exec transaction needs to be restarted. 415 */ 416 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 417 unsigned int num_fences) 418 { 419 struct drm_gem_object *obj; 420 unsigned long index; 421 int ret; 422 423 do { 424 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 425 if (ret) 426 return ret; 427 428 ret = xe_vm_rebind(vm, false); 429 if (ret) 430 return ret; 431 } while (!list_empty(&vm->gpuvm.evict.list)); 432 433 drm_exec_for_each_locked_object(exec, index, obj) { 434 ret = dma_resv_reserve_fences(obj->resv, num_fences); 435 if (ret) 436 return ret; 437 } 438 439 return 0; 440 } 441 442 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 443 bool *done) 444 { 445 int err; 446 447 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 448 if (err) 449 return err; 450 451 if (xe_vm_is_idle(vm)) { 452 vm->preempt.rebind_deactivated = true; 453 *done = true; 454 return 0; 455 } 456 457 if (!preempt_fences_waiting(vm)) { 458 *done = true; 459 return 0; 460 } 461 462 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 463 if (err) 464 return err; 465 466 err = wait_for_existing_preempt_fences(vm); 467 if (err) 468 return err; 469 470 /* 471 * Add validation and rebinding to the locking loop since both can 472 * cause evictions which may require blocing dma_resv locks. 473 * The fence reservation here is intended for the new preempt fences 474 * we attach at the end of the rebind work. 475 */ 476 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 477 } 478 479 static void preempt_rebind_work_func(struct work_struct *w) 480 { 481 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 482 struct drm_exec exec; 483 unsigned int fence_count = 0; 484 LIST_HEAD(preempt_fences); 485 ktime_t end = 0; 486 int err = 0; 487 long wait; 488 int __maybe_unused tries = 0; 489 490 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 491 trace_xe_vm_rebind_worker_enter(vm); 492 493 down_write(&vm->lock); 494 495 if (xe_vm_is_closed_or_banned(vm)) { 496 up_write(&vm->lock); 497 trace_xe_vm_rebind_worker_exit(vm); 498 return; 499 } 500 501 retry: 502 if (xe_vm_userptr_check_repin(vm)) { 503 err = xe_vm_userptr_pin(vm); 504 if (err) 505 goto out_unlock_outer; 506 } 507 508 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 509 510 drm_exec_until_all_locked(&exec) { 511 bool done = false; 512 513 err = xe_preempt_work_begin(&exec, vm, &done); 514 drm_exec_retry_on_contention(&exec); 515 if (err || done) { 516 drm_exec_fini(&exec); 517 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 518 err = -EAGAIN; 519 520 goto out_unlock_outer; 521 } 522 } 523 524 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 525 if (err) 526 goto out_unlock; 527 528 err = xe_vm_rebind(vm, true); 529 if (err) 530 goto out_unlock; 531 532 /* Wait on rebinds and munmap style VM unbinds */ 533 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 534 DMA_RESV_USAGE_KERNEL, 535 false, MAX_SCHEDULE_TIMEOUT); 536 if (wait <= 0) { 537 err = -ETIME; 538 goto out_unlock; 539 } 540 541 #define retry_required(__tries, __vm) \ 542 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 543 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 544 __xe_vm_userptr_needs_repin(__vm)) 545 546 down_read(&vm->userptr.notifier_lock); 547 if (retry_required(tries, vm)) { 548 up_read(&vm->userptr.notifier_lock); 549 err = -EAGAIN; 550 goto out_unlock; 551 } 552 553 #undef retry_required 554 555 spin_lock(&vm->xe->ttm.lru_lock); 556 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 557 spin_unlock(&vm->xe->ttm.lru_lock); 558 559 /* Point of no return. */ 560 arm_preempt_fences(vm, &preempt_fences); 561 resume_and_reinstall_preempt_fences(vm, &exec); 562 up_read(&vm->userptr.notifier_lock); 563 564 out_unlock: 565 drm_exec_fini(&exec); 566 out_unlock_outer: 567 if (err == -EAGAIN) { 568 trace_xe_vm_rebind_worker_retry(vm); 569 goto retry; 570 } 571 572 if (err) { 573 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 574 xe_vm_kill(vm, true); 575 } 576 up_write(&vm->lock); 577 578 free_preempt_fences(&preempt_fences); 579 580 trace_xe_vm_rebind_worker_exit(vm); 581 } 582 583 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 584 const struct mmu_notifier_range *range, 585 unsigned long cur_seq) 586 { 587 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); 588 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); 589 struct xe_vma *vma = &uvma->vma; 590 struct xe_vm *vm = xe_vma_vm(vma); 591 struct dma_resv_iter cursor; 592 struct dma_fence *fence; 593 long err; 594 595 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 596 trace_xe_vma_userptr_invalidate(vma); 597 598 if (!mmu_notifier_range_blockable(range)) 599 return false; 600 601 vm_dbg(&xe_vma_vm(vma)->xe->drm, 602 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 603 xe_vma_start(vma), xe_vma_size(vma)); 604 605 down_write(&vm->userptr.notifier_lock); 606 mmu_interval_set_seq(mni, cur_seq); 607 608 /* No need to stop gpu access if the userptr is not yet bound. */ 609 if (!userptr->initial_bind) { 610 up_write(&vm->userptr.notifier_lock); 611 return true; 612 } 613 614 /* 615 * Tell exec and rebind worker they need to repin and rebind this 616 * userptr. 617 */ 618 if (!xe_vm_in_fault_mode(vm) && 619 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { 620 spin_lock(&vm->userptr.invalidated_lock); 621 list_move_tail(&userptr->invalidate_link, 622 &vm->userptr.invalidated); 623 spin_unlock(&vm->userptr.invalidated_lock); 624 } 625 626 up_write(&vm->userptr.notifier_lock); 627 628 /* 629 * Preempt fences turn into schedule disables, pipeline these. 630 * Note that even in fault mode, we need to wait for binds and 631 * unbinds to complete, and those are attached as BOOKMARK fences 632 * to the vm. 633 */ 634 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 635 DMA_RESV_USAGE_BOOKKEEP); 636 dma_resv_for_each_fence_unlocked(&cursor, fence) 637 dma_fence_enable_sw_signaling(fence); 638 dma_resv_iter_end(&cursor); 639 640 err = dma_resv_wait_timeout(xe_vm_resv(vm), 641 DMA_RESV_USAGE_BOOKKEEP, 642 false, MAX_SCHEDULE_TIMEOUT); 643 XE_WARN_ON(err <= 0); 644 645 if (xe_vm_in_fault_mode(vm)) { 646 err = xe_vm_invalidate_vma(vma); 647 XE_WARN_ON(err); 648 } 649 650 trace_xe_vma_userptr_invalidate_complete(vma); 651 652 return true; 653 } 654 655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 656 .invalidate = vma_userptr_invalidate, 657 }; 658 659 int xe_vm_userptr_pin(struct xe_vm *vm) 660 { 661 struct xe_userptr_vma *uvma, *next; 662 int err = 0; 663 LIST_HEAD(tmp_evict); 664 665 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 666 lockdep_assert_held_write(&vm->lock); 667 668 /* Collect invalidated userptrs */ 669 spin_lock(&vm->userptr.invalidated_lock); 670 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 671 userptr.invalidate_link) { 672 list_del_init(&uvma->userptr.invalidate_link); 673 list_move_tail(&uvma->userptr.repin_link, 674 &vm->userptr.repin_list); 675 } 676 spin_unlock(&vm->userptr.invalidated_lock); 677 678 /* Pin and move to temporary list */ 679 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 680 userptr.repin_link) { 681 err = xe_vma_userptr_pin_pages(uvma); 682 if (err == -EFAULT) { 683 list_del_init(&uvma->userptr.repin_link); 684 685 /* Wait for pending binds */ 686 xe_vm_lock(vm, false); 687 dma_resv_wait_timeout(xe_vm_resv(vm), 688 DMA_RESV_USAGE_BOOKKEEP, 689 false, MAX_SCHEDULE_TIMEOUT); 690 691 err = xe_vm_invalidate_vma(&uvma->vma); 692 xe_vm_unlock(vm); 693 if (err) 694 return err; 695 } else { 696 if (err < 0) 697 return err; 698 699 list_del_init(&uvma->userptr.repin_link); 700 list_move_tail(&uvma->vma.combined_links.rebind, 701 &vm->rebind_list); 702 } 703 } 704 705 return 0; 706 } 707 708 /** 709 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 710 * that need repinning. 711 * @vm: The VM. 712 * 713 * This function does an advisory check for whether the VM has userptrs that 714 * need repinning. 715 * 716 * Return: 0 if there are no indications of userptrs needing repinning, 717 * -EAGAIN if there are. 718 */ 719 int xe_vm_userptr_check_repin(struct xe_vm *vm) 720 { 721 return (list_empty_careful(&vm->userptr.repin_list) && 722 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 723 } 724 725 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 726 { 727 int i; 728 729 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 730 if (!vops->pt_update_ops[i].num_ops) 731 continue; 732 733 vops->pt_update_ops[i].ops = 734 kmalloc_array(vops->pt_update_ops[i].num_ops, 735 sizeof(*vops->pt_update_ops[i].ops), 736 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 737 if (!vops->pt_update_ops[i].ops) 738 return array_of_binds ? -ENOBUFS : -ENOMEM; 739 } 740 741 return 0; 742 } 743 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 744 745 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 746 { 747 int i; 748 749 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 750 kfree(vops->pt_update_ops[i].ops); 751 } 752 753 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 754 { 755 int i; 756 757 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 758 if (BIT(i) & tile_mask) 759 ++vops->pt_update_ops[i].num_ops; 760 } 761 762 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 763 u8 tile_mask) 764 { 765 INIT_LIST_HEAD(&op->link); 766 op->tile_mask = tile_mask; 767 op->base.op = DRM_GPUVA_OP_MAP; 768 op->base.map.va.addr = vma->gpuva.va.addr; 769 op->base.map.va.range = vma->gpuva.va.range; 770 op->base.map.gem.obj = vma->gpuva.gem.obj; 771 op->base.map.gem.offset = vma->gpuva.gem.offset; 772 op->map.vma = vma; 773 op->map.immediate = true; 774 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 775 op->map.is_null = xe_vma_is_null(vma); 776 } 777 778 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 779 u8 tile_mask) 780 { 781 struct xe_vma_op *op; 782 783 op = kzalloc(sizeof(*op), GFP_KERNEL); 784 if (!op) 785 return -ENOMEM; 786 787 xe_vm_populate_rebind(op, vma, tile_mask); 788 list_add_tail(&op->link, &vops->list); 789 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 790 791 return 0; 792 } 793 794 static struct dma_fence *ops_execute(struct xe_vm *vm, 795 struct xe_vma_ops *vops); 796 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 797 struct xe_exec_queue *q, 798 struct xe_sync_entry *syncs, u32 num_syncs); 799 800 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 801 { 802 struct dma_fence *fence; 803 struct xe_vma *vma, *next; 804 struct xe_vma_ops vops; 805 struct xe_vma_op *op, *next_op; 806 int err, i; 807 808 lockdep_assert_held(&vm->lock); 809 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 810 list_empty(&vm->rebind_list)) 811 return 0; 812 813 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 814 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 815 vops.pt_update_ops[i].wait_vm_bookkeep = true; 816 817 xe_vm_assert_held(vm); 818 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 819 xe_assert(vm->xe, vma->tile_present); 820 821 if (rebind_worker) 822 trace_xe_vma_rebind_worker(vma); 823 else 824 trace_xe_vma_rebind_exec(vma); 825 826 err = xe_vm_ops_add_rebind(&vops, vma, 827 vma->tile_present); 828 if (err) 829 goto free_ops; 830 } 831 832 err = xe_vma_ops_alloc(&vops, false); 833 if (err) 834 goto free_ops; 835 836 fence = ops_execute(vm, &vops); 837 if (IS_ERR(fence)) { 838 err = PTR_ERR(fence); 839 } else { 840 dma_fence_put(fence); 841 list_for_each_entry_safe(vma, next, &vm->rebind_list, 842 combined_links.rebind) 843 list_del_init(&vma->combined_links.rebind); 844 } 845 free_ops: 846 list_for_each_entry_safe(op, next_op, &vops.list, link) { 847 list_del(&op->link); 848 kfree(op); 849 } 850 xe_vma_ops_fini(&vops); 851 852 return err; 853 } 854 855 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 856 { 857 struct dma_fence *fence = NULL; 858 struct xe_vma_ops vops; 859 struct xe_vma_op *op, *next_op; 860 struct xe_tile *tile; 861 u8 id; 862 int err; 863 864 lockdep_assert_held(&vm->lock); 865 xe_vm_assert_held(vm); 866 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 867 868 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 869 for_each_tile(tile, vm->xe, id) { 870 vops.pt_update_ops[id].wait_vm_bookkeep = true; 871 vops.pt_update_ops[tile->id].q = 872 xe_tile_migrate_exec_queue(tile); 873 } 874 875 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 876 if (err) 877 return ERR_PTR(err); 878 879 err = xe_vma_ops_alloc(&vops, false); 880 if (err) { 881 fence = ERR_PTR(err); 882 goto free_ops; 883 } 884 885 fence = ops_execute(vm, &vops); 886 887 free_ops: 888 list_for_each_entry_safe(op, next_op, &vops.list, link) { 889 list_del(&op->link); 890 kfree(op); 891 } 892 xe_vma_ops_fini(&vops); 893 894 return fence; 895 } 896 897 static void xe_vma_free(struct xe_vma *vma) 898 { 899 if (xe_vma_is_userptr(vma)) 900 kfree(to_userptr_vma(vma)); 901 else 902 kfree(vma); 903 } 904 905 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 906 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 907 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 908 909 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 910 struct xe_bo *bo, 911 u64 bo_offset_or_userptr, 912 u64 start, u64 end, 913 u16 pat_index, unsigned int flags) 914 { 915 struct xe_vma *vma; 916 struct xe_tile *tile; 917 u8 id; 918 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 919 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 920 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 921 922 xe_assert(vm->xe, start < end); 923 xe_assert(vm->xe, end < vm->size); 924 925 /* 926 * Allocate and ensure that the xe_vma_is_userptr() return 927 * matches what was allocated. 928 */ 929 if (!bo && !is_null) { 930 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 931 932 if (!uvma) 933 return ERR_PTR(-ENOMEM); 934 935 vma = &uvma->vma; 936 } else { 937 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 938 if (!vma) 939 return ERR_PTR(-ENOMEM); 940 941 if (is_null) 942 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 943 if (bo) 944 vma->gpuva.gem.obj = &bo->ttm.base; 945 } 946 947 INIT_LIST_HEAD(&vma->combined_links.rebind); 948 949 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 950 vma->gpuva.vm = &vm->gpuvm; 951 vma->gpuva.va.addr = start; 952 vma->gpuva.va.range = end - start + 1; 953 if (read_only) 954 vma->gpuva.flags |= XE_VMA_READ_ONLY; 955 if (dumpable) 956 vma->gpuva.flags |= XE_VMA_DUMPABLE; 957 958 for_each_tile(tile, vm->xe, id) 959 vma->tile_mask |= 0x1 << id; 960 961 if (vm->xe->info.has_atomic_enable_pte_bit) 962 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 963 964 vma->pat_index = pat_index; 965 966 if (bo) { 967 struct drm_gpuvm_bo *vm_bo; 968 969 xe_bo_assert_held(bo); 970 971 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 972 if (IS_ERR(vm_bo)) { 973 xe_vma_free(vma); 974 return ERR_CAST(vm_bo); 975 } 976 977 drm_gpuvm_bo_extobj_add(vm_bo); 978 drm_gem_object_get(&bo->ttm.base); 979 vma->gpuva.gem.offset = bo_offset_or_userptr; 980 drm_gpuva_link(&vma->gpuva, vm_bo); 981 drm_gpuvm_bo_put(vm_bo); 982 } else /* userptr or null */ { 983 if (!is_null) { 984 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 985 u64 size = end - start + 1; 986 int err; 987 988 INIT_LIST_HEAD(&userptr->invalidate_link); 989 INIT_LIST_HEAD(&userptr->repin_link); 990 vma->gpuva.gem.offset = bo_offset_or_userptr; 991 992 err = mmu_interval_notifier_insert(&userptr->notifier, 993 current->mm, 994 xe_vma_userptr(vma), size, 995 &vma_userptr_notifier_ops); 996 if (err) { 997 xe_vma_free(vma); 998 return ERR_PTR(err); 999 } 1000 1001 userptr->notifier_seq = LONG_MAX; 1002 } 1003 1004 xe_vm_get(vm); 1005 } 1006 1007 return vma; 1008 } 1009 1010 static void xe_vma_destroy_late(struct xe_vma *vma) 1011 { 1012 struct xe_vm *vm = xe_vma_vm(vma); 1013 1014 if (vma->ufence) { 1015 xe_sync_ufence_put(vma->ufence); 1016 vma->ufence = NULL; 1017 } 1018 1019 if (xe_vma_is_userptr(vma)) { 1020 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1021 struct xe_userptr *userptr = &uvma->userptr; 1022 1023 if (userptr->sg) 1024 xe_hmm_userptr_free_sg(uvma); 1025 1026 /* 1027 * Since userptr pages are not pinned, we can't remove 1028 * the notifier until we're sure the GPU is not accessing 1029 * them anymore 1030 */ 1031 mmu_interval_notifier_remove(&userptr->notifier); 1032 xe_vm_put(vm); 1033 } else if (xe_vma_is_null(vma)) { 1034 xe_vm_put(vm); 1035 } else { 1036 xe_bo_put(xe_vma_bo(vma)); 1037 } 1038 1039 xe_vma_free(vma); 1040 } 1041 1042 static void vma_destroy_work_func(struct work_struct *w) 1043 { 1044 struct xe_vma *vma = 1045 container_of(w, struct xe_vma, destroy_work); 1046 1047 xe_vma_destroy_late(vma); 1048 } 1049 1050 static void vma_destroy_cb(struct dma_fence *fence, 1051 struct dma_fence_cb *cb) 1052 { 1053 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1054 1055 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1056 queue_work(system_unbound_wq, &vma->destroy_work); 1057 } 1058 1059 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1060 { 1061 struct xe_vm *vm = xe_vma_vm(vma); 1062 1063 lockdep_assert_held_write(&vm->lock); 1064 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1065 1066 if (xe_vma_is_userptr(vma)) { 1067 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1068 1069 spin_lock(&vm->userptr.invalidated_lock); 1070 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1071 spin_unlock(&vm->userptr.invalidated_lock); 1072 } else if (!xe_vma_is_null(vma)) { 1073 xe_bo_assert_held(xe_vma_bo(vma)); 1074 1075 drm_gpuva_unlink(&vma->gpuva); 1076 } 1077 1078 xe_vm_assert_held(vm); 1079 if (fence) { 1080 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1081 vma_destroy_cb); 1082 1083 if (ret) { 1084 XE_WARN_ON(ret != -ENOENT); 1085 xe_vma_destroy_late(vma); 1086 } 1087 } else { 1088 xe_vma_destroy_late(vma); 1089 } 1090 } 1091 1092 /** 1093 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1094 * @exec: The drm_exec object we're currently locking for. 1095 * @vma: The vma for witch we want to lock the vm resv and any attached 1096 * object's resv. 1097 * 1098 * Return: 0 on success, negative error code on error. In particular 1099 * may return -EDEADLK on WW transaction contention and -EINTR if 1100 * an interruptible wait is terminated by a signal. 1101 */ 1102 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1103 { 1104 struct xe_vm *vm = xe_vma_vm(vma); 1105 struct xe_bo *bo = xe_vma_bo(vma); 1106 int err; 1107 1108 XE_WARN_ON(!vm); 1109 1110 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1111 if (!err && bo && !bo->vm) 1112 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1113 1114 return err; 1115 } 1116 1117 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1118 { 1119 struct drm_exec exec; 1120 int err; 1121 1122 drm_exec_init(&exec, 0, 0); 1123 drm_exec_until_all_locked(&exec) { 1124 err = xe_vm_lock_vma(&exec, vma); 1125 drm_exec_retry_on_contention(&exec); 1126 if (XE_WARN_ON(err)) 1127 break; 1128 } 1129 1130 xe_vma_destroy(vma, NULL); 1131 1132 drm_exec_fini(&exec); 1133 } 1134 1135 struct xe_vma * 1136 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1137 { 1138 struct drm_gpuva *gpuva; 1139 1140 lockdep_assert_held(&vm->lock); 1141 1142 if (xe_vm_is_closed_or_banned(vm)) 1143 return NULL; 1144 1145 xe_assert(vm->xe, start + range <= vm->size); 1146 1147 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1148 1149 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1150 } 1151 1152 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1153 { 1154 int err; 1155 1156 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1157 lockdep_assert_held(&vm->lock); 1158 1159 mutex_lock(&vm->snap_mutex); 1160 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1161 mutex_unlock(&vm->snap_mutex); 1162 XE_WARN_ON(err); /* Shouldn't be possible */ 1163 1164 return err; 1165 } 1166 1167 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1168 { 1169 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1170 lockdep_assert_held(&vm->lock); 1171 1172 mutex_lock(&vm->snap_mutex); 1173 drm_gpuva_remove(&vma->gpuva); 1174 mutex_unlock(&vm->snap_mutex); 1175 if (vm->usm.last_fault_vma == vma) 1176 vm->usm.last_fault_vma = NULL; 1177 } 1178 1179 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1180 { 1181 struct xe_vma_op *op; 1182 1183 op = kzalloc(sizeof(*op), GFP_KERNEL); 1184 1185 if (unlikely(!op)) 1186 return NULL; 1187 1188 return &op->base; 1189 } 1190 1191 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1192 1193 static const struct drm_gpuvm_ops gpuvm_ops = { 1194 .op_alloc = xe_vm_op_alloc, 1195 .vm_bo_validate = xe_gpuvm_validate, 1196 .vm_free = xe_vm_free, 1197 }; 1198 1199 static u64 pde_encode_pat_index(u16 pat_index) 1200 { 1201 u64 pte = 0; 1202 1203 if (pat_index & BIT(0)) 1204 pte |= XE_PPGTT_PTE_PAT0; 1205 1206 if (pat_index & BIT(1)) 1207 pte |= XE_PPGTT_PTE_PAT1; 1208 1209 return pte; 1210 } 1211 1212 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1213 { 1214 u64 pte = 0; 1215 1216 if (pat_index & BIT(0)) 1217 pte |= XE_PPGTT_PTE_PAT0; 1218 1219 if (pat_index & BIT(1)) 1220 pte |= XE_PPGTT_PTE_PAT1; 1221 1222 if (pat_index & BIT(2)) { 1223 if (pt_level) 1224 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1225 else 1226 pte |= XE_PPGTT_PTE_PAT2; 1227 } 1228 1229 if (pat_index & BIT(3)) 1230 pte |= XELPG_PPGTT_PTE_PAT3; 1231 1232 if (pat_index & (BIT(4))) 1233 pte |= XE2_PPGTT_PTE_PAT4; 1234 1235 return pte; 1236 } 1237 1238 static u64 pte_encode_ps(u32 pt_level) 1239 { 1240 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1241 1242 if (pt_level == 1) 1243 return XE_PDE_PS_2M; 1244 else if (pt_level == 2) 1245 return XE_PDPE_PS_1G; 1246 1247 return 0; 1248 } 1249 1250 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1251 const u16 pat_index) 1252 { 1253 u64 pde; 1254 1255 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1256 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1257 pde |= pde_encode_pat_index(pat_index); 1258 1259 return pde; 1260 } 1261 1262 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1263 u16 pat_index, u32 pt_level) 1264 { 1265 u64 pte; 1266 1267 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1268 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1269 pte |= pte_encode_pat_index(pat_index, pt_level); 1270 pte |= pte_encode_ps(pt_level); 1271 1272 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1273 pte |= XE_PPGTT_PTE_DM; 1274 1275 return pte; 1276 } 1277 1278 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1279 u16 pat_index, u32 pt_level) 1280 { 1281 pte |= XE_PAGE_PRESENT; 1282 1283 if (likely(!xe_vma_read_only(vma))) 1284 pte |= XE_PAGE_RW; 1285 1286 pte |= pte_encode_pat_index(pat_index, pt_level); 1287 pte |= pte_encode_ps(pt_level); 1288 1289 if (unlikely(xe_vma_is_null(vma))) 1290 pte |= XE_PTE_NULL; 1291 1292 return pte; 1293 } 1294 1295 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1296 u16 pat_index, 1297 u32 pt_level, bool devmem, u64 flags) 1298 { 1299 u64 pte; 1300 1301 /* Avoid passing random bits directly as flags */ 1302 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1303 1304 pte = addr; 1305 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1306 pte |= pte_encode_pat_index(pat_index, pt_level); 1307 pte |= pte_encode_ps(pt_level); 1308 1309 if (devmem) 1310 pte |= XE_PPGTT_PTE_DM; 1311 1312 pte |= flags; 1313 1314 return pte; 1315 } 1316 1317 static const struct xe_pt_ops xelp_pt_ops = { 1318 .pte_encode_bo = xelp_pte_encode_bo, 1319 .pte_encode_vma = xelp_pte_encode_vma, 1320 .pte_encode_addr = xelp_pte_encode_addr, 1321 .pde_encode_bo = xelp_pde_encode_bo, 1322 }; 1323 1324 static void vm_destroy_work_func(struct work_struct *w); 1325 1326 /** 1327 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1328 * given tile and vm. 1329 * @xe: xe device. 1330 * @tile: tile to set up for. 1331 * @vm: vm to set up for. 1332 * 1333 * Sets up a pagetable tree with one page-table per level and a single 1334 * leaf PTE. All pagetable entries point to the single page-table or, 1335 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1336 * writes become NOPs. 1337 * 1338 * Return: 0 on success, negative error code on error. 1339 */ 1340 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1341 struct xe_vm *vm) 1342 { 1343 u8 id = tile->id; 1344 int i; 1345 1346 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1347 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1348 if (IS_ERR(vm->scratch_pt[id][i])) 1349 return PTR_ERR(vm->scratch_pt[id][i]); 1350 1351 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1352 } 1353 1354 return 0; 1355 } 1356 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1357 1358 static void xe_vm_free_scratch(struct xe_vm *vm) 1359 { 1360 struct xe_tile *tile; 1361 u8 id; 1362 1363 if (!xe_vm_has_scratch(vm)) 1364 return; 1365 1366 for_each_tile(tile, vm->xe, id) { 1367 u32 i; 1368 1369 if (!vm->pt_root[id]) 1370 continue; 1371 1372 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1373 if (vm->scratch_pt[id][i]) 1374 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1375 } 1376 } 1377 1378 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1379 { 1380 struct drm_gem_object *vm_resv_obj; 1381 struct xe_vm *vm; 1382 int err, number_tiles = 0; 1383 struct xe_tile *tile; 1384 u8 id; 1385 1386 /* 1387 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1388 * ever be in faulting mode. 1389 */ 1390 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1391 1392 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1393 if (!vm) 1394 return ERR_PTR(-ENOMEM); 1395 1396 vm->xe = xe; 1397 1398 vm->size = 1ull << xe->info.va_bits; 1399 1400 vm->flags = flags; 1401 1402 /** 1403 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1404 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1405 * under a user-VM lock when the PXP session is started at exec_queue 1406 * creation time. Those are different VMs and therefore there is no risk 1407 * of deadlock, but we need to tell lockdep that this is the case or it 1408 * will print a warning. 1409 */ 1410 if (flags & XE_VM_FLAG_GSC) { 1411 static struct lock_class_key gsc_vm_key; 1412 1413 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1414 } else { 1415 init_rwsem(&vm->lock); 1416 } 1417 mutex_init(&vm->snap_mutex); 1418 1419 INIT_LIST_HEAD(&vm->rebind_list); 1420 1421 INIT_LIST_HEAD(&vm->userptr.repin_list); 1422 INIT_LIST_HEAD(&vm->userptr.invalidated); 1423 init_rwsem(&vm->userptr.notifier_lock); 1424 spin_lock_init(&vm->userptr.invalidated_lock); 1425 1426 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1427 1428 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1429 1430 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1431 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1432 1433 for_each_tile(tile, xe, id) 1434 xe_range_fence_tree_init(&vm->rftree[id]); 1435 1436 vm->pt_ops = &xelp_pt_ops; 1437 1438 /* 1439 * Long-running workloads are not protected by the scheduler references. 1440 * By design, run_job for long-running workloads returns NULL and the 1441 * scheduler drops all the references of it, hence protecting the VM 1442 * for this case is necessary. 1443 */ 1444 if (flags & XE_VM_FLAG_LR_MODE) 1445 xe_pm_runtime_get_noresume(xe); 1446 1447 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1448 if (!vm_resv_obj) { 1449 err = -ENOMEM; 1450 goto err_no_resv; 1451 } 1452 1453 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1454 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1455 1456 drm_gem_object_put(vm_resv_obj); 1457 1458 err = xe_vm_lock(vm, true); 1459 if (err) 1460 goto err_close; 1461 1462 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1463 vm->flags |= XE_VM_FLAG_64K; 1464 1465 for_each_tile(tile, xe, id) { 1466 if (flags & XE_VM_FLAG_MIGRATION && 1467 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1468 continue; 1469 1470 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1471 if (IS_ERR(vm->pt_root[id])) { 1472 err = PTR_ERR(vm->pt_root[id]); 1473 vm->pt_root[id] = NULL; 1474 goto err_unlock_close; 1475 } 1476 } 1477 1478 if (xe_vm_has_scratch(vm)) { 1479 for_each_tile(tile, xe, id) { 1480 if (!vm->pt_root[id]) 1481 continue; 1482 1483 err = xe_vm_create_scratch(xe, tile, vm); 1484 if (err) 1485 goto err_unlock_close; 1486 } 1487 vm->batch_invalidate_tlb = true; 1488 } 1489 1490 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1491 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1492 vm->batch_invalidate_tlb = false; 1493 } 1494 1495 /* Fill pt_root after allocating scratch tables */ 1496 for_each_tile(tile, xe, id) { 1497 if (!vm->pt_root[id]) 1498 continue; 1499 1500 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1501 } 1502 xe_vm_unlock(vm); 1503 1504 /* Kernel migration VM shouldn't have a circular loop.. */ 1505 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1506 for_each_tile(tile, xe, id) { 1507 struct xe_exec_queue *q; 1508 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1509 1510 if (!vm->pt_root[id]) 1511 continue; 1512 1513 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1514 if (IS_ERR(q)) { 1515 err = PTR_ERR(q); 1516 goto err_close; 1517 } 1518 vm->q[id] = q; 1519 number_tiles++; 1520 } 1521 } 1522 1523 if (number_tiles > 1) 1524 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1525 1526 trace_xe_vm_create(vm); 1527 1528 return vm; 1529 1530 err_unlock_close: 1531 xe_vm_unlock(vm); 1532 err_close: 1533 xe_vm_close_and_put(vm); 1534 return ERR_PTR(err); 1535 1536 err_no_resv: 1537 mutex_destroy(&vm->snap_mutex); 1538 for_each_tile(tile, xe, id) 1539 xe_range_fence_tree_fini(&vm->rftree[id]); 1540 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1541 kfree(vm); 1542 if (flags & XE_VM_FLAG_LR_MODE) 1543 xe_pm_runtime_put(xe); 1544 return ERR_PTR(err); 1545 } 1546 1547 static void xe_vm_close(struct xe_vm *vm) 1548 { 1549 down_write(&vm->lock); 1550 vm->size = 0; 1551 up_write(&vm->lock); 1552 } 1553 1554 void xe_vm_close_and_put(struct xe_vm *vm) 1555 { 1556 LIST_HEAD(contested); 1557 struct xe_device *xe = vm->xe; 1558 struct xe_tile *tile; 1559 struct xe_vma *vma, *next_vma; 1560 struct drm_gpuva *gpuva, *next; 1561 u8 id; 1562 1563 xe_assert(xe, !vm->preempt.num_exec_queues); 1564 1565 xe_vm_close(vm); 1566 if (xe_vm_in_preempt_fence_mode(vm)) 1567 flush_work(&vm->preempt.rebind_work); 1568 1569 down_write(&vm->lock); 1570 for_each_tile(tile, xe, id) { 1571 if (vm->q[id]) 1572 xe_exec_queue_last_fence_put(vm->q[id], vm); 1573 } 1574 up_write(&vm->lock); 1575 1576 for_each_tile(tile, xe, id) { 1577 if (vm->q[id]) { 1578 xe_exec_queue_kill(vm->q[id]); 1579 xe_exec_queue_put(vm->q[id]); 1580 vm->q[id] = NULL; 1581 } 1582 } 1583 1584 down_write(&vm->lock); 1585 xe_vm_lock(vm, false); 1586 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1587 vma = gpuva_to_vma(gpuva); 1588 1589 if (xe_vma_has_no_bo(vma)) { 1590 down_read(&vm->userptr.notifier_lock); 1591 vma->gpuva.flags |= XE_VMA_DESTROYED; 1592 up_read(&vm->userptr.notifier_lock); 1593 } 1594 1595 xe_vm_remove_vma(vm, vma); 1596 1597 /* easy case, remove from VMA? */ 1598 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1599 list_del_init(&vma->combined_links.rebind); 1600 xe_vma_destroy(vma, NULL); 1601 continue; 1602 } 1603 1604 list_move_tail(&vma->combined_links.destroy, &contested); 1605 vma->gpuva.flags |= XE_VMA_DESTROYED; 1606 } 1607 1608 /* 1609 * All vm operations will add shared fences to resv. 1610 * The only exception is eviction for a shared object, 1611 * but even so, the unbind when evicted would still 1612 * install a fence to resv. Hence it's safe to 1613 * destroy the pagetables immediately. 1614 */ 1615 xe_vm_free_scratch(vm); 1616 1617 for_each_tile(tile, xe, id) { 1618 if (vm->pt_root[id]) { 1619 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1620 vm->pt_root[id] = NULL; 1621 } 1622 } 1623 xe_vm_unlock(vm); 1624 1625 /* 1626 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1627 * Since we hold a refcount to the bo, we can remove and free 1628 * the members safely without locking. 1629 */ 1630 list_for_each_entry_safe(vma, next_vma, &contested, 1631 combined_links.destroy) { 1632 list_del_init(&vma->combined_links.destroy); 1633 xe_vma_destroy_unlocked(vma); 1634 } 1635 1636 up_write(&vm->lock); 1637 1638 down_write(&xe->usm.lock); 1639 if (vm->usm.asid) { 1640 void *lookup; 1641 1642 xe_assert(xe, xe->info.has_asid); 1643 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1644 1645 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1646 xe_assert(xe, lookup == vm); 1647 } 1648 up_write(&xe->usm.lock); 1649 1650 for_each_tile(tile, xe, id) 1651 xe_range_fence_tree_fini(&vm->rftree[id]); 1652 1653 xe_vm_put(vm); 1654 } 1655 1656 static void vm_destroy_work_func(struct work_struct *w) 1657 { 1658 struct xe_vm *vm = 1659 container_of(w, struct xe_vm, destroy_work); 1660 struct xe_device *xe = vm->xe; 1661 struct xe_tile *tile; 1662 u8 id; 1663 1664 /* xe_vm_close_and_put was not called? */ 1665 xe_assert(xe, !vm->size); 1666 1667 if (xe_vm_in_preempt_fence_mode(vm)) 1668 flush_work(&vm->preempt.rebind_work); 1669 1670 mutex_destroy(&vm->snap_mutex); 1671 1672 if (vm->flags & XE_VM_FLAG_LR_MODE) 1673 xe_pm_runtime_put(xe); 1674 1675 for_each_tile(tile, xe, id) 1676 XE_WARN_ON(vm->pt_root[id]); 1677 1678 trace_xe_vm_free(vm); 1679 1680 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1681 1682 if (vm->xef) 1683 xe_file_put(vm->xef); 1684 1685 kfree(vm); 1686 } 1687 1688 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1689 { 1690 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1691 1692 /* To destroy the VM we need to be able to sleep */ 1693 queue_work(system_unbound_wq, &vm->destroy_work); 1694 } 1695 1696 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1697 { 1698 struct xe_vm *vm; 1699 1700 mutex_lock(&xef->vm.lock); 1701 vm = xa_load(&xef->vm.xa, id); 1702 if (vm) 1703 xe_vm_get(vm); 1704 mutex_unlock(&xef->vm.lock); 1705 1706 return vm; 1707 } 1708 1709 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1710 { 1711 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1712 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1713 } 1714 1715 static struct xe_exec_queue * 1716 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1717 { 1718 return q ? q : vm->q[0]; 1719 } 1720 1721 static struct xe_user_fence * 1722 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1723 { 1724 unsigned int i; 1725 1726 for (i = 0; i < num_syncs; i++) { 1727 struct xe_sync_entry *e = &syncs[i]; 1728 1729 if (xe_sync_is_ufence(e)) 1730 return xe_sync_ufence_get(e); 1731 } 1732 1733 return NULL; 1734 } 1735 1736 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1737 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1738 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1739 1740 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1741 struct drm_file *file) 1742 { 1743 struct xe_device *xe = to_xe_device(dev); 1744 struct xe_file *xef = to_xe_file(file); 1745 struct drm_xe_vm_create *args = data; 1746 struct xe_tile *tile; 1747 struct xe_vm *vm; 1748 u32 id, asid; 1749 int err; 1750 u32 flags = 0; 1751 1752 if (XE_IOCTL_DBG(xe, args->extensions)) 1753 return -EINVAL; 1754 1755 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1756 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1757 1758 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1759 !xe->info.has_usm)) 1760 return -EINVAL; 1761 1762 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1763 return -EINVAL; 1764 1765 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1766 return -EINVAL; 1767 1768 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1769 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1770 return -EINVAL; 1771 1772 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1773 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1774 return -EINVAL; 1775 1776 if (XE_IOCTL_DBG(xe, args->extensions)) 1777 return -EINVAL; 1778 1779 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1780 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1781 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1782 flags |= XE_VM_FLAG_LR_MODE; 1783 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1784 flags |= XE_VM_FLAG_FAULT_MODE; 1785 1786 vm = xe_vm_create(xe, flags); 1787 if (IS_ERR(vm)) 1788 return PTR_ERR(vm); 1789 1790 if (xe->info.has_asid) { 1791 down_write(&xe->usm.lock); 1792 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1793 XA_LIMIT(1, XE_MAX_ASID - 1), 1794 &xe->usm.next_asid, GFP_KERNEL); 1795 up_write(&xe->usm.lock); 1796 if (err < 0) 1797 goto err_close_and_put; 1798 1799 vm->usm.asid = asid; 1800 } 1801 1802 vm->xef = xe_file_get(xef); 1803 1804 /* Record BO memory for VM pagetable created against client */ 1805 for_each_tile(tile, xe, id) 1806 if (vm->pt_root[id]) 1807 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1808 1809 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1810 /* Warning: Security issue - never enable by default */ 1811 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1812 #endif 1813 1814 /* user id alloc must always be last in ioctl to prevent UAF */ 1815 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1816 if (err) 1817 goto err_close_and_put; 1818 1819 args->vm_id = id; 1820 1821 return 0; 1822 1823 err_close_and_put: 1824 xe_vm_close_and_put(vm); 1825 1826 return err; 1827 } 1828 1829 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1830 struct drm_file *file) 1831 { 1832 struct xe_device *xe = to_xe_device(dev); 1833 struct xe_file *xef = to_xe_file(file); 1834 struct drm_xe_vm_destroy *args = data; 1835 struct xe_vm *vm; 1836 int err = 0; 1837 1838 if (XE_IOCTL_DBG(xe, args->pad) || 1839 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1840 return -EINVAL; 1841 1842 mutex_lock(&xef->vm.lock); 1843 vm = xa_load(&xef->vm.xa, args->vm_id); 1844 if (XE_IOCTL_DBG(xe, !vm)) 1845 err = -ENOENT; 1846 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1847 err = -EBUSY; 1848 else 1849 xa_erase(&xef->vm.xa, args->vm_id); 1850 mutex_unlock(&xef->vm.lock); 1851 1852 if (!err) 1853 xe_vm_close_and_put(vm); 1854 1855 return err; 1856 } 1857 1858 static const u32 region_to_mem_type[] = { 1859 XE_PL_TT, 1860 XE_PL_VRAM0, 1861 XE_PL_VRAM1, 1862 }; 1863 1864 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1865 bool post_commit) 1866 { 1867 down_read(&vm->userptr.notifier_lock); 1868 vma->gpuva.flags |= XE_VMA_DESTROYED; 1869 up_read(&vm->userptr.notifier_lock); 1870 if (post_commit) 1871 xe_vm_remove_vma(vm, vma); 1872 } 1873 1874 #undef ULL 1875 #define ULL unsigned long long 1876 1877 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1878 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1879 { 1880 struct xe_vma *vma; 1881 1882 switch (op->op) { 1883 case DRM_GPUVA_OP_MAP: 1884 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1885 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1886 break; 1887 case DRM_GPUVA_OP_REMAP: 1888 vma = gpuva_to_vma(op->remap.unmap->va); 1889 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1890 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1891 op->remap.unmap->keep ? 1 : 0); 1892 if (op->remap.prev) 1893 vm_dbg(&xe->drm, 1894 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1895 (ULL)op->remap.prev->va.addr, 1896 (ULL)op->remap.prev->va.range); 1897 if (op->remap.next) 1898 vm_dbg(&xe->drm, 1899 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1900 (ULL)op->remap.next->va.addr, 1901 (ULL)op->remap.next->va.range); 1902 break; 1903 case DRM_GPUVA_OP_UNMAP: 1904 vma = gpuva_to_vma(op->unmap.va); 1905 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1906 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1907 op->unmap.keep ? 1 : 0); 1908 break; 1909 case DRM_GPUVA_OP_PREFETCH: 1910 vma = gpuva_to_vma(op->prefetch.va); 1911 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1912 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1913 break; 1914 default: 1915 drm_warn(&xe->drm, "NOT POSSIBLE"); 1916 } 1917 } 1918 #else 1919 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1920 { 1921 } 1922 #endif 1923 1924 /* 1925 * Create operations list from IOCTL arguments, setup operations fields so parse 1926 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1927 */ 1928 static struct drm_gpuva_ops * 1929 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1930 u64 bo_offset_or_userptr, u64 addr, u64 range, 1931 u32 operation, u32 flags, 1932 u32 prefetch_region, u16 pat_index) 1933 { 1934 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1935 struct drm_gpuva_ops *ops; 1936 struct drm_gpuva_op *__op; 1937 struct drm_gpuvm_bo *vm_bo; 1938 int err; 1939 1940 lockdep_assert_held_write(&vm->lock); 1941 1942 vm_dbg(&vm->xe->drm, 1943 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1944 operation, (ULL)addr, (ULL)range, 1945 (ULL)bo_offset_or_userptr); 1946 1947 switch (operation) { 1948 case DRM_XE_VM_BIND_OP_MAP: 1949 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1950 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1951 obj, bo_offset_or_userptr); 1952 break; 1953 case DRM_XE_VM_BIND_OP_UNMAP: 1954 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1955 break; 1956 case DRM_XE_VM_BIND_OP_PREFETCH: 1957 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1958 break; 1959 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1960 xe_assert(vm->xe, bo); 1961 1962 err = xe_bo_lock(bo, true); 1963 if (err) 1964 return ERR_PTR(err); 1965 1966 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 1967 if (IS_ERR(vm_bo)) { 1968 xe_bo_unlock(bo); 1969 return ERR_CAST(vm_bo); 1970 } 1971 1972 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 1973 drm_gpuvm_bo_put(vm_bo); 1974 xe_bo_unlock(bo); 1975 break; 1976 default: 1977 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1978 ops = ERR_PTR(-EINVAL); 1979 } 1980 if (IS_ERR(ops)) 1981 return ops; 1982 1983 drm_gpuva_for_each_op(__op, ops) { 1984 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 1985 1986 if (__op->op == DRM_GPUVA_OP_MAP) { 1987 op->map.immediate = 1988 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 1989 op->map.read_only = 1990 flags & DRM_XE_VM_BIND_FLAG_READONLY; 1991 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 1992 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 1993 op->map.pat_index = pat_index; 1994 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 1995 op->prefetch.region = prefetch_region; 1996 } 1997 1998 print_op(vm->xe, __op); 1999 } 2000 2001 return ops; 2002 } 2003 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2004 2005 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2006 u16 pat_index, unsigned int flags) 2007 { 2008 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2009 struct drm_exec exec; 2010 struct xe_vma *vma; 2011 int err = 0; 2012 2013 lockdep_assert_held_write(&vm->lock); 2014 2015 if (bo) { 2016 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2017 drm_exec_until_all_locked(&exec) { 2018 err = 0; 2019 if (!bo->vm) { 2020 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2021 drm_exec_retry_on_contention(&exec); 2022 } 2023 if (!err) { 2024 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2025 drm_exec_retry_on_contention(&exec); 2026 } 2027 if (err) { 2028 drm_exec_fini(&exec); 2029 return ERR_PTR(err); 2030 } 2031 } 2032 } 2033 vma = xe_vma_create(vm, bo, op->gem.offset, 2034 op->va.addr, op->va.addr + 2035 op->va.range - 1, pat_index, flags); 2036 if (IS_ERR(vma)) 2037 goto err_unlock; 2038 2039 if (xe_vma_is_userptr(vma)) 2040 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2041 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2042 err = add_preempt_fences(vm, bo); 2043 2044 err_unlock: 2045 if (bo) 2046 drm_exec_fini(&exec); 2047 2048 if (err) { 2049 prep_vma_destroy(vm, vma, false); 2050 xe_vma_destroy_unlocked(vma); 2051 vma = ERR_PTR(err); 2052 } 2053 2054 return vma; 2055 } 2056 2057 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2058 { 2059 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2060 return SZ_1G; 2061 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2062 return SZ_2M; 2063 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2064 return SZ_64K; 2065 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2066 return SZ_4K; 2067 2068 return SZ_1G; /* Uninitialized, used max size */ 2069 } 2070 2071 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2072 { 2073 switch (size) { 2074 case SZ_1G: 2075 vma->gpuva.flags |= XE_VMA_PTE_1G; 2076 break; 2077 case SZ_2M: 2078 vma->gpuva.flags |= XE_VMA_PTE_2M; 2079 break; 2080 case SZ_64K: 2081 vma->gpuva.flags |= XE_VMA_PTE_64K; 2082 break; 2083 case SZ_4K: 2084 vma->gpuva.flags |= XE_VMA_PTE_4K; 2085 break; 2086 } 2087 } 2088 2089 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2090 { 2091 int err = 0; 2092 2093 lockdep_assert_held_write(&vm->lock); 2094 2095 switch (op->base.op) { 2096 case DRM_GPUVA_OP_MAP: 2097 err |= xe_vm_insert_vma(vm, op->map.vma); 2098 if (!err) 2099 op->flags |= XE_VMA_OP_COMMITTED; 2100 break; 2101 case DRM_GPUVA_OP_REMAP: 2102 { 2103 u8 tile_present = 2104 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2105 2106 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2107 true); 2108 op->flags |= XE_VMA_OP_COMMITTED; 2109 2110 if (op->remap.prev) { 2111 err |= xe_vm_insert_vma(vm, op->remap.prev); 2112 if (!err) 2113 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2114 if (!err && op->remap.skip_prev) { 2115 op->remap.prev->tile_present = 2116 tile_present; 2117 op->remap.prev = NULL; 2118 } 2119 } 2120 if (op->remap.next) { 2121 err |= xe_vm_insert_vma(vm, op->remap.next); 2122 if (!err) 2123 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2124 if (!err && op->remap.skip_next) { 2125 op->remap.next->tile_present = 2126 tile_present; 2127 op->remap.next = NULL; 2128 } 2129 } 2130 2131 /* Adjust for partial unbind after removing VMA from VM */ 2132 if (!err) { 2133 op->base.remap.unmap->va->va.addr = op->remap.start; 2134 op->base.remap.unmap->va->va.range = op->remap.range; 2135 } 2136 break; 2137 } 2138 case DRM_GPUVA_OP_UNMAP: 2139 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2140 op->flags |= XE_VMA_OP_COMMITTED; 2141 break; 2142 case DRM_GPUVA_OP_PREFETCH: 2143 op->flags |= XE_VMA_OP_COMMITTED; 2144 break; 2145 default: 2146 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2147 } 2148 2149 return err; 2150 } 2151 2152 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2153 struct xe_vma_ops *vops) 2154 { 2155 struct xe_device *xe = vm->xe; 2156 struct drm_gpuva_op *__op; 2157 struct xe_tile *tile; 2158 u8 id, tile_mask = 0; 2159 int err = 0; 2160 2161 lockdep_assert_held_write(&vm->lock); 2162 2163 for_each_tile(tile, vm->xe, id) 2164 tile_mask |= 0x1 << id; 2165 2166 drm_gpuva_for_each_op(__op, ops) { 2167 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2168 struct xe_vma *vma; 2169 unsigned int flags = 0; 2170 2171 INIT_LIST_HEAD(&op->link); 2172 list_add_tail(&op->link, &vops->list); 2173 op->tile_mask = tile_mask; 2174 2175 switch (op->base.op) { 2176 case DRM_GPUVA_OP_MAP: 2177 { 2178 flags |= op->map.read_only ? 2179 VMA_CREATE_FLAG_READ_ONLY : 0; 2180 flags |= op->map.is_null ? 2181 VMA_CREATE_FLAG_IS_NULL : 0; 2182 flags |= op->map.dumpable ? 2183 VMA_CREATE_FLAG_DUMPABLE : 0; 2184 2185 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2186 flags); 2187 if (IS_ERR(vma)) 2188 return PTR_ERR(vma); 2189 2190 op->map.vma = vma; 2191 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2192 xe_vma_ops_incr_pt_update_ops(vops, 2193 op->tile_mask); 2194 break; 2195 } 2196 case DRM_GPUVA_OP_REMAP: 2197 { 2198 struct xe_vma *old = 2199 gpuva_to_vma(op->base.remap.unmap->va); 2200 2201 op->remap.start = xe_vma_start(old); 2202 op->remap.range = xe_vma_size(old); 2203 2204 if (op->base.remap.prev) { 2205 flags |= op->base.remap.unmap->va->flags & 2206 XE_VMA_READ_ONLY ? 2207 VMA_CREATE_FLAG_READ_ONLY : 0; 2208 flags |= op->base.remap.unmap->va->flags & 2209 DRM_GPUVA_SPARSE ? 2210 VMA_CREATE_FLAG_IS_NULL : 0; 2211 flags |= op->base.remap.unmap->va->flags & 2212 XE_VMA_DUMPABLE ? 2213 VMA_CREATE_FLAG_DUMPABLE : 0; 2214 2215 vma = new_vma(vm, op->base.remap.prev, 2216 old->pat_index, flags); 2217 if (IS_ERR(vma)) 2218 return PTR_ERR(vma); 2219 2220 op->remap.prev = vma; 2221 2222 /* 2223 * Userptr creates a new SG mapping so 2224 * we must also rebind. 2225 */ 2226 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2227 IS_ALIGNED(xe_vma_end(vma), 2228 xe_vma_max_pte_size(old)); 2229 if (op->remap.skip_prev) { 2230 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2231 op->remap.range -= 2232 xe_vma_end(vma) - 2233 xe_vma_start(old); 2234 op->remap.start = xe_vma_end(vma); 2235 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2236 (ULL)op->remap.start, 2237 (ULL)op->remap.range); 2238 } else { 2239 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2240 } 2241 } 2242 2243 if (op->base.remap.next) { 2244 flags |= op->base.remap.unmap->va->flags & 2245 XE_VMA_READ_ONLY ? 2246 VMA_CREATE_FLAG_READ_ONLY : 0; 2247 flags |= op->base.remap.unmap->va->flags & 2248 DRM_GPUVA_SPARSE ? 2249 VMA_CREATE_FLAG_IS_NULL : 0; 2250 flags |= op->base.remap.unmap->va->flags & 2251 XE_VMA_DUMPABLE ? 2252 VMA_CREATE_FLAG_DUMPABLE : 0; 2253 2254 vma = new_vma(vm, op->base.remap.next, 2255 old->pat_index, flags); 2256 if (IS_ERR(vma)) 2257 return PTR_ERR(vma); 2258 2259 op->remap.next = vma; 2260 2261 /* 2262 * Userptr creates a new SG mapping so 2263 * we must also rebind. 2264 */ 2265 op->remap.skip_next = !xe_vma_is_userptr(old) && 2266 IS_ALIGNED(xe_vma_start(vma), 2267 xe_vma_max_pte_size(old)); 2268 if (op->remap.skip_next) { 2269 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2270 op->remap.range -= 2271 xe_vma_end(old) - 2272 xe_vma_start(vma); 2273 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2274 (ULL)op->remap.start, 2275 (ULL)op->remap.range); 2276 } else { 2277 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2278 } 2279 } 2280 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2281 break; 2282 } 2283 case DRM_GPUVA_OP_UNMAP: 2284 case DRM_GPUVA_OP_PREFETCH: 2285 /* FIXME: Need to skip some prefetch ops */ 2286 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2287 break; 2288 default: 2289 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2290 } 2291 2292 err = xe_vma_op_commit(vm, op); 2293 if (err) 2294 return err; 2295 } 2296 2297 return 0; 2298 } 2299 2300 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2301 bool post_commit, bool prev_post_commit, 2302 bool next_post_commit) 2303 { 2304 lockdep_assert_held_write(&vm->lock); 2305 2306 switch (op->base.op) { 2307 case DRM_GPUVA_OP_MAP: 2308 if (op->map.vma) { 2309 prep_vma_destroy(vm, op->map.vma, post_commit); 2310 xe_vma_destroy_unlocked(op->map.vma); 2311 } 2312 break; 2313 case DRM_GPUVA_OP_UNMAP: 2314 { 2315 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2316 2317 if (vma) { 2318 down_read(&vm->userptr.notifier_lock); 2319 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2320 up_read(&vm->userptr.notifier_lock); 2321 if (post_commit) 2322 xe_vm_insert_vma(vm, vma); 2323 } 2324 break; 2325 } 2326 case DRM_GPUVA_OP_REMAP: 2327 { 2328 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2329 2330 if (op->remap.prev) { 2331 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2332 xe_vma_destroy_unlocked(op->remap.prev); 2333 } 2334 if (op->remap.next) { 2335 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2336 xe_vma_destroy_unlocked(op->remap.next); 2337 } 2338 if (vma) { 2339 down_read(&vm->userptr.notifier_lock); 2340 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2341 up_read(&vm->userptr.notifier_lock); 2342 if (post_commit) 2343 xe_vm_insert_vma(vm, vma); 2344 } 2345 break; 2346 } 2347 case DRM_GPUVA_OP_PREFETCH: 2348 /* Nothing to do */ 2349 break; 2350 default: 2351 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2352 } 2353 } 2354 2355 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2356 struct drm_gpuva_ops **ops, 2357 int num_ops_list) 2358 { 2359 int i; 2360 2361 for (i = num_ops_list - 1; i >= 0; --i) { 2362 struct drm_gpuva_ops *__ops = ops[i]; 2363 struct drm_gpuva_op *__op; 2364 2365 if (!__ops) 2366 continue; 2367 2368 drm_gpuva_for_each_op_reverse(__op, __ops) { 2369 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2370 2371 xe_vma_op_unwind(vm, op, 2372 op->flags & XE_VMA_OP_COMMITTED, 2373 op->flags & XE_VMA_OP_PREV_COMMITTED, 2374 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2375 } 2376 } 2377 } 2378 2379 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2380 bool validate) 2381 { 2382 struct xe_bo *bo = xe_vma_bo(vma); 2383 struct xe_vm *vm = xe_vma_vm(vma); 2384 int err = 0; 2385 2386 if (bo) { 2387 if (!bo->vm) 2388 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2389 if (!err && validate) 2390 err = xe_bo_validate(bo, vm, 2391 !xe_vm_in_preempt_fence_mode(vm)); 2392 } 2393 2394 return err; 2395 } 2396 2397 static int check_ufence(struct xe_vma *vma) 2398 { 2399 if (vma->ufence) { 2400 struct xe_user_fence * const f = vma->ufence; 2401 2402 if (!xe_sync_ufence_get_status(f)) 2403 return -EBUSY; 2404 2405 vma->ufence = NULL; 2406 xe_sync_ufence_put(f); 2407 } 2408 2409 return 0; 2410 } 2411 2412 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2413 struct xe_vma_op *op) 2414 { 2415 int err = 0; 2416 2417 switch (op->base.op) { 2418 case DRM_GPUVA_OP_MAP: 2419 err = vma_lock_and_validate(exec, op->map.vma, 2420 !xe_vm_in_fault_mode(vm) || 2421 op->map.immediate); 2422 break; 2423 case DRM_GPUVA_OP_REMAP: 2424 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2425 if (err) 2426 break; 2427 2428 err = vma_lock_and_validate(exec, 2429 gpuva_to_vma(op->base.remap.unmap->va), 2430 false); 2431 if (!err && op->remap.prev) 2432 err = vma_lock_and_validate(exec, op->remap.prev, true); 2433 if (!err && op->remap.next) 2434 err = vma_lock_and_validate(exec, op->remap.next, true); 2435 break; 2436 case DRM_GPUVA_OP_UNMAP: 2437 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2438 if (err) 2439 break; 2440 2441 err = vma_lock_and_validate(exec, 2442 gpuva_to_vma(op->base.unmap.va), 2443 false); 2444 break; 2445 case DRM_GPUVA_OP_PREFETCH: 2446 { 2447 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2448 u32 region = op->prefetch.region; 2449 2450 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2451 2452 err = vma_lock_and_validate(exec, 2453 gpuva_to_vma(op->base.prefetch.va), 2454 false); 2455 if (!err && !xe_vma_has_no_bo(vma)) 2456 err = xe_bo_migrate(xe_vma_bo(vma), 2457 region_to_mem_type[region]); 2458 break; 2459 } 2460 default: 2461 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2462 } 2463 2464 return err; 2465 } 2466 2467 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2468 struct xe_vm *vm, 2469 struct xe_vma_ops *vops) 2470 { 2471 struct xe_vma_op *op; 2472 int err; 2473 2474 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2475 if (err) 2476 return err; 2477 2478 list_for_each_entry(op, &vops->list, link) { 2479 err = op_lock_and_prep(exec, vm, op); 2480 if (err) 2481 return err; 2482 } 2483 2484 #ifdef TEST_VM_OPS_ERROR 2485 if (vops->inject_error && 2486 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2487 return -ENOSPC; 2488 #endif 2489 2490 return 0; 2491 } 2492 2493 static void op_trace(struct xe_vma_op *op) 2494 { 2495 switch (op->base.op) { 2496 case DRM_GPUVA_OP_MAP: 2497 trace_xe_vma_bind(op->map.vma); 2498 break; 2499 case DRM_GPUVA_OP_REMAP: 2500 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2501 if (op->remap.prev) 2502 trace_xe_vma_bind(op->remap.prev); 2503 if (op->remap.next) 2504 trace_xe_vma_bind(op->remap.next); 2505 break; 2506 case DRM_GPUVA_OP_UNMAP: 2507 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2508 break; 2509 case DRM_GPUVA_OP_PREFETCH: 2510 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2511 break; 2512 default: 2513 XE_WARN_ON("NOT POSSIBLE"); 2514 } 2515 } 2516 2517 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2518 { 2519 struct xe_vma_op *op; 2520 2521 list_for_each_entry(op, &vops->list, link) 2522 op_trace(op); 2523 } 2524 2525 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2526 { 2527 struct xe_exec_queue *q = vops->q; 2528 struct xe_tile *tile; 2529 int number_tiles = 0; 2530 u8 id; 2531 2532 for_each_tile(tile, vm->xe, id) { 2533 if (vops->pt_update_ops[id].num_ops) 2534 ++number_tiles; 2535 2536 if (vops->pt_update_ops[id].q) 2537 continue; 2538 2539 if (q) { 2540 vops->pt_update_ops[id].q = q; 2541 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2542 q = list_next_entry(q, multi_gt_list); 2543 } else { 2544 vops->pt_update_ops[id].q = vm->q[id]; 2545 } 2546 } 2547 2548 return number_tiles; 2549 } 2550 2551 static struct dma_fence *ops_execute(struct xe_vm *vm, 2552 struct xe_vma_ops *vops) 2553 { 2554 struct xe_tile *tile; 2555 struct dma_fence *fence = NULL; 2556 struct dma_fence **fences = NULL; 2557 struct dma_fence_array *cf = NULL; 2558 int number_tiles = 0, current_fence = 0, err; 2559 u8 id; 2560 2561 number_tiles = vm_ops_setup_tile_args(vm, vops); 2562 if (number_tiles == 0) 2563 return ERR_PTR(-ENODATA); 2564 2565 if (number_tiles > 1) { 2566 fences = kmalloc_array(number_tiles, sizeof(*fences), 2567 GFP_KERNEL); 2568 if (!fences) { 2569 fence = ERR_PTR(-ENOMEM); 2570 goto err_trace; 2571 } 2572 } 2573 2574 for_each_tile(tile, vm->xe, id) { 2575 if (!vops->pt_update_ops[id].num_ops) 2576 continue; 2577 2578 err = xe_pt_update_ops_prepare(tile, vops); 2579 if (err) { 2580 fence = ERR_PTR(err); 2581 goto err_out; 2582 } 2583 } 2584 2585 trace_xe_vm_ops_execute(vops); 2586 2587 for_each_tile(tile, vm->xe, id) { 2588 if (!vops->pt_update_ops[id].num_ops) 2589 continue; 2590 2591 fence = xe_pt_update_ops_run(tile, vops); 2592 if (IS_ERR(fence)) 2593 goto err_out; 2594 2595 if (fences) 2596 fences[current_fence++] = fence; 2597 } 2598 2599 if (fences) { 2600 cf = dma_fence_array_create(number_tiles, fences, 2601 vm->composite_fence_ctx, 2602 vm->composite_fence_seqno++, 2603 false); 2604 if (!cf) { 2605 --vm->composite_fence_seqno; 2606 fence = ERR_PTR(-ENOMEM); 2607 goto err_out; 2608 } 2609 fence = &cf->base; 2610 } 2611 2612 for_each_tile(tile, vm->xe, id) { 2613 if (!vops->pt_update_ops[id].num_ops) 2614 continue; 2615 2616 xe_pt_update_ops_fini(tile, vops); 2617 } 2618 2619 return fence; 2620 2621 err_out: 2622 for_each_tile(tile, vm->xe, id) { 2623 if (!vops->pt_update_ops[id].num_ops) 2624 continue; 2625 2626 xe_pt_update_ops_abort(tile, vops); 2627 } 2628 while (current_fence) 2629 dma_fence_put(fences[--current_fence]); 2630 kfree(fences); 2631 kfree(cf); 2632 2633 err_trace: 2634 trace_xe_vm_ops_fail(vm); 2635 return fence; 2636 } 2637 2638 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2639 { 2640 if (vma->ufence) 2641 xe_sync_ufence_put(vma->ufence); 2642 vma->ufence = __xe_sync_ufence_get(ufence); 2643 } 2644 2645 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2646 struct xe_user_fence *ufence) 2647 { 2648 switch (op->base.op) { 2649 case DRM_GPUVA_OP_MAP: 2650 vma_add_ufence(op->map.vma, ufence); 2651 break; 2652 case DRM_GPUVA_OP_REMAP: 2653 if (op->remap.prev) 2654 vma_add_ufence(op->remap.prev, ufence); 2655 if (op->remap.next) 2656 vma_add_ufence(op->remap.next, ufence); 2657 break; 2658 case DRM_GPUVA_OP_UNMAP: 2659 break; 2660 case DRM_GPUVA_OP_PREFETCH: 2661 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2662 break; 2663 default: 2664 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2665 } 2666 } 2667 2668 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2669 struct dma_fence *fence) 2670 { 2671 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2672 struct xe_user_fence *ufence; 2673 struct xe_vma_op *op; 2674 int i; 2675 2676 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2677 list_for_each_entry(op, &vops->list, link) { 2678 if (ufence) 2679 op_add_ufence(vm, op, ufence); 2680 2681 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2682 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2683 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2684 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2685 fence); 2686 } 2687 if (ufence) 2688 xe_sync_ufence_put(ufence); 2689 for (i = 0; i < vops->num_syncs; i++) 2690 xe_sync_entry_signal(vops->syncs + i, fence); 2691 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2692 } 2693 2694 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2695 struct xe_vma_ops *vops) 2696 { 2697 struct drm_exec exec; 2698 struct dma_fence *fence; 2699 int err; 2700 2701 lockdep_assert_held_write(&vm->lock); 2702 2703 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2704 DRM_EXEC_IGNORE_DUPLICATES, 0); 2705 drm_exec_until_all_locked(&exec) { 2706 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2707 drm_exec_retry_on_contention(&exec); 2708 if (err) { 2709 fence = ERR_PTR(err); 2710 goto unlock; 2711 } 2712 2713 fence = ops_execute(vm, vops); 2714 if (IS_ERR(fence)) 2715 goto unlock; 2716 2717 vm_bind_ioctl_ops_fini(vm, vops, fence); 2718 } 2719 2720 unlock: 2721 drm_exec_fini(&exec); 2722 return fence; 2723 } 2724 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2725 2726 #define SUPPORTED_FLAGS_STUB \ 2727 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2728 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2729 DRM_XE_VM_BIND_FLAG_NULL | \ 2730 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 2731 DRM_XE_VM_BIND_FLAG_CHECK_PXP) 2732 2733 #ifdef TEST_VM_OPS_ERROR 2734 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2735 #else 2736 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2737 #endif 2738 2739 #define XE_64K_PAGE_MASK 0xffffull 2740 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2741 2742 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2743 struct drm_xe_vm_bind *args, 2744 struct drm_xe_vm_bind_op **bind_ops) 2745 { 2746 int err; 2747 int i; 2748 2749 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2750 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2751 return -EINVAL; 2752 2753 if (XE_IOCTL_DBG(xe, args->extensions)) 2754 return -EINVAL; 2755 2756 if (args->num_binds > 1) { 2757 u64 __user *bind_user = 2758 u64_to_user_ptr(args->vector_of_binds); 2759 2760 *bind_ops = kvmalloc_array(args->num_binds, 2761 sizeof(struct drm_xe_vm_bind_op), 2762 GFP_KERNEL | __GFP_ACCOUNT | 2763 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2764 if (!*bind_ops) 2765 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2766 2767 err = __copy_from_user(*bind_ops, bind_user, 2768 sizeof(struct drm_xe_vm_bind_op) * 2769 args->num_binds); 2770 if (XE_IOCTL_DBG(xe, err)) { 2771 err = -EFAULT; 2772 goto free_bind_ops; 2773 } 2774 } else { 2775 *bind_ops = &args->bind; 2776 } 2777 2778 for (i = 0; i < args->num_binds; ++i) { 2779 u64 range = (*bind_ops)[i].range; 2780 u64 addr = (*bind_ops)[i].addr; 2781 u32 op = (*bind_ops)[i].op; 2782 u32 flags = (*bind_ops)[i].flags; 2783 u32 obj = (*bind_ops)[i].obj; 2784 u64 obj_offset = (*bind_ops)[i].obj_offset; 2785 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2786 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2787 u16 pat_index = (*bind_ops)[i].pat_index; 2788 u16 coh_mode; 2789 2790 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2791 err = -EINVAL; 2792 goto free_bind_ops; 2793 } 2794 2795 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2796 (*bind_ops)[i].pat_index = pat_index; 2797 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2798 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2799 err = -EINVAL; 2800 goto free_bind_ops; 2801 } 2802 2803 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2804 err = -EINVAL; 2805 goto free_bind_ops; 2806 } 2807 2808 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2809 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2810 XE_IOCTL_DBG(xe, obj && is_null) || 2811 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2812 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2813 is_null) || 2814 XE_IOCTL_DBG(xe, !obj && 2815 op == DRM_XE_VM_BIND_OP_MAP && 2816 !is_null) || 2817 XE_IOCTL_DBG(xe, !obj && 2818 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2819 XE_IOCTL_DBG(xe, addr && 2820 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2821 XE_IOCTL_DBG(xe, range && 2822 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2823 XE_IOCTL_DBG(xe, obj && 2824 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2825 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2826 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2827 XE_IOCTL_DBG(xe, obj && 2828 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2829 XE_IOCTL_DBG(xe, prefetch_region && 2830 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2831 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2832 xe->info.mem_region_mask)) || 2833 XE_IOCTL_DBG(xe, obj && 2834 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2835 err = -EINVAL; 2836 goto free_bind_ops; 2837 } 2838 2839 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2840 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2841 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2842 XE_IOCTL_DBG(xe, !range && 2843 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2844 err = -EINVAL; 2845 goto free_bind_ops; 2846 } 2847 } 2848 2849 return 0; 2850 2851 free_bind_ops: 2852 if (args->num_binds > 1) 2853 kvfree(*bind_ops); 2854 return err; 2855 } 2856 2857 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2858 struct xe_exec_queue *q, 2859 struct xe_sync_entry *syncs, 2860 int num_syncs) 2861 { 2862 struct dma_fence *fence; 2863 int i, err = 0; 2864 2865 fence = xe_sync_in_fence_get(syncs, num_syncs, 2866 to_wait_exec_queue(vm, q), vm); 2867 if (IS_ERR(fence)) 2868 return PTR_ERR(fence); 2869 2870 for (i = 0; i < num_syncs; i++) 2871 xe_sync_entry_signal(&syncs[i], fence); 2872 2873 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2874 fence); 2875 dma_fence_put(fence); 2876 2877 return err; 2878 } 2879 2880 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2881 struct xe_exec_queue *q, 2882 struct xe_sync_entry *syncs, u32 num_syncs) 2883 { 2884 memset(vops, 0, sizeof(*vops)); 2885 INIT_LIST_HEAD(&vops->list); 2886 vops->vm = vm; 2887 vops->q = q; 2888 vops->syncs = syncs; 2889 vops->num_syncs = num_syncs; 2890 } 2891 2892 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2893 u64 addr, u64 range, u64 obj_offset, 2894 u16 pat_index, u32 op, u32 bind_flags) 2895 { 2896 u16 coh_mode; 2897 2898 if (XE_IOCTL_DBG(xe, range > bo->size) || 2899 XE_IOCTL_DBG(xe, obj_offset > 2900 bo->size - range)) { 2901 return -EINVAL; 2902 } 2903 2904 /* 2905 * Some platforms require 64k VM_BIND alignment, 2906 * specifically those with XE_VRAM_FLAGS_NEED64K. 2907 * 2908 * Other platforms may have BO's set to 64k physical placement, 2909 * but can be mapped at 4k offsets anyway. This check is only 2910 * there for the former case. 2911 */ 2912 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2913 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2914 if (XE_IOCTL_DBG(xe, obj_offset & 2915 XE_64K_PAGE_MASK) || 2916 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2917 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2918 return -EINVAL; 2919 } 2920 } 2921 2922 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2923 if (bo->cpu_caching) { 2924 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2925 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2926 return -EINVAL; 2927 } 2928 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2929 /* 2930 * Imported dma-buf from a different device should 2931 * require 1way or 2way coherency since we don't know 2932 * how it was mapped on the CPU. Just assume is it 2933 * potentially cached on CPU side. 2934 */ 2935 return -EINVAL; 2936 } 2937 2938 /* If a BO is protected it can only be mapped if the key is still valid */ 2939 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 2940 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 2941 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 2942 return -ENOEXEC; 2943 2944 return 0; 2945 } 2946 2947 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2948 { 2949 struct xe_device *xe = to_xe_device(dev); 2950 struct xe_file *xef = to_xe_file(file); 2951 struct drm_xe_vm_bind *args = data; 2952 struct drm_xe_sync __user *syncs_user; 2953 struct xe_bo **bos = NULL; 2954 struct drm_gpuva_ops **ops = NULL; 2955 struct xe_vm *vm; 2956 struct xe_exec_queue *q = NULL; 2957 u32 num_syncs, num_ufence = 0; 2958 struct xe_sync_entry *syncs = NULL; 2959 struct drm_xe_vm_bind_op *bind_ops; 2960 struct xe_vma_ops vops; 2961 struct dma_fence *fence; 2962 int err; 2963 int i; 2964 2965 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 2966 if (err) 2967 return err; 2968 2969 if (args->exec_queue_id) { 2970 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 2971 if (XE_IOCTL_DBG(xe, !q)) { 2972 err = -ENOENT; 2973 goto free_objs; 2974 } 2975 2976 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 2977 err = -EINVAL; 2978 goto put_exec_queue; 2979 } 2980 } 2981 2982 vm = xe_vm_lookup(xef, args->vm_id); 2983 if (XE_IOCTL_DBG(xe, !vm)) { 2984 err = -EINVAL; 2985 goto put_exec_queue; 2986 } 2987 2988 err = down_write_killable(&vm->lock); 2989 if (err) 2990 goto put_vm; 2991 2992 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 2993 err = -ENOENT; 2994 goto release_vm_lock; 2995 } 2996 2997 for (i = 0; i < args->num_binds; ++i) { 2998 u64 range = bind_ops[i].range; 2999 u64 addr = bind_ops[i].addr; 3000 3001 if (XE_IOCTL_DBG(xe, range > vm->size) || 3002 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3003 err = -EINVAL; 3004 goto release_vm_lock; 3005 } 3006 } 3007 3008 if (args->num_binds) { 3009 bos = kvcalloc(args->num_binds, sizeof(*bos), 3010 GFP_KERNEL | __GFP_ACCOUNT | 3011 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3012 if (!bos) { 3013 err = -ENOMEM; 3014 goto release_vm_lock; 3015 } 3016 3017 ops = kvcalloc(args->num_binds, sizeof(*ops), 3018 GFP_KERNEL | __GFP_ACCOUNT | 3019 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3020 if (!ops) { 3021 err = -ENOMEM; 3022 goto release_vm_lock; 3023 } 3024 } 3025 3026 for (i = 0; i < args->num_binds; ++i) { 3027 struct drm_gem_object *gem_obj; 3028 u64 range = bind_ops[i].range; 3029 u64 addr = bind_ops[i].addr; 3030 u32 obj = bind_ops[i].obj; 3031 u64 obj_offset = bind_ops[i].obj_offset; 3032 u16 pat_index = bind_ops[i].pat_index; 3033 u32 op = bind_ops[i].op; 3034 u32 bind_flags = bind_ops[i].flags; 3035 3036 if (!obj) 3037 continue; 3038 3039 gem_obj = drm_gem_object_lookup(file, obj); 3040 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3041 err = -ENOENT; 3042 goto put_obj; 3043 } 3044 bos[i] = gem_to_xe_bo(gem_obj); 3045 3046 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3047 obj_offset, pat_index, op, 3048 bind_flags); 3049 if (err) 3050 goto put_obj; 3051 } 3052 3053 if (args->num_syncs) { 3054 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3055 if (!syncs) { 3056 err = -ENOMEM; 3057 goto put_obj; 3058 } 3059 } 3060 3061 syncs_user = u64_to_user_ptr(args->syncs); 3062 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3063 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3064 &syncs_user[num_syncs], 3065 (xe_vm_in_lr_mode(vm) ? 3066 SYNC_PARSE_FLAG_LR_MODE : 0) | 3067 (!args->num_binds ? 3068 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3069 if (err) 3070 goto free_syncs; 3071 3072 if (xe_sync_is_ufence(&syncs[num_syncs])) 3073 num_ufence++; 3074 } 3075 3076 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3077 err = -EINVAL; 3078 goto free_syncs; 3079 } 3080 3081 if (!args->num_binds) { 3082 err = -ENODATA; 3083 goto free_syncs; 3084 } 3085 3086 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3087 for (i = 0; i < args->num_binds; ++i) { 3088 u64 range = bind_ops[i].range; 3089 u64 addr = bind_ops[i].addr; 3090 u32 op = bind_ops[i].op; 3091 u32 flags = bind_ops[i].flags; 3092 u64 obj_offset = bind_ops[i].obj_offset; 3093 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3094 u16 pat_index = bind_ops[i].pat_index; 3095 3096 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3097 addr, range, op, flags, 3098 prefetch_region, pat_index); 3099 if (IS_ERR(ops[i])) { 3100 err = PTR_ERR(ops[i]); 3101 ops[i] = NULL; 3102 goto unwind_ops; 3103 } 3104 3105 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3106 if (err) 3107 goto unwind_ops; 3108 3109 #ifdef TEST_VM_OPS_ERROR 3110 if (flags & FORCE_OP_ERROR) { 3111 vops.inject_error = true; 3112 vm->xe->vm_inject_error_position = 3113 (vm->xe->vm_inject_error_position + 1) % 3114 FORCE_OP_ERROR_COUNT; 3115 } 3116 #endif 3117 } 3118 3119 /* Nothing to do */ 3120 if (list_empty(&vops.list)) { 3121 err = -ENODATA; 3122 goto unwind_ops; 3123 } 3124 3125 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3126 if (err) 3127 goto unwind_ops; 3128 3129 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3130 if (IS_ERR(fence)) 3131 err = PTR_ERR(fence); 3132 else 3133 dma_fence_put(fence); 3134 3135 unwind_ops: 3136 if (err && err != -ENODATA) 3137 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3138 xe_vma_ops_fini(&vops); 3139 for (i = args->num_binds - 1; i >= 0; --i) 3140 if (ops[i]) 3141 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3142 free_syncs: 3143 if (err == -ENODATA) 3144 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3145 while (num_syncs--) 3146 xe_sync_entry_cleanup(&syncs[num_syncs]); 3147 3148 kfree(syncs); 3149 put_obj: 3150 for (i = 0; i < args->num_binds; ++i) 3151 xe_bo_put(bos[i]); 3152 release_vm_lock: 3153 up_write(&vm->lock); 3154 put_vm: 3155 xe_vm_put(vm); 3156 put_exec_queue: 3157 if (q) 3158 xe_exec_queue_put(q); 3159 free_objs: 3160 kvfree(bos); 3161 kvfree(ops); 3162 if (args->num_binds > 1) 3163 kvfree(bind_ops); 3164 return err; 3165 } 3166 3167 /** 3168 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3169 * @vm: VM to bind the BO to 3170 * @bo: BO to bind 3171 * @q: exec queue to use for the bind (optional) 3172 * @addr: address at which to bind the BO 3173 * @cache_lvl: PAT cache level to use 3174 * 3175 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3176 * kernel-owned VM. 3177 * 3178 * Returns a dma_fence to track the binding completion if the job to do so was 3179 * successfully submitted, an error pointer otherwise. 3180 */ 3181 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3182 struct xe_exec_queue *q, u64 addr, 3183 enum xe_cache_level cache_lvl) 3184 { 3185 struct xe_vma_ops vops; 3186 struct drm_gpuva_ops *ops = NULL; 3187 struct dma_fence *fence; 3188 int err; 3189 3190 xe_bo_get(bo); 3191 xe_vm_get(vm); 3192 if (q) 3193 xe_exec_queue_get(q); 3194 3195 down_write(&vm->lock); 3196 3197 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3198 3199 ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, 3200 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3201 vm->xe->pat.idx[cache_lvl]); 3202 if (IS_ERR(ops)) { 3203 err = PTR_ERR(ops); 3204 goto release_vm_lock; 3205 } 3206 3207 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3208 if (err) 3209 goto release_vm_lock; 3210 3211 xe_assert(vm->xe, !list_empty(&vops.list)); 3212 3213 err = xe_vma_ops_alloc(&vops, false); 3214 if (err) 3215 goto unwind_ops; 3216 3217 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3218 if (IS_ERR(fence)) 3219 err = PTR_ERR(fence); 3220 3221 unwind_ops: 3222 if (err && err != -ENODATA) 3223 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3224 3225 xe_vma_ops_fini(&vops); 3226 drm_gpuva_ops_free(&vm->gpuvm, ops); 3227 3228 release_vm_lock: 3229 up_write(&vm->lock); 3230 3231 if (q) 3232 xe_exec_queue_put(q); 3233 xe_vm_put(vm); 3234 xe_bo_put(bo); 3235 3236 if (err) 3237 fence = ERR_PTR(err); 3238 3239 return fence; 3240 } 3241 3242 /** 3243 * xe_vm_lock() - Lock the vm's dma_resv object 3244 * @vm: The struct xe_vm whose lock is to be locked 3245 * @intr: Whether to perform any wait interruptible 3246 * 3247 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3248 * contended lock was interrupted. If @intr is false, the function 3249 * always returns 0. 3250 */ 3251 int xe_vm_lock(struct xe_vm *vm, bool intr) 3252 { 3253 if (intr) 3254 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3255 3256 return dma_resv_lock(xe_vm_resv(vm), NULL); 3257 } 3258 3259 /** 3260 * xe_vm_unlock() - Unlock the vm's dma_resv object 3261 * @vm: The struct xe_vm whose lock is to be released. 3262 * 3263 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3264 */ 3265 void xe_vm_unlock(struct xe_vm *vm) 3266 { 3267 dma_resv_unlock(xe_vm_resv(vm)); 3268 } 3269 3270 /** 3271 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3272 * @vma: VMA to invalidate 3273 * 3274 * Walks a list of page tables leaves which it memset the entries owned by this 3275 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3276 * complete. 3277 * 3278 * Returns 0 for success, negative error code otherwise. 3279 */ 3280 int xe_vm_invalidate_vma(struct xe_vma *vma) 3281 { 3282 struct xe_device *xe = xe_vma_vm(vma)->xe; 3283 struct xe_tile *tile; 3284 struct xe_gt_tlb_invalidation_fence 3285 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3286 u8 id; 3287 u32 fence_id = 0; 3288 int ret = 0; 3289 3290 xe_assert(xe, !xe_vma_is_null(vma)); 3291 trace_xe_vma_invalidate(vma); 3292 3293 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3294 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3295 xe_vma_start(vma), xe_vma_size(vma)); 3296 3297 /* Check that we don't race with page-table updates */ 3298 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3299 if (xe_vma_is_userptr(vma)) { 3300 WARN_ON_ONCE(!mmu_interval_check_retry 3301 (&to_userptr_vma(vma)->userptr.notifier, 3302 to_userptr_vma(vma)->userptr.notifier_seq)); 3303 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3304 DMA_RESV_USAGE_BOOKKEEP)); 3305 3306 } else { 3307 xe_bo_assert_held(xe_vma_bo(vma)); 3308 } 3309 } 3310 3311 for_each_tile(tile, xe, id) { 3312 if (xe_pt_zap_ptes(tile, vma)) { 3313 xe_device_wmb(xe); 3314 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3315 &fence[fence_id], 3316 true); 3317 3318 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3319 &fence[fence_id], vma); 3320 if (ret) 3321 goto wait; 3322 ++fence_id; 3323 3324 if (!tile->media_gt) 3325 continue; 3326 3327 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3328 &fence[fence_id], 3329 true); 3330 3331 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3332 &fence[fence_id], vma); 3333 if (ret) 3334 goto wait; 3335 ++fence_id; 3336 } 3337 } 3338 3339 wait: 3340 for (id = 0; id < fence_id; ++id) 3341 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3342 3343 vma->tile_invalidated = vma->tile_mask; 3344 3345 return ret; 3346 } 3347 3348 int xe_vm_validate_protected(struct xe_vm *vm) 3349 { 3350 struct drm_gpuva *gpuva; 3351 int err = 0; 3352 3353 if (!vm) 3354 return -ENODEV; 3355 3356 mutex_lock(&vm->snap_mutex); 3357 3358 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3359 struct xe_vma *vma = gpuva_to_vma(gpuva); 3360 struct xe_bo *bo = vma->gpuva.gem.obj ? 3361 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3362 3363 if (!bo) 3364 continue; 3365 3366 if (xe_bo_is_protected(bo)) { 3367 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3368 if (err) 3369 break; 3370 } 3371 } 3372 3373 mutex_unlock(&vm->snap_mutex); 3374 return err; 3375 } 3376 3377 struct xe_vm_snapshot { 3378 unsigned long num_snaps; 3379 struct { 3380 u64 ofs, bo_ofs; 3381 unsigned long len; 3382 struct xe_bo *bo; 3383 void *data; 3384 struct mm_struct *mm; 3385 } snap[]; 3386 }; 3387 3388 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3389 { 3390 unsigned long num_snaps = 0, i; 3391 struct xe_vm_snapshot *snap = NULL; 3392 struct drm_gpuva *gpuva; 3393 3394 if (!vm) 3395 return NULL; 3396 3397 mutex_lock(&vm->snap_mutex); 3398 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3399 if (gpuva->flags & XE_VMA_DUMPABLE) 3400 num_snaps++; 3401 } 3402 3403 if (num_snaps) 3404 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3405 if (!snap) { 3406 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3407 goto out_unlock; 3408 } 3409 3410 snap->num_snaps = num_snaps; 3411 i = 0; 3412 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3413 struct xe_vma *vma = gpuva_to_vma(gpuva); 3414 struct xe_bo *bo = vma->gpuva.gem.obj ? 3415 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3416 3417 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3418 continue; 3419 3420 snap->snap[i].ofs = xe_vma_start(vma); 3421 snap->snap[i].len = xe_vma_size(vma); 3422 if (bo) { 3423 snap->snap[i].bo = xe_bo_get(bo); 3424 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3425 } else if (xe_vma_is_userptr(vma)) { 3426 struct mm_struct *mm = 3427 to_userptr_vma(vma)->userptr.notifier.mm; 3428 3429 if (mmget_not_zero(mm)) 3430 snap->snap[i].mm = mm; 3431 else 3432 snap->snap[i].data = ERR_PTR(-EFAULT); 3433 3434 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3435 } else { 3436 snap->snap[i].data = ERR_PTR(-ENOENT); 3437 } 3438 i++; 3439 } 3440 3441 out_unlock: 3442 mutex_unlock(&vm->snap_mutex); 3443 return snap; 3444 } 3445 3446 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3447 { 3448 if (IS_ERR_OR_NULL(snap)) 3449 return; 3450 3451 for (int i = 0; i < snap->num_snaps; i++) { 3452 struct xe_bo *bo = snap->snap[i].bo; 3453 int err; 3454 3455 if (IS_ERR(snap->snap[i].data)) 3456 continue; 3457 3458 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3459 if (!snap->snap[i].data) { 3460 snap->snap[i].data = ERR_PTR(-ENOMEM); 3461 goto cleanup_bo; 3462 } 3463 3464 if (bo) { 3465 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3466 snap->snap[i].data, snap->snap[i].len); 3467 } else { 3468 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3469 3470 kthread_use_mm(snap->snap[i].mm); 3471 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3472 err = 0; 3473 else 3474 err = -EFAULT; 3475 kthread_unuse_mm(snap->snap[i].mm); 3476 3477 mmput(snap->snap[i].mm); 3478 snap->snap[i].mm = NULL; 3479 } 3480 3481 if (err) { 3482 kvfree(snap->snap[i].data); 3483 snap->snap[i].data = ERR_PTR(err); 3484 } 3485 3486 cleanup_bo: 3487 xe_bo_put(bo); 3488 snap->snap[i].bo = NULL; 3489 } 3490 } 3491 3492 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3493 { 3494 unsigned long i, j; 3495 3496 if (IS_ERR_OR_NULL(snap)) { 3497 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3498 return; 3499 } 3500 3501 for (i = 0; i < snap->num_snaps; i++) { 3502 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3503 3504 if (IS_ERR(snap->snap[i].data)) { 3505 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3506 PTR_ERR(snap->snap[i].data)); 3507 continue; 3508 } 3509 3510 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3511 3512 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3513 u32 *val = snap->snap[i].data + j; 3514 char dumped[ASCII85_BUFSZ]; 3515 3516 drm_puts(p, ascii85_encode(*val, dumped)); 3517 } 3518 3519 drm_puts(p, "\n"); 3520 } 3521 } 3522 3523 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3524 { 3525 unsigned long i; 3526 3527 if (IS_ERR_OR_NULL(snap)) 3528 return; 3529 3530 for (i = 0; i < snap->num_snaps; i++) { 3531 if (!IS_ERR(snap->snap[i].data)) 3532 kvfree(snap->snap[i].data); 3533 xe_bo_put(snap->snap[i].bo); 3534 if (snap->snap[i].mm) 3535 mmput(snap->snap[i].mm); 3536 } 3537 kvfree(snap); 3538 } 3539