1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_print.h> 13 #include <drm/ttm/ttm_tt.h> 14 #include <uapi/drm/xe_drm.h> 15 #include <linux/ascii85.h> 16 #include <linux/delay.h> 17 #include <linux/kthread.h> 18 #include <linux/mm.h> 19 #include <linux/swap.h> 20 21 #include <generated/xe_wa_oob.h> 22 23 #include "regs/xe_gtt_defs.h" 24 #include "xe_assert.h" 25 #include "xe_bo.h" 26 #include "xe_device.h" 27 #include "xe_drm_client.h" 28 #include "xe_exec_queue.h" 29 #include "xe_gt_pagefault.h" 30 #include "xe_gt_tlb_invalidation.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sync.h" 38 #include "xe_trace_bo.h" 39 #include "xe_wa.h" 40 #include "xe_hmm.h" 41 42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 43 { 44 return vm->gpuvm.r_obj; 45 } 46 47 /** 48 * xe_vma_userptr_check_repin() - Advisory check for repin needed 49 * @uvma: The userptr vma 50 * 51 * Check if the userptr vma has been invalidated since last successful 52 * repin. The check is advisory only and can the function can be called 53 * without the vm->userptr.notifier_lock held. There is no guarantee that the 54 * vma userptr will remain valid after a lockless check, so typically 55 * the call needs to be followed by a proper check under the notifier_lock. 56 * 57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 */ 59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 { 61 return mmu_interval_check_retry(&uvma->userptr.notifier, 62 uvma->userptr.notifier_seq) ? 63 -EAGAIN : 0; 64 } 65 66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 67 { 68 struct xe_vma *vma = &uvma->vma; 69 struct xe_vm *vm = xe_vma_vm(vma); 70 struct xe_device *xe = vm->xe; 71 72 lockdep_assert_held(&vm->lock); 73 xe_assert(xe, xe_vma_is_userptr(vma)); 74 75 return xe_hmm_userptr_populate_range(uvma, false); 76 } 77 78 static bool preempt_fences_waiting(struct xe_vm *vm) 79 { 80 struct xe_exec_queue *q; 81 82 lockdep_assert_held(&vm->lock); 83 xe_vm_assert_held(vm); 84 85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 86 if (!q->lr.pfence || 87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 88 &q->lr.pfence->flags)) { 89 return true; 90 } 91 } 92 93 return false; 94 } 95 96 static void free_preempt_fences(struct list_head *list) 97 { 98 struct list_head *link, *next; 99 100 list_for_each_safe(link, next, list) 101 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 102 } 103 104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 105 unsigned int *count) 106 { 107 lockdep_assert_held(&vm->lock); 108 xe_vm_assert_held(vm); 109 110 if (*count >= vm->preempt.num_exec_queues) 111 return 0; 112 113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 115 116 if (IS_ERR(pfence)) 117 return PTR_ERR(pfence); 118 119 list_move_tail(xe_preempt_fence_link(pfence), list); 120 } 121 122 return 0; 123 } 124 125 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 126 { 127 struct xe_exec_queue *q; 128 129 xe_vm_assert_held(vm); 130 131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 132 if (q->lr.pfence) { 133 long timeout = dma_fence_wait(q->lr.pfence, false); 134 135 /* Only -ETIME on fence indicates VM needs to be killed */ 136 if (timeout < 0 || q->lr.pfence->error == -ETIME) 137 return -ETIME; 138 139 dma_fence_put(q->lr.pfence); 140 q->lr.pfence = NULL; 141 } 142 } 143 144 return 0; 145 } 146 147 static bool xe_vm_is_idle(struct xe_vm *vm) 148 { 149 struct xe_exec_queue *q; 150 151 xe_vm_assert_held(vm); 152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 153 if (!xe_exec_queue_is_idle(q)) 154 return false; 155 } 156 157 return true; 158 } 159 160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 161 { 162 struct list_head *link; 163 struct xe_exec_queue *q; 164 165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 166 struct dma_fence *fence; 167 168 link = list->next; 169 xe_assert(vm->xe, link != list); 170 171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 172 q, q->lr.context, 173 ++q->lr.seqno); 174 dma_fence_put(q->lr.pfence); 175 q->lr.pfence = fence; 176 } 177 } 178 179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 180 { 181 struct xe_exec_queue *q; 182 int err; 183 184 xe_bo_assert_held(bo); 185 186 if (!vm->preempt.num_exec_queues) 187 return 0; 188 189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 190 if (err) 191 return err; 192 193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 194 if (q->lr.pfence) { 195 dma_resv_add_fence(bo->ttm.base.resv, 196 q->lr.pfence, 197 DMA_RESV_USAGE_BOOKKEEP); 198 } 199 200 return 0; 201 } 202 203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 204 struct drm_exec *exec) 205 { 206 struct xe_exec_queue *q; 207 208 lockdep_assert_held(&vm->lock); 209 xe_vm_assert_held(vm); 210 211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 212 q->ops->resume(q); 213 214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 216 } 217 } 218 219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 220 { 221 struct drm_gpuvm_exec vm_exec = { 222 .vm = &vm->gpuvm, 223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 224 .num_fences = 1, 225 }; 226 struct drm_exec *exec = &vm_exec.exec; 227 struct dma_fence *pfence; 228 int err; 229 bool wait; 230 231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 232 233 down_write(&vm->lock); 234 err = drm_gpuvm_exec_lock(&vm_exec); 235 if (err) 236 goto out_up_write; 237 238 pfence = xe_preempt_fence_create(q, q->lr.context, 239 ++q->lr.seqno); 240 if (!pfence) { 241 err = -ENOMEM; 242 goto out_fini; 243 } 244 245 list_add(&q->lr.link, &vm->preempt.exec_queues); 246 ++vm->preempt.num_exec_queues; 247 q->lr.pfence = pfence; 248 249 down_read(&vm->userptr.notifier_lock); 250 251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 253 254 /* 255 * Check to see if a preemption on VM is in flight or userptr 256 * invalidation, if so trigger this preempt fence to sync state with 257 * other preempt fences on the VM. 258 */ 259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 260 if (wait) 261 dma_fence_enable_sw_signaling(pfence); 262 263 up_read(&vm->userptr.notifier_lock); 264 265 out_fini: 266 drm_exec_fini(exec); 267 out_up_write: 268 up_write(&vm->lock); 269 270 return err; 271 } 272 273 /** 274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 275 * @vm: The VM. 276 * @q: The exec_queue 277 * 278 * Note that this function might be called multiple times on the same queue. 279 */ 280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 281 { 282 if (!xe_vm_in_preempt_fence_mode(vm)) 283 return; 284 285 down_write(&vm->lock); 286 if (!list_empty(&q->lr.link)) { 287 list_del_init(&q->lr.link); 288 --vm->preempt.num_exec_queues; 289 } 290 if (q->lr.pfence) { 291 dma_fence_enable_sw_signaling(q->lr.pfence); 292 dma_fence_put(q->lr.pfence); 293 q->lr.pfence = NULL; 294 } 295 up_write(&vm->lock); 296 } 297 298 /** 299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 300 * that need repinning. 301 * @vm: The VM. 302 * 303 * This function checks for whether the VM has userptrs that need repinning, 304 * and provides a release-type barrier on the userptr.notifier_lock after 305 * checking. 306 * 307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 308 */ 309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 310 { 311 lockdep_assert_held_read(&vm->userptr.notifier_lock); 312 313 return (list_empty(&vm->userptr.repin_list) && 314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 315 } 316 317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 318 319 /** 320 * xe_vm_kill() - VM Kill 321 * @vm: The VM. 322 * @unlocked: Flag indicates the VM's dma-resv is not held 323 * 324 * Kill the VM by setting banned flag indicated VM is no longer available for 325 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 326 */ 327 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 328 { 329 struct xe_exec_queue *q; 330 331 lockdep_assert_held(&vm->lock); 332 333 if (unlocked) 334 xe_vm_lock(vm, false); 335 336 vm->flags |= XE_VM_FLAG_BANNED; 337 trace_xe_vm_kill(vm); 338 339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 340 q->ops->kill(q); 341 342 if (unlocked) 343 xe_vm_unlock(vm); 344 345 /* TODO: Inform user the VM is banned */ 346 } 347 348 /** 349 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 350 * @exec: The drm_exec object used for locking before validation. 351 * @err: The error returned from ttm_bo_validate(). 352 * @end: A ktime_t cookie that should be set to 0 before first use and 353 * that should be reused on subsequent calls. 354 * 355 * With multiple active VMs, under memory pressure, it is possible that 356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 357 * Until ttm properly handles locking in such scenarios, best thing the 358 * driver can do is retry with a timeout. Check if that is necessary, and 359 * if so unlock the drm_exec's objects while keeping the ticket to prepare 360 * for a rerun. 361 * 362 * Return: true if a retry after drm_exec_init() is recommended; 363 * false otherwise. 364 */ 365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 366 { 367 ktime_t cur; 368 369 if (err != -ENOMEM) 370 return false; 371 372 cur = ktime_get(); 373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 374 if (!ktime_before(cur, *end)) 375 return false; 376 377 msleep(20); 378 return true; 379 } 380 381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 382 { 383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 384 struct drm_gpuva *gpuva; 385 int ret; 386 387 lockdep_assert_held(&vm->lock); 388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 390 &vm->rebind_list); 391 392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 393 if (ret) 394 return ret; 395 396 vm_bo->evicted = false; 397 return 0; 398 } 399 400 /** 401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 402 * @vm: The vm for which we are rebinding. 403 * @exec: The struct drm_exec with the locked GEM objects. 404 * @num_fences: The number of fences to reserve for the operation, not 405 * including rebinds and validations. 406 * 407 * Validates all evicted gem objects and rebinds their vmas. Note that 408 * rebindings may cause evictions and hence the validation-rebind 409 * sequence is rerun until there are no more objects to validate. 410 * 411 * Return: 0 on success, negative error code on error. In particular, 412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 413 * the drm_exec transaction needs to be restarted. 414 */ 415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 416 unsigned int num_fences) 417 { 418 struct drm_gem_object *obj; 419 unsigned long index; 420 int ret; 421 422 do { 423 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 424 if (ret) 425 return ret; 426 427 ret = xe_vm_rebind(vm, false); 428 if (ret) 429 return ret; 430 } while (!list_empty(&vm->gpuvm.evict.list)); 431 432 drm_exec_for_each_locked_object(exec, index, obj) { 433 ret = dma_resv_reserve_fences(obj->resv, num_fences); 434 if (ret) 435 return ret; 436 } 437 438 return 0; 439 } 440 441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 442 bool *done) 443 { 444 int err; 445 446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 447 if (err) 448 return err; 449 450 if (xe_vm_is_idle(vm)) { 451 vm->preempt.rebind_deactivated = true; 452 *done = true; 453 return 0; 454 } 455 456 if (!preempt_fences_waiting(vm)) { 457 *done = true; 458 return 0; 459 } 460 461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 462 if (err) 463 return err; 464 465 err = wait_for_existing_preempt_fences(vm); 466 if (err) 467 return err; 468 469 /* 470 * Add validation and rebinding to the locking loop since both can 471 * cause evictions which may require blocing dma_resv locks. 472 * The fence reservation here is intended for the new preempt fences 473 * we attach at the end of the rebind work. 474 */ 475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 476 } 477 478 static void preempt_rebind_work_func(struct work_struct *w) 479 { 480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 481 struct drm_exec exec; 482 unsigned int fence_count = 0; 483 LIST_HEAD(preempt_fences); 484 ktime_t end = 0; 485 int err = 0; 486 long wait; 487 int __maybe_unused tries = 0; 488 489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 490 trace_xe_vm_rebind_worker_enter(vm); 491 492 down_write(&vm->lock); 493 494 if (xe_vm_is_closed_or_banned(vm)) { 495 up_write(&vm->lock); 496 trace_xe_vm_rebind_worker_exit(vm); 497 return; 498 } 499 500 retry: 501 if (xe_vm_userptr_check_repin(vm)) { 502 err = xe_vm_userptr_pin(vm); 503 if (err) 504 goto out_unlock_outer; 505 } 506 507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 508 509 drm_exec_until_all_locked(&exec) { 510 bool done = false; 511 512 err = xe_preempt_work_begin(&exec, vm, &done); 513 drm_exec_retry_on_contention(&exec); 514 if (err || done) { 515 drm_exec_fini(&exec); 516 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 517 err = -EAGAIN; 518 519 goto out_unlock_outer; 520 } 521 } 522 523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 524 if (err) 525 goto out_unlock; 526 527 err = xe_vm_rebind(vm, true); 528 if (err) 529 goto out_unlock; 530 531 /* Wait on rebinds and munmap style VM unbinds */ 532 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 533 DMA_RESV_USAGE_KERNEL, 534 false, MAX_SCHEDULE_TIMEOUT); 535 if (wait <= 0) { 536 err = -ETIME; 537 goto out_unlock; 538 } 539 540 #define retry_required(__tries, __vm) \ 541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 543 __xe_vm_userptr_needs_repin(__vm)) 544 545 down_read(&vm->userptr.notifier_lock); 546 if (retry_required(tries, vm)) { 547 up_read(&vm->userptr.notifier_lock); 548 err = -EAGAIN; 549 goto out_unlock; 550 } 551 552 #undef retry_required 553 554 spin_lock(&vm->xe->ttm.lru_lock); 555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 556 spin_unlock(&vm->xe->ttm.lru_lock); 557 558 /* Point of no return. */ 559 arm_preempt_fences(vm, &preempt_fences); 560 resume_and_reinstall_preempt_fences(vm, &exec); 561 up_read(&vm->userptr.notifier_lock); 562 563 out_unlock: 564 drm_exec_fini(&exec); 565 out_unlock_outer: 566 if (err == -EAGAIN) { 567 trace_xe_vm_rebind_worker_retry(vm); 568 goto retry; 569 } 570 571 if (err) { 572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 573 xe_vm_kill(vm, true); 574 } 575 up_write(&vm->lock); 576 577 free_preempt_fences(&preempt_fences); 578 579 trace_xe_vm_rebind_worker_exit(vm); 580 } 581 582 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 583 const struct mmu_notifier_range *range, 584 unsigned long cur_seq) 585 { 586 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); 587 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); 588 struct xe_vma *vma = &uvma->vma; 589 struct xe_vm *vm = xe_vma_vm(vma); 590 struct dma_resv_iter cursor; 591 struct dma_fence *fence; 592 long err; 593 594 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 595 trace_xe_vma_userptr_invalidate(vma); 596 597 if (!mmu_notifier_range_blockable(range)) 598 return false; 599 600 vm_dbg(&xe_vma_vm(vma)->xe->drm, 601 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 602 xe_vma_start(vma), xe_vma_size(vma)); 603 604 down_write(&vm->userptr.notifier_lock); 605 mmu_interval_set_seq(mni, cur_seq); 606 607 /* No need to stop gpu access if the userptr is not yet bound. */ 608 if (!userptr->initial_bind) { 609 up_write(&vm->userptr.notifier_lock); 610 return true; 611 } 612 613 /* 614 * Tell exec and rebind worker they need to repin and rebind this 615 * userptr. 616 */ 617 if (!xe_vm_in_fault_mode(vm) && 618 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { 619 spin_lock(&vm->userptr.invalidated_lock); 620 list_move_tail(&userptr->invalidate_link, 621 &vm->userptr.invalidated); 622 spin_unlock(&vm->userptr.invalidated_lock); 623 } 624 625 up_write(&vm->userptr.notifier_lock); 626 627 /* 628 * Preempt fences turn into schedule disables, pipeline these. 629 * Note that even in fault mode, we need to wait for binds and 630 * unbinds to complete, and those are attached as BOOKMARK fences 631 * to the vm. 632 */ 633 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 634 DMA_RESV_USAGE_BOOKKEEP); 635 dma_resv_for_each_fence_unlocked(&cursor, fence) 636 dma_fence_enable_sw_signaling(fence); 637 dma_resv_iter_end(&cursor); 638 639 err = dma_resv_wait_timeout(xe_vm_resv(vm), 640 DMA_RESV_USAGE_BOOKKEEP, 641 false, MAX_SCHEDULE_TIMEOUT); 642 XE_WARN_ON(err <= 0); 643 644 if (xe_vm_in_fault_mode(vm)) { 645 err = xe_vm_invalidate_vma(vma); 646 XE_WARN_ON(err); 647 } 648 649 trace_xe_vma_userptr_invalidate_complete(vma); 650 651 return true; 652 } 653 654 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 655 .invalidate = vma_userptr_invalidate, 656 }; 657 658 int xe_vm_userptr_pin(struct xe_vm *vm) 659 { 660 struct xe_userptr_vma *uvma, *next; 661 int err = 0; 662 LIST_HEAD(tmp_evict); 663 664 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 665 lockdep_assert_held_write(&vm->lock); 666 667 /* Collect invalidated userptrs */ 668 spin_lock(&vm->userptr.invalidated_lock); 669 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 670 userptr.invalidate_link) { 671 list_del_init(&uvma->userptr.invalidate_link); 672 list_move_tail(&uvma->userptr.repin_link, 673 &vm->userptr.repin_list); 674 } 675 spin_unlock(&vm->userptr.invalidated_lock); 676 677 /* Pin and move to temporary list */ 678 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 679 userptr.repin_link) { 680 err = xe_vma_userptr_pin_pages(uvma); 681 if (err == -EFAULT) { 682 list_del_init(&uvma->userptr.repin_link); 683 684 /* Wait for pending binds */ 685 xe_vm_lock(vm, false); 686 dma_resv_wait_timeout(xe_vm_resv(vm), 687 DMA_RESV_USAGE_BOOKKEEP, 688 false, MAX_SCHEDULE_TIMEOUT); 689 690 err = xe_vm_invalidate_vma(&uvma->vma); 691 xe_vm_unlock(vm); 692 if (err) 693 return err; 694 } else { 695 if (err < 0) 696 return err; 697 698 list_del_init(&uvma->userptr.repin_link); 699 list_move_tail(&uvma->vma.combined_links.rebind, 700 &vm->rebind_list); 701 } 702 } 703 704 return 0; 705 } 706 707 /** 708 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 709 * that need repinning. 710 * @vm: The VM. 711 * 712 * This function does an advisory check for whether the VM has userptrs that 713 * need repinning. 714 * 715 * Return: 0 if there are no indications of userptrs needing repinning, 716 * -EAGAIN if there are. 717 */ 718 int xe_vm_userptr_check_repin(struct xe_vm *vm) 719 { 720 return (list_empty_careful(&vm->userptr.repin_list) && 721 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 722 } 723 724 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 725 { 726 int i; 727 728 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 729 if (!vops->pt_update_ops[i].num_ops) 730 continue; 731 732 vops->pt_update_ops[i].ops = 733 kmalloc_array(vops->pt_update_ops[i].num_ops, 734 sizeof(*vops->pt_update_ops[i].ops), 735 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 736 if (!vops->pt_update_ops[i].ops) 737 return array_of_binds ? -ENOBUFS : -ENOMEM; 738 } 739 740 return 0; 741 } 742 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 743 744 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 745 { 746 int i; 747 748 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 749 kfree(vops->pt_update_ops[i].ops); 750 } 751 752 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 753 { 754 int i; 755 756 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 757 if (BIT(i) & tile_mask) 758 ++vops->pt_update_ops[i].num_ops; 759 } 760 761 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 762 u8 tile_mask) 763 { 764 INIT_LIST_HEAD(&op->link); 765 op->tile_mask = tile_mask; 766 op->base.op = DRM_GPUVA_OP_MAP; 767 op->base.map.va.addr = vma->gpuva.va.addr; 768 op->base.map.va.range = vma->gpuva.va.range; 769 op->base.map.gem.obj = vma->gpuva.gem.obj; 770 op->base.map.gem.offset = vma->gpuva.gem.offset; 771 op->map.vma = vma; 772 op->map.immediate = true; 773 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 774 op->map.is_null = xe_vma_is_null(vma); 775 } 776 777 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 778 u8 tile_mask) 779 { 780 struct xe_vma_op *op; 781 782 op = kzalloc(sizeof(*op), GFP_KERNEL); 783 if (!op) 784 return -ENOMEM; 785 786 xe_vm_populate_rebind(op, vma, tile_mask); 787 list_add_tail(&op->link, &vops->list); 788 xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 789 790 return 0; 791 } 792 793 static struct dma_fence *ops_execute(struct xe_vm *vm, 794 struct xe_vma_ops *vops); 795 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 796 struct xe_exec_queue *q, 797 struct xe_sync_entry *syncs, u32 num_syncs); 798 799 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 800 { 801 struct dma_fence *fence; 802 struct xe_vma *vma, *next; 803 struct xe_vma_ops vops; 804 struct xe_vma_op *op, *next_op; 805 int err, i; 806 807 lockdep_assert_held(&vm->lock); 808 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 809 list_empty(&vm->rebind_list)) 810 return 0; 811 812 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 813 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 814 vops.pt_update_ops[i].wait_vm_bookkeep = true; 815 816 xe_vm_assert_held(vm); 817 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 818 xe_assert(vm->xe, vma->tile_present); 819 820 if (rebind_worker) 821 trace_xe_vma_rebind_worker(vma); 822 else 823 trace_xe_vma_rebind_exec(vma); 824 825 err = xe_vm_ops_add_rebind(&vops, vma, 826 vma->tile_present); 827 if (err) 828 goto free_ops; 829 } 830 831 err = xe_vma_ops_alloc(&vops, false); 832 if (err) 833 goto free_ops; 834 835 fence = ops_execute(vm, &vops); 836 if (IS_ERR(fence)) { 837 err = PTR_ERR(fence); 838 } else { 839 dma_fence_put(fence); 840 list_for_each_entry_safe(vma, next, &vm->rebind_list, 841 combined_links.rebind) 842 list_del_init(&vma->combined_links.rebind); 843 } 844 free_ops: 845 list_for_each_entry_safe(op, next_op, &vops.list, link) { 846 list_del(&op->link); 847 kfree(op); 848 } 849 xe_vma_ops_fini(&vops); 850 851 return err; 852 } 853 854 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 855 { 856 struct dma_fence *fence = NULL; 857 struct xe_vma_ops vops; 858 struct xe_vma_op *op, *next_op; 859 struct xe_tile *tile; 860 u8 id; 861 int err; 862 863 lockdep_assert_held(&vm->lock); 864 xe_vm_assert_held(vm); 865 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 866 867 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 868 for_each_tile(tile, vm->xe, id) { 869 vops.pt_update_ops[id].wait_vm_bookkeep = true; 870 vops.pt_update_ops[tile->id].q = 871 xe_tile_migrate_exec_queue(tile); 872 } 873 874 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 875 if (err) 876 return ERR_PTR(err); 877 878 err = xe_vma_ops_alloc(&vops, false); 879 if (err) { 880 fence = ERR_PTR(err); 881 goto free_ops; 882 } 883 884 fence = ops_execute(vm, &vops); 885 886 free_ops: 887 list_for_each_entry_safe(op, next_op, &vops.list, link) { 888 list_del(&op->link); 889 kfree(op); 890 } 891 xe_vma_ops_fini(&vops); 892 893 return fence; 894 } 895 896 static void xe_vma_free(struct xe_vma *vma) 897 { 898 if (xe_vma_is_userptr(vma)) 899 kfree(to_userptr_vma(vma)); 900 else 901 kfree(vma); 902 } 903 904 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 905 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 906 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 907 908 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 909 struct xe_bo *bo, 910 u64 bo_offset_or_userptr, 911 u64 start, u64 end, 912 u16 pat_index, unsigned int flags) 913 { 914 struct xe_vma *vma; 915 struct xe_tile *tile; 916 u8 id; 917 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 918 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 919 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 920 921 xe_assert(vm->xe, start < end); 922 xe_assert(vm->xe, end < vm->size); 923 924 /* 925 * Allocate and ensure that the xe_vma_is_userptr() return 926 * matches what was allocated. 927 */ 928 if (!bo && !is_null) { 929 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 930 931 if (!uvma) 932 return ERR_PTR(-ENOMEM); 933 934 vma = &uvma->vma; 935 } else { 936 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 937 if (!vma) 938 return ERR_PTR(-ENOMEM); 939 940 if (is_null) 941 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 942 if (bo) 943 vma->gpuva.gem.obj = &bo->ttm.base; 944 } 945 946 INIT_LIST_HEAD(&vma->combined_links.rebind); 947 948 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 949 vma->gpuva.vm = &vm->gpuvm; 950 vma->gpuva.va.addr = start; 951 vma->gpuva.va.range = end - start + 1; 952 if (read_only) 953 vma->gpuva.flags |= XE_VMA_READ_ONLY; 954 if (dumpable) 955 vma->gpuva.flags |= XE_VMA_DUMPABLE; 956 957 for_each_tile(tile, vm->xe, id) 958 vma->tile_mask |= 0x1 << id; 959 960 if (vm->xe->info.has_atomic_enable_pte_bit) 961 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 962 963 vma->pat_index = pat_index; 964 965 if (bo) { 966 struct drm_gpuvm_bo *vm_bo; 967 968 xe_bo_assert_held(bo); 969 970 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 971 if (IS_ERR(vm_bo)) { 972 xe_vma_free(vma); 973 return ERR_CAST(vm_bo); 974 } 975 976 drm_gpuvm_bo_extobj_add(vm_bo); 977 drm_gem_object_get(&bo->ttm.base); 978 vma->gpuva.gem.offset = bo_offset_or_userptr; 979 drm_gpuva_link(&vma->gpuva, vm_bo); 980 drm_gpuvm_bo_put(vm_bo); 981 } else /* userptr or null */ { 982 if (!is_null) { 983 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 984 u64 size = end - start + 1; 985 int err; 986 987 INIT_LIST_HEAD(&userptr->invalidate_link); 988 INIT_LIST_HEAD(&userptr->repin_link); 989 vma->gpuva.gem.offset = bo_offset_or_userptr; 990 991 err = mmu_interval_notifier_insert(&userptr->notifier, 992 current->mm, 993 xe_vma_userptr(vma), size, 994 &vma_userptr_notifier_ops); 995 if (err) { 996 xe_vma_free(vma); 997 return ERR_PTR(err); 998 } 999 1000 userptr->notifier_seq = LONG_MAX; 1001 } 1002 1003 xe_vm_get(vm); 1004 } 1005 1006 return vma; 1007 } 1008 1009 static void xe_vma_destroy_late(struct xe_vma *vma) 1010 { 1011 struct xe_vm *vm = xe_vma_vm(vma); 1012 1013 if (vma->ufence) { 1014 xe_sync_ufence_put(vma->ufence); 1015 vma->ufence = NULL; 1016 } 1017 1018 if (xe_vma_is_userptr(vma)) { 1019 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1020 struct xe_userptr *userptr = &uvma->userptr; 1021 1022 if (userptr->sg) 1023 xe_hmm_userptr_free_sg(uvma); 1024 1025 /* 1026 * Since userptr pages are not pinned, we can't remove 1027 * the notifier until we're sure the GPU is not accessing 1028 * them anymore 1029 */ 1030 mmu_interval_notifier_remove(&userptr->notifier); 1031 xe_vm_put(vm); 1032 } else if (xe_vma_is_null(vma)) { 1033 xe_vm_put(vm); 1034 } else { 1035 xe_bo_put(xe_vma_bo(vma)); 1036 } 1037 1038 xe_vma_free(vma); 1039 } 1040 1041 static void vma_destroy_work_func(struct work_struct *w) 1042 { 1043 struct xe_vma *vma = 1044 container_of(w, struct xe_vma, destroy_work); 1045 1046 xe_vma_destroy_late(vma); 1047 } 1048 1049 static void vma_destroy_cb(struct dma_fence *fence, 1050 struct dma_fence_cb *cb) 1051 { 1052 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1053 1054 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1055 queue_work(system_unbound_wq, &vma->destroy_work); 1056 } 1057 1058 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1059 { 1060 struct xe_vm *vm = xe_vma_vm(vma); 1061 1062 lockdep_assert_held_write(&vm->lock); 1063 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1064 1065 if (xe_vma_is_userptr(vma)) { 1066 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1067 1068 spin_lock(&vm->userptr.invalidated_lock); 1069 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1070 spin_unlock(&vm->userptr.invalidated_lock); 1071 } else if (!xe_vma_is_null(vma)) { 1072 xe_bo_assert_held(xe_vma_bo(vma)); 1073 1074 drm_gpuva_unlink(&vma->gpuva); 1075 } 1076 1077 xe_vm_assert_held(vm); 1078 if (fence) { 1079 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1080 vma_destroy_cb); 1081 1082 if (ret) { 1083 XE_WARN_ON(ret != -ENOENT); 1084 xe_vma_destroy_late(vma); 1085 } 1086 } else { 1087 xe_vma_destroy_late(vma); 1088 } 1089 } 1090 1091 /** 1092 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1093 * @exec: The drm_exec object we're currently locking for. 1094 * @vma: The vma for witch we want to lock the vm resv and any attached 1095 * object's resv. 1096 * 1097 * Return: 0 on success, negative error code on error. In particular 1098 * may return -EDEADLK on WW transaction contention and -EINTR if 1099 * an interruptible wait is terminated by a signal. 1100 */ 1101 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1102 { 1103 struct xe_vm *vm = xe_vma_vm(vma); 1104 struct xe_bo *bo = xe_vma_bo(vma); 1105 int err; 1106 1107 XE_WARN_ON(!vm); 1108 1109 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1110 if (!err && bo && !bo->vm) 1111 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1112 1113 return err; 1114 } 1115 1116 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1117 { 1118 struct drm_exec exec; 1119 int err; 1120 1121 drm_exec_init(&exec, 0, 0); 1122 drm_exec_until_all_locked(&exec) { 1123 err = xe_vm_lock_vma(&exec, vma); 1124 drm_exec_retry_on_contention(&exec); 1125 if (XE_WARN_ON(err)) 1126 break; 1127 } 1128 1129 xe_vma_destroy(vma, NULL); 1130 1131 drm_exec_fini(&exec); 1132 } 1133 1134 struct xe_vma * 1135 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1136 { 1137 struct drm_gpuva *gpuva; 1138 1139 lockdep_assert_held(&vm->lock); 1140 1141 if (xe_vm_is_closed_or_banned(vm)) 1142 return NULL; 1143 1144 xe_assert(vm->xe, start + range <= vm->size); 1145 1146 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1147 1148 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1149 } 1150 1151 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1152 { 1153 int err; 1154 1155 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1156 lockdep_assert_held(&vm->lock); 1157 1158 mutex_lock(&vm->snap_mutex); 1159 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1160 mutex_unlock(&vm->snap_mutex); 1161 XE_WARN_ON(err); /* Shouldn't be possible */ 1162 1163 return err; 1164 } 1165 1166 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1167 { 1168 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1169 lockdep_assert_held(&vm->lock); 1170 1171 mutex_lock(&vm->snap_mutex); 1172 drm_gpuva_remove(&vma->gpuva); 1173 mutex_unlock(&vm->snap_mutex); 1174 if (vm->usm.last_fault_vma == vma) 1175 vm->usm.last_fault_vma = NULL; 1176 } 1177 1178 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1179 { 1180 struct xe_vma_op *op; 1181 1182 op = kzalloc(sizeof(*op), GFP_KERNEL); 1183 1184 if (unlikely(!op)) 1185 return NULL; 1186 1187 return &op->base; 1188 } 1189 1190 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1191 1192 static const struct drm_gpuvm_ops gpuvm_ops = { 1193 .op_alloc = xe_vm_op_alloc, 1194 .vm_bo_validate = xe_gpuvm_validate, 1195 .vm_free = xe_vm_free, 1196 }; 1197 1198 static u64 pde_encode_pat_index(u16 pat_index) 1199 { 1200 u64 pte = 0; 1201 1202 if (pat_index & BIT(0)) 1203 pte |= XE_PPGTT_PTE_PAT0; 1204 1205 if (pat_index & BIT(1)) 1206 pte |= XE_PPGTT_PTE_PAT1; 1207 1208 return pte; 1209 } 1210 1211 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1212 { 1213 u64 pte = 0; 1214 1215 if (pat_index & BIT(0)) 1216 pte |= XE_PPGTT_PTE_PAT0; 1217 1218 if (pat_index & BIT(1)) 1219 pte |= XE_PPGTT_PTE_PAT1; 1220 1221 if (pat_index & BIT(2)) { 1222 if (pt_level) 1223 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1224 else 1225 pte |= XE_PPGTT_PTE_PAT2; 1226 } 1227 1228 if (pat_index & BIT(3)) 1229 pte |= XELPG_PPGTT_PTE_PAT3; 1230 1231 if (pat_index & (BIT(4))) 1232 pte |= XE2_PPGTT_PTE_PAT4; 1233 1234 return pte; 1235 } 1236 1237 static u64 pte_encode_ps(u32 pt_level) 1238 { 1239 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1240 1241 if (pt_level == 1) 1242 return XE_PDE_PS_2M; 1243 else if (pt_level == 2) 1244 return XE_PDPE_PS_1G; 1245 1246 return 0; 1247 } 1248 1249 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1250 const u16 pat_index) 1251 { 1252 u64 pde; 1253 1254 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1255 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1256 pde |= pde_encode_pat_index(pat_index); 1257 1258 return pde; 1259 } 1260 1261 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1262 u16 pat_index, u32 pt_level) 1263 { 1264 u64 pte; 1265 1266 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1267 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1268 pte |= pte_encode_pat_index(pat_index, pt_level); 1269 pte |= pte_encode_ps(pt_level); 1270 1271 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1272 pte |= XE_PPGTT_PTE_DM; 1273 1274 return pte; 1275 } 1276 1277 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1278 u16 pat_index, u32 pt_level) 1279 { 1280 pte |= XE_PAGE_PRESENT; 1281 1282 if (likely(!xe_vma_read_only(vma))) 1283 pte |= XE_PAGE_RW; 1284 1285 pte |= pte_encode_pat_index(pat_index, pt_level); 1286 pte |= pte_encode_ps(pt_level); 1287 1288 if (unlikely(xe_vma_is_null(vma))) 1289 pte |= XE_PTE_NULL; 1290 1291 return pte; 1292 } 1293 1294 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1295 u16 pat_index, 1296 u32 pt_level, bool devmem, u64 flags) 1297 { 1298 u64 pte; 1299 1300 /* Avoid passing random bits directly as flags */ 1301 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1302 1303 pte = addr; 1304 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1305 pte |= pte_encode_pat_index(pat_index, pt_level); 1306 pte |= pte_encode_ps(pt_level); 1307 1308 if (devmem) 1309 pte |= XE_PPGTT_PTE_DM; 1310 1311 pte |= flags; 1312 1313 return pte; 1314 } 1315 1316 static const struct xe_pt_ops xelp_pt_ops = { 1317 .pte_encode_bo = xelp_pte_encode_bo, 1318 .pte_encode_vma = xelp_pte_encode_vma, 1319 .pte_encode_addr = xelp_pte_encode_addr, 1320 .pde_encode_bo = xelp_pde_encode_bo, 1321 }; 1322 1323 static void vm_destroy_work_func(struct work_struct *w); 1324 1325 /** 1326 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1327 * given tile and vm. 1328 * @xe: xe device. 1329 * @tile: tile to set up for. 1330 * @vm: vm to set up for. 1331 * 1332 * Sets up a pagetable tree with one page-table per level and a single 1333 * leaf PTE. All pagetable entries point to the single page-table or, 1334 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1335 * writes become NOPs. 1336 * 1337 * Return: 0 on success, negative error code on error. 1338 */ 1339 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1340 struct xe_vm *vm) 1341 { 1342 u8 id = tile->id; 1343 int i; 1344 1345 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1346 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1347 if (IS_ERR(vm->scratch_pt[id][i])) 1348 return PTR_ERR(vm->scratch_pt[id][i]); 1349 1350 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1351 } 1352 1353 return 0; 1354 } 1355 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1356 1357 static void xe_vm_free_scratch(struct xe_vm *vm) 1358 { 1359 struct xe_tile *tile; 1360 u8 id; 1361 1362 if (!xe_vm_has_scratch(vm)) 1363 return; 1364 1365 for_each_tile(tile, vm->xe, id) { 1366 u32 i; 1367 1368 if (!vm->pt_root[id]) 1369 continue; 1370 1371 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1372 if (vm->scratch_pt[id][i]) 1373 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1374 } 1375 } 1376 1377 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1378 { 1379 struct drm_gem_object *vm_resv_obj; 1380 struct xe_vm *vm; 1381 int err, number_tiles = 0; 1382 struct xe_tile *tile; 1383 u8 id; 1384 1385 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1386 if (!vm) 1387 return ERR_PTR(-ENOMEM); 1388 1389 vm->xe = xe; 1390 1391 vm->size = 1ull << xe->info.va_bits; 1392 1393 vm->flags = flags; 1394 1395 init_rwsem(&vm->lock); 1396 mutex_init(&vm->snap_mutex); 1397 1398 INIT_LIST_HEAD(&vm->rebind_list); 1399 1400 INIT_LIST_HEAD(&vm->userptr.repin_list); 1401 INIT_LIST_HEAD(&vm->userptr.invalidated); 1402 init_rwsem(&vm->userptr.notifier_lock); 1403 spin_lock_init(&vm->userptr.invalidated_lock); 1404 1405 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1406 1407 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1408 1409 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1410 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1411 1412 for_each_tile(tile, xe, id) 1413 xe_range_fence_tree_init(&vm->rftree[id]); 1414 1415 vm->pt_ops = &xelp_pt_ops; 1416 1417 /* 1418 * Long-running workloads are not protected by the scheduler references. 1419 * By design, run_job for long-running workloads returns NULL and the 1420 * scheduler drops all the references of it, hence protecting the VM 1421 * for this case is necessary. 1422 */ 1423 if (flags & XE_VM_FLAG_LR_MODE) 1424 xe_pm_runtime_get_noresume(xe); 1425 1426 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1427 if (!vm_resv_obj) { 1428 err = -ENOMEM; 1429 goto err_no_resv; 1430 } 1431 1432 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1433 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1434 1435 drm_gem_object_put(vm_resv_obj); 1436 1437 err = xe_vm_lock(vm, true); 1438 if (err) 1439 goto err_close; 1440 1441 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1442 vm->flags |= XE_VM_FLAG_64K; 1443 1444 for_each_tile(tile, xe, id) { 1445 if (flags & XE_VM_FLAG_MIGRATION && 1446 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1447 continue; 1448 1449 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1450 if (IS_ERR(vm->pt_root[id])) { 1451 err = PTR_ERR(vm->pt_root[id]); 1452 vm->pt_root[id] = NULL; 1453 goto err_unlock_close; 1454 } 1455 } 1456 1457 if (xe_vm_has_scratch(vm)) { 1458 for_each_tile(tile, xe, id) { 1459 if (!vm->pt_root[id]) 1460 continue; 1461 1462 err = xe_vm_create_scratch(xe, tile, vm); 1463 if (err) 1464 goto err_unlock_close; 1465 } 1466 vm->batch_invalidate_tlb = true; 1467 } 1468 1469 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1470 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1471 vm->batch_invalidate_tlb = false; 1472 } 1473 1474 /* Fill pt_root after allocating scratch tables */ 1475 for_each_tile(tile, xe, id) { 1476 if (!vm->pt_root[id]) 1477 continue; 1478 1479 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1480 } 1481 xe_vm_unlock(vm); 1482 1483 /* Kernel migration VM shouldn't have a circular loop.. */ 1484 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1485 for_each_tile(tile, xe, id) { 1486 struct xe_exec_queue *q; 1487 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1488 1489 if (!vm->pt_root[id]) 1490 continue; 1491 1492 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1493 if (IS_ERR(q)) { 1494 err = PTR_ERR(q); 1495 goto err_close; 1496 } 1497 vm->q[id] = q; 1498 number_tiles++; 1499 } 1500 } 1501 1502 if (number_tiles > 1) 1503 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1504 1505 trace_xe_vm_create(vm); 1506 1507 return vm; 1508 1509 err_unlock_close: 1510 xe_vm_unlock(vm); 1511 err_close: 1512 xe_vm_close_and_put(vm); 1513 return ERR_PTR(err); 1514 1515 err_no_resv: 1516 mutex_destroy(&vm->snap_mutex); 1517 for_each_tile(tile, xe, id) 1518 xe_range_fence_tree_fini(&vm->rftree[id]); 1519 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1520 kfree(vm); 1521 if (flags & XE_VM_FLAG_LR_MODE) 1522 xe_pm_runtime_put(xe); 1523 return ERR_PTR(err); 1524 } 1525 1526 static void xe_vm_close(struct xe_vm *vm) 1527 { 1528 down_write(&vm->lock); 1529 vm->size = 0; 1530 up_write(&vm->lock); 1531 } 1532 1533 void xe_vm_close_and_put(struct xe_vm *vm) 1534 { 1535 LIST_HEAD(contested); 1536 struct xe_device *xe = vm->xe; 1537 struct xe_tile *tile; 1538 struct xe_vma *vma, *next_vma; 1539 struct drm_gpuva *gpuva, *next; 1540 u8 id; 1541 1542 xe_assert(xe, !vm->preempt.num_exec_queues); 1543 1544 xe_vm_close(vm); 1545 if (xe_vm_in_preempt_fence_mode(vm)) 1546 flush_work(&vm->preempt.rebind_work); 1547 1548 down_write(&vm->lock); 1549 for_each_tile(tile, xe, id) { 1550 if (vm->q[id]) 1551 xe_exec_queue_last_fence_put(vm->q[id], vm); 1552 } 1553 up_write(&vm->lock); 1554 1555 for_each_tile(tile, xe, id) { 1556 if (vm->q[id]) { 1557 xe_exec_queue_kill(vm->q[id]); 1558 xe_exec_queue_put(vm->q[id]); 1559 vm->q[id] = NULL; 1560 } 1561 } 1562 1563 down_write(&vm->lock); 1564 xe_vm_lock(vm, false); 1565 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1566 vma = gpuva_to_vma(gpuva); 1567 1568 if (xe_vma_has_no_bo(vma)) { 1569 down_read(&vm->userptr.notifier_lock); 1570 vma->gpuva.flags |= XE_VMA_DESTROYED; 1571 up_read(&vm->userptr.notifier_lock); 1572 } 1573 1574 xe_vm_remove_vma(vm, vma); 1575 1576 /* easy case, remove from VMA? */ 1577 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1578 list_del_init(&vma->combined_links.rebind); 1579 xe_vma_destroy(vma, NULL); 1580 continue; 1581 } 1582 1583 list_move_tail(&vma->combined_links.destroy, &contested); 1584 vma->gpuva.flags |= XE_VMA_DESTROYED; 1585 } 1586 1587 /* 1588 * All vm operations will add shared fences to resv. 1589 * The only exception is eviction for a shared object, 1590 * but even so, the unbind when evicted would still 1591 * install a fence to resv. Hence it's safe to 1592 * destroy the pagetables immediately. 1593 */ 1594 xe_vm_free_scratch(vm); 1595 1596 for_each_tile(tile, xe, id) { 1597 if (vm->pt_root[id]) { 1598 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1599 vm->pt_root[id] = NULL; 1600 } 1601 } 1602 xe_vm_unlock(vm); 1603 1604 /* 1605 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1606 * Since we hold a refcount to the bo, we can remove and free 1607 * the members safely without locking. 1608 */ 1609 list_for_each_entry_safe(vma, next_vma, &contested, 1610 combined_links.destroy) { 1611 list_del_init(&vma->combined_links.destroy); 1612 xe_vma_destroy_unlocked(vma); 1613 } 1614 1615 up_write(&vm->lock); 1616 1617 down_write(&xe->usm.lock); 1618 if (vm->usm.asid) { 1619 void *lookup; 1620 1621 xe_assert(xe, xe->info.has_asid); 1622 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1623 1624 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1625 xe_assert(xe, lookup == vm); 1626 } 1627 up_write(&xe->usm.lock); 1628 1629 for_each_tile(tile, xe, id) 1630 xe_range_fence_tree_fini(&vm->rftree[id]); 1631 1632 xe_vm_put(vm); 1633 } 1634 1635 static void vm_destroy_work_func(struct work_struct *w) 1636 { 1637 struct xe_vm *vm = 1638 container_of(w, struct xe_vm, destroy_work); 1639 struct xe_device *xe = vm->xe; 1640 struct xe_tile *tile; 1641 u8 id; 1642 1643 /* xe_vm_close_and_put was not called? */ 1644 xe_assert(xe, !vm->size); 1645 1646 if (xe_vm_in_preempt_fence_mode(vm)) 1647 flush_work(&vm->preempt.rebind_work); 1648 1649 mutex_destroy(&vm->snap_mutex); 1650 1651 if (vm->flags & XE_VM_FLAG_LR_MODE) 1652 xe_pm_runtime_put(xe); 1653 1654 for_each_tile(tile, xe, id) 1655 XE_WARN_ON(vm->pt_root[id]); 1656 1657 trace_xe_vm_free(vm); 1658 1659 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1660 1661 if (vm->xef) 1662 xe_file_put(vm->xef); 1663 1664 kfree(vm); 1665 } 1666 1667 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1668 { 1669 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1670 1671 /* To destroy the VM we need to be able to sleep */ 1672 queue_work(system_unbound_wq, &vm->destroy_work); 1673 } 1674 1675 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1676 { 1677 struct xe_vm *vm; 1678 1679 mutex_lock(&xef->vm.lock); 1680 vm = xa_load(&xef->vm.xa, id); 1681 if (vm) 1682 xe_vm_get(vm); 1683 mutex_unlock(&xef->vm.lock); 1684 1685 return vm; 1686 } 1687 1688 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1689 { 1690 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, 1691 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); 1692 } 1693 1694 static struct xe_exec_queue * 1695 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1696 { 1697 return q ? q : vm->q[0]; 1698 } 1699 1700 static struct xe_user_fence * 1701 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1702 { 1703 unsigned int i; 1704 1705 for (i = 0; i < num_syncs; i++) { 1706 struct xe_sync_entry *e = &syncs[i]; 1707 1708 if (xe_sync_is_ufence(e)) 1709 return xe_sync_ufence_get(e); 1710 } 1711 1712 return NULL; 1713 } 1714 1715 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1716 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1717 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1718 1719 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1720 struct drm_file *file) 1721 { 1722 struct xe_device *xe = to_xe_device(dev); 1723 struct xe_file *xef = to_xe_file(file); 1724 struct drm_xe_vm_create *args = data; 1725 struct xe_tile *tile; 1726 struct xe_vm *vm; 1727 u32 id, asid; 1728 int err; 1729 u32 flags = 0; 1730 1731 if (XE_IOCTL_DBG(xe, args->extensions)) 1732 return -EINVAL; 1733 1734 if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) 1735 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1736 1737 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1738 !xe->info.has_usm)) 1739 return -EINVAL; 1740 1741 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1742 return -EINVAL; 1743 1744 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1745 return -EINVAL; 1746 1747 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1748 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1749 return -EINVAL; 1750 1751 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1752 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1753 return -EINVAL; 1754 1755 if (XE_IOCTL_DBG(xe, args->extensions)) 1756 return -EINVAL; 1757 1758 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1759 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1760 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1761 flags |= XE_VM_FLAG_LR_MODE; 1762 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1763 flags |= XE_VM_FLAG_FAULT_MODE; 1764 1765 vm = xe_vm_create(xe, flags); 1766 if (IS_ERR(vm)) 1767 return PTR_ERR(vm); 1768 1769 if (xe->info.has_asid) { 1770 down_write(&xe->usm.lock); 1771 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1772 XA_LIMIT(1, XE_MAX_ASID - 1), 1773 &xe->usm.next_asid, GFP_KERNEL); 1774 up_write(&xe->usm.lock); 1775 if (err < 0) 1776 goto err_close_and_put; 1777 1778 vm->usm.asid = asid; 1779 } 1780 1781 vm->xef = xe_file_get(xef); 1782 1783 /* Record BO memory for VM pagetable created against client */ 1784 for_each_tile(tile, xe, id) 1785 if (vm->pt_root[id]) 1786 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); 1787 1788 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1789 /* Warning: Security issue - never enable by default */ 1790 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1791 #endif 1792 1793 /* user id alloc must always be last in ioctl to prevent UAF */ 1794 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1795 if (err) 1796 goto err_close_and_put; 1797 1798 args->vm_id = id; 1799 1800 return 0; 1801 1802 err_close_and_put: 1803 xe_vm_close_and_put(vm); 1804 1805 return err; 1806 } 1807 1808 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1809 struct drm_file *file) 1810 { 1811 struct xe_device *xe = to_xe_device(dev); 1812 struct xe_file *xef = to_xe_file(file); 1813 struct drm_xe_vm_destroy *args = data; 1814 struct xe_vm *vm; 1815 int err = 0; 1816 1817 if (XE_IOCTL_DBG(xe, args->pad) || 1818 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1819 return -EINVAL; 1820 1821 mutex_lock(&xef->vm.lock); 1822 vm = xa_load(&xef->vm.xa, args->vm_id); 1823 if (XE_IOCTL_DBG(xe, !vm)) 1824 err = -ENOENT; 1825 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1826 err = -EBUSY; 1827 else 1828 xa_erase(&xef->vm.xa, args->vm_id); 1829 mutex_unlock(&xef->vm.lock); 1830 1831 if (!err) 1832 xe_vm_close_and_put(vm); 1833 1834 return err; 1835 } 1836 1837 static const u32 region_to_mem_type[] = { 1838 XE_PL_TT, 1839 XE_PL_VRAM0, 1840 XE_PL_VRAM1, 1841 }; 1842 1843 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 1844 bool post_commit) 1845 { 1846 down_read(&vm->userptr.notifier_lock); 1847 vma->gpuva.flags |= XE_VMA_DESTROYED; 1848 up_read(&vm->userptr.notifier_lock); 1849 if (post_commit) 1850 xe_vm_remove_vma(vm, vma); 1851 } 1852 1853 #undef ULL 1854 #define ULL unsigned long long 1855 1856 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 1857 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1858 { 1859 struct xe_vma *vma; 1860 1861 switch (op->op) { 1862 case DRM_GPUVA_OP_MAP: 1863 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 1864 (ULL)op->map.va.addr, (ULL)op->map.va.range); 1865 break; 1866 case DRM_GPUVA_OP_REMAP: 1867 vma = gpuva_to_vma(op->remap.unmap->va); 1868 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1869 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1870 op->remap.unmap->keep ? 1 : 0); 1871 if (op->remap.prev) 1872 vm_dbg(&xe->drm, 1873 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 1874 (ULL)op->remap.prev->va.addr, 1875 (ULL)op->remap.prev->va.range); 1876 if (op->remap.next) 1877 vm_dbg(&xe->drm, 1878 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 1879 (ULL)op->remap.next->va.addr, 1880 (ULL)op->remap.next->va.range); 1881 break; 1882 case DRM_GPUVA_OP_UNMAP: 1883 vma = gpuva_to_vma(op->unmap.va); 1884 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 1885 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 1886 op->unmap.keep ? 1 : 0); 1887 break; 1888 case DRM_GPUVA_OP_PREFETCH: 1889 vma = gpuva_to_vma(op->prefetch.va); 1890 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 1891 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 1892 break; 1893 default: 1894 drm_warn(&xe->drm, "NOT POSSIBLE"); 1895 } 1896 } 1897 #else 1898 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 1899 { 1900 } 1901 #endif 1902 1903 /* 1904 * Create operations list from IOCTL arguments, setup operations fields so parse 1905 * and commit steps are decoupled from IOCTL arguments. This step can fail. 1906 */ 1907 static struct drm_gpuva_ops * 1908 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 1909 u64 bo_offset_or_userptr, u64 addr, u64 range, 1910 u32 operation, u32 flags, 1911 u32 prefetch_region, u16 pat_index) 1912 { 1913 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 1914 struct drm_gpuva_ops *ops; 1915 struct drm_gpuva_op *__op; 1916 struct drm_gpuvm_bo *vm_bo; 1917 int err; 1918 1919 lockdep_assert_held_write(&vm->lock); 1920 1921 vm_dbg(&vm->xe->drm, 1922 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 1923 operation, (ULL)addr, (ULL)range, 1924 (ULL)bo_offset_or_userptr); 1925 1926 switch (operation) { 1927 case DRM_XE_VM_BIND_OP_MAP: 1928 case DRM_XE_VM_BIND_OP_MAP_USERPTR: 1929 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, 1930 obj, bo_offset_or_userptr); 1931 break; 1932 case DRM_XE_VM_BIND_OP_UNMAP: 1933 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 1934 break; 1935 case DRM_XE_VM_BIND_OP_PREFETCH: 1936 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 1937 break; 1938 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 1939 xe_assert(vm->xe, bo); 1940 1941 err = xe_bo_lock(bo, true); 1942 if (err) 1943 return ERR_PTR(err); 1944 1945 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 1946 if (IS_ERR(vm_bo)) { 1947 xe_bo_unlock(bo); 1948 return ERR_CAST(vm_bo); 1949 } 1950 1951 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 1952 drm_gpuvm_bo_put(vm_bo); 1953 xe_bo_unlock(bo); 1954 break; 1955 default: 1956 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1957 ops = ERR_PTR(-EINVAL); 1958 } 1959 if (IS_ERR(ops)) 1960 return ops; 1961 1962 drm_gpuva_for_each_op(__op, ops) { 1963 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 1964 1965 if (__op->op == DRM_GPUVA_OP_MAP) { 1966 op->map.immediate = 1967 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 1968 op->map.read_only = 1969 flags & DRM_XE_VM_BIND_FLAG_READONLY; 1970 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 1971 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 1972 op->map.pat_index = pat_index; 1973 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 1974 op->prefetch.region = prefetch_region; 1975 } 1976 1977 print_op(vm->xe, __op); 1978 } 1979 1980 return ops; 1981 } 1982 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 1983 1984 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 1985 u16 pat_index, unsigned int flags) 1986 { 1987 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 1988 struct drm_exec exec; 1989 struct xe_vma *vma; 1990 int err = 0; 1991 1992 lockdep_assert_held_write(&vm->lock); 1993 1994 if (bo) { 1995 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 1996 drm_exec_until_all_locked(&exec) { 1997 err = 0; 1998 if (!bo->vm) { 1999 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2000 drm_exec_retry_on_contention(&exec); 2001 } 2002 if (!err) { 2003 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2004 drm_exec_retry_on_contention(&exec); 2005 } 2006 if (err) { 2007 drm_exec_fini(&exec); 2008 return ERR_PTR(err); 2009 } 2010 } 2011 } 2012 vma = xe_vma_create(vm, bo, op->gem.offset, 2013 op->va.addr, op->va.addr + 2014 op->va.range - 1, pat_index, flags); 2015 if (IS_ERR(vma)) 2016 goto err_unlock; 2017 2018 if (xe_vma_is_userptr(vma)) 2019 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2020 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2021 err = add_preempt_fences(vm, bo); 2022 2023 err_unlock: 2024 if (bo) 2025 drm_exec_fini(&exec); 2026 2027 if (err) { 2028 prep_vma_destroy(vm, vma, false); 2029 xe_vma_destroy_unlocked(vma); 2030 vma = ERR_PTR(err); 2031 } 2032 2033 return vma; 2034 } 2035 2036 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2037 { 2038 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2039 return SZ_1G; 2040 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2041 return SZ_2M; 2042 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2043 return SZ_64K; 2044 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2045 return SZ_4K; 2046 2047 return SZ_1G; /* Uninitialized, used max size */ 2048 } 2049 2050 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2051 { 2052 switch (size) { 2053 case SZ_1G: 2054 vma->gpuva.flags |= XE_VMA_PTE_1G; 2055 break; 2056 case SZ_2M: 2057 vma->gpuva.flags |= XE_VMA_PTE_2M; 2058 break; 2059 case SZ_64K: 2060 vma->gpuva.flags |= XE_VMA_PTE_64K; 2061 break; 2062 case SZ_4K: 2063 vma->gpuva.flags |= XE_VMA_PTE_4K; 2064 break; 2065 } 2066 } 2067 2068 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2069 { 2070 int err = 0; 2071 2072 lockdep_assert_held_write(&vm->lock); 2073 2074 switch (op->base.op) { 2075 case DRM_GPUVA_OP_MAP: 2076 err |= xe_vm_insert_vma(vm, op->map.vma); 2077 if (!err) 2078 op->flags |= XE_VMA_OP_COMMITTED; 2079 break; 2080 case DRM_GPUVA_OP_REMAP: 2081 { 2082 u8 tile_present = 2083 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2084 2085 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2086 true); 2087 op->flags |= XE_VMA_OP_COMMITTED; 2088 2089 if (op->remap.prev) { 2090 err |= xe_vm_insert_vma(vm, op->remap.prev); 2091 if (!err) 2092 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2093 if (!err && op->remap.skip_prev) { 2094 op->remap.prev->tile_present = 2095 tile_present; 2096 op->remap.prev = NULL; 2097 } 2098 } 2099 if (op->remap.next) { 2100 err |= xe_vm_insert_vma(vm, op->remap.next); 2101 if (!err) 2102 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2103 if (!err && op->remap.skip_next) { 2104 op->remap.next->tile_present = 2105 tile_present; 2106 op->remap.next = NULL; 2107 } 2108 } 2109 2110 /* Adjust for partial unbind after removing VMA from VM */ 2111 if (!err) { 2112 op->base.remap.unmap->va->va.addr = op->remap.start; 2113 op->base.remap.unmap->va->va.range = op->remap.range; 2114 } 2115 break; 2116 } 2117 case DRM_GPUVA_OP_UNMAP: 2118 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2119 op->flags |= XE_VMA_OP_COMMITTED; 2120 break; 2121 case DRM_GPUVA_OP_PREFETCH: 2122 op->flags |= XE_VMA_OP_COMMITTED; 2123 break; 2124 default: 2125 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2126 } 2127 2128 return err; 2129 } 2130 2131 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2132 struct xe_vma_ops *vops) 2133 { 2134 struct xe_device *xe = vm->xe; 2135 struct drm_gpuva_op *__op; 2136 struct xe_tile *tile; 2137 u8 id, tile_mask = 0; 2138 int err = 0; 2139 2140 lockdep_assert_held_write(&vm->lock); 2141 2142 for_each_tile(tile, vm->xe, id) 2143 tile_mask |= 0x1 << id; 2144 2145 drm_gpuva_for_each_op(__op, ops) { 2146 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2147 struct xe_vma *vma; 2148 unsigned int flags = 0; 2149 2150 INIT_LIST_HEAD(&op->link); 2151 list_add_tail(&op->link, &vops->list); 2152 op->tile_mask = tile_mask; 2153 2154 switch (op->base.op) { 2155 case DRM_GPUVA_OP_MAP: 2156 { 2157 flags |= op->map.read_only ? 2158 VMA_CREATE_FLAG_READ_ONLY : 0; 2159 flags |= op->map.is_null ? 2160 VMA_CREATE_FLAG_IS_NULL : 0; 2161 flags |= op->map.dumpable ? 2162 VMA_CREATE_FLAG_DUMPABLE : 0; 2163 2164 vma = new_vma(vm, &op->base.map, op->map.pat_index, 2165 flags); 2166 if (IS_ERR(vma)) 2167 return PTR_ERR(vma); 2168 2169 op->map.vma = vma; 2170 if (op->map.immediate || !xe_vm_in_fault_mode(vm)) 2171 xe_vma_ops_incr_pt_update_ops(vops, 2172 op->tile_mask); 2173 break; 2174 } 2175 case DRM_GPUVA_OP_REMAP: 2176 { 2177 struct xe_vma *old = 2178 gpuva_to_vma(op->base.remap.unmap->va); 2179 2180 op->remap.start = xe_vma_start(old); 2181 op->remap.range = xe_vma_size(old); 2182 2183 if (op->base.remap.prev) { 2184 flags |= op->base.remap.unmap->va->flags & 2185 XE_VMA_READ_ONLY ? 2186 VMA_CREATE_FLAG_READ_ONLY : 0; 2187 flags |= op->base.remap.unmap->va->flags & 2188 DRM_GPUVA_SPARSE ? 2189 VMA_CREATE_FLAG_IS_NULL : 0; 2190 flags |= op->base.remap.unmap->va->flags & 2191 XE_VMA_DUMPABLE ? 2192 VMA_CREATE_FLAG_DUMPABLE : 0; 2193 2194 vma = new_vma(vm, op->base.remap.prev, 2195 old->pat_index, flags); 2196 if (IS_ERR(vma)) 2197 return PTR_ERR(vma); 2198 2199 op->remap.prev = vma; 2200 2201 /* 2202 * Userptr creates a new SG mapping so 2203 * we must also rebind. 2204 */ 2205 op->remap.skip_prev = !xe_vma_is_userptr(old) && 2206 IS_ALIGNED(xe_vma_end(vma), 2207 xe_vma_max_pte_size(old)); 2208 if (op->remap.skip_prev) { 2209 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2210 op->remap.range -= 2211 xe_vma_end(vma) - 2212 xe_vma_start(old); 2213 op->remap.start = xe_vma_end(vma); 2214 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2215 (ULL)op->remap.start, 2216 (ULL)op->remap.range); 2217 } else { 2218 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2219 } 2220 } 2221 2222 if (op->base.remap.next) { 2223 flags |= op->base.remap.unmap->va->flags & 2224 XE_VMA_READ_ONLY ? 2225 VMA_CREATE_FLAG_READ_ONLY : 0; 2226 flags |= op->base.remap.unmap->va->flags & 2227 DRM_GPUVA_SPARSE ? 2228 VMA_CREATE_FLAG_IS_NULL : 0; 2229 flags |= op->base.remap.unmap->va->flags & 2230 XE_VMA_DUMPABLE ? 2231 VMA_CREATE_FLAG_DUMPABLE : 0; 2232 2233 vma = new_vma(vm, op->base.remap.next, 2234 old->pat_index, flags); 2235 if (IS_ERR(vma)) 2236 return PTR_ERR(vma); 2237 2238 op->remap.next = vma; 2239 2240 /* 2241 * Userptr creates a new SG mapping so 2242 * we must also rebind. 2243 */ 2244 op->remap.skip_next = !xe_vma_is_userptr(old) && 2245 IS_ALIGNED(xe_vma_start(vma), 2246 xe_vma_max_pte_size(old)); 2247 if (op->remap.skip_next) { 2248 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2249 op->remap.range -= 2250 xe_vma_end(old) - 2251 xe_vma_start(vma); 2252 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2253 (ULL)op->remap.start, 2254 (ULL)op->remap.range); 2255 } else { 2256 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2257 } 2258 } 2259 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2260 break; 2261 } 2262 case DRM_GPUVA_OP_UNMAP: 2263 case DRM_GPUVA_OP_PREFETCH: 2264 /* FIXME: Need to skip some prefetch ops */ 2265 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2266 break; 2267 default: 2268 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2269 } 2270 2271 err = xe_vma_op_commit(vm, op); 2272 if (err) 2273 return err; 2274 } 2275 2276 return 0; 2277 } 2278 2279 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2280 bool post_commit, bool prev_post_commit, 2281 bool next_post_commit) 2282 { 2283 lockdep_assert_held_write(&vm->lock); 2284 2285 switch (op->base.op) { 2286 case DRM_GPUVA_OP_MAP: 2287 if (op->map.vma) { 2288 prep_vma_destroy(vm, op->map.vma, post_commit); 2289 xe_vma_destroy_unlocked(op->map.vma); 2290 } 2291 break; 2292 case DRM_GPUVA_OP_UNMAP: 2293 { 2294 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2295 2296 if (vma) { 2297 down_read(&vm->userptr.notifier_lock); 2298 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2299 up_read(&vm->userptr.notifier_lock); 2300 if (post_commit) 2301 xe_vm_insert_vma(vm, vma); 2302 } 2303 break; 2304 } 2305 case DRM_GPUVA_OP_REMAP: 2306 { 2307 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2308 2309 if (op->remap.prev) { 2310 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2311 xe_vma_destroy_unlocked(op->remap.prev); 2312 } 2313 if (op->remap.next) { 2314 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2315 xe_vma_destroy_unlocked(op->remap.next); 2316 } 2317 if (vma) { 2318 down_read(&vm->userptr.notifier_lock); 2319 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2320 up_read(&vm->userptr.notifier_lock); 2321 if (post_commit) 2322 xe_vm_insert_vma(vm, vma); 2323 } 2324 break; 2325 } 2326 case DRM_GPUVA_OP_PREFETCH: 2327 /* Nothing to do */ 2328 break; 2329 default: 2330 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2331 } 2332 } 2333 2334 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2335 struct drm_gpuva_ops **ops, 2336 int num_ops_list) 2337 { 2338 int i; 2339 2340 for (i = num_ops_list - 1; i >= 0; --i) { 2341 struct drm_gpuva_ops *__ops = ops[i]; 2342 struct drm_gpuva_op *__op; 2343 2344 if (!__ops) 2345 continue; 2346 2347 drm_gpuva_for_each_op_reverse(__op, __ops) { 2348 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2349 2350 xe_vma_op_unwind(vm, op, 2351 op->flags & XE_VMA_OP_COMMITTED, 2352 op->flags & XE_VMA_OP_PREV_COMMITTED, 2353 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2354 } 2355 } 2356 } 2357 2358 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2359 bool validate) 2360 { 2361 struct xe_bo *bo = xe_vma_bo(vma); 2362 struct xe_vm *vm = xe_vma_vm(vma); 2363 int err = 0; 2364 2365 if (bo) { 2366 if (!bo->vm) 2367 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2368 if (!err && validate) 2369 err = xe_bo_validate(bo, vm, 2370 !xe_vm_in_preempt_fence_mode(vm)); 2371 } 2372 2373 return err; 2374 } 2375 2376 static int check_ufence(struct xe_vma *vma) 2377 { 2378 if (vma->ufence) { 2379 struct xe_user_fence * const f = vma->ufence; 2380 2381 if (!xe_sync_ufence_get_status(f)) 2382 return -EBUSY; 2383 2384 vma->ufence = NULL; 2385 xe_sync_ufence_put(f); 2386 } 2387 2388 return 0; 2389 } 2390 2391 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2392 struct xe_vma_op *op) 2393 { 2394 int err = 0; 2395 2396 switch (op->base.op) { 2397 case DRM_GPUVA_OP_MAP: 2398 err = vma_lock_and_validate(exec, op->map.vma, 2399 !xe_vm_in_fault_mode(vm) || 2400 op->map.immediate); 2401 break; 2402 case DRM_GPUVA_OP_REMAP: 2403 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2404 if (err) 2405 break; 2406 2407 err = vma_lock_and_validate(exec, 2408 gpuva_to_vma(op->base.remap.unmap->va), 2409 false); 2410 if (!err && op->remap.prev) 2411 err = vma_lock_and_validate(exec, op->remap.prev, true); 2412 if (!err && op->remap.next) 2413 err = vma_lock_and_validate(exec, op->remap.next, true); 2414 break; 2415 case DRM_GPUVA_OP_UNMAP: 2416 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2417 if (err) 2418 break; 2419 2420 err = vma_lock_and_validate(exec, 2421 gpuva_to_vma(op->base.unmap.va), 2422 false); 2423 break; 2424 case DRM_GPUVA_OP_PREFETCH: 2425 { 2426 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2427 u32 region = op->prefetch.region; 2428 2429 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2430 2431 err = vma_lock_and_validate(exec, 2432 gpuva_to_vma(op->base.prefetch.va), 2433 false); 2434 if (!err && !xe_vma_has_no_bo(vma)) 2435 err = xe_bo_migrate(xe_vma_bo(vma), 2436 region_to_mem_type[region]); 2437 break; 2438 } 2439 default: 2440 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2441 } 2442 2443 return err; 2444 } 2445 2446 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2447 struct xe_vm *vm, 2448 struct xe_vma_ops *vops) 2449 { 2450 struct xe_vma_op *op; 2451 int err; 2452 2453 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2454 if (err) 2455 return err; 2456 2457 list_for_each_entry(op, &vops->list, link) { 2458 err = op_lock_and_prep(exec, vm, op); 2459 if (err) 2460 return err; 2461 } 2462 2463 #ifdef TEST_VM_OPS_ERROR 2464 if (vops->inject_error && 2465 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 2466 return -ENOSPC; 2467 #endif 2468 2469 return 0; 2470 } 2471 2472 static void op_trace(struct xe_vma_op *op) 2473 { 2474 switch (op->base.op) { 2475 case DRM_GPUVA_OP_MAP: 2476 trace_xe_vma_bind(op->map.vma); 2477 break; 2478 case DRM_GPUVA_OP_REMAP: 2479 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 2480 if (op->remap.prev) 2481 trace_xe_vma_bind(op->remap.prev); 2482 if (op->remap.next) 2483 trace_xe_vma_bind(op->remap.next); 2484 break; 2485 case DRM_GPUVA_OP_UNMAP: 2486 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 2487 break; 2488 case DRM_GPUVA_OP_PREFETCH: 2489 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 2490 break; 2491 default: 2492 XE_WARN_ON("NOT POSSIBLE"); 2493 } 2494 } 2495 2496 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 2497 { 2498 struct xe_vma_op *op; 2499 2500 list_for_each_entry(op, &vops->list, link) 2501 op_trace(op); 2502 } 2503 2504 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 2505 { 2506 struct xe_exec_queue *q = vops->q; 2507 struct xe_tile *tile; 2508 int number_tiles = 0; 2509 u8 id; 2510 2511 for_each_tile(tile, vm->xe, id) { 2512 if (vops->pt_update_ops[id].num_ops) 2513 ++number_tiles; 2514 2515 if (vops->pt_update_ops[id].q) 2516 continue; 2517 2518 if (q) { 2519 vops->pt_update_ops[id].q = q; 2520 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 2521 q = list_next_entry(q, multi_gt_list); 2522 } else { 2523 vops->pt_update_ops[id].q = vm->q[id]; 2524 } 2525 } 2526 2527 return number_tiles; 2528 } 2529 2530 static struct dma_fence *ops_execute(struct xe_vm *vm, 2531 struct xe_vma_ops *vops) 2532 { 2533 struct xe_tile *tile; 2534 struct dma_fence *fence = NULL; 2535 struct dma_fence **fences = NULL; 2536 struct dma_fence_array *cf = NULL; 2537 int number_tiles = 0, current_fence = 0, err; 2538 u8 id; 2539 2540 number_tiles = vm_ops_setup_tile_args(vm, vops); 2541 if (number_tiles == 0) 2542 return ERR_PTR(-ENODATA); 2543 2544 if (number_tiles > 1) { 2545 fences = kmalloc_array(number_tiles, sizeof(*fences), 2546 GFP_KERNEL); 2547 if (!fences) { 2548 fence = ERR_PTR(-ENOMEM); 2549 goto err_trace; 2550 } 2551 } 2552 2553 for_each_tile(tile, vm->xe, id) { 2554 if (!vops->pt_update_ops[id].num_ops) 2555 continue; 2556 2557 err = xe_pt_update_ops_prepare(tile, vops); 2558 if (err) { 2559 fence = ERR_PTR(err); 2560 goto err_out; 2561 } 2562 } 2563 2564 trace_xe_vm_ops_execute(vops); 2565 2566 for_each_tile(tile, vm->xe, id) { 2567 if (!vops->pt_update_ops[id].num_ops) 2568 continue; 2569 2570 fence = xe_pt_update_ops_run(tile, vops); 2571 if (IS_ERR(fence)) 2572 goto err_out; 2573 2574 if (fences) 2575 fences[current_fence++] = fence; 2576 } 2577 2578 if (fences) { 2579 cf = dma_fence_array_create(number_tiles, fences, 2580 vm->composite_fence_ctx, 2581 vm->composite_fence_seqno++, 2582 false); 2583 if (!cf) { 2584 --vm->composite_fence_seqno; 2585 fence = ERR_PTR(-ENOMEM); 2586 goto err_out; 2587 } 2588 fence = &cf->base; 2589 } 2590 2591 for_each_tile(tile, vm->xe, id) { 2592 if (!vops->pt_update_ops[id].num_ops) 2593 continue; 2594 2595 xe_pt_update_ops_fini(tile, vops); 2596 } 2597 2598 return fence; 2599 2600 err_out: 2601 for_each_tile(tile, vm->xe, id) { 2602 if (!vops->pt_update_ops[id].num_ops) 2603 continue; 2604 2605 xe_pt_update_ops_abort(tile, vops); 2606 } 2607 while (current_fence) 2608 dma_fence_put(fences[--current_fence]); 2609 kfree(fences); 2610 kfree(cf); 2611 2612 err_trace: 2613 trace_xe_vm_ops_fail(vm); 2614 return fence; 2615 } 2616 2617 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 2618 { 2619 if (vma->ufence) 2620 xe_sync_ufence_put(vma->ufence); 2621 vma->ufence = __xe_sync_ufence_get(ufence); 2622 } 2623 2624 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 2625 struct xe_user_fence *ufence) 2626 { 2627 switch (op->base.op) { 2628 case DRM_GPUVA_OP_MAP: 2629 vma_add_ufence(op->map.vma, ufence); 2630 break; 2631 case DRM_GPUVA_OP_REMAP: 2632 if (op->remap.prev) 2633 vma_add_ufence(op->remap.prev, ufence); 2634 if (op->remap.next) 2635 vma_add_ufence(op->remap.next, ufence); 2636 break; 2637 case DRM_GPUVA_OP_UNMAP: 2638 break; 2639 case DRM_GPUVA_OP_PREFETCH: 2640 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 2641 break; 2642 default: 2643 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2644 } 2645 } 2646 2647 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 2648 struct dma_fence *fence) 2649 { 2650 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 2651 struct xe_user_fence *ufence; 2652 struct xe_vma_op *op; 2653 int i; 2654 2655 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 2656 list_for_each_entry(op, &vops->list, link) { 2657 if (ufence) 2658 op_add_ufence(vm, op, ufence); 2659 2660 if (op->base.op == DRM_GPUVA_OP_UNMAP) 2661 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 2662 else if (op->base.op == DRM_GPUVA_OP_REMAP) 2663 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 2664 fence); 2665 } 2666 if (ufence) 2667 xe_sync_ufence_put(ufence); 2668 for (i = 0; i < vops->num_syncs; i++) 2669 xe_sync_entry_signal(vops->syncs + i, fence); 2670 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 2671 dma_fence_put(fence); 2672 } 2673 2674 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, 2675 struct xe_vma_ops *vops) 2676 { 2677 struct drm_exec exec; 2678 struct dma_fence *fence; 2679 int err; 2680 2681 lockdep_assert_held_write(&vm->lock); 2682 2683 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 2684 DRM_EXEC_IGNORE_DUPLICATES, 0); 2685 drm_exec_until_all_locked(&exec) { 2686 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 2687 drm_exec_retry_on_contention(&exec); 2688 if (err) 2689 goto unlock; 2690 2691 fence = ops_execute(vm, vops); 2692 if (IS_ERR(fence)) { 2693 err = PTR_ERR(fence); 2694 goto unlock; 2695 } 2696 2697 vm_bind_ioctl_ops_fini(vm, vops, fence); 2698 } 2699 2700 unlock: 2701 drm_exec_fini(&exec); 2702 return err; 2703 } 2704 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2705 2706 #define SUPPORTED_FLAGS_STUB \ 2707 (DRM_XE_VM_BIND_FLAG_READONLY | \ 2708 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 2709 DRM_XE_VM_BIND_FLAG_NULL | \ 2710 DRM_XE_VM_BIND_FLAG_DUMPABLE) 2711 2712 #ifdef TEST_VM_OPS_ERROR 2713 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 2714 #else 2715 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 2716 #endif 2717 2718 #define XE_64K_PAGE_MASK 0xffffull 2719 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 2720 2721 static int vm_bind_ioctl_check_args(struct xe_device *xe, 2722 struct drm_xe_vm_bind *args, 2723 struct drm_xe_vm_bind_op **bind_ops) 2724 { 2725 int err; 2726 int i; 2727 2728 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 2729 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2730 return -EINVAL; 2731 2732 if (XE_IOCTL_DBG(xe, args->extensions)) 2733 return -EINVAL; 2734 2735 if (args->num_binds > 1) { 2736 u64 __user *bind_user = 2737 u64_to_user_ptr(args->vector_of_binds); 2738 2739 *bind_ops = kvmalloc_array(args->num_binds, 2740 sizeof(struct drm_xe_vm_bind_op), 2741 GFP_KERNEL | __GFP_ACCOUNT | 2742 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2743 if (!*bind_ops) 2744 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2745 2746 err = __copy_from_user(*bind_ops, bind_user, 2747 sizeof(struct drm_xe_vm_bind_op) * 2748 args->num_binds); 2749 if (XE_IOCTL_DBG(xe, err)) { 2750 err = -EFAULT; 2751 goto free_bind_ops; 2752 } 2753 } else { 2754 *bind_ops = &args->bind; 2755 } 2756 2757 for (i = 0; i < args->num_binds; ++i) { 2758 u64 range = (*bind_ops)[i].range; 2759 u64 addr = (*bind_ops)[i].addr; 2760 u32 op = (*bind_ops)[i].op; 2761 u32 flags = (*bind_ops)[i].flags; 2762 u32 obj = (*bind_ops)[i].obj; 2763 u64 obj_offset = (*bind_ops)[i].obj_offset; 2764 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 2765 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2766 u16 pat_index = (*bind_ops)[i].pat_index; 2767 u16 coh_mode; 2768 2769 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 2770 err = -EINVAL; 2771 goto free_bind_ops; 2772 } 2773 2774 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 2775 (*bind_ops)[i].pat_index = pat_index; 2776 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2777 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 2778 err = -EINVAL; 2779 goto free_bind_ops; 2780 } 2781 2782 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 2783 err = -EINVAL; 2784 goto free_bind_ops; 2785 } 2786 2787 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 2788 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 2789 XE_IOCTL_DBG(xe, obj && is_null) || 2790 XE_IOCTL_DBG(xe, obj_offset && is_null) || 2791 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 2792 is_null) || 2793 XE_IOCTL_DBG(xe, !obj && 2794 op == DRM_XE_VM_BIND_OP_MAP && 2795 !is_null) || 2796 XE_IOCTL_DBG(xe, !obj && 2797 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2798 XE_IOCTL_DBG(xe, addr && 2799 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2800 XE_IOCTL_DBG(xe, range && 2801 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 2802 XE_IOCTL_DBG(xe, obj && 2803 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2804 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2805 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 2806 XE_IOCTL_DBG(xe, obj && 2807 op == DRM_XE_VM_BIND_OP_PREFETCH) || 2808 XE_IOCTL_DBG(xe, prefetch_region && 2809 op != DRM_XE_VM_BIND_OP_PREFETCH) || 2810 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & 2811 xe->info.mem_region_mask)) || 2812 XE_IOCTL_DBG(xe, obj && 2813 op == DRM_XE_VM_BIND_OP_UNMAP)) { 2814 err = -EINVAL; 2815 goto free_bind_ops; 2816 } 2817 2818 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 2819 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 2820 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 2821 XE_IOCTL_DBG(xe, !range && 2822 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 2823 err = -EINVAL; 2824 goto free_bind_ops; 2825 } 2826 } 2827 2828 return 0; 2829 2830 free_bind_ops: 2831 if (args->num_binds > 1) 2832 kvfree(*bind_ops); 2833 return err; 2834 } 2835 2836 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 2837 struct xe_exec_queue *q, 2838 struct xe_sync_entry *syncs, 2839 int num_syncs) 2840 { 2841 struct dma_fence *fence; 2842 int i, err = 0; 2843 2844 fence = xe_sync_in_fence_get(syncs, num_syncs, 2845 to_wait_exec_queue(vm, q), vm); 2846 if (IS_ERR(fence)) 2847 return PTR_ERR(fence); 2848 2849 for (i = 0; i < num_syncs; i++) 2850 xe_sync_entry_signal(&syncs[i], fence); 2851 2852 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 2853 fence); 2854 dma_fence_put(fence); 2855 2856 return err; 2857 } 2858 2859 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 2860 struct xe_exec_queue *q, 2861 struct xe_sync_entry *syncs, u32 num_syncs) 2862 { 2863 memset(vops, 0, sizeof(*vops)); 2864 INIT_LIST_HEAD(&vops->list); 2865 vops->vm = vm; 2866 vops->q = q; 2867 vops->syncs = syncs; 2868 vops->num_syncs = num_syncs; 2869 } 2870 2871 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 2872 u64 addr, u64 range, u64 obj_offset, 2873 u16 pat_index) 2874 { 2875 u16 coh_mode; 2876 2877 if (XE_IOCTL_DBG(xe, range > bo->size) || 2878 XE_IOCTL_DBG(xe, obj_offset > 2879 bo->size - range)) { 2880 return -EINVAL; 2881 } 2882 2883 /* 2884 * Some platforms require 64k VM_BIND alignment, 2885 * specifically those with XE_VRAM_FLAGS_NEED64K. 2886 * 2887 * Other platforms may have BO's set to 64k physical placement, 2888 * but can be mapped at 4k offsets anyway. This check is only 2889 * there for the former case. 2890 */ 2891 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 2892 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 2893 if (XE_IOCTL_DBG(xe, obj_offset & 2894 XE_64K_PAGE_MASK) || 2895 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 2896 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 2897 return -EINVAL; 2898 } 2899 } 2900 2901 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 2902 if (bo->cpu_caching) { 2903 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 2904 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 2905 return -EINVAL; 2906 } 2907 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 2908 /* 2909 * Imported dma-buf from a different device should 2910 * require 1way or 2way coherency since we don't know 2911 * how it was mapped on the CPU. Just assume is it 2912 * potentially cached on CPU side. 2913 */ 2914 return -EINVAL; 2915 } 2916 2917 return 0; 2918 } 2919 2920 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2921 { 2922 struct xe_device *xe = to_xe_device(dev); 2923 struct xe_file *xef = to_xe_file(file); 2924 struct drm_xe_vm_bind *args = data; 2925 struct drm_xe_sync __user *syncs_user; 2926 struct xe_bo **bos = NULL; 2927 struct drm_gpuva_ops **ops = NULL; 2928 struct xe_vm *vm; 2929 struct xe_exec_queue *q = NULL; 2930 u32 num_syncs, num_ufence = 0; 2931 struct xe_sync_entry *syncs = NULL; 2932 struct drm_xe_vm_bind_op *bind_ops; 2933 struct xe_vma_ops vops; 2934 int err; 2935 int i; 2936 2937 err = vm_bind_ioctl_check_args(xe, args, &bind_ops); 2938 if (err) 2939 return err; 2940 2941 if (args->exec_queue_id) { 2942 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 2943 if (XE_IOCTL_DBG(xe, !q)) { 2944 err = -ENOENT; 2945 goto free_objs; 2946 } 2947 2948 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 2949 err = -EINVAL; 2950 goto put_exec_queue; 2951 } 2952 } 2953 2954 vm = xe_vm_lookup(xef, args->vm_id); 2955 if (XE_IOCTL_DBG(xe, !vm)) { 2956 err = -EINVAL; 2957 goto put_exec_queue; 2958 } 2959 2960 err = down_write_killable(&vm->lock); 2961 if (err) 2962 goto put_vm; 2963 2964 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 2965 err = -ENOENT; 2966 goto release_vm_lock; 2967 } 2968 2969 for (i = 0; i < args->num_binds; ++i) { 2970 u64 range = bind_ops[i].range; 2971 u64 addr = bind_ops[i].addr; 2972 2973 if (XE_IOCTL_DBG(xe, range > vm->size) || 2974 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 2975 err = -EINVAL; 2976 goto release_vm_lock; 2977 } 2978 } 2979 2980 if (args->num_binds) { 2981 bos = kvcalloc(args->num_binds, sizeof(*bos), 2982 GFP_KERNEL | __GFP_ACCOUNT | 2983 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2984 if (!bos) { 2985 err = -ENOMEM; 2986 goto release_vm_lock; 2987 } 2988 2989 ops = kvcalloc(args->num_binds, sizeof(*ops), 2990 GFP_KERNEL | __GFP_ACCOUNT | 2991 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2992 if (!ops) { 2993 err = -ENOMEM; 2994 goto release_vm_lock; 2995 } 2996 } 2997 2998 for (i = 0; i < args->num_binds; ++i) { 2999 struct drm_gem_object *gem_obj; 3000 u64 range = bind_ops[i].range; 3001 u64 addr = bind_ops[i].addr; 3002 u32 obj = bind_ops[i].obj; 3003 u64 obj_offset = bind_ops[i].obj_offset; 3004 u16 pat_index = bind_ops[i].pat_index; 3005 3006 if (!obj) 3007 continue; 3008 3009 gem_obj = drm_gem_object_lookup(file, obj); 3010 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3011 err = -ENOENT; 3012 goto put_obj; 3013 } 3014 bos[i] = gem_to_xe_bo(gem_obj); 3015 3016 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3017 obj_offset, pat_index); 3018 if (err) 3019 goto put_obj; 3020 } 3021 3022 if (args->num_syncs) { 3023 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3024 if (!syncs) { 3025 err = -ENOMEM; 3026 goto put_obj; 3027 } 3028 } 3029 3030 syncs_user = u64_to_user_ptr(args->syncs); 3031 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3032 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3033 &syncs_user[num_syncs], 3034 (xe_vm_in_lr_mode(vm) ? 3035 SYNC_PARSE_FLAG_LR_MODE : 0) | 3036 (!args->num_binds ? 3037 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3038 if (err) 3039 goto free_syncs; 3040 3041 if (xe_sync_is_ufence(&syncs[num_syncs])) 3042 num_ufence++; 3043 } 3044 3045 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3046 err = -EINVAL; 3047 goto free_syncs; 3048 } 3049 3050 if (!args->num_binds) { 3051 err = -ENODATA; 3052 goto free_syncs; 3053 } 3054 3055 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3056 for (i = 0; i < args->num_binds; ++i) { 3057 u64 range = bind_ops[i].range; 3058 u64 addr = bind_ops[i].addr; 3059 u32 op = bind_ops[i].op; 3060 u32 flags = bind_ops[i].flags; 3061 u64 obj_offset = bind_ops[i].obj_offset; 3062 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3063 u16 pat_index = bind_ops[i].pat_index; 3064 3065 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3066 addr, range, op, flags, 3067 prefetch_region, pat_index); 3068 if (IS_ERR(ops[i])) { 3069 err = PTR_ERR(ops[i]); 3070 ops[i] = NULL; 3071 goto unwind_ops; 3072 } 3073 3074 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3075 if (err) 3076 goto unwind_ops; 3077 3078 #ifdef TEST_VM_OPS_ERROR 3079 if (flags & FORCE_OP_ERROR) { 3080 vops.inject_error = true; 3081 vm->xe->vm_inject_error_position = 3082 (vm->xe->vm_inject_error_position + 1) % 3083 FORCE_OP_ERROR_COUNT; 3084 } 3085 #endif 3086 } 3087 3088 /* Nothing to do */ 3089 if (list_empty(&vops.list)) { 3090 err = -ENODATA; 3091 goto unwind_ops; 3092 } 3093 3094 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3095 if (err) 3096 goto unwind_ops; 3097 3098 err = vm_bind_ioctl_ops_execute(vm, &vops); 3099 3100 unwind_ops: 3101 if (err && err != -ENODATA) 3102 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3103 xe_vma_ops_fini(&vops); 3104 for (i = args->num_binds - 1; i >= 0; --i) 3105 if (ops[i]) 3106 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3107 free_syncs: 3108 if (err == -ENODATA) 3109 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3110 while (num_syncs--) 3111 xe_sync_entry_cleanup(&syncs[num_syncs]); 3112 3113 kfree(syncs); 3114 put_obj: 3115 for (i = 0; i < args->num_binds; ++i) 3116 xe_bo_put(bos[i]); 3117 release_vm_lock: 3118 up_write(&vm->lock); 3119 put_vm: 3120 xe_vm_put(vm); 3121 put_exec_queue: 3122 if (q) 3123 xe_exec_queue_put(q); 3124 free_objs: 3125 kvfree(bos); 3126 kvfree(ops); 3127 if (args->num_binds > 1) 3128 kvfree(bind_ops); 3129 return err; 3130 } 3131 3132 /** 3133 * xe_vm_lock() - Lock the vm's dma_resv object 3134 * @vm: The struct xe_vm whose lock is to be locked 3135 * @intr: Whether to perform any wait interruptible 3136 * 3137 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3138 * contended lock was interrupted. If @intr is false, the function 3139 * always returns 0. 3140 */ 3141 int xe_vm_lock(struct xe_vm *vm, bool intr) 3142 { 3143 if (intr) 3144 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3145 3146 return dma_resv_lock(xe_vm_resv(vm), NULL); 3147 } 3148 3149 /** 3150 * xe_vm_unlock() - Unlock the vm's dma_resv object 3151 * @vm: The struct xe_vm whose lock is to be released. 3152 * 3153 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3154 */ 3155 void xe_vm_unlock(struct xe_vm *vm) 3156 { 3157 dma_resv_unlock(xe_vm_resv(vm)); 3158 } 3159 3160 /** 3161 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3162 * @vma: VMA to invalidate 3163 * 3164 * Walks a list of page tables leaves which it memset the entries owned by this 3165 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3166 * complete. 3167 * 3168 * Returns 0 for success, negative error code otherwise. 3169 */ 3170 int xe_vm_invalidate_vma(struct xe_vma *vma) 3171 { 3172 struct xe_device *xe = xe_vma_vm(vma)->xe; 3173 struct xe_tile *tile; 3174 struct xe_gt_tlb_invalidation_fence 3175 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3176 u8 id; 3177 u32 fence_id = 0; 3178 int ret = 0; 3179 3180 xe_assert(xe, !xe_vma_is_null(vma)); 3181 trace_xe_vma_invalidate(vma); 3182 3183 vm_dbg(&xe_vma_vm(vma)->xe->drm, 3184 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3185 xe_vma_start(vma), xe_vma_size(vma)); 3186 3187 /* Check that we don't race with page-table updates */ 3188 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3189 if (xe_vma_is_userptr(vma)) { 3190 WARN_ON_ONCE(!mmu_interval_check_retry 3191 (&to_userptr_vma(vma)->userptr.notifier, 3192 to_userptr_vma(vma)->userptr.notifier_seq)); 3193 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3194 DMA_RESV_USAGE_BOOKKEEP)); 3195 3196 } else { 3197 xe_bo_assert_held(xe_vma_bo(vma)); 3198 } 3199 } 3200 3201 for_each_tile(tile, xe, id) { 3202 if (xe_pt_zap_ptes(tile, vma)) { 3203 xe_device_wmb(xe); 3204 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3205 &fence[fence_id], 3206 true); 3207 3208 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3209 &fence[fence_id], vma); 3210 if (ret) 3211 goto wait; 3212 ++fence_id; 3213 3214 if (!tile->media_gt) 3215 continue; 3216 3217 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3218 &fence[fence_id], 3219 true); 3220 3221 ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3222 &fence[fence_id], vma); 3223 if (ret) 3224 goto wait; 3225 ++fence_id; 3226 } 3227 } 3228 3229 wait: 3230 for (id = 0; id < fence_id; ++id) 3231 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3232 3233 vma->tile_invalidated = vma->tile_mask; 3234 3235 return ret; 3236 } 3237 3238 struct xe_vm_snapshot { 3239 unsigned long num_snaps; 3240 struct { 3241 u64 ofs, bo_ofs; 3242 unsigned long len; 3243 struct xe_bo *bo; 3244 void *data; 3245 struct mm_struct *mm; 3246 } snap[]; 3247 }; 3248 3249 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3250 { 3251 unsigned long num_snaps = 0, i; 3252 struct xe_vm_snapshot *snap = NULL; 3253 struct drm_gpuva *gpuva; 3254 3255 if (!vm) 3256 return NULL; 3257 3258 mutex_lock(&vm->snap_mutex); 3259 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3260 if (gpuva->flags & XE_VMA_DUMPABLE) 3261 num_snaps++; 3262 } 3263 3264 if (num_snaps) 3265 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 3266 if (!snap) { 3267 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 3268 goto out_unlock; 3269 } 3270 3271 snap->num_snaps = num_snaps; 3272 i = 0; 3273 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3274 struct xe_vma *vma = gpuva_to_vma(gpuva); 3275 struct xe_bo *bo = vma->gpuva.gem.obj ? 3276 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3277 3278 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 3279 continue; 3280 3281 snap->snap[i].ofs = xe_vma_start(vma); 3282 snap->snap[i].len = xe_vma_size(vma); 3283 if (bo) { 3284 snap->snap[i].bo = xe_bo_get(bo); 3285 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 3286 } else if (xe_vma_is_userptr(vma)) { 3287 struct mm_struct *mm = 3288 to_userptr_vma(vma)->userptr.notifier.mm; 3289 3290 if (mmget_not_zero(mm)) 3291 snap->snap[i].mm = mm; 3292 else 3293 snap->snap[i].data = ERR_PTR(-EFAULT); 3294 3295 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 3296 } else { 3297 snap->snap[i].data = ERR_PTR(-ENOENT); 3298 } 3299 i++; 3300 } 3301 3302 out_unlock: 3303 mutex_unlock(&vm->snap_mutex); 3304 return snap; 3305 } 3306 3307 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 3308 { 3309 if (IS_ERR_OR_NULL(snap)) 3310 return; 3311 3312 for (int i = 0; i < snap->num_snaps; i++) { 3313 struct xe_bo *bo = snap->snap[i].bo; 3314 int err; 3315 3316 if (IS_ERR(snap->snap[i].data)) 3317 continue; 3318 3319 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 3320 if (!snap->snap[i].data) { 3321 snap->snap[i].data = ERR_PTR(-ENOMEM); 3322 goto cleanup_bo; 3323 } 3324 3325 if (bo) { 3326 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3327 snap->snap[i].data, snap->snap[i].len); 3328 } else { 3329 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3330 3331 kthread_use_mm(snap->snap[i].mm); 3332 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 3333 err = 0; 3334 else 3335 err = -EFAULT; 3336 kthread_unuse_mm(snap->snap[i].mm); 3337 3338 mmput(snap->snap[i].mm); 3339 snap->snap[i].mm = NULL; 3340 } 3341 3342 if (err) { 3343 kvfree(snap->snap[i].data); 3344 snap->snap[i].data = ERR_PTR(err); 3345 } 3346 3347 cleanup_bo: 3348 xe_bo_put(bo); 3349 snap->snap[i].bo = NULL; 3350 } 3351 } 3352 3353 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 3354 { 3355 unsigned long i, j; 3356 3357 if (IS_ERR_OR_NULL(snap)) { 3358 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 3359 return; 3360 } 3361 3362 for (i = 0; i < snap->num_snaps; i++) { 3363 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 3364 3365 if (IS_ERR(snap->snap[i].data)) { 3366 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 3367 PTR_ERR(snap->snap[i].data)); 3368 continue; 3369 } 3370 3371 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 3372 3373 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 3374 u32 *val = snap->snap[i].data + j; 3375 char dumped[ASCII85_BUFSZ]; 3376 3377 drm_puts(p, ascii85_encode(*val, dumped)); 3378 } 3379 3380 drm_puts(p, "\n"); 3381 } 3382 } 3383 3384 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 3385 { 3386 unsigned long i; 3387 3388 if (IS_ERR_OR_NULL(snap)) 3389 return; 3390 3391 for (i = 0; i < snap->num_snaps; i++) { 3392 if (!IS_ERR(snap->snap[i].data)) 3393 kvfree(snap->snap[i].data); 3394 xe_bo_put(snap->snap[i].bo); 3395 if (snap->snap[i].mm) 3396 mmput(snap->snap[i].mm); 3397 } 3398 kvfree(snap); 3399 } 3400