1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 #include "xe_hmm.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vma_userptr_check_repin() - Advisory check for repin needed 53 * @uvma: The userptr vma 54 * 55 * Check if the userptr vma has been invalidated since last successful 56 * repin. The check is advisory only and can the function can be called 57 * without the vm->userptr.notifier_lock held. There is no guarantee that the 58 * vma userptr will remain valid after a lockless check, so typically 59 * the call needs to be followed by a proper check under the notifier_lock. 60 * 61 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 62 */ 63 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 64 { 65 return mmu_interval_check_retry(&uvma->userptr.notifier, 66 uvma->userptr.notifier_seq) ? 67 -EAGAIN : 0; 68 } 69 70 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 71 { 72 struct xe_vma *vma = &uvma->vma; 73 struct xe_vm *vm = xe_vma_vm(vma); 74 struct xe_device *xe = vm->xe; 75 76 lockdep_assert_held(&vm->lock); 77 xe_assert(xe, xe_vma_is_userptr(vma)); 78 79 return xe_hmm_userptr_populate_range(uvma, false); 80 } 81 82 static bool preempt_fences_waiting(struct xe_vm *vm) 83 { 84 struct xe_exec_queue *q; 85 86 lockdep_assert_held(&vm->lock); 87 xe_vm_assert_held(vm); 88 89 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 90 if (!q->lr.pfence || 91 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 92 &q->lr.pfence->flags)) { 93 return true; 94 } 95 } 96 97 return false; 98 } 99 100 static void free_preempt_fences(struct list_head *list) 101 { 102 struct list_head *link, *next; 103 104 list_for_each_safe(link, next, list) 105 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 106 } 107 108 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 109 unsigned int *count) 110 { 111 lockdep_assert_held(&vm->lock); 112 xe_vm_assert_held(vm); 113 114 if (*count >= vm->preempt.num_exec_queues) 115 return 0; 116 117 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 118 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 119 120 if (IS_ERR(pfence)) 121 return PTR_ERR(pfence); 122 123 list_move_tail(xe_preempt_fence_link(pfence), list); 124 } 125 126 return 0; 127 } 128 129 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 130 { 131 struct xe_exec_queue *q; 132 133 xe_vm_assert_held(vm); 134 135 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 136 if (q->lr.pfence) { 137 long timeout = dma_fence_wait(q->lr.pfence, false); 138 139 /* Only -ETIME on fence indicates VM needs to be killed */ 140 if (timeout < 0 || q->lr.pfence->error == -ETIME) 141 return -ETIME; 142 143 dma_fence_put(q->lr.pfence); 144 q->lr.pfence = NULL; 145 } 146 } 147 148 return 0; 149 } 150 151 static bool xe_vm_is_idle(struct xe_vm *vm) 152 { 153 struct xe_exec_queue *q; 154 155 xe_vm_assert_held(vm); 156 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 157 if (!xe_exec_queue_is_idle(q)) 158 return false; 159 } 160 161 return true; 162 } 163 164 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 165 { 166 struct list_head *link; 167 struct xe_exec_queue *q; 168 169 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 170 struct dma_fence *fence; 171 172 link = list->next; 173 xe_assert(vm->xe, link != list); 174 175 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 176 q, q->lr.context, 177 ++q->lr.seqno); 178 dma_fence_put(q->lr.pfence); 179 q->lr.pfence = fence; 180 } 181 } 182 183 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 184 { 185 struct xe_exec_queue *q; 186 int err; 187 188 xe_bo_assert_held(bo); 189 190 if (!vm->preempt.num_exec_queues) 191 return 0; 192 193 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 194 if (err) 195 return err; 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 198 if (q->lr.pfence) { 199 dma_resv_add_fence(bo->ttm.base.resv, 200 q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP); 202 } 203 204 return 0; 205 } 206 207 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 208 struct drm_exec *exec) 209 { 210 struct xe_exec_queue *q; 211 212 lockdep_assert_held(&vm->lock); 213 xe_vm_assert_held(vm); 214 215 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 216 q->ops->resume(q); 217 218 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 219 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 220 } 221 } 222 223 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 224 { 225 struct drm_gpuvm_exec vm_exec = { 226 .vm = &vm->gpuvm, 227 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 228 .num_fences = 1, 229 }; 230 struct drm_exec *exec = &vm_exec.exec; 231 struct dma_fence *pfence; 232 int err; 233 bool wait; 234 235 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 236 237 down_write(&vm->lock); 238 err = drm_gpuvm_exec_lock(&vm_exec); 239 if (err) 240 goto out_up_write; 241 242 pfence = xe_preempt_fence_create(q, q->lr.context, 243 ++q->lr.seqno); 244 if (!pfence) { 245 err = -ENOMEM; 246 goto out_fini; 247 } 248 249 list_add(&q->lr.link, &vm->preempt.exec_queues); 250 ++vm->preempt.num_exec_queues; 251 q->lr.pfence = pfence; 252 253 down_read(&vm->userptr.notifier_lock); 254 255 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 256 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 257 258 /* 259 * Check to see if a preemption on VM is in flight or userptr 260 * invalidation, if so trigger this preempt fence to sync state with 261 * other preempt fences on the VM. 262 */ 263 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 264 if (wait) 265 dma_fence_enable_sw_signaling(pfence); 266 267 up_read(&vm->userptr.notifier_lock); 268 269 out_fini: 270 drm_exec_fini(exec); 271 out_up_write: 272 up_write(&vm->lock); 273 274 return err; 275 } 276 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 277 278 /** 279 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 280 * @vm: The VM. 281 * @q: The exec_queue 282 * 283 * Note that this function might be called multiple times on the same queue. 284 */ 285 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 286 { 287 if (!xe_vm_in_preempt_fence_mode(vm)) 288 return; 289 290 down_write(&vm->lock); 291 if (!list_empty(&q->lr.link)) { 292 list_del_init(&q->lr.link); 293 --vm->preempt.num_exec_queues; 294 } 295 if (q->lr.pfence) { 296 dma_fence_enable_sw_signaling(q->lr.pfence); 297 dma_fence_put(q->lr.pfence); 298 q->lr.pfence = NULL; 299 } 300 up_write(&vm->lock); 301 } 302 303 /** 304 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 305 * that need repinning. 306 * @vm: The VM. 307 * 308 * This function checks for whether the VM has userptrs that need repinning, 309 * and provides a release-type barrier on the userptr.notifier_lock after 310 * checking. 311 * 312 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 313 */ 314 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 315 { 316 lockdep_assert_held_read(&vm->userptr.notifier_lock); 317 318 return (list_empty(&vm->userptr.repin_list) && 319 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 320 } 321 322 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 323 324 /** 325 * xe_vm_kill() - VM Kill 326 * @vm: The VM. 327 * @unlocked: Flag indicates the VM's dma-resv is not held 328 * 329 * Kill the VM by setting banned flag indicated VM is no longer available for 330 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 331 */ 332 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 333 { 334 struct xe_exec_queue *q; 335 336 lockdep_assert_held(&vm->lock); 337 338 if (unlocked) 339 xe_vm_lock(vm, false); 340 341 vm->flags |= XE_VM_FLAG_BANNED; 342 trace_xe_vm_kill(vm); 343 344 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 345 q->ops->kill(q); 346 347 if (unlocked) 348 xe_vm_unlock(vm); 349 350 /* TODO: Inform user the VM is banned */ 351 } 352 353 /** 354 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 355 * @exec: The drm_exec object used for locking before validation. 356 * @err: The error returned from ttm_bo_validate(). 357 * @end: A ktime_t cookie that should be set to 0 before first use and 358 * that should be reused on subsequent calls. 359 * 360 * With multiple active VMs, under memory pressure, it is possible that 361 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 362 * Until ttm properly handles locking in such scenarios, best thing the 363 * driver can do is retry with a timeout. Check if that is necessary, and 364 * if so unlock the drm_exec's objects while keeping the ticket to prepare 365 * for a rerun. 366 * 367 * Return: true if a retry after drm_exec_init() is recommended; 368 * false otherwise. 369 */ 370 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 371 { 372 ktime_t cur; 373 374 if (err != -ENOMEM) 375 return false; 376 377 cur = ktime_get(); 378 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 379 if (!ktime_before(cur, *end)) 380 return false; 381 382 msleep(20); 383 return true; 384 } 385 386 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 387 { 388 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 389 struct drm_gpuva *gpuva; 390 int ret; 391 392 lockdep_assert_held(&vm->lock); 393 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 394 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 395 &vm->rebind_list); 396 397 if (!try_wait_for_completion(&vm->xe->pm_block)) 398 return -EAGAIN; 399 400 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 401 if (ret) 402 return ret; 403 404 vm_bo->evicted = false; 405 return 0; 406 } 407 408 /** 409 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 410 * @vm: The vm for which we are rebinding. 411 * @exec: The struct drm_exec with the locked GEM objects. 412 * @num_fences: The number of fences to reserve for the operation, not 413 * including rebinds and validations. 414 * 415 * Validates all evicted gem objects and rebinds their vmas. Note that 416 * rebindings may cause evictions and hence the validation-rebind 417 * sequence is rerun until there are no more objects to validate. 418 * 419 * Return: 0 on success, negative error code on error. In particular, 420 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 421 * the drm_exec transaction needs to be restarted. 422 */ 423 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 424 unsigned int num_fences) 425 { 426 struct drm_gem_object *obj; 427 unsigned long index; 428 int ret; 429 430 do { 431 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 432 if (ret) 433 return ret; 434 435 ret = xe_vm_rebind(vm, false); 436 if (ret) 437 return ret; 438 } while (!list_empty(&vm->gpuvm.evict.list)); 439 440 drm_exec_for_each_locked_object(exec, index, obj) { 441 ret = dma_resv_reserve_fences(obj->resv, num_fences); 442 if (ret) 443 return ret; 444 } 445 446 return 0; 447 } 448 449 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 450 bool *done) 451 { 452 int err; 453 454 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 455 if (err) 456 return err; 457 458 if (xe_vm_is_idle(vm)) { 459 vm->preempt.rebind_deactivated = true; 460 *done = true; 461 return 0; 462 } 463 464 if (!preempt_fences_waiting(vm)) { 465 *done = true; 466 return 0; 467 } 468 469 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 470 if (err) 471 return err; 472 473 err = wait_for_existing_preempt_fences(vm); 474 if (err) 475 return err; 476 477 /* 478 * Add validation and rebinding to the locking loop since both can 479 * cause evictions which may require blocing dma_resv locks. 480 * The fence reservation here is intended for the new preempt fences 481 * we attach at the end of the rebind work. 482 */ 483 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 484 } 485 486 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 487 { 488 struct xe_device *xe = vm->xe; 489 bool ret = false; 490 491 mutex_lock(&xe->rebind_resume_lock); 492 if (!try_wait_for_completion(&vm->xe->pm_block)) { 493 ret = true; 494 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 495 } 496 mutex_unlock(&xe->rebind_resume_lock); 497 498 return ret; 499 } 500 501 /** 502 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 503 * @vm: The vm whose preempt worker to resume. 504 * 505 * Resume a preempt worker that was previously suspended by 506 * vm_suspend_rebind_worker(). 507 */ 508 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 509 { 510 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 511 } 512 513 static void preempt_rebind_work_func(struct work_struct *w) 514 { 515 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 516 struct drm_exec exec; 517 unsigned int fence_count = 0; 518 LIST_HEAD(preempt_fences); 519 ktime_t end = 0; 520 int err = 0; 521 long wait; 522 int __maybe_unused tries = 0; 523 524 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 525 trace_xe_vm_rebind_worker_enter(vm); 526 527 down_write(&vm->lock); 528 529 if (xe_vm_is_closed_or_banned(vm)) { 530 up_write(&vm->lock); 531 trace_xe_vm_rebind_worker_exit(vm); 532 return; 533 } 534 535 retry: 536 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 537 up_write(&vm->lock); 538 return; 539 } 540 541 if (xe_vm_userptr_check_repin(vm)) { 542 err = xe_vm_userptr_pin(vm); 543 if (err) 544 goto out_unlock_outer; 545 } 546 547 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 548 549 drm_exec_until_all_locked(&exec) { 550 bool done = false; 551 552 err = xe_preempt_work_begin(&exec, vm, &done); 553 drm_exec_retry_on_contention(&exec); 554 if (err || done) { 555 drm_exec_fini(&exec); 556 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 557 err = -EAGAIN; 558 559 goto out_unlock_outer; 560 } 561 } 562 563 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 564 if (err) 565 goto out_unlock; 566 567 err = xe_vm_rebind(vm, true); 568 if (err) 569 goto out_unlock; 570 571 /* Wait on rebinds and munmap style VM unbinds */ 572 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 573 DMA_RESV_USAGE_KERNEL, 574 false, MAX_SCHEDULE_TIMEOUT); 575 if (wait <= 0) { 576 err = -ETIME; 577 goto out_unlock; 578 } 579 580 #define retry_required(__tries, __vm) \ 581 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 582 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 583 __xe_vm_userptr_needs_repin(__vm)) 584 585 down_read(&vm->userptr.notifier_lock); 586 if (retry_required(tries, vm)) { 587 up_read(&vm->userptr.notifier_lock); 588 err = -EAGAIN; 589 goto out_unlock; 590 } 591 592 #undef retry_required 593 594 spin_lock(&vm->xe->ttm.lru_lock); 595 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 596 spin_unlock(&vm->xe->ttm.lru_lock); 597 598 /* Point of no return. */ 599 arm_preempt_fences(vm, &preempt_fences); 600 resume_and_reinstall_preempt_fences(vm, &exec); 601 up_read(&vm->userptr.notifier_lock); 602 603 out_unlock: 604 drm_exec_fini(&exec); 605 out_unlock_outer: 606 if (err == -EAGAIN) { 607 trace_xe_vm_rebind_worker_retry(vm); 608 goto retry; 609 } 610 611 if (err) { 612 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 613 xe_vm_kill(vm, true); 614 } 615 up_write(&vm->lock); 616 617 free_preempt_fences(&preempt_fences); 618 619 trace_xe_vm_rebind_worker_exit(vm); 620 } 621 622 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 623 { 624 struct xe_userptr *userptr = &uvma->userptr; 625 struct xe_vma *vma = &uvma->vma; 626 struct dma_resv_iter cursor; 627 struct dma_fence *fence; 628 long err; 629 630 /* 631 * Tell exec and rebind worker they need to repin and rebind this 632 * userptr. 633 */ 634 if (!xe_vm_in_fault_mode(vm) && 635 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 636 spin_lock(&vm->userptr.invalidated_lock); 637 list_move_tail(&userptr->invalidate_link, 638 &vm->userptr.invalidated); 639 spin_unlock(&vm->userptr.invalidated_lock); 640 } 641 642 /* 643 * Preempt fences turn into schedule disables, pipeline these. 644 * Note that even in fault mode, we need to wait for binds and 645 * unbinds to complete, and those are attached as BOOKMARK fences 646 * to the vm. 647 */ 648 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 649 DMA_RESV_USAGE_BOOKKEEP); 650 dma_resv_for_each_fence_unlocked(&cursor, fence) 651 dma_fence_enable_sw_signaling(fence); 652 dma_resv_iter_end(&cursor); 653 654 err = dma_resv_wait_timeout(xe_vm_resv(vm), 655 DMA_RESV_USAGE_BOOKKEEP, 656 false, MAX_SCHEDULE_TIMEOUT); 657 XE_WARN_ON(err <= 0); 658 659 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 660 err = xe_vm_invalidate_vma(vma); 661 XE_WARN_ON(err); 662 } 663 664 xe_hmm_userptr_unmap(uvma); 665 } 666 667 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 668 const struct mmu_notifier_range *range, 669 unsigned long cur_seq) 670 { 671 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 672 struct xe_vma *vma = &uvma->vma; 673 struct xe_vm *vm = xe_vma_vm(vma); 674 675 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 676 trace_xe_vma_userptr_invalidate(vma); 677 678 if (!mmu_notifier_range_blockable(range)) 679 return false; 680 681 vm_dbg(&xe_vma_vm(vma)->xe->drm, 682 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 683 xe_vma_start(vma), xe_vma_size(vma)); 684 685 down_write(&vm->userptr.notifier_lock); 686 mmu_interval_set_seq(mni, cur_seq); 687 688 __vma_userptr_invalidate(vm, uvma); 689 up_write(&vm->userptr.notifier_lock); 690 trace_xe_vma_userptr_invalidate_complete(vma); 691 692 return true; 693 } 694 695 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 696 .invalidate = vma_userptr_invalidate, 697 }; 698 699 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 700 /** 701 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 702 * @uvma: The userptr vma to invalidate 703 * 704 * Perform a forced userptr invalidation for testing purposes. 705 */ 706 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 707 { 708 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 709 710 /* Protect against concurrent userptr pinning */ 711 lockdep_assert_held(&vm->lock); 712 /* Protect against concurrent notifiers */ 713 lockdep_assert_held(&vm->userptr.notifier_lock); 714 /* 715 * Protect against concurrent instances of this function and 716 * the critical exec sections 717 */ 718 xe_vm_assert_held(vm); 719 720 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 721 uvma->userptr.notifier_seq)) 722 uvma->userptr.notifier_seq -= 2; 723 __vma_userptr_invalidate(vm, uvma); 724 } 725 #endif 726 727 int xe_vm_userptr_pin(struct xe_vm *vm) 728 { 729 struct xe_userptr_vma *uvma, *next; 730 int err = 0; 731 732 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 733 lockdep_assert_held_write(&vm->lock); 734 735 /* Collect invalidated userptrs */ 736 spin_lock(&vm->userptr.invalidated_lock); 737 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 738 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 739 userptr.invalidate_link) { 740 list_del_init(&uvma->userptr.invalidate_link); 741 list_add_tail(&uvma->userptr.repin_link, 742 &vm->userptr.repin_list); 743 } 744 spin_unlock(&vm->userptr.invalidated_lock); 745 746 /* Pin and move to bind list */ 747 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 748 userptr.repin_link) { 749 err = xe_vma_userptr_pin_pages(uvma); 750 if (err == -EFAULT) { 751 list_del_init(&uvma->userptr.repin_link); 752 /* 753 * We might have already done the pin once already, but 754 * then had to retry before the re-bind happened, due 755 * some other condition in the caller, but in the 756 * meantime the userptr got dinged by the notifier such 757 * that we need to revalidate here, but this time we hit 758 * the EFAULT. In such a case make sure we remove 759 * ourselves from the rebind list to avoid going down in 760 * flames. 761 */ 762 if (!list_empty(&uvma->vma.combined_links.rebind)) 763 list_del_init(&uvma->vma.combined_links.rebind); 764 765 /* Wait for pending binds */ 766 xe_vm_lock(vm, false); 767 dma_resv_wait_timeout(xe_vm_resv(vm), 768 DMA_RESV_USAGE_BOOKKEEP, 769 false, MAX_SCHEDULE_TIMEOUT); 770 771 down_read(&vm->userptr.notifier_lock); 772 err = xe_vm_invalidate_vma(&uvma->vma); 773 up_read(&vm->userptr.notifier_lock); 774 xe_vm_unlock(vm); 775 if (err) 776 break; 777 } else { 778 if (err) 779 break; 780 781 list_del_init(&uvma->userptr.repin_link); 782 list_move_tail(&uvma->vma.combined_links.rebind, 783 &vm->rebind_list); 784 } 785 } 786 787 if (err) { 788 down_write(&vm->userptr.notifier_lock); 789 spin_lock(&vm->userptr.invalidated_lock); 790 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 791 userptr.repin_link) { 792 list_del_init(&uvma->userptr.repin_link); 793 list_move_tail(&uvma->userptr.invalidate_link, 794 &vm->userptr.invalidated); 795 } 796 spin_unlock(&vm->userptr.invalidated_lock); 797 up_write(&vm->userptr.notifier_lock); 798 } 799 return err; 800 } 801 802 /** 803 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 804 * that need repinning. 805 * @vm: The VM. 806 * 807 * This function does an advisory check for whether the VM has userptrs that 808 * need repinning. 809 * 810 * Return: 0 if there are no indications of userptrs needing repinning, 811 * -EAGAIN if there are. 812 */ 813 int xe_vm_userptr_check_repin(struct xe_vm *vm) 814 { 815 return (list_empty_careful(&vm->userptr.repin_list) && 816 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 817 } 818 819 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 820 { 821 int i; 822 823 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 824 if (!vops->pt_update_ops[i].num_ops) 825 continue; 826 827 vops->pt_update_ops[i].ops = 828 kmalloc_array(vops->pt_update_ops[i].num_ops, 829 sizeof(*vops->pt_update_ops[i].ops), 830 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 831 if (!vops->pt_update_ops[i].ops) 832 return array_of_binds ? -ENOBUFS : -ENOMEM; 833 } 834 835 return 0; 836 } 837 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 838 839 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 840 { 841 struct xe_vma *vma; 842 843 vma = gpuva_to_vma(op->base.prefetch.va); 844 845 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 846 xa_destroy(&op->prefetch_range.range); 847 } 848 849 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 850 { 851 struct xe_vma_op *op; 852 853 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 854 return; 855 856 list_for_each_entry(op, &vops->list, link) 857 xe_vma_svm_prefetch_op_fini(op); 858 } 859 860 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 861 { 862 int i; 863 864 xe_vma_svm_prefetch_ops_fini(vops); 865 866 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 867 kfree(vops->pt_update_ops[i].ops); 868 } 869 870 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 871 { 872 int i; 873 874 if (!inc_val) 875 return; 876 877 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 878 if (BIT(i) & tile_mask) 879 vops->pt_update_ops[i].num_ops += inc_val; 880 } 881 882 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 883 u8 tile_mask) 884 { 885 INIT_LIST_HEAD(&op->link); 886 op->tile_mask = tile_mask; 887 op->base.op = DRM_GPUVA_OP_MAP; 888 op->base.map.va.addr = vma->gpuva.va.addr; 889 op->base.map.va.range = vma->gpuva.va.range; 890 op->base.map.gem.obj = vma->gpuva.gem.obj; 891 op->base.map.gem.offset = vma->gpuva.gem.offset; 892 op->map.vma = vma; 893 op->map.immediate = true; 894 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 895 op->map.is_null = xe_vma_is_null(vma); 896 } 897 898 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 899 u8 tile_mask) 900 { 901 struct xe_vma_op *op; 902 903 op = kzalloc(sizeof(*op), GFP_KERNEL); 904 if (!op) 905 return -ENOMEM; 906 907 xe_vm_populate_rebind(op, vma, tile_mask); 908 list_add_tail(&op->link, &vops->list); 909 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 910 911 return 0; 912 } 913 914 static struct dma_fence *ops_execute(struct xe_vm *vm, 915 struct xe_vma_ops *vops); 916 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 917 struct xe_exec_queue *q, 918 struct xe_sync_entry *syncs, u32 num_syncs); 919 920 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 921 { 922 struct dma_fence *fence; 923 struct xe_vma *vma, *next; 924 struct xe_vma_ops vops; 925 struct xe_vma_op *op, *next_op; 926 int err, i; 927 928 lockdep_assert_held(&vm->lock); 929 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 930 list_empty(&vm->rebind_list)) 931 return 0; 932 933 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 934 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 935 vops.pt_update_ops[i].wait_vm_bookkeep = true; 936 937 xe_vm_assert_held(vm); 938 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 939 xe_assert(vm->xe, vma->tile_present); 940 941 if (rebind_worker) 942 trace_xe_vma_rebind_worker(vma); 943 else 944 trace_xe_vma_rebind_exec(vma); 945 946 err = xe_vm_ops_add_rebind(&vops, vma, 947 vma->tile_present); 948 if (err) 949 goto free_ops; 950 } 951 952 err = xe_vma_ops_alloc(&vops, false); 953 if (err) 954 goto free_ops; 955 956 fence = ops_execute(vm, &vops); 957 if (IS_ERR(fence)) { 958 err = PTR_ERR(fence); 959 } else { 960 dma_fence_put(fence); 961 list_for_each_entry_safe(vma, next, &vm->rebind_list, 962 combined_links.rebind) 963 list_del_init(&vma->combined_links.rebind); 964 } 965 free_ops: 966 list_for_each_entry_safe(op, next_op, &vops.list, link) { 967 list_del(&op->link); 968 kfree(op); 969 } 970 xe_vma_ops_fini(&vops); 971 972 return err; 973 } 974 975 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 976 { 977 struct dma_fence *fence = NULL; 978 struct xe_vma_ops vops; 979 struct xe_vma_op *op, *next_op; 980 struct xe_tile *tile; 981 u8 id; 982 int err; 983 984 lockdep_assert_held(&vm->lock); 985 xe_vm_assert_held(vm); 986 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 987 988 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 989 for_each_tile(tile, vm->xe, id) { 990 vops.pt_update_ops[id].wait_vm_bookkeep = true; 991 vops.pt_update_ops[tile->id].q = 992 xe_migrate_exec_queue(tile->migrate); 993 } 994 995 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 996 if (err) 997 return ERR_PTR(err); 998 999 err = xe_vma_ops_alloc(&vops, false); 1000 if (err) { 1001 fence = ERR_PTR(err); 1002 goto free_ops; 1003 } 1004 1005 fence = ops_execute(vm, &vops); 1006 1007 free_ops: 1008 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1009 list_del(&op->link); 1010 kfree(op); 1011 } 1012 xe_vma_ops_fini(&vops); 1013 1014 return fence; 1015 } 1016 1017 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 1018 struct xe_vma *vma, 1019 struct xe_svm_range *range, 1020 u8 tile_mask) 1021 { 1022 INIT_LIST_HEAD(&op->link); 1023 op->tile_mask = tile_mask; 1024 op->base.op = DRM_GPUVA_OP_DRIVER; 1025 op->subop = XE_VMA_SUBOP_MAP_RANGE; 1026 op->map_range.vma = vma; 1027 op->map_range.range = range; 1028 } 1029 1030 static int 1031 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 1032 struct xe_vma *vma, 1033 struct xe_svm_range *range, 1034 u8 tile_mask) 1035 { 1036 struct xe_vma_op *op; 1037 1038 op = kzalloc(sizeof(*op), GFP_KERNEL); 1039 if (!op) 1040 return -ENOMEM; 1041 1042 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1043 list_add_tail(&op->link, &vops->list); 1044 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1045 1046 return 0; 1047 } 1048 1049 /** 1050 * xe_vm_range_rebind() - VM range (re)bind 1051 * @vm: The VM which the range belongs to. 1052 * @vma: The VMA which the range belongs to. 1053 * @range: SVM range to rebind. 1054 * @tile_mask: Tile mask to bind the range to. 1055 * 1056 * (re)bind SVM range setting up GPU page tables for the range. 1057 * 1058 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 1059 * failure 1060 */ 1061 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 1062 struct xe_vma *vma, 1063 struct xe_svm_range *range, 1064 u8 tile_mask) 1065 { 1066 struct dma_fence *fence = NULL; 1067 struct xe_vma_ops vops; 1068 struct xe_vma_op *op, *next_op; 1069 struct xe_tile *tile; 1070 u8 id; 1071 int err; 1072 1073 lockdep_assert_held(&vm->lock); 1074 xe_vm_assert_held(vm); 1075 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1076 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1077 1078 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1079 for_each_tile(tile, vm->xe, id) { 1080 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1081 vops.pt_update_ops[tile->id].q = 1082 xe_migrate_exec_queue(tile->migrate); 1083 } 1084 1085 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 1086 if (err) 1087 return ERR_PTR(err); 1088 1089 err = xe_vma_ops_alloc(&vops, false); 1090 if (err) { 1091 fence = ERR_PTR(err); 1092 goto free_ops; 1093 } 1094 1095 fence = ops_execute(vm, &vops); 1096 1097 free_ops: 1098 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1099 list_del(&op->link); 1100 kfree(op); 1101 } 1102 xe_vma_ops_fini(&vops); 1103 1104 return fence; 1105 } 1106 1107 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 1108 struct xe_svm_range *range) 1109 { 1110 INIT_LIST_HEAD(&op->link); 1111 op->tile_mask = range->tile_present; 1112 op->base.op = DRM_GPUVA_OP_DRIVER; 1113 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 1114 op->unmap_range.range = range; 1115 } 1116 1117 static int 1118 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 1119 struct xe_svm_range *range) 1120 { 1121 struct xe_vma_op *op; 1122 1123 op = kzalloc(sizeof(*op), GFP_KERNEL); 1124 if (!op) 1125 return -ENOMEM; 1126 1127 xe_vm_populate_range_unbind(op, range); 1128 list_add_tail(&op->link, &vops->list); 1129 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1130 1131 return 0; 1132 } 1133 1134 /** 1135 * xe_vm_range_unbind() - VM range unbind 1136 * @vm: The VM which the range belongs to. 1137 * @range: SVM range to rebind. 1138 * 1139 * Unbind SVM range removing the GPU page tables for the range. 1140 * 1141 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 1142 * failure 1143 */ 1144 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 1145 struct xe_svm_range *range) 1146 { 1147 struct dma_fence *fence = NULL; 1148 struct xe_vma_ops vops; 1149 struct xe_vma_op *op, *next_op; 1150 struct xe_tile *tile; 1151 u8 id; 1152 int err; 1153 1154 lockdep_assert_held(&vm->lock); 1155 xe_vm_assert_held(vm); 1156 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1157 1158 if (!range->tile_present) 1159 return dma_fence_get_stub(); 1160 1161 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1162 for_each_tile(tile, vm->xe, id) { 1163 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1164 vops.pt_update_ops[tile->id].q = 1165 xe_migrate_exec_queue(tile->migrate); 1166 } 1167 1168 err = xe_vm_ops_add_range_unbind(&vops, range); 1169 if (err) 1170 return ERR_PTR(err); 1171 1172 err = xe_vma_ops_alloc(&vops, false); 1173 if (err) { 1174 fence = ERR_PTR(err); 1175 goto free_ops; 1176 } 1177 1178 fence = ops_execute(vm, &vops); 1179 1180 free_ops: 1181 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1182 list_del(&op->link); 1183 kfree(op); 1184 } 1185 xe_vma_ops_fini(&vops); 1186 1187 return fence; 1188 } 1189 1190 static void xe_vma_free(struct xe_vma *vma) 1191 { 1192 if (xe_vma_is_userptr(vma)) 1193 kfree(to_userptr_vma(vma)); 1194 else 1195 kfree(vma); 1196 } 1197 1198 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 1199 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 1200 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 1201 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 1202 1203 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1204 struct xe_bo *bo, 1205 u64 bo_offset_or_userptr, 1206 u64 start, u64 end, 1207 struct xe_vma_mem_attr *attr, 1208 unsigned int flags) 1209 { 1210 struct xe_vma *vma; 1211 struct xe_tile *tile; 1212 u8 id; 1213 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 1214 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 1215 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 1216 bool is_cpu_addr_mirror = 1217 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 1218 1219 xe_assert(vm->xe, start < end); 1220 xe_assert(vm->xe, end < vm->size); 1221 1222 /* 1223 * Allocate and ensure that the xe_vma_is_userptr() return 1224 * matches what was allocated. 1225 */ 1226 if (!bo && !is_null && !is_cpu_addr_mirror) { 1227 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 1228 1229 if (!uvma) 1230 return ERR_PTR(-ENOMEM); 1231 1232 vma = &uvma->vma; 1233 } else { 1234 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 1235 if (!vma) 1236 return ERR_PTR(-ENOMEM); 1237 1238 if (is_cpu_addr_mirror) 1239 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 1240 if (is_null) 1241 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 1242 if (bo) 1243 vma->gpuva.gem.obj = &bo->ttm.base; 1244 } 1245 1246 INIT_LIST_HEAD(&vma->combined_links.rebind); 1247 1248 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1249 vma->gpuva.vm = &vm->gpuvm; 1250 vma->gpuva.va.addr = start; 1251 vma->gpuva.va.range = end - start + 1; 1252 if (read_only) 1253 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1254 if (dumpable) 1255 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1256 1257 for_each_tile(tile, vm->xe, id) 1258 vma->tile_mask |= 0x1 << id; 1259 1260 if (vm->xe->info.has_atomic_enable_pte_bit) 1261 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1262 1263 vma->attr = *attr; 1264 1265 if (bo) { 1266 struct drm_gpuvm_bo *vm_bo; 1267 1268 xe_bo_assert_held(bo); 1269 1270 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1271 if (IS_ERR(vm_bo)) { 1272 xe_vma_free(vma); 1273 return ERR_CAST(vm_bo); 1274 } 1275 1276 drm_gpuvm_bo_extobj_add(vm_bo); 1277 drm_gem_object_get(&bo->ttm.base); 1278 vma->gpuva.gem.offset = bo_offset_or_userptr; 1279 drm_gpuva_link(&vma->gpuva, vm_bo); 1280 drm_gpuvm_bo_put(vm_bo); 1281 } else /* userptr or null */ { 1282 if (!is_null && !is_cpu_addr_mirror) { 1283 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1284 u64 size = end - start + 1; 1285 int err; 1286 1287 INIT_LIST_HEAD(&userptr->invalidate_link); 1288 INIT_LIST_HEAD(&userptr->repin_link); 1289 vma->gpuva.gem.offset = bo_offset_or_userptr; 1290 mutex_init(&userptr->unmap_mutex); 1291 1292 err = mmu_interval_notifier_insert(&userptr->notifier, 1293 current->mm, 1294 xe_vma_userptr(vma), size, 1295 &vma_userptr_notifier_ops); 1296 if (err) { 1297 xe_vma_free(vma); 1298 return ERR_PTR(err); 1299 } 1300 1301 userptr->notifier_seq = LONG_MAX; 1302 } 1303 1304 xe_vm_get(vm); 1305 } 1306 1307 return vma; 1308 } 1309 1310 static void xe_vma_destroy_late(struct xe_vma *vma) 1311 { 1312 struct xe_vm *vm = xe_vma_vm(vma); 1313 1314 if (vma->ufence) { 1315 xe_sync_ufence_put(vma->ufence); 1316 vma->ufence = NULL; 1317 } 1318 1319 if (xe_vma_is_userptr(vma)) { 1320 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1321 struct xe_userptr *userptr = &uvma->userptr; 1322 1323 if (userptr->sg) 1324 xe_hmm_userptr_free_sg(uvma); 1325 1326 /* 1327 * Since userptr pages are not pinned, we can't remove 1328 * the notifier until we're sure the GPU is not accessing 1329 * them anymore 1330 */ 1331 mmu_interval_notifier_remove(&userptr->notifier); 1332 mutex_destroy(&userptr->unmap_mutex); 1333 xe_vm_put(vm); 1334 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1335 xe_vm_put(vm); 1336 } else { 1337 xe_bo_put(xe_vma_bo(vma)); 1338 } 1339 1340 xe_vma_free(vma); 1341 } 1342 1343 static void vma_destroy_work_func(struct work_struct *w) 1344 { 1345 struct xe_vma *vma = 1346 container_of(w, struct xe_vma, destroy_work); 1347 1348 xe_vma_destroy_late(vma); 1349 } 1350 1351 static void vma_destroy_cb(struct dma_fence *fence, 1352 struct dma_fence_cb *cb) 1353 { 1354 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1355 1356 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1357 queue_work(system_unbound_wq, &vma->destroy_work); 1358 } 1359 1360 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1361 { 1362 struct xe_vm *vm = xe_vma_vm(vma); 1363 1364 lockdep_assert_held_write(&vm->lock); 1365 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1366 1367 if (xe_vma_is_userptr(vma)) { 1368 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1369 1370 spin_lock(&vm->userptr.invalidated_lock); 1371 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1372 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1373 spin_unlock(&vm->userptr.invalidated_lock); 1374 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1375 xe_bo_assert_held(xe_vma_bo(vma)); 1376 1377 drm_gpuva_unlink(&vma->gpuva); 1378 } 1379 1380 xe_vm_assert_held(vm); 1381 if (fence) { 1382 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1383 vma_destroy_cb); 1384 1385 if (ret) { 1386 XE_WARN_ON(ret != -ENOENT); 1387 xe_vma_destroy_late(vma); 1388 } 1389 } else { 1390 xe_vma_destroy_late(vma); 1391 } 1392 } 1393 1394 /** 1395 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1396 * @exec: The drm_exec object we're currently locking for. 1397 * @vma: The vma for witch we want to lock the vm resv and any attached 1398 * object's resv. 1399 * 1400 * Return: 0 on success, negative error code on error. In particular 1401 * may return -EDEADLK on WW transaction contention and -EINTR if 1402 * an interruptible wait is terminated by a signal. 1403 */ 1404 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1405 { 1406 struct xe_vm *vm = xe_vma_vm(vma); 1407 struct xe_bo *bo = xe_vma_bo(vma); 1408 int err; 1409 1410 XE_WARN_ON(!vm); 1411 1412 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1413 if (!err && bo && !bo->vm) 1414 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1415 1416 return err; 1417 } 1418 1419 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1420 { 1421 struct drm_exec exec; 1422 int err; 1423 1424 drm_exec_init(&exec, 0, 0); 1425 drm_exec_until_all_locked(&exec) { 1426 err = xe_vm_lock_vma(&exec, vma); 1427 drm_exec_retry_on_contention(&exec); 1428 if (XE_WARN_ON(err)) 1429 break; 1430 } 1431 1432 xe_vma_destroy(vma, NULL); 1433 1434 drm_exec_fini(&exec); 1435 } 1436 1437 struct xe_vma * 1438 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1439 { 1440 struct drm_gpuva *gpuva; 1441 1442 lockdep_assert_held(&vm->lock); 1443 1444 if (xe_vm_is_closed_or_banned(vm)) 1445 return NULL; 1446 1447 xe_assert(vm->xe, start + range <= vm->size); 1448 1449 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1450 1451 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1452 } 1453 1454 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1455 { 1456 int err; 1457 1458 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1459 lockdep_assert_held(&vm->lock); 1460 1461 mutex_lock(&vm->snap_mutex); 1462 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1463 mutex_unlock(&vm->snap_mutex); 1464 XE_WARN_ON(err); /* Shouldn't be possible */ 1465 1466 return err; 1467 } 1468 1469 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1470 { 1471 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1472 lockdep_assert_held(&vm->lock); 1473 1474 mutex_lock(&vm->snap_mutex); 1475 drm_gpuva_remove(&vma->gpuva); 1476 mutex_unlock(&vm->snap_mutex); 1477 if (vm->usm.last_fault_vma == vma) 1478 vm->usm.last_fault_vma = NULL; 1479 } 1480 1481 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1482 { 1483 struct xe_vma_op *op; 1484 1485 op = kzalloc(sizeof(*op), GFP_KERNEL); 1486 1487 if (unlikely(!op)) 1488 return NULL; 1489 1490 return &op->base; 1491 } 1492 1493 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1494 1495 static const struct drm_gpuvm_ops gpuvm_ops = { 1496 .op_alloc = xe_vm_op_alloc, 1497 .vm_bo_validate = xe_gpuvm_validate, 1498 .vm_free = xe_vm_free, 1499 }; 1500 1501 static u64 pde_encode_pat_index(u16 pat_index) 1502 { 1503 u64 pte = 0; 1504 1505 if (pat_index & BIT(0)) 1506 pte |= XE_PPGTT_PTE_PAT0; 1507 1508 if (pat_index & BIT(1)) 1509 pte |= XE_PPGTT_PTE_PAT1; 1510 1511 return pte; 1512 } 1513 1514 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1515 { 1516 u64 pte = 0; 1517 1518 if (pat_index & BIT(0)) 1519 pte |= XE_PPGTT_PTE_PAT0; 1520 1521 if (pat_index & BIT(1)) 1522 pte |= XE_PPGTT_PTE_PAT1; 1523 1524 if (pat_index & BIT(2)) { 1525 if (pt_level) 1526 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1527 else 1528 pte |= XE_PPGTT_PTE_PAT2; 1529 } 1530 1531 if (pat_index & BIT(3)) 1532 pte |= XELPG_PPGTT_PTE_PAT3; 1533 1534 if (pat_index & (BIT(4))) 1535 pte |= XE2_PPGTT_PTE_PAT4; 1536 1537 return pte; 1538 } 1539 1540 static u64 pte_encode_ps(u32 pt_level) 1541 { 1542 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1543 1544 if (pt_level == 1) 1545 return XE_PDE_PS_2M; 1546 else if (pt_level == 2) 1547 return XE_PDPE_PS_1G; 1548 1549 return 0; 1550 } 1551 1552 static u16 pde_pat_index(struct xe_bo *bo) 1553 { 1554 struct xe_device *xe = xe_bo_device(bo); 1555 u16 pat_index; 1556 1557 /* 1558 * We only have two bits to encode the PAT index in non-leaf nodes, but 1559 * these only point to other paging structures so we only need a minimal 1560 * selection of options. The user PAT index is only for encoding leaf 1561 * nodes, where we have use of more bits to do the encoding. The 1562 * non-leaf nodes are instead under driver control so the chosen index 1563 * here should be distict from the user PAT index. Also the 1564 * corresponding coherency of the PAT index should be tied to the 1565 * allocation type of the page table (or at least we should pick 1566 * something which is always safe). 1567 */ 1568 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1569 pat_index = xe->pat.idx[XE_CACHE_WB]; 1570 else 1571 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1572 1573 xe_assert(xe, pat_index <= 3); 1574 1575 return pat_index; 1576 } 1577 1578 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1579 { 1580 u64 pde; 1581 1582 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1583 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1584 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1585 1586 return pde; 1587 } 1588 1589 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1590 u16 pat_index, u32 pt_level) 1591 { 1592 u64 pte; 1593 1594 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1595 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1596 pte |= pte_encode_pat_index(pat_index, pt_level); 1597 pte |= pte_encode_ps(pt_level); 1598 1599 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1600 pte |= XE_PPGTT_PTE_DM; 1601 1602 return pte; 1603 } 1604 1605 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1606 u16 pat_index, u32 pt_level) 1607 { 1608 pte |= XE_PAGE_PRESENT; 1609 1610 if (likely(!xe_vma_read_only(vma))) 1611 pte |= XE_PAGE_RW; 1612 1613 pte |= pte_encode_pat_index(pat_index, pt_level); 1614 pte |= pte_encode_ps(pt_level); 1615 1616 if (unlikely(xe_vma_is_null(vma))) 1617 pte |= XE_PTE_NULL; 1618 1619 return pte; 1620 } 1621 1622 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1623 u16 pat_index, 1624 u32 pt_level, bool devmem, u64 flags) 1625 { 1626 u64 pte; 1627 1628 /* Avoid passing random bits directly as flags */ 1629 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1630 1631 pte = addr; 1632 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1633 pte |= pte_encode_pat_index(pat_index, pt_level); 1634 pte |= pte_encode_ps(pt_level); 1635 1636 if (devmem) 1637 pte |= XE_PPGTT_PTE_DM; 1638 1639 pte |= flags; 1640 1641 return pte; 1642 } 1643 1644 static const struct xe_pt_ops xelp_pt_ops = { 1645 .pte_encode_bo = xelp_pte_encode_bo, 1646 .pte_encode_vma = xelp_pte_encode_vma, 1647 .pte_encode_addr = xelp_pte_encode_addr, 1648 .pde_encode_bo = xelp_pde_encode_bo, 1649 }; 1650 1651 static void vm_destroy_work_func(struct work_struct *w); 1652 1653 /** 1654 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1655 * given tile and vm. 1656 * @xe: xe device. 1657 * @tile: tile to set up for. 1658 * @vm: vm to set up for. 1659 * 1660 * Sets up a pagetable tree with one page-table per level and a single 1661 * leaf PTE. All pagetable entries point to the single page-table or, 1662 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1663 * writes become NOPs. 1664 * 1665 * Return: 0 on success, negative error code on error. 1666 */ 1667 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1668 struct xe_vm *vm) 1669 { 1670 u8 id = tile->id; 1671 int i; 1672 1673 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1674 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1675 if (IS_ERR(vm->scratch_pt[id][i])) { 1676 int err = PTR_ERR(vm->scratch_pt[id][i]); 1677 1678 vm->scratch_pt[id][i] = NULL; 1679 return err; 1680 } 1681 1682 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1683 } 1684 1685 return 0; 1686 } 1687 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1688 1689 static void xe_vm_free_scratch(struct xe_vm *vm) 1690 { 1691 struct xe_tile *tile; 1692 u8 id; 1693 1694 if (!xe_vm_has_scratch(vm)) 1695 return; 1696 1697 for_each_tile(tile, vm->xe, id) { 1698 u32 i; 1699 1700 if (!vm->pt_root[id]) 1701 continue; 1702 1703 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1704 if (vm->scratch_pt[id][i]) 1705 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1706 } 1707 } 1708 1709 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1710 { 1711 struct drm_gem_object *vm_resv_obj; 1712 struct xe_vm *vm; 1713 int err, number_tiles = 0; 1714 struct xe_tile *tile; 1715 u8 id; 1716 1717 /* 1718 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1719 * ever be in faulting mode. 1720 */ 1721 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1722 1723 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1724 if (!vm) 1725 return ERR_PTR(-ENOMEM); 1726 1727 vm->xe = xe; 1728 1729 vm->size = 1ull << xe->info.va_bits; 1730 vm->flags = flags; 1731 1732 if (xef) 1733 vm->xef = xe_file_get(xef); 1734 /** 1735 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1736 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1737 * under a user-VM lock when the PXP session is started at exec_queue 1738 * creation time. Those are different VMs and therefore there is no risk 1739 * of deadlock, but we need to tell lockdep that this is the case or it 1740 * will print a warning. 1741 */ 1742 if (flags & XE_VM_FLAG_GSC) { 1743 static struct lock_class_key gsc_vm_key; 1744 1745 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1746 } else { 1747 init_rwsem(&vm->lock); 1748 } 1749 mutex_init(&vm->snap_mutex); 1750 1751 INIT_LIST_HEAD(&vm->rebind_list); 1752 1753 INIT_LIST_HEAD(&vm->userptr.repin_list); 1754 INIT_LIST_HEAD(&vm->userptr.invalidated); 1755 init_rwsem(&vm->userptr.notifier_lock); 1756 spin_lock_init(&vm->userptr.invalidated_lock); 1757 1758 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1759 1760 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1761 1762 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1763 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1764 1765 for_each_tile(tile, xe, id) 1766 xe_range_fence_tree_init(&vm->rftree[id]); 1767 1768 vm->pt_ops = &xelp_pt_ops; 1769 1770 /* 1771 * Long-running workloads are not protected by the scheduler references. 1772 * By design, run_job for long-running workloads returns NULL and the 1773 * scheduler drops all the references of it, hence protecting the VM 1774 * for this case is necessary. 1775 */ 1776 if (flags & XE_VM_FLAG_LR_MODE) { 1777 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1778 xe_pm_runtime_get_noresume(xe); 1779 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1780 } 1781 1782 if (flags & XE_VM_FLAG_FAULT_MODE) { 1783 err = xe_svm_init(vm); 1784 if (err) 1785 goto err_no_resv; 1786 } 1787 1788 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1789 if (!vm_resv_obj) { 1790 err = -ENOMEM; 1791 goto err_svm_fini; 1792 } 1793 1794 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1795 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1796 1797 drm_gem_object_put(vm_resv_obj); 1798 1799 err = xe_vm_lock(vm, true); 1800 if (err) 1801 goto err_close; 1802 1803 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1804 vm->flags |= XE_VM_FLAG_64K; 1805 1806 for_each_tile(tile, xe, id) { 1807 if (flags & XE_VM_FLAG_MIGRATION && 1808 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1809 continue; 1810 1811 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1812 if (IS_ERR(vm->pt_root[id])) { 1813 err = PTR_ERR(vm->pt_root[id]); 1814 vm->pt_root[id] = NULL; 1815 goto err_unlock_close; 1816 } 1817 } 1818 1819 if (xe_vm_has_scratch(vm)) { 1820 for_each_tile(tile, xe, id) { 1821 if (!vm->pt_root[id]) 1822 continue; 1823 1824 err = xe_vm_create_scratch(xe, tile, vm); 1825 if (err) 1826 goto err_unlock_close; 1827 } 1828 vm->batch_invalidate_tlb = true; 1829 } 1830 1831 if (vm->flags & XE_VM_FLAG_LR_MODE) 1832 vm->batch_invalidate_tlb = false; 1833 1834 /* Fill pt_root after allocating scratch tables */ 1835 for_each_tile(tile, xe, id) { 1836 if (!vm->pt_root[id]) 1837 continue; 1838 1839 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1840 } 1841 xe_vm_unlock(vm); 1842 1843 /* Kernel migration VM shouldn't have a circular loop.. */ 1844 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1845 for_each_tile(tile, xe, id) { 1846 struct xe_exec_queue *q; 1847 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1848 1849 if (!vm->pt_root[id]) 1850 continue; 1851 1852 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1853 if (IS_ERR(q)) { 1854 err = PTR_ERR(q); 1855 goto err_close; 1856 } 1857 vm->q[id] = q; 1858 number_tiles++; 1859 } 1860 } 1861 1862 if (number_tiles > 1) 1863 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1864 1865 if (xef && xe->info.has_asid) { 1866 u32 asid; 1867 1868 down_write(&xe->usm.lock); 1869 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1870 XA_LIMIT(1, XE_MAX_ASID - 1), 1871 &xe->usm.next_asid, GFP_KERNEL); 1872 up_write(&xe->usm.lock); 1873 if (err < 0) 1874 goto err_unlock_close; 1875 1876 vm->usm.asid = asid; 1877 } 1878 1879 trace_xe_vm_create(vm); 1880 1881 return vm; 1882 1883 err_unlock_close: 1884 xe_vm_unlock(vm); 1885 err_close: 1886 xe_vm_close_and_put(vm); 1887 return ERR_PTR(err); 1888 1889 err_svm_fini: 1890 if (flags & XE_VM_FLAG_FAULT_MODE) { 1891 vm->size = 0; /* close the vm */ 1892 xe_svm_fini(vm); 1893 } 1894 err_no_resv: 1895 mutex_destroy(&vm->snap_mutex); 1896 for_each_tile(tile, xe, id) 1897 xe_range_fence_tree_fini(&vm->rftree[id]); 1898 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1899 if (vm->xef) 1900 xe_file_put(vm->xef); 1901 kfree(vm); 1902 if (flags & XE_VM_FLAG_LR_MODE) 1903 xe_pm_runtime_put(xe); 1904 return ERR_PTR(err); 1905 } 1906 1907 static void xe_vm_close(struct xe_vm *vm) 1908 { 1909 struct xe_device *xe = vm->xe; 1910 bool bound; 1911 int idx; 1912 1913 bound = drm_dev_enter(&xe->drm, &idx); 1914 1915 down_write(&vm->lock); 1916 if (xe_vm_in_fault_mode(vm)) 1917 xe_svm_notifier_lock(vm); 1918 1919 vm->size = 0; 1920 1921 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1922 struct xe_tile *tile; 1923 struct xe_gt *gt; 1924 u8 id; 1925 1926 /* Wait for pending binds */ 1927 dma_resv_wait_timeout(xe_vm_resv(vm), 1928 DMA_RESV_USAGE_BOOKKEEP, 1929 false, MAX_SCHEDULE_TIMEOUT); 1930 1931 if (bound) { 1932 for_each_tile(tile, xe, id) 1933 if (vm->pt_root[id]) 1934 xe_pt_clear(xe, vm->pt_root[id]); 1935 1936 for_each_gt(gt, xe, id) 1937 xe_tlb_inval_vm(>->tlb_inval, vm); 1938 } 1939 } 1940 1941 if (xe_vm_in_fault_mode(vm)) 1942 xe_svm_notifier_unlock(vm); 1943 up_write(&vm->lock); 1944 1945 if (bound) 1946 drm_dev_exit(idx); 1947 } 1948 1949 void xe_vm_close_and_put(struct xe_vm *vm) 1950 { 1951 LIST_HEAD(contested); 1952 struct xe_device *xe = vm->xe; 1953 struct xe_tile *tile; 1954 struct xe_vma *vma, *next_vma; 1955 struct drm_gpuva *gpuva, *next; 1956 u8 id; 1957 1958 xe_assert(xe, !vm->preempt.num_exec_queues); 1959 1960 xe_vm_close(vm); 1961 if (xe_vm_in_preempt_fence_mode(vm)) { 1962 mutex_lock(&xe->rebind_resume_lock); 1963 list_del_init(&vm->preempt.pm_activate_link); 1964 mutex_unlock(&xe->rebind_resume_lock); 1965 flush_work(&vm->preempt.rebind_work); 1966 } 1967 if (xe_vm_in_fault_mode(vm)) 1968 xe_svm_close(vm); 1969 1970 down_write(&vm->lock); 1971 for_each_tile(tile, xe, id) { 1972 if (vm->q[id]) 1973 xe_exec_queue_last_fence_put(vm->q[id], vm); 1974 } 1975 up_write(&vm->lock); 1976 1977 for_each_tile(tile, xe, id) { 1978 if (vm->q[id]) { 1979 xe_exec_queue_kill(vm->q[id]); 1980 xe_exec_queue_put(vm->q[id]); 1981 vm->q[id] = NULL; 1982 } 1983 } 1984 1985 down_write(&vm->lock); 1986 xe_vm_lock(vm, false); 1987 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1988 vma = gpuva_to_vma(gpuva); 1989 1990 if (xe_vma_has_no_bo(vma)) { 1991 down_read(&vm->userptr.notifier_lock); 1992 vma->gpuva.flags |= XE_VMA_DESTROYED; 1993 up_read(&vm->userptr.notifier_lock); 1994 } 1995 1996 xe_vm_remove_vma(vm, vma); 1997 1998 /* easy case, remove from VMA? */ 1999 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 2000 list_del_init(&vma->combined_links.rebind); 2001 xe_vma_destroy(vma, NULL); 2002 continue; 2003 } 2004 2005 list_move_tail(&vma->combined_links.destroy, &contested); 2006 vma->gpuva.flags |= XE_VMA_DESTROYED; 2007 } 2008 2009 /* 2010 * All vm operations will add shared fences to resv. 2011 * The only exception is eviction for a shared object, 2012 * but even so, the unbind when evicted would still 2013 * install a fence to resv. Hence it's safe to 2014 * destroy the pagetables immediately. 2015 */ 2016 xe_vm_free_scratch(vm); 2017 2018 for_each_tile(tile, xe, id) { 2019 if (vm->pt_root[id]) { 2020 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 2021 vm->pt_root[id] = NULL; 2022 } 2023 } 2024 xe_vm_unlock(vm); 2025 2026 /* 2027 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 2028 * Since we hold a refcount to the bo, we can remove and free 2029 * the members safely without locking. 2030 */ 2031 list_for_each_entry_safe(vma, next_vma, &contested, 2032 combined_links.destroy) { 2033 list_del_init(&vma->combined_links.destroy); 2034 xe_vma_destroy_unlocked(vma); 2035 } 2036 2037 if (xe_vm_in_fault_mode(vm)) 2038 xe_svm_fini(vm); 2039 2040 up_write(&vm->lock); 2041 2042 down_write(&xe->usm.lock); 2043 if (vm->usm.asid) { 2044 void *lookup; 2045 2046 xe_assert(xe, xe->info.has_asid); 2047 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 2048 2049 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 2050 xe_assert(xe, lookup == vm); 2051 } 2052 up_write(&xe->usm.lock); 2053 2054 for_each_tile(tile, xe, id) 2055 xe_range_fence_tree_fini(&vm->rftree[id]); 2056 2057 xe_vm_put(vm); 2058 } 2059 2060 static void vm_destroy_work_func(struct work_struct *w) 2061 { 2062 struct xe_vm *vm = 2063 container_of(w, struct xe_vm, destroy_work); 2064 struct xe_device *xe = vm->xe; 2065 struct xe_tile *tile; 2066 u8 id; 2067 2068 /* xe_vm_close_and_put was not called? */ 2069 xe_assert(xe, !vm->size); 2070 2071 if (xe_vm_in_preempt_fence_mode(vm)) 2072 flush_work(&vm->preempt.rebind_work); 2073 2074 mutex_destroy(&vm->snap_mutex); 2075 2076 if (vm->flags & XE_VM_FLAG_LR_MODE) 2077 xe_pm_runtime_put(xe); 2078 2079 for_each_tile(tile, xe, id) 2080 XE_WARN_ON(vm->pt_root[id]); 2081 2082 trace_xe_vm_free(vm); 2083 2084 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2085 2086 if (vm->xef) 2087 xe_file_put(vm->xef); 2088 2089 kfree(vm); 2090 } 2091 2092 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2093 { 2094 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2095 2096 /* To destroy the VM we need to be able to sleep */ 2097 queue_work(system_unbound_wq, &vm->destroy_work); 2098 } 2099 2100 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2101 { 2102 struct xe_vm *vm; 2103 2104 mutex_lock(&xef->vm.lock); 2105 vm = xa_load(&xef->vm.xa, id); 2106 if (vm) 2107 xe_vm_get(vm); 2108 mutex_unlock(&xef->vm.lock); 2109 2110 return vm; 2111 } 2112 2113 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2114 { 2115 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2116 } 2117 2118 static struct xe_exec_queue * 2119 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2120 { 2121 return q ? q : vm->q[0]; 2122 } 2123 2124 static struct xe_user_fence * 2125 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2126 { 2127 unsigned int i; 2128 2129 for (i = 0; i < num_syncs; i++) { 2130 struct xe_sync_entry *e = &syncs[i]; 2131 2132 if (xe_sync_is_ufence(e)) 2133 return xe_sync_ufence_get(e); 2134 } 2135 2136 return NULL; 2137 } 2138 2139 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2140 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2141 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2142 2143 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2144 struct drm_file *file) 2145 { 2146 struct xe_device *xe = to_xe_device(dev); 2147 struct xe_file *xef = to_xe_file(file); 2148 struct drm_xe_vm_create *args = data; 2149 struct xe_vm *vm; 2150 u32 id; 2151 int err; 2152 u32 flags = 0; 2153 2154 if (XE_IOCTL_DBG(xe, args->extensions)) 2155 return -EINVAL; 2156 2157 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 2158 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2159 2160 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2161 !xe->info.has_usm)) 2162 return -EINVAL; 2163 2164 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2165 return -EINVAL; 2166 2167 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2168 return -EINVAL; 2169 2170 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2171 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2172 !xe->info.needs_scratch)) 2173 return -EINVAL; 2174 2175 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2176 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2177 return -EINVAL; 2178 2179 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2180 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2181 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2182 flags |= XE_VM_FLAG_LR_MODE; 2183 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2184 flags |= XE_VM_FLAG_FAULT_MODE; 2185 2186 vm = xe_vm_create(xe, flags, xef); 2187 if (IS_ERR(vm)) 2188 return PTR_ERR(vm); 2189 2190 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2191 /* Warning: Security issue - never enable by default */ 2192 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2193 #endif 2194 2195 /* user id alloc must always be last in ioctl to prevent UAF */ 2196 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2197 if (err) 2198 goto err_close_and_put; 2199 2200 args->vm_id = id; 2201 2202 return 0; 2203 2204 err_close_and_put: 2205 xe_vm_close_and_put(vm); 2206 2207 return err; 2208 } 2209 2210 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2211 struct drm_file *file) 2212 { 2213 struct xe_device *xe = to_xe_device(dev); 2214 struct xe_file *xef = to_xe_file(file); 2215 struct drm_xe_vm_destroy *args = data; 2216 struct xe_vm *vm; 2217 int err = 0; 2218 2219 if (XE_IOCTL_DBG(xe, args->pad) || 2220 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2221 return -EINVAL; 2222 2223 mutex_lock(&xef->vm.lock); 2224 vm = xa_load(&xef->vm.xa, args->vm_id); 2225 if (XE_IOCTL_DBG(xe, !vm)) 2226 err = -ENOENT; 2227 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2228 err = -EBUSY; 2229 else 2230 xa_erase(&xef->vm.xa, args->vm_id); 2231 mutex_unlock(&xef->vm.lock); 2232 2233 if (!err) 2234 xe_vm_close_and_put(vm); 2235 2236 return err; 2237 } 2238 2239 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2240 { 2241 struct drm_gpuva *gpuva; 2242 u32 num_vmas = 0; 2243 2244 lockdep_assert_held(&vm->lock); 2245 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2246 num_vmas++; 2247 2248 return num_vmas; 2249 } 2250 2251 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2252 u64 end, struct drm_xe_mem_range_attr *attrs) 2253 { 2254 struct drm_gpuva *gpuva; 2255 int i = 0; 2256 2257 lockdep_assert_held(&vm->lock); 2258 2259 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2260 struct xe_vma *vma = gpuva_to_vma(gpuva); 2261 2262 if (i == *num_vmas) 2263 return -ENOSPC; 2264 2265 attrs[i].start = xe_vma_start(vma); 2266 attrs[i].end = xe_vma_end(vma); 2267 attrs[i].atomic.val = vma->attr.atomic_access; 2268 attrs[i].pat_index.val = vma->attr.pat_index; 2269 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2270 attrs[i].preferred_mem_loc.migration_policy = 2271 vma->attr.preferred_loc.migration_policy; 2272 2273 i++; 2274 } 2275 2276 *num_vmas = i; 2277 return 0; 2278 } 2279 2280 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2281 { 2282 struct xe_device *xe = to_xe_device(dev); 2283 struct xe_file *xef = to_xe_file(file); 2284 struct drm_xe_mem_range_attr *mem_attrs; 2285 struct drm_xe_vm_query_mem_range_attr *args = data; 2286 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2287 struct xe_vm *vm; 2288 int err = 0; 2289 2290 if (XE_IOCTL_DBG(xe, 2291 ((args->num_mem_ranges == 0 && 2292 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2293 (args->num_mem_ranges > 0 && 2294 (!attrs_user || 2295 args->sizeof_mem_range_attr != 2296 sizeof(struct drm_xe_mem_range_attr)))))) 2297 return -EINVAL; 2298 2299 vm = xe_vm_lookup(xef, args->vm_id); 2300 if (XE_IOCTL_DBG(xe, !vm)) 2301 return -EINVAL; 2302 2303 err = down_read_interruptible(&vm->lock); 2304 if (err) 2305 goto put_vm; 2306 2307 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2308 2309 if (args->num_mem_ranges == 0 && !attrs_user) { 2310 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2311 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2312 goto unlock_vm; 2313 } 2314 2315 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2316 GFP_KERNEL | __GFP_ACCOUNT | 2317 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2318 if (!mem_attrs) { 2319 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2320 goto unlock_vm; 2321 } 2322 2323 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2324 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2325 args->start + args->range, mem_attrs); 2326 if (err) 2327 goto free_mem_attrs; 2328 2329 err = copy_to_user(attrs_user, mem_attrs, 2330 args->sizeof_mem_range_attr * args->num_mem_ranges); 2331 2332 free_mem_attrs: 2333 kvfree(mem_attrs); 2334 unlock_vm: 2335 up_read(&vm->lock); 2336 put_vm: 2337 xe_vm_put(vm); 2338 return err; 2339 } 2340 2341 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2342 { 2343 if (page_addr > xe_vma_end(vma) - 1 || 2344 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2345 return false; 2346 2347 return true; 2348 } 2349 2350 /** 2351 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2352 * 2353 * @vm: the xe_vm the vma belongs to 2354 * @page_addr: address to look up 2355 */ 2356 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2357 { 2358 struct xe_vma *vma = NULL; 2359 2360 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2361 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2362 vma = vm->usm.last_fault_vma; 2363 } 2364 if (!vma) 2365 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2366 2367 return vma; 2368 } 2369 2370 static const u32 region_to_mem_type[] = { 2371 XE_PL_TT, 2372 XE_PL_VRAM0, 2373 XE_PL_VRAM1, 2374 }; 2375 2376 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2377 bool post_commit) 2378 { 2379 down_read(&vm->userptr.notifier_lock); 2380 vma->gpuva.flags |= XE_VMA_DESTROYED; 2381 up_read(&vm->userptr.notifier_lock); 2382 if (post_commit) 2383 xe_vm_remove_vma(vm, vma); 2384 } 2385 2386 #undef ULL 2387 #define ULL unsigned long long 2388 2389 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2390 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2391 { 2392 struct xe_vma *vma; 2393 2394 switch (op->op) { 2395 case DRM_GPUVA_OP_MAP: 2396 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2397 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2398 break; 2399 case DRM_GPUVA_OP_REMAP: 2400 vma = gpuva_to_vma(op->remap.unmap->va); 2401 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2402 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2403 op->remap.unmap->keep ? 1 : 0); 2404 if (op->remap.prev) 2405 vm_dbg(&xe->drm, 2406 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2407 (ULL)op->remap.prev->va.addr, 2408 (ULL)op->remap.prev->va.range); 2409 if (op->remap.next) 2410 vm_dbg(&xe->drm, 2411 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2412 (ULL)op->remap.next->va.addr, 2413 (ULL)op->remap.next->va.range); 2414 break; 2415 case DRM_GPUVA_OP_UNMAP: 2416 vma = gpuva_to_vma(op->unmap.va); 2417 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2418 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2419 op->unmap.keep ? 1 : 0); 2420 break; 2421 case DRM_GPUVA_OP_PREFETCH: 2422 vma = gpuva_to_vma(op->prefetch.va); 2423 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2424 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2425 break; 2426 default: 2427 drm_warn(&xe->drm, "NOT POSSIBLE"); 2428 } 2429 } 2430 #else 2431 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2432 { 2433 } 2434 #endif 2435 2436 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2437 { 2438 if (!xe_vm_in_fault_mode(vm)) 2439 return false; 2440 2441 if (!xe_vm_has_scratch(vm)) 2442 return false; 2443 2444 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2445 return false; 2446 2447 return true; 2448 } 2449 2450 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2451 { 2452 struct drm_gpuva_op *__op; 2453 2454 drm_gpuva_for_each_op(__op, ops) { 2455 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2456 2457 xe_vma_svm_prefetch_op_fini(op); 2458 } 2459 } 2460 2461 /* 2462 * Create operations list from IOCTL arguments, setup operations fields so parse 2463 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2464 */ 2465 static struct drm_gpuva_ops * 2466 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2467 struct xe_bo *bo, u64 bo_offset_or_userptr, 2468 u64 addr, u64 range, 2469 u32 operation, u32 flags, 2470 u32 prefetch_region, u16 pat_index) 2471 { 2472 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2473 struct drm_gpuva_ops *ops; 2474 struct drm_gpuva_op *__op; 2475 struct drm_gpuvm_bo *vm_bo; 2476 u64 range_end = addr + range; 2477 int err; 2478 2479 lockdep_assert_held_write(&vm->lock); 2480 2481 vm_dbg(&vm->xe->drm, 2482 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2483 operation, (ULL)addr, (ULL)range, 2484 (ULL)bo_offset_or_userptr); 2485 2486 switch (operation) { 2487 case DRM_XE_VM_BIND_OP_MAP: 2488 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2489 struct drm_gpuvm_map_req map_req = { 2490 .map.va.addr = addr, 2491 .map.va.range = range, 2492 .map.gem.obj = obj, 2493 .map.gem.offset = bo_offset_or_userptr, 2494 }; 2495 2496 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2497 break; 2498 } 2499 case DRM_XE_VM_BIND_OP_UNMAP: 2500 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2501 break; 2502 case DRM_XE_VM_BIND_OP_PREFETCH: 2503 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2504 break; 2505 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2506 xe_assert(vm->xe, bo); 2507 2508 err = xe_bo_lock(bo, true); 2509 if (err) 2510 return ERR_PTR(err); 2511 2512 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2513 if (IS_ERR(vm_bo)) { 2514 xe_bo_unlock(bo); 2515 return ERR_CAST(vm_bo); 2516 } 2517 2518 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2519 drm_gpuvm_bo_put(vm_bo); 2520 xe_bo_unlock(bo); 2521 break; 2522 default: 2523 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2524 ops = ERR_PTR(-EINVAL); 2525 } 2526 if (IS_ERR(ops)) 2527 return ops; 2528 2529 drm_gpuva_for_each_op(__op, ops) { 2530 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2531 2532 if (__op->op == DRM_GPUVA_OP_MAP) { 2533 op->map.immediate = 2534 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2535 op->map.read_only = 2536 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2537 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2538 op->map.is_cpu_addr_mirror = flags & 2539 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2540 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2541 op->map.pat_index = pat_index; 2542 op->map.invalidate_on_bind = 2543 __xe_vm_needs_clear_scratch_pages(vm, flags); 2544 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2545 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2546 struct xe_tile *tile; 2547 struct xe_svm_range *svm_range; 2548 struct drm_gpusvm_ctx ctx = {}; 2549 struct drm_pagemap *dpagemap; 2550 u8 id, tile_mask = 0; 2551 u32 i; 2552 2553 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2554 op->prefetch.region = prefetch_region; 2555 break; 2556 } 2557 2558 ctx.read_only = xe_vma_read_only(vma); 2559 ctx.devmem_possible = IS_DGFX(vm->xe) && 2560 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2561 2562 for_each_tile(tile, vm->xe, id) 2563 tile_mask |= 0x1 << id; 2564 2565 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2566 op->prefetch_range.ranges_count = 0; 2567 tile = NULL; 2568 2569 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2570 dpagemap = xe_vma_resolve_pagemap(vma, 2571 xe_device_get_root_tile(vm->xe)); 2572 /* 2573 * TODO: Once multigpu support is enabled will need 2574 * something to dereference tile from dpagemap. 2575 */ 2576 if (dpagemap) 2577 tile = xe_device_get_root_tile(vm->xe); 2578 } else if (prefetch_region) { 2579 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2580 XE_PL_VRAM0]; 2581 } 2582 2583 op->prefetch_range.tile = tile; 2584 alloc_next_range: 2585 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2586 2587 if (PTR_ERR(svm_range) == -ENOENT) { 2588 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2589 2590 addr = ret == ULONG_MAX ? 0 : ret; 2591 if (addr) 2592 goto alloc_next_range; 2593 else 2594 goto print_op_label; 2595 } 2596 2597 if (IS_ERR(svm_range)) { 2598 err = PTR_ERR(svm_range); 2599 goto unwind_prefetch_ops; 2600 } 2601 2602 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2603 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2604 goto check_next_range; 2605 } 2606 2607 err = xa_alloc(&op->prefetch_range.range, 2608 &i, svm_range, xa_limit_32b, 2609 GFP_KERNEL); 2610 2611 if (err) 2612 goto unwind_prefetch_ops; 2613 2614 op->prefetch_range.ranges_count++; 2615 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2616 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2617 check_next_range: 2618 if (range_end > xe_svm_range_end(svm_range) && 2619 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2620 addr = xe_svm_range_end(svm_range); 2621 goto alloc_next_range; 2622 } 2623 } 2624 print_op_label: 2625 print_op(vm->xe, __op); 2626 } 2627 2628 return ops; 2629 2630 unwind_prefetch_ops: 2631 xe_svm_prefetch_gpuva_ops_fini(ops); 2632 drm_gpuva_ops_free(&vm->gpuvm, ops); 2633 return ERR_PTR(err); 2634 } 2635 2636 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2637 2638 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2639 struct xe_vma_mem_attr *attr, unsigned int flags) 2640 { 2641 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2642 struct drm_exec exec; 2643 struct xe_vma *vma; 2644 int err = 0; 2645 2646 lockdep_assert_held_write(&vm->lock); 2647 2648 if (bo) { 2649 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2650 drm_exec_until_all_locked(&exec) { 2651 err = 0; 2652 if (!bo->vm) { 2653 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2654 drm_exec_retry_on_contention(&exec); 2655 } 2656 if (!err) { 2657 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2658 drm_exec_retry_on_contention(&exec); 2659 } 2660 if (err) { 2661 drm_exec_fini(&exec); 2662 return ERR_PTR(err); 2663 } 2664 } 2665 } 2666 vma = xe_vma_create(vm, bo, op->gem.offset, 2667 op->va.addr, op->va.addr + 2668 op->va.range - 1, attr, flags); 2669 if (IS_ERR(vma)) 2670 goto err_unlock; 2671 2672 if (xe_vma_is_userptr(vma)) 2673 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2674 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2675 err = add_preempt_fences(vm, bo); 2676 2677 err_unlock: 2678 if (bo) 2679 drm_exec_fini(&exec); 2680 2681 if (err) { 2682 prep_vma_destroy(vm, vma, false); 2683 xe_vma_destroy_unlocked(vma); 2684 vma = ERR_PTR(err); 2685 } 2686 2687 return vma; 2688 } 2689 2690 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2691 { 2692 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2693 return SZ_1G; 2694 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2695 return SZ_2M; 2696 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2697 return SZ_64K; 2698 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2699 return SZ_4K; 2700 2701 return SZ_1G; /* Uninitialized, used max size */ 2702 } 2703 2704 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2705 { 2706 switch (size) { 2707 case SZ_1G: 2708 vma->gpuva.flags |= XE_VMA_PTE_1G; 2709 break; 2710 case SZ_2M: 2711 vma->gpuva.flags |= XE_VMA_PTE_2M; 2712 break; 2713 case SZ_64K: 2714 vma->gpuva.flags |= XE_VMA_PTE_64K; 2715 break; 2716 case SZ_4K: 2717 vma->gpuva.flags |= XE_VMA_PTE_4K; 2718 break; 2719 } 2720 } 2721 2722 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2723 { 2724 int err = 0; 2725 2726 lockdep_assert_held_write(&vm->lock); 2727 2728 switch (op->base.op) { 2729 case DRM_GPUVA_OP_MAP: 2730 err |= xe_vm_insert_vma(vm, op->map.vma); 2731 if (!err) 2732 op->flags |= XE_VMA_OP_COMMITTED; 2733 break; 2734 case DRM_GPUVA_OP_REMAP: 2735 { 2736 u8 tile_present = 2737 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2738 2739 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2740 true); 2741 op->flags |= XE_VMA_OP_COMMITTED; 2742 2743 if (op->remap.prev) { 2744 err |= xe_vm_insert_vma(vm, op->remap.prev); 2745 if (!err) 2746 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2747 if (!err && op->remap.skip_prev) { 2748 op->remap.prev->tile_present = 2749 tile_present; 2750 op->remap.prev = NULL; 2751 } 2752 } 2753 if (op->remap.next) { 2754 err |= xe_vm_insert_vma(vm, op->remap.next); 2755 if (!err) 2756 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2757 if (!err && op->remap.skip_next) { 2758 op->remap.next->tile_present = 2759 tile_present; 2760 op->remap.next = NULL; 2761 } 2762 } 2763 2764 /* Adjust for partial unbind after removing VMA from VM */ 2765 if (!err) { 2766 op->base.remap.unmap->va->va.addr = op->remap.start; 2767 op->base.remap.unmap->va->va.range = op->remap.range; 2768 } 2769 break; 2770 } 2771 case DRM_GPUVA_OP_UNMAP: 2772 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2773 op->flags |= XE_VMA_OP_COMMITTED; 2774 break; 2775 case DRM_GPUVA_OP_PREFETCH: 2776 op->flags |= XE_VMA_OP_COMMITTED; 2777 break; 2778 default: 2779 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2780 } 2781 2782 return err; 2783 } 2784 2785 /** 2786 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2787 * @vma: Pointer to the xe_vma structure to check 2788 * 2789 * This function determines whether the given VMA (Virtual Memory Area) 2790 * has its memory attributes set to their default values. Specifically, 2791 * it checks the following conditions: 2792 * 2793 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2794 * - `pat_index` is equal to `default_pat_index` 2795 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2796 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2797 * 2798 * Return: true if all attributes are at their default values, false otherwise. 2799 */ 2800 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2801 { 2802 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2803 vma->attr.pat_index == vma->attr.default_pat_index && 2804 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2805 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2806 } 2807 2808 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2809 struct xe_vma_ops *vops) 2810 { 2811 struct xe_device *xe = vm->xe; 2812 struct drm_gpuva_op *__op; 2813 struct xe_tile *tile; 2814 u8 id, tile_mask = 0; 2815 int err = 0; 2816 2817 lockdep_assert_held_write(&vm->lock); 2818 2819 for_each_tile(tile, vm->xe, id) 2820 tile_mask |= 0x1 << id; 2821 2822 drm_gpuva_for_each_op(__op, ops) { 2823 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2824 struct xe_vma *vma; 2825 unsigned int flags = 0; 2826 2827 INIT_LIST_HEAD(&op->link); 2828 list_add_tail(&op->link, &vops->list); 2829 op->tile_mask = tile_mask; 2830 2831 switch (op->base.op) { 2832 case DRM_GPUVA_OP_MAP: 2833 { 2834 struct xe_vma_mem_attr default_attr = { 2835 .preferred_loc = { 2836 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2837 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2838 }, 2839 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2840 .default_pat_index = op->map.pat_index, 2841 .pat_index = op->map.pat_index, 2842 }; 2843 2844 flags |= op->map.read_only ? 2845 VMA_CREATE_FLAG_READ_ONLY : 0; 2846 flags |= op->map.is_null ? 2847 VMA_CREATE_FLAG_IS_NULL : 0; 2848 flags |= op->map.dumpable ? 2849 VMA_CREATE_FLAG_DUMPABLE : 0; 2850 flags |= op->map.is_cpu_addr_mirror ? 2851 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2852 2853 vma = new_vma(vm, &op->base.map, &default_attr, 2854 flags); 2855 if (IS_ERR(vma)) 2856 return PTR_ERR(vma); 2857 2858 op->map.vma = vma; 2859 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2860 !op->map.is_cpu_addr_mirror) || 2861 op->map.invalidate_on_bind) 2862 xe_vma_ops_incr_pt_update_ops(vops, 2863 op->tile_mask, 1); 2864 break; 2865 } 2866 case DRM_GPUVA_OP_REMAP: 2867 { 2868 struct xe_vma *old = 2869 gpuva_to_vma(op->base.remap.unmap->va); 2870 bool skip = xe_vma_is_cpu_addr_mirror(old); 2871 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2872 int num_remap_ops = 0; 2873 2874 if (op->base.remap.prev) 2875 start = op->base.remap.prev->va.addr + 2876 op->base.remap.prev->va.range; 2877 if (op->base.remap.next) 2878 end = op->base.remap.next->va.addr; 2879 2880 if (xe_vma_is_cpu_addr_mirror(old) && 2881 xe_svm_has_mapping(vm, start, end)) { 2882 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2883 xe_svm_unmap_address_range(vm, start, end); 2884 else 2885 return -EBUSY; 2886 } 2887 2888 op->remap.start = xe_vma_start(old); 2889 op->remap.range = xe_vma_size(old); 2890 2891 flags |= op->base.remap.unmap->va->flags & 2892 XE_VMA_READ_ONLY ? 2893 VMA_CREATE_FLAG_READ_ONLY : 0; 2894 flags |= op->base.remap.unmap->va->flags & 2895 DRM_GPUVA_SPARSE ? 2896 VMA_CREATE_FLAG_IS_NULL : 0; 2897 flags |= op->base.remap.unmap->va->flags & 2898 XE_VMA_DUMPABLE ? 2899 VMA_CREATE_FLAG_DUMPABLE : 0; 2900 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2901 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2902 2903 if (op->base.remap.prev) { 2904 vma = new_vma(vm, op->base.remap.prev, 2905 &old->attr, flags); 2906 if (IS_ERR(vma)) 2907 return PTR_ERR(vma); 2908 2909 op->remap.prev = vma; 2910 2911 /* 2912 * Userptr creates a new SG mapping so 2913 * we must also rebind. 2914 */ 2915 op->remap.skip_prev = skip || 2916 (!xe_vma_is_userptr(old) && 2917 IS_ALIGNED(xe_vma_end(vma), 2918 xe_vma_max_pte_size(old))); 2919 if (op->remap.skip_prev) { 2920 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2921 op->remap.range -= 2922 xe_vma_end(vma) - 2923 xe_vma_start(old); 2924 op->remap.start = xe_vma_end(vma); 2925 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2926 (ULL)op->remap.start, 2927 (ULL)op->remap.range); 2928 } else { 2929 num_remap_ops++; 2930 } 2931 } 2932 2933 if (op->base.remap.next) { 2934 vma = new_vma(vm, op->base.remap.next, 2935 &old->attr, flags); 2936 if (IS_ERR(vma)) 2937 return PTR_ERR(vma); 2938 2939 op->remap.next = vma; 2940 2941 /* 2942 * Userptr creates a new SG mapping so 2943 * we must also rebind. 2944 */ 2945 op->remap.skip_next = skip || 2946 (!xe_vma_is_userptr(old) && 2947 IS_ALIGNED(xe_vma_start(vma), 2948 xe_vma_max_pte_size(old))); 2949 if (op->remap.skip_next) { 2950 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2951 op->remap.range -= 2952 xe_vma_end(old) - 2953 xe_vma_start(vma); 2954 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2955 (ULL)op->remap.start, 2956 (ULL)op->remap.range); 2957 } else { 2958 num_remap_ops++; 2959 } 2960 } 2961 if (!skip) 2962 num_remap_ops++; 2963 2964 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2965 break; 2966 } 2967 case DRM_GPUVA_OP_UNMAP: 2968 vma = gpuva_to_vma(op->base.unmap.va); 2969 2970 if (xe_vma_is_cpu_addr_mirror(vma) && 2971 xe_svm_has_mapping(vm, xe_vma_start(vma), 2972 xe_vma_end(vma))) 2973 return -EBUSY; 2974 2975 if (!xe_vma_is_cpu_addr_mirror(vma)) 2976 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2977 break; 2978 case DRM_GPUVA_OP_PREFETCH: 2979 vma = gpuva_to_vma(op->base.prefetch.va); 2980 2981 if (xe_vma_is_userptr(vma)) { 2982 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2983 if (err) 2984 return err; 2985 } 2986 2987 if (xe_vma_is_cpu_addr_mirror(vma)) 2988 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2989 op->prefetch_range.ranges_count); 2990 else 2991 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2992 2993 break; 2994 default: 2995 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2996 } 2997 2998 err = xe_vma_op_commit(vm, op); 2999 if (err) 3000 return err; 3001 } 3002 3003 return 0; 3004 } 3005 3006 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 3007 bool post_commit, bool prev_post_commit, 3008 bool next_post_commit) 3009 { 3010 lockdep_assert_held_write(&vm->lock); 3011 3012 switch (op->base.op) { 3013 case DRM_GPUVA_OP_MAP: 3014 if (op->map.vma) { 3015 prep_vma_destroy(vm, op->map.vma, post_commit); 3016 xe_vma_destroy_unlocked(op->map.vma); 3017 } 3018 break; 3019 case DRM_GPUVA_OP_UNMAP: 3020 { 3021 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 3022 3023 if (vma) { 3024 down_read(&vm->userptr.notifier_lock); 3025 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 3026 up_read(&vm->userptr.notifier_lock); 3027 if (post_commit) 3028 xe_vm_insert_vma(vm, vma); 3029 } 3030 break; 3031 } 3032 case DRM_GPUVA_OP_REMAP: 3033 { 3034 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 3035 3036 if (op->remap.prev) { 3037 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 3038 xe_vma_destroy_unlocked(op->remap.prev); 3039 } 3040 if (op->remap.next) { 3041 prep_vma_destroy(vm, op->remap.next, next_post_commit); 3042 xe_vma_destroy_unlocked(op->remap.next); 3043 } 3044 if (vma) { 3045 down_read(&vm->userptr.notifier_lock); 3046 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 3047 up_read(&vm->userptr.notifier_lock); 3048 if (post_commit) 3049 xe_vm_insert_vma(vm, vma); 3050 } 3051 break; 3052 } 3053 case DRM_GPUVA_OP_PREFETCH: 3054 /* Nothing to do */ 3055 break; 3056 default: 3057 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3058 } 3059 } 3060 3061 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 3062 struct drm_gpuva_ops **ops, 3063 int num_ops_list) 3064 { 3065 int i; 3066 3067 for (i = num_ops_list - 1; i >= 0; --i) { 3068 struct drm_gpuva_ops *__ops = ops[i]; 3069 struct drm_gpuva_op *__op; 3070 3071 if (!__ops) 3072 continue; 3073 3074 drm_gpuva_for_each_op_reverse(__op, __ops) { 3075 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 3076 3077 xe_vma_op_unwind(vm, op, 3078 op->flags & XE_VMA_OP_COMMITTED, 3079 op->flags & XE_VMA_OP_PREV_COMMITTED, 3080 op->flags & XE_VMA_OP_NEXT_COMMITTED); 3081 } 3082 } 3083 } 3084 3085 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3086 bool validate) 3087 { 3088 struct xe_bo *bo = xe_vma_bo(vma); 3089 struct xe_vm *vm = xe_vma_vm(vma); 3090 int err = 0; 3091 3092 if (bo) { 3093 if (!bo->vm) 3094 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3095 if (!err && validate) 3096 err = xe_bo_validate(bo, vm, 3097 !xe_vm_in_preempt_fence_mode(vm)); 3098 } 3099 3100 return err; 3101 } 3102 3103 static int check_ufence(struct xe_vma *vma) 3104 { 3105 if (vma->ufence) { 3106 struct xe_user_fence * const f = vma->ufence; 3107 3108 if (!xe_sync_ufence_get_status(f)) 3109 return -EBUSY; 3110 3111 vma->ufence = NULL; 3112 xe_sync_ufence_put(f); 3113 } 3114 3115 return 0; 3116 } 3117 3118 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 3119 { 3120 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 3121 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3122 struct xe_tile *tile = op->prefetch_range.tile; 3123 int err = 0; 3124 3125 struct xe_svm_range *svm_range; 3126 struct drm_gpusvm_ctx ctx = {}; 3127 unsigned long i; 3128 3129 if (!xe_vma_is_cpu_addr_mirror(vma)) 3130 return 0; 3131 3132 ctx.read_only = xe_vma_read_only(vma); 3133 ctx.devmem_possible = devmem_possible; 3134 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 3135 3136 /* TODO: Threading the migration */ 3137 xa_for_each(&op->prefetch_range.range, i, svm_range) { 3138 if (!tile) 3139 xe_svm_range_migrate_to_smem(vm, svm_range); 3140 3141 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 3142 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 3143 if (err) { 3144 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 3145 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3146 return -ENODATA; 3147 } 3148 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 3149 } 3150 3151 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 3152 if (err) { 3153 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 3154 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3155 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 3156 err = -ENODATA; 3157 return err; 3158 } 3159 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 3160 } 3161 3162 return err; 3163 } 3164 3165 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3166 struct xe_vma_op *op) 3167 { 3168 int err = 0; 3169 3170 switch (op->base.op) { 3171 case DRM_GPUVA_OP_MAP: 3172 if (!op->map.invalidate_on_bind) 3173 err = vma_lock_and_validate(exec, op->map.vma, 3174 !xe_vm_in_fault_mode(vm) || 3175 op->map.immediate); 3176 break; 3177 case DRM_GPUVA_OP_REMAP: 3178 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3179 if (err) 3180 break; 3181 3182 err = vma_lock_and_validate(exec, 3183 gpuva_to_vma(op->base.remap.unmap->va), 3184 false); 3185 if (!err && op->remap.prev) 3186 err = vma_lock_and_validate(exec, op->remap.prev, true); 3187 if (!err && op->remap.next) 3188 err = vma_lock_and_validate(exec, op->remap.next, true); 3189 break; 3190 case DRM_GPUVA_OP_UNMAP: 3191 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3192 if (err) 3193 break; 3194 3195 err = vma_lock_and_validate(exec, 3196 gpuva_to_vma(op->base.unmap.va), 3197 false); 3198 break; 3199 case DRM_GPUVA_OP_PREFETCH: 3200 { 3201 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3202 u32 region; 3203 3204 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3205 region = op->prefetch.region; 3206 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3207 region <= ARRAY_SIZE(region_to_mem_type)); 3208 } 3209 3210 err = vma_lock_and_validate(exec, 3211 gpuva_to_vma(op->base.prefetch.va), 3212 false); 3213 if (!err && !xe_vma_has_no_bo(vma)) 3214 err = xe_bo_migrate(xe_vma_bo(vma), 3215 region_to_mem_type[region]); 3216 break; 3217 } 3218 default: 3219 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3220 } 3221 3222 return err; 3223 } 3224 3225 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3226 { 3227 struct xe_vma_op *op; 3228 int err; 3229 3230 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3231 return 0; 3232 3233 list_for_each_entry(op, &vops->list, link) { 3234 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3235 err = prefetch_ranges(vm, op); 3236 if (err) 3237 return err; 3238 } 3239 } 3240 3241 return 0; 3242 } 3243 3244 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3245 struct xe_vm *vm, 3246 struct xe_vma_ops *vops) 3247 { 3248 struct xe_vma_op *op; 3249 int err; 3250 3251 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3252 if (err) 3253 return err; 3254 3255 list_for_each_entry(op, &vops->list, link) { 3256 err = op_lock_and_prep(exec, vm, op); 3257 if (err) 3258 return err; 3259 } 3260 3261 #ifdef TEST_VM_OPS_ERROR 3262 if (vops->inject_error && 3263 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3264 return -ENOSPC; 3265 #endif 3266 3267 return 0; 3268 } 3269 3270 static void op_trace(struct xe_vma_op *op) 3271 { 3272 switch (op->base.op) { 3273 case DRM_GPUVA_OP_MAP: 3274 trace_xe_vma_bind(op->map.vma); 3275 break; 3276 case DRM_GPUVA_OP_REMAP: 3277 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3278 if (op->remap.prev) 3279 trace_xe_vma_bind(op->remap.prev); 3280 if (op->remap.next) 3281 trace_xe_vma_bind(op->remap.next); 3282 break; 3283 case DRM_GPUVA_OP_UNMAP: 3284 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3285 break; 3286 case DRM_GPUVA_OP_PREFETCH: 3287 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3288 break; 3289 case DRM_GPUVA_OP_DRIVER: 3290 break; 3291 default: 3292 XE_WARN_ON("NOT POSSIBLE"); 3293 } 3294 } 3295 3296 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3297 { 3298 struct xe_vma_op *op; 3299 3300 list_for_each_entry(op, &vops->list, link) 3301 op_trace(op); 3302 } 3303 3304 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3305 { 3306 struct xe_exec_queue *q = vops->q; 3307 struct xe_tile *tile; 3308 int number_tiles = 0; 3309 u8 id; 3310 3311 for_each_tile(tile, vm->xe, id) { 3312 if (vops->pt_update_ops[id].num_ops) 3313 ++number_tiles; 3314 3315 if (vops->pt_update_ops[id].q) 3316 continue; 3317 3318 if (q) { 3319 vops->pt_update_ops[id].q = q; 3320 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3321 q = list_next_entry(q, multi_gt_list); 3322 } else { 3323 vops->pt_update_ops[id].q = vm->q[id]; 3324 } 3325 } 3326 3327 return number_tiles; 3328 } 3329 3330 static struct dma_fence *ops_execute(struct xe_vm *vm, 3331 struct xe_vma_ops *vops) 3332 { 3333 struct xe_tile *tile; 3334 struct dma_fence *fence = NULL; 3335 struct dma_fence **fences = NULL; 3336 struct dma_fence_array *cf = NULL; 3337 int number_tiles = 0, current_fence = 0, err; 3338 u8 id; 3339 3340 number_tiles = vm_ops_setup_tile_args(vm, vops); 3341 if (number_tiles == 0) 3342 return ERR_PTR(-ENODATA); 3343 3344 if (number_tiles > 1) { 3345 fences = kmalloc_array(number_tiles, sizeof(*fences), 3346 GFP_KERNEL); 3347 if (!fences) { 3348 fence = ERR_PTR(-ENOMEM); 3349 goto err_trace; 3350 } 3351 } 3352 3353 for_each_tile(tile, vm->xe, id) { 3354 if (!vops->pt_update_ops[id].num_ops) 3355 continue; 3356 3357 err = xe_pt_update_ops_prepare(tile, vops); 3358 if (err) { 3359 fence = ERR_PTR(err); 3360 goto err_out; 3361 } 3362 } 3363 3364 trace_xe_vm_ops_execute(vops); 3365 3366 for_each_tile(tile, vm->xe, id) { 3367 if (!vops->pt_update_ops[id].num_ops) 3368 continue; 3369 3370 fence = xe_pt_update_ops_run(tile, vops); 3371 if (IS_ERR(fence)) 3372 goto err_out; 3373 3374 if (fences) 3375 fences[current_fence++] = fence; 3376 } 3377 3378 if (fences) { 3379 cf = dma_fence_array_create(number_tiles, fences, 3380 vm->composite_fence_ctx, 3381 vm->composite_fence_seqno++, 3382 false); 3383 if (!cf) { 3384 --vm->composite_fence_seqno; 3385 fence = ERR_PTR(-ENOMEM); 3386 goto err_out; 3387 } 3388 fence = &cf->base; 3389 } 3390 3391 for_each_tile(tile, vm->xe, id) { 3392 if (!vops->pt_update_ops[id].num_ops) 3393 continue; 3394 3395 xe_pt_update_ops_fini(tile, vops); 3396 } 3397 3398 return fence; 3399 3400 err_out: 3401 for_each_tile(tile, vm->xe, id) { 3402 if (!vops->pt_update_ops[id].num_ops) 3403 continue; 3404 3405 xe_pt_update_ops_abort(tile, vops); 3406 } 3407 while (current_fence) 3408 dma_fence_put(fences[--current_fence]); 3409 kfree(fences); 3410 kfree(cf); 3411 3412 err_trace: 3413 trace_xe_vm_ops_fail(vm); 3414 return fence; 3415 } 3416 3417 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3418 { 3419 if (vma->ufence) 3420 xe_sync_ufence_put(vma->ufence); 3421 vma->ufence = __xe_sync_ufence_get(ufence); 3422 } 3423 3424 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3425 struct xe_user_fence *ufence) 3426 { 3427 switch (op->base.op) { 3428 case DRM_GPUVA_OP_MAP: 3429 vma_add_ufence(op->map.vma, ufence); 3430 break; 3431 case DRM_GPUVA_OP_REMAP: 3432 if (op->remap.prev) 3433 vma_add_ufence(op->remap.prev, ufence); 3434 if (op->remap.next) 3435 vma_add_ufence(op->remap.next, ufence); 3436 break; 3437 case DRM_GPUVA_OP_UNMAP: 3438 break; 3439 case DRM_GPUVA_OP_PREFETCH: 3440 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3441 break; 3442 default: 3443 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3444 } 3445 } 3446 3447 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3448 struct dma_fence *fence) 3449 { 3450 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3451 struct xe_user_fence *ufence; 3452 struct xe_vma_op *op; 3453 int i; 3454 3455 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3456 list_for_each_entry(op, &vops->list, link) { 3457 if (ufence) 3458 op_add_ufence(vm, op, ufence); 3459 3460 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3461 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3462 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3463 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3464 fence); 3465 } 3466 if (ufence) 3467 xe_sync_ufence_put(ufence); 3468 if (fence) { 3469 for (i = 0; i < vops->num_syncs; i++) 3470 xe_sync_entry_signal(vops->syncs + i, fence); 3471 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3472 } 3473 } 3474 3475 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3476 struct xe_vma_ops *vops) 3477 { 3478 struct drm_exec exec; 3479 struct dma_fence *fence; 3480 int err; 3481 3482 lockdep_assert_held_write(&vm->lock); 3483 3484 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3485 DRM_EXEC_IGNORE_DUPLICATES, 0); 3486 drm_exec_until_all_locked(&exec) { 3487 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3488 drm_exec_retry_on_contention(&exec); 3489 if (err) { 3490 fence = ERR_PTR(err); 3491 goto unlock; 3492 } 3493 3494 fence = ops_execute(vm, vops); 3495 if (IS_ERR(fence)) { 3496 if (PTR_ERR(fence) == -ENODATA) 3497 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3498 goto unlock; 3499 } 3500 3501 vm_bind_ioctl_ops_fini(vm, vops, fence); 3502 } 3503 3504 unlock: 3505 drm_exec_fini(&exec); 3506 return fence; 3507 } 3508 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3509 3510 #define SUPPORTED_FLAGS_STUB \ 3511 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3512 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3513 DRM_XE_VM_BIND_FLAG_NULL | \ 3514 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3515 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3516 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3517 3518 #ifdef TEST_VM_OPS_ERROR 3519 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3520 #else 3521 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3522 #endif 3523 3524 #define XE_64K_PAGE_MASK 0xffffull 3525 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3526 3527 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3528 struct drm_xe_vm_bind *args, 3529 struct drm_xe_vm_bind_op **bind_ops) 3530 { 3531 int err; 3532 int i; 3533 3534 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3535 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3536 return -EINVAL; 3537 3538 if (XE_IOCTL_DBG(xe, args->extensions)) 3539 return -EINVAL; 3540 3541 if (args->num_binds > 1) { 3542 u64 __user *bind_user = 3543 u64_to_user_ptr(args->vector_of_binds); 3544 3545 *bind_ops = kvmalloc_array(args->num_binds, 3546 sizeof(struct drm_xe_vm_bind_op), 3547 GFP_KERNEL | __GFP_ACCOUNT | 3548 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3549 if (!*bind_ops) 3550 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3551 3552 err = copy_from_user(*bind_ops, bind_user, 3553 sizeof(struct drm_xe_vm_bind_op) * 3554 args->num_binds); 3555 if (XE_IOCTL_DBG(xe, err)) { 3556 err = -EFAULT; 3557 goto free_bind_ops; 3558 } 3559 } else { 3560 *bind_ops = &args->bind; 3561 } 3562 3563 for (i = 0; i < args->num_binds; ++i) { 3564 u64 range = (*bind_ops)[i].range; 3565 u64 addr = (*bind_ops)[i].addr; 3566 u32 op = (*bind_ops)[i].op; 3567 u32 flags = (*bind_ops)[i].flags; 3568 u32 obj = (*bind_ops)[i].obj; 3569 u64 obj_offset = (*bind_ops)[i].obj_offset; 3570 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3571 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3572 bool is_cpu_addr_mirror = flags & 3573 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3574 u16 pat_index = (*bind_ops)[i].pat_index; 3575 u16 coh_mode; 3576 3577 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3578 (!xe_vm_in_fault_mode(vm) || 3579 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3580 err = -EINVAL; 3581 goto free_bind_ops; 3582 } 3583 3584 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3585 err = -EINVAL; 3586 goto free_bind_ops; 3587 } 3588 3589 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3590 (*bind_ops)[i].pat_index = pat_index; 3591 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3592 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3593 err = -EINVAL; 3594 goto free_bind_ops; 3595 } 3596 3597 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3598 err = -EINVAL; 3599 goto free_bind_ops; 3600 } 3601 3602 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3603 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3604 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3605 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3606 is_cpu_addr_mirror)) || 3607 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3608 (is_null || is_cpu_addr_mirror)) || 3609 XE_IOCTL_DBG(xe, !obj && 3610 op == DRM_XE_VM_BIND_OP_MAP && 3611 !is_null && !is_cpu_addr_mirror) || 3612 XE_IOCTL_DBG(xe, !obj && 3613 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3614 XE_IOCTL_DBG(xe, addr && 3615 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3616 XE_IOCTL_DBG(xe, range && 3617 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3618 XE_IOCTL_DBG(xe, obj && 3619 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3620 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3621 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3622 XE_IOCTL_DBG(xe, obj && 3623 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3624 XE_IOCTL_DBG(xe, prefetch_region && 3625 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3626 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3627 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3628 XE_IOCTL_DBG(xe, obj && 3629 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3630 err = -EINVAL; 3631 goto free_bind_ops; 3632 } 3633 3634 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3635 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3636 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3637 XE_IOCTL_DBG(xe, !range && 3638 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3639 err = -EINVAL; 3640 goto free_bind_ops; 3641 } 3642 } 3643 3644 return 0; 3645 3646 free_bind_ops: 3647 if (args->num_binds > 1) 3648 kvfree(*bind_ops); 3649 *bind_ops = NULL; 3650 return err; 3651 } 3652 3653 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3654 struct xe_exec_queue *q, 3655 struct xe_sync_entry *syncs, 3656 int num_syncs) 3657 { 3658 struct dma_fence *fence; 3659 int i, err = 0; 3660 3661 fence = xe_sync_in_fence_get(syncs, num_syncs, 3662 to_wait_exec_queue(vm, q), vm); 3663 if (IS_ERR(fence)) 3664 return PTR_ERR(fence); 3665 3666 for (i = 0; i < num_syncs; i++) 3667 xe_sync_entry_signal(&syncs[i], fence); 3668 3669 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3670 fence); 3671 dma_fence_put(fence); 3672 3673 return err; 3674 } 3675 3676 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3677 struct xe_exec_queue *q, 3678 struct xe_sync_entry *syncs, u32 num_syncs) 3679 { 3680 memset(vops, 0, sizeof(*vops)); 3681 INIT_LIST_HEAD(&vops->list); 3682 vops->vm = vm; 3683 vops->q = q; 3684 vops->syncs = syncs; 3685 vops->num_syncs = num_syncs; 3686 vops->flags = 0; 3687 } 3688 3689 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3690 u64 addr, u64 range, u64 obj_offset, 3691 u16 pat_index, u32 op, u32 bind_flags) 3692 { 3693 u16 coh_mode; 3694 3695 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3696 XE_IOCTL_DBG(xe, obj_offset > 3697 xe_bo_size(bo) - range)) { 3698 return -EINVAL; 3699 } 3700 3701 /* 3702 * Some platforms require 64k VM_BIND alignment, 3703 * specifically those with XE_VRAM_FLAGS_NEED64K. 3704 * 3705 * Other platforms may have BO's set to 64k physical placement, 3706 * but can be mapped at 4k offsets anyway. This check is only 3707 * there for the former case. 3708 */ 3709 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3710 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3711 if (XE_IOCTL_DBG(xe, obj_offset & 3712 XE_64K_PAGE_MASK) || 3713 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3714 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3715 return -EINVAL; 3716 } 3717 } 3718 3719 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3720 if (bo->cpu_caching) { 3721 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3722 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3723 return -EINVAL; 3724 } 3725 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3726 /* 3727 * Imported dma-buf from a different device should 3728 * require 1way or 2way coherency since we don't know 3729 * how it was mapped on the CPU. Just assume is it 3730 * potentially cached on CPU side. 3731 */ 3732 return -EINVAL; 3733 } 3734 3735 /* If a BO is protected it can only be mapped if the key is still valid */ 3736 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3737 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3738 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3739 return -ENOEXEC; 3740 3741 return 0; 3742 } 3743 3744 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3745 { 3746 struct xe_device *xe = to_xe_device(dev); 3747 struct xe_file *xef = to_xe_file(file); 3748 struct drm_xe_vm_bind *args = data; 3749 struct drm_xe_sync __user *syncs_user; 3750 struct xe_bo **bos = NULL; 3751 struct drm_gpuva_ops **ops = NULL; 3752 struct xe_vm *vm; 3753 struct xe_exec_queue *q = NULL; 3754 u32 num_syncs, num_ufence = 0; 3755 struct xe_sync_entry *syncs = NULL; 3756 struct drm_xe_vm_bind_op *bind_ops = NULL; 3757 struct xe_vma_ops vops; 3758 struct dma_fence *fence; 3759 int err; 3760 int i; 3761 3762 vm = xe_vm_lookup(xef, args->vm_id); 3763 if (XE_IOCTL_DBG(xe, !vm)) 3764 return -EINVAL; 3765 3766 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3767 if (err) 3768 goto put_vm; 3769 3770 if (args->exec_queue_id) { 3771 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3772 if (XE_IOCTL_DBG(xe, !q)) { 3773 err = -ENOENT; 3774 goto free_bind_ops; 3775 } 3776 3777 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3778 err = -EINVAL; 3779 goto put_exec_queue; 3780 } 3781 } 3782 3783 /* Ensure all UNMAPs visible */ 3784 xe_svm_flush(vm); 3785 3786 err = down_write_killable(&vm->lock); 3787 if (err) 3788 goto put_exec_queue; 3789 3790 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3791 err = -ENOENT; 3792 goto release_vm_lock; 3793 } 3794 3795 for (i = 0; i < args->num_binds; ++i) { 3796 u64 range = bind_ops[i].range; 3797 u64 addr = bind_ops[i].addr; 3798 3799 if (XE_IOCTL_DBG(xe, range > vm->size) || 3800 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3801 err = -EINVAL; 3802 goto release_vm_lock; 3803 } 3804 } 3805 3806 if (args->num_binds) { 3807 bos = kvcalloc(args->num_binds, sizeof(*bos), 3808 GFP_KERNEL | __GFP_ACCOUNT | 3809 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3810 if (!bos) { 3811 err = -ENOMEM; 3812 goto release_vm_lock; 3813 } 3814 3815 ops = kvcalloc(args->num_binds, sizeof(*ops), 3816 GFP_KERNEL | __GFP_ACCOUNT | 3817 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3818 if (!ops) { 3819 err = -ENOMEM; 3820 goto free_bos; 3821 } 3822 } 3823 3824 for (i = 0; i < args->num_binds; ++i) { 3825 struct drm_gem_object *gem_obj; 3826 u64 range = bind_ops[i].range; 3827 u64 addr = bind_ops[i].addr; 3828 u32 obj = bind_ops[i].obj; 3829 u64 obj_offset = bind_ops[i].obj_offset; 3830 u16 pat_index = bind_ops[i].pat_index; 3831 u32 op = bind_ops[i].op; 3832 u32 bind_flags = bind_ops[i].flags; 3833 3834 if (!obj) 3835 continue; 3836 3837 gem_obj = drm_gem_object_lookup(file, obj); 3838 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3839 err = -ENOENT; 3840 goto put_obj; 3841 } 3842 bos[i] = gem_to_xe_bo(gem_obj); 3843 3844 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3845 obj_offset, pat_index, op, 3846 bind_flags); 3847 if (err) 3848 goto put_obj; 3849 } 3850 3851 if (args->num_syncs) { 3852 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3853 if (!syncs) { 3854 err = -ENOMEM; 3855 goto put_obj; 3856 } 3857 } 3858 3859 syncs_user = u64_to_user_ptr(args->syncs); 3860 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3861 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3862 &syncs_user[num_syncs], 3863 (xe_vm_in_lr_mode(vm) ? 3864 SYNC_PARSE_FLAG_LR_MODE : 0) | 3865 (!args->num_binds ? 3866 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3867 if (err) 3868 goto free_syncs; 3869 3870 if (xe_sync_is_ufence(&syncs[num_syncs])) 3871 num_ufence++; 3872 } 3873 3874 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3875 err = -EINVAL; 3876 goto free_syncs; 3877 } 3878 3879 if (!args->num_binds) { 3880 err = -ENODATA; 3881 goto free_syncs; 3882 } 3883 3884 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3885 for (i = 0; i < args->num_binds; ++i) { 3886 u64 range = bind_ops[i].range; 3887 u64 addr = bind_ops[i].addr; 3888 u32 op = bind_ops[i].op; 3889 u32 flags = bind_ops[i].flags; 3890 u64 obj_offset = bind_ops[i].obj_offset; 3891 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3892 u16 pat_index = bind_ops[i].pat_index; 3893 3894 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3895 addr, range, op, flags, 3896 prefetch_region, pat_index); 3897 if (IS_ERR(ops[i])) { 3898 err = PTR_ERR(ops[i]); 3899 ops[i] = NULL; 3900 goto unwind_ops; 3901 } 3902 3903 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3904 if (err) 3905 goto unwind_ops; 3906 3907 #ifdef TEST_VM_OPS_ERROR 3908 if (flags & FORCE_OP_ERROR) { 3909 vops.inject_error = true; 3910 vm->xe->vm_inject_error_position = 3911 (vm->xe->vm_inject_error_position + 1) % 3912 FORCE_OP_ERROR_COUNT; 3913 } 3914 #endif 3915 } 3916 3917 /* Nothing to do */ 3918 if (list_empty(&vops.list)) { 3919 err = -ENODATA; 3920 goto unwind_ops; 3921 } 3922 3923 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3924 if (err) 3925 goto unwind_ops; 3926 3927 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3928 if (err) 3929 goto unwind_ops; 3930 3931 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3932 if (IS_ERR(fence)) 3933 err = PTR_ERR(fence); 3934 else 3935 dma_fence_put(fence); 3936 3937 unwind_ops: 3938 if (err && err != -ENODATA) 3939 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3940 xe_vma_ops_fini(&vops); 3941 for (i = args->num_binds - 1; i >= 0; --i) 3942 if (ops[i]) 3943 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3944 free_syncs: 3945 if (err == -ENODATA) 3946 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3947 while (num_syncs--) 3948 xe_sync_entry_cleanup(&syncs[num_syncs]); 3949 3950 kfree(syncs); 3951 put_obj: 3952 for (i = 0; i < args->num_binds; ++i) 3953 xe_bo_put(bos[i]); 3954 3955 kvfree(ops); 3956 free_bos: 3957 kvfree(bos); 3958 release_vm_lock: 3959 up_write(&vm->lock); 3960 put_exec_queue: 3961 if (q) 3962 xe_exec_queue_put(q); 3963 free_bind_ops: 3964 if (args->num_binds > 1) 3965 kvfree(bind_ops); 3966 put_vm: 3967 xe_vm_put(vm); 3968 return err; 3969 } 3970 3971 /** 3972 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3973 * @vm: VM to bind the BO to 3974 * @bo: BO to bind 3975 * @q: exec queue to use for the bind (optional) 3976 * @addr: address at which to bind the BO 3977 * @cache_lvl: PAT cache level to use 3978 * 3979 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3980 * kernel-owned VM. 3981 * 3982 * Returns a dma_fence to track the binding completion if the job to do so was 3983 * successfully submitted, an error pointer otherwise. 3984 */ 3985 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3986 struct xe_exec_queue *q, u64 addr, 3987 enum xe_cache_level cache_lvl) 3988 { 3989 struct xe_vma_ops vops; 3990 struct drm_gpuva_ops *ops = NULL; 3991 struct dma_fence *fence; 3992 int err; 3993 3994 xe_bo_get(bo); 3995 xe_vm_get(vm); 3996 if (q) 3997 xe_exec_queue_get(q); 3998 3999 down_write(&vm->lock); 4000 4001 xe_vma_ops_init(&vops, vm, q, NULL, 0); 4002 4003 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 4004 DRM_XE_VM_BIND_OP_MAP, 0, 0, 4005 vm->xe->pat.idx[cache_lvl]); 4006 if (IS_ERR(ops)) { 4007 err = PTR_ERR(ops); 4008 goto release_vm_lock; 4009 } 4010 4011 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4012 if (err) 4013 goto release_vm_lock; 4014 4015 xe_assert(vm->xe, !list_empty(&vops.list)); 4016 4017 err = xe_vma_ops_alloc(&vops, false); 4018 if (err) 4019 goto unwind_ops; 4020 4021 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4022 if (IS_ERR(fence)) 4023 err = PTR_ERR(fence); 4024 4025 unwind_ops: 4026 if (err && err != -ENODATA) 4027 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4028 4029 xe_vma_ops_fini(&vops); 4030 drm_gpuva_ops_free(&vm->gpuvm, ops); 4031 4032 release_vm_lock: 4033 up_write(&vm->lock); 4034 4035 if (q) 4036 xe_exec_queue_put(q); 4037 xe_vm_put(vm); 4038 xe_bo_put(bo); 4039 4040 if (err) 4041 fence = ERR_PTR(err); 4042 4043 return fence; 4044 } 4045 4046 /** 4047 * xe_vm_lock() - Lock the vm's dma_resv object 4048 * @vm: The struct xe_vm whose lock is to be locked 4049 * @intr: Whether to perform any wait interruptible 4050 * 4051 * Return: 0 on success, -EINTR if @intr is true and the wait for a 4052 * contended lock was interrupted. If @intr is false, the function 4053 * always returns 0. 4054 */ 4055 int xe_vm_lock(struct xe_vm *vm, bool intr) 4056 { 4057 if (intr) 4058 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4059 4060 return dma_resv_lock(xe_vm_resv(vm), NULL); 4061 } 4062 4063 /** 4064 * xe_vm_unlock() - Unlock the vm's dma_resv object 4065 * @vm: The struct xe_vm whose lock is to be released. 4066 * 4067 * Unlock a buffer object lock that was locked by xe_vm_lock(). 4068 */ 4069 void xe_vm_unlock(struct xe_vm *vm) 4070 { 4071 dma_resv_unlock(xe_vm_resv(vm)); 4072 } 4073 4074 /** 4075 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 4076 * address range 4077 * @vm: The VM 4078 * @start: start address 4079 * @end: end address 4080 * @tile_mask: mask for which gt's issue tlb invalidation 4081 * 4082 * Issue a range based TLB invalidation for gt's in tilemask 4083 * 4084 * Returns 0 for success, negative error code otherwise. 4085 */ 4086 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 4087 u64 end, u8 tile_mask) 4088 { 4089 struct xe_tlb_inval_fence 4090 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 4091 struct xe_tile *tile; 4092 u32 fence_id = 0; 4093 u8 id; 4094 int err; 4095 4096 if (!tile_mask) 4097 return 0; 4098 4099 for_each_tile(tile, vm->xe, id) { 4100 if (!(tile_mask & BIT(id))) 4101 continue; 4102 4103 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 4104 &fence[fence_id], true); 4105 4106 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 4107 &fence[fence_id], start, end, 4108 vm->usm.asid); 4109 if (err) 4110 goto wait; 4111 ++fence_id; 4112 4113 if (!tile->media_gt) 4114 continue; 4115 4116 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 4117 &fence[fence_id], true); 4118 4119 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 4120 &fence[fence_id], start, end, 4121 vm->usm.asid); 4122 if (err) 4123 goto wait; 4124 ++fence_id; 4125 } 4126 4127 wait: 4128 for (id = 0; id < fence_id; ++id) 4129 xe_tlb_inval_fence_wait(&fence[id]); 4130 4131 return err; 4132 } 4133 4134 /** 4135 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4136 * @vma: VMA to invalidate 4137 * 4138 * Walks a list of page tables leaves which it memset the entries owned by this 4139 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4140 * complete. 4141 * 4142 * Returns 0 for success, negative error code otherwise. 4143 */ 4144 int xe_vm_invalidate_vma(struct xe_vma *vma) 4145 { 4146 struct xe_device *xe = xe_vma_vm(vma)->xe; 4147 struct xe_vm *vm = xe_vma_vm(vma); 4148 struct xe_tile *tile; 4149 u8 tile_mask = 0; 4150 int ret = 0; 4151 u8 id; 4152 4153 xe_assert(xe, !xe_vma_is_null(vma)); 4154 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4155 trace_xe_vma_invalidate(vma); 4156 4157 vm_dbg(&vm->xe->drm, 4158 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4159 xe_vma_start(vma), xe_vma_size(vma)); 4160 4161 /* 4162 * Check that we don't race with page-table updates, tile_invalidated 4163 * update is safe 4164 */ 4165 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4166 if (xe_vma_is_userptr(vma)) { 4167 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 4168 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 4169 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4170 4171 WARN_ON_ONCE(!mmu_interval_check_retry 4172 (&to_userptr_vma(vma)->userptr.notifier, 4173 to_userptr_vma(vma)->userptr.notifier_seq)); 4174 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4175 DMA_RESV_USAGE_BOOKKEEP)); 4176 4177 } else { 4178 xe_bo_assert_held(xe_vma_bo(vma)); 4179 } 4180 } 4181 4182 for_each_tile(tile, xe, id) 4183 if (xe_pt_zap_ptes(tile, vma)) 4184 tile_mask |= BIT(id); 4185 4186 xe_device_wmb(xe); 4187 4188 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 4189 xe_vma_end(vma), tile_mask); 4190 4191 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4192 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4193 4194 return ret; 4195 } 4196 4197 int xe_vm_validate_protected(struct xe_vm *vm) 4198 { 4199 struct drm_gpuva *gpuva; 4200 int err = 0; 4201 4202 if (!vm) 4203 return -ENODEV; 4204 4205 mutex_lock(&vm->snap_mutex); 4206 4207 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4208 struct xe_vma *vma = gpuva_to_vma(gpuva); 4209 struct xe_bo *bo = vma->gpuva.gem.obj ? 4210 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4211 4212 if (!bo) 4213 continue; 4214 4215 if (xe_bo_is_protected(bo)) { 4216 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4217 if (err) 4218 break; 4219 } 4220 } 4221 4222 mutex_unlock(&vm->snap_mutex); 4223 return err; 4224 } 4225 4226 struct xe_vm_snapshot { 4227 unsigned long num_snaps; 4228 struct { 4229 u64 ofs, bo_ofs; 4230 unsigned long len; 4231 struct xe_bo *bo; 4232 void *data; 4233 struct mm_struct *mm; 4234 } snap[]; 4235 }; 4236 4237 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4238 { 4239 unsigned long num_snaps = 0, i; 4240 struct xe_vm_snapshot *snap = NULL; 4241 struct drm_gpuva *gpuva; 4242 4243 if (!vm) 4244 return NULL; 4245 4246 mutex_lock(&vm->snap_mutex); 4247 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4248 if (gpuva->flags & XE_VMA_DUMPABLE) 4249 num_snaps++; 4250 } 4251 4252 if (num_snaps) 4253 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4254 if (!snap) { 4255 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4256 goto out_unlock; 4257 } 4258 4259 snap->num_snaps = num_snaps; 4260 i = 0; 4261 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4262 struct xe_vma *vma = gpuva_to_vma(gpuva); 4263 struct xe_bo *bo = vma->gpuva.gem.obj ? 4264 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4265 4266 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4267 continue; 4268 4269 snap->snap[i].ofs = xe_vma_start(vma); 4270 snap->snap[i].len = xe_vma_size(vma); 4271 if (bo) { 4272 snap->snap[i].bo = xe_bo_get(bo); 4273 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4274 } else if (xe_vma_is_userptr(vma)) { 4275 struct mm_struct *mm = 4276 to_userptr_vma(vma)->userptr.notifier.mm; 4277 4278 if (mmget_not_zero(mm)) 4279 snap->snap[i].mm = mm; 4280 else 4281 snap->snap[i].data = ERR_PTR(-EFAULT); 4282 4283 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4284 } else { 4285 snap->snap[i].data = ERR_PTR(-ENOENT); 4286 } 4287 i++; 4288 } 4289 4290 out_unlock: 4291 mutex_unlock(&vm->snap_mutex); 4292 return snap; 4293 } 4294 4295 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4296 { 4297 if (IS_ERR_OR_NULL(snap)) 4298 return; 4299 4300 for (int i = 0; i < snap->num_snaps; i++) { 4301 struct xe_bo *bo = snap->snap[i].bo; 4302 int err; 4303 4304 if (IS_ERR(snap->snap[i].data)) 4305 continue; 4306 4307 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4308 if (!snap->snap[i].data) { 4309 snap->snap[i].data = ERR_PTR(-ENOMEM); 4310 goto cleanup_bo; 4311 } 4312 4313 if (bo) { 4314 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4315 snap->snap[i].data, snap->snap[i].len); 4316 } else { 4317 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4318 4319 kthread_use_mm(snap->snap[i].mm); 4320 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4321 err = 0; 4322 else 4323 err = -EFAULT; 4324 kthread_unuse_mm(snap->snap[i].mm); 4325 4326 mmput(snap->snap[i].mm); 4327 snap->snap[i].mm = NULL; 4328 } 4329 4330 if (err) { 4331 kvfree(snap->snap[i].data); 4332 snap->snap[i].data = ERR_PTR(err); 4333 } 4334 4335 cleanup_bo: 4336 xe_bo_put(bo); 4337 snap->snap[i].bo = NULL; 4338 } 4339 } 4340 4341 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4342 { 4343 unsigned long i, j; 4344 4345 if (IS_ERR_OR_NULL(snap)) { 4346 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4347 return; 4348 } 4349 4350 for (i = 0; i < snap->num_snaps; i++) { 4351 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4352 4353 if (IS_ERR(snap->snap[i].data)) { 4354 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4355 PTR_ERR(snap->snap[i].data)); 4356 continue; 4357 } 4358 4359 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4360 4361 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4362 u32 *val = snap->snap[i].data + j; 4363 char dumped[ASCII85_BUFSZ]; 4364 4365 drm_puts(p, ascii85_encode(*val, dumped)); 4366 } 4367 4368 drm_puts(p, "\n"); 4369 4370 if (drm_coredump_printer_is_full(p)) 4371 return; 4372 } 4373 } 4374 4375 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4376 { 4377 unsigned long i; 4378 4379 if (IS_ERR_OR_NULL(snap)) 4380 return; 4381 4382 for (i = 0; i < snap->num_snaps; i++) { 4383 if (!IS_ERR(snap->snap[i].data)) 4384 kvfree(snap->snap[i].data); 4385 xe_bo_put(snap->snap[i].bo); 4386 if (snap->snap[i].mm) 4387 mmput(snap->snap[i].mm); 4388 } 4389 kvfree(snap); 4390 } 4391 4392 /** 4393 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4394 * @xe: Pointer to the XE device structure 4395 * @vma: Pointer to the virtual memory area (VMA) structure 4396 * @is_atomic: In pagefault path and atomic operation 4397 * 4398 * This function determines whether the given VMA needs to be migrated to 4399 * VRAM in order to do atomic GPU operation. 4400 * 4401 * Return: 4402 * 1 - Migration to VRAM is required 4403 * 0 - Migration is not required 4404 * -EACCES - Invalid access for atomic memory attr 4405 * 4406 */ 4407 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4408 { 4409 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4410 vma->attr.atomic_access; 4411 4412 if (!IS_DGFX(xe) || !is_atomic) 4413 return false; 4414 4415 /* 4416 * NOTE: The checks implemented here are platform-specific. For 4417 * instance, on a device supporting CXL atomics, these would ideally 4418 * work universally without additional handling. 4419 */ 4420 switch (atomic_access) { 4421 case DRM_XE_ATOMIC_DEVICE: 4422 return !xe->info.has_device_atomics_on_smem; 4423 4424 case DRM_XE_ATOMIC_CPU: 4425 return -EACCES; 4426 4427 case DRM_XE_ATOMIC_UNDEFINED: 4428 case DRM_XE_ATOMIC_GLOBAL: 4429 default: 4430 return 1; 4431 } 4432 } 4433 4434 static int xe_vm_alloc_vma(struct xe_vm *vm, 4435 struct drm_gpuvm_map_req *map_req, 4436 bool is_madvise) 4437 { 4438 struct xe_vma_ops vops; 4439 struct drm_gpuva_ops *ops = NULL; 4440 struct drm_gpuva_op *__op; 4441 bool is_cpu_addr_mirror = false; 4442 bool remap_op = false; 4443 struct xe_vma_mem_attr tmp_attr; 4444 u16 default_pat; 4445 int err; 4446 4447 lockdep_assert_held_write(&vm->lock); 4448 4449 if (is_madvise) 4450 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4451 else 4452 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4453 4454 if (IS_ERR(ops)) 4455 return PTR_ERR(ops); 4456 4457 if (list_empty(&ops->list)) { 4458 err = 0; 4459 goto free_ops; 4460 } 4461 4462 drm_gpuva_for_each_op(__op, ops) { 4463 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4464 struct xe_vma *vma = NULL; 4465 4466 if (!is_madvise) { 4467 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4468 vma = gpuva_to_vma(op->base.unmap.va); 4469 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4470 default_pat = vma->attr.default_pat_index; 4471 } 4472 4473 if (__op->op == DRM_GPUVA_OP_REMAP) { 4474 vma = gpuva_to_vma(op->base.remap.unmap->va); 4475 default_pat = vma->attr.default_pat_index; 4476 } 4477 4478 if (__op->op == DRM_GPUVA_OP_MAP) { 4479 op->map.is_cpu_addr_mirror = true; 4480 op->map.pat_index = default_pat; 4481 } 4482 } else { 4483 if (__op->op == DRM_GPUVA_OP_REMAP) { 4484 vma = gpuva_to_vma(op->base.remap.unmap->va); 4485 xe_assert(vm->xe, !remap_op); 4486 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4487 remap_op = true; 4488 4489 if (xe_vma_is_cpu_addr_mirror(vma)) 4490 is_cpu_addr_mirror = true; 4491 else 4492 is_cpu_addr_mirror = false; 4493 } 4494 4495 if (__op->op == DRM_GPUVA_OP_MAP) { 4496 xe_assert(vm->xe, remap_op); 4497 remap_op = false; 4498 /* 4499 * In case of madvise ops DRM_GPUVA_OP_MAP is 4500 * always after DRM_GPUVA_OP_REMAP, so ensure 4501 * we assign op->map.is_cpu_addr_mirror true 4502 * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4503 */ 4504 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4505 } 4506 } 4507 print_op(vm->xe, __op); 4508 } 4509 4510 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4511 4512 if (is_madvise) 4513 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4514 4515 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4516 if (err) 4517 goto unwind_ops; 4518 4519 xe_vm_lock(vm, false); 4520 4521 drm_gpuva_for_each_op(__op, ops) { 4522 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4523 struct xe_vma *vma; 4524 4525 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4526 vma = gpuva_to_vma(op->base.unmap.va); 4527 /* There should be no unmap for madvise */ 4528 if (is_madvise) 4529 XE_WARN_ON("UNEXPECTED UNMAP"); 4530 4531 xe_vma_destroy(vma, NULL); 4532 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4533 vma = gpuva_to_vma(op->base.remap.unmap->va); 4534 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4535 * VMA, so they can be assigned to newly MAP created vma. 4536 */ 4537 if (is_madvise) 4538 tmp_attr = vma->attr; 4539 4540 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4541 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4542 vma = op->map.vma; 4543 /* In case of madvise call, MAP will always be follwed by REMAP. 4544 * Therefore temp_attr will always have sane values, making it safe to 4545 * copy them to new vma. 4546 */ 4547 if (is_madvise) 4548 vma->attr = tmp_attr; 4549 } 4550 } 4551 4552 xe_vm_unlock(vm); 4553 drm_gpuva_ops_free(&vm->gpuvm, ops); 4554 return 0; 4555 4556 unwind_ops: 4557 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4558 free_ops: 4559 drm_gpuva_ops_free(&vm->gpuvm, ops); 4560 return err; 4561 } 4562 4563 /** 4564 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4565 * @vm: Pointer to the xe_vm structure 4566 * @start: Starting input address 4567 * @range: Size of the input range 4568 * 4569 * This function splits existing vma to create new vma for user provided input range 4570 * 4571 * Return: 0 if success 4572 */ 4573 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4574 { 4575 struct drm_gpuvm_map_req map_req = { 4576 .map.va.addr = start, 4577 .map.va.range = range, 4578 }; 4579 4580 lockdep_assert_held_write(&vm->lock); 4581 4582 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4583 4584 return xe_vm_alloc_vma(vm, &map_req, true); 4585 } 4586 4587 /** 4588 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4589 * @vm: Pointer to the xe_vm structure 4590 * @start: Starting input address 4591 * @range: Size of the input range 4592 * 4593 * This function splits/merges existing vma to create new vma for user provided input range 4594 * 4595 * Return: 0 if success 4596 */ 4597 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4598 { 4599 struct drm_gpuvm_map_req map_req = { 4600 .map.va.addr = start, 4601 .map.va.range = range, 4602 }; 4603 4604 lockdep_assert_held_write(&vm->lock); 4605 4606 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4607 start, range); 4608 4609 return xe_vm_alloc_vma(vm, &map_req, false); 4610 } 4611