1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 #include "xe_hmm.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vma_userptr_check_repin() - Advisory check for repin needed 53 * @uvma: The userptr vma 54 * 55 * Check if the userptr vma has been invalidated since last successful 56 * repin. The check is advisory only and can the function can be called 57 * without the vm->userptr.notifier_lock held. There is no guarantee that the 58 * vma userptr will remain valid after a lockless check, so typically 59 * the call needs to be followed by a proper check under the notifier_lock. 60 * 61 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 62 */ 63 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 64 { 65 return mmu_interval_check_retry(&uvma->userptr.notifier, 66 uvma->userptr.notifier_seq) ? 67 -EAGAIN : 0; 68 } 69 70 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 71 { 72 struct xe_vma *vma = &uvma->vma; 73 struct xe_vm *vm = xe_vma_vm(vma); 74 struct xe_device *xe = vm->xe; 75 76 lockdep_assert_held(&vm->lock); 77 xe_assert(xe, xe_vma_is_userptr(vma)); 78 79 return xe_hmm_userptr_populate_range(uvma, false); 80 } 81 82 static bool preempt_fences_waiting(struct xe_vm *vm) 83 { 84 struct xe_exec_queue *q; 85 86 lockdep_assert_held(&vm->lock); 87 xe_vm_assert_held(vm); 88 89 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 90 if (!q->lr.pfence || 91 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 92 &q->lr.pfence->flags)) { 93 return true; 94 } 95 } 96 97 return false; 98 } 99 100 static void free_preempt_fences(struct list_head *list) 101 { 102 struct list_head *link, *next; 103 104 list_for_each_safe(link, next, list) 105 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 106 } 107 108 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 109 unsigned int *count) 110 { 111 lockdep_assert_held(&vm->lock); 112 xe_vm_assert_held(vm); 113 114 if (*count >= vm->preempt.num_exec_queues) 115 return 0; 116 117 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 118 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 119 120 if (IS_ERR(pfence)) 121 return PTR_ERR(pfence); 122 123 list_move_tail(xe_preempt_fence_link(pfence), list); 124 } 125 126 return 0; 127 } 128 129 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 130 { 131 struct xe_exec_queue *q; 132 133 xe_vm_assert_held(vm); 134 135 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 136 if (q->lr.pfence) { 137 long timeout = dma_fence_wait(q->lr.pfence, false); 138 139 /* Only -ETIME on fence indicates VM needs to be killed */ 140 if (timeout < 0 || q->lr.pfence->error == -ETIME) 141 return -ETIME; 142 143 dma_fence_put(q->lr.pfence); 144 q->lr.pfence = NULL; 145 } 146 } 147 148 return 0; 149 } 150 151 static bool xe_vm_is_idle(struct xe_vm *vm) 152 { 153 struct xe_exec_queue *q; 154 155 xe_vm_assert_held(vm); 156 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 157 if (!xe_exec_queue_is_idle(q)) 158 return false; 159 } 160 161 return true; 162 } 163 164 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 165 { 166 struct list_head *link; 167 struct xe_exec_queue *q; 168 169 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 170 struct dma_fence *fence; 171 172 link = list->next; 173 xe_assert(vm->xe, link != list); 174 175 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 176 q, q->lr.context, 177 ++q->lr.seqno); 178 dma_fence_put(q->lr.pfence); 179 q->lr.pfence = fence; 180 } 181 } 182 183 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 184 { 185 struct xe_exec_queue *q; 186 int err; 187 188 xe_bo_assert_held(bo); 189 190 if (!vm->preempt.num_exec_queues) 191 return 0; 192 193 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 194 if (err) 195 return err; 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 198 if (q->lr.pfence) { 199 dma_resv_add_fence(bo->ttm.base.resv, 200 q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP); 202 } 203 204 return 0; 205 } 206 207 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 208 struct drm_exec *exec) 209 { 210 struct xe_exec_queue *q; 211 212 lockdep_assert_held(&vm->lock); 213 xe_vm_assert_held(vm); 214 215 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 216 q->ops->resume(q); 217 218 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 219 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 220 } 221 } 222 223 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 224 { 225 struct drm_gpuvm_exec vm_exec = { 226 .vm = &vm->gpuvm, 227 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 228 .num_fences = 1, 229 }; 230 struct drm_exec *exec = &vm_exec.exec; 231 struct dma_fence *pfence; 232 int err; 233 bool wait; 234 235 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 236 237 down_write(&vm->lock); 238 err = drm_gpuvm_exec_lock(&vm_exec); 239 if (err) 240 goto out_up_write; 241 242 pfence = xe_preempt_fence_create(q, q->lr.context, 243 ++q->lr.seqno); 244 if (!pfence) { 245 err = -ENOMEM; 246 goto out_fini; 247 } 248 249 list_add(&q->lr.link, &vm->preempt.exec_queues); 250 ++vm->preempt.num_exec_queues; 251 q->lr.pfence = pfence; 252 253 down_read(&vm->userptr.notifier_lock); 254 255 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 256 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 257 258 /* 259 * Check to see if a preemption on VM is in flight or userptr 260 * invalidation, if so trigger this preempt fence to sync state with 261 * other preempt fences on the VM. 262 */ 263 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 264 if (wait) 265 dma_fence_enable_sw_signaling(pfence); 266 267 up_read(&vm->userptr.notifier_lock); 268 269 out_fini: 270 drm_exec_fini(exec); 271 out_up_write: 272 up_write(&vm->lock); 273 274 return err; 275 } 276 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 277 278 /** 279 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 280 * @vm: The VM. 281 * @q: The exec_queue 282 * 283 * Note that this function might be called multiple times on the same queue. 284 */ 285 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 286 { 287 if (!xe_vm_in_preempt_fence_mode(vm)) 288 return; 289 290 down_write(&vm->lock); 291 if (!list_empty(&q->lr.link)) { 292 list_del_init(&q->lr.link); 293 --vm->preempt.num_exec_queues; 294 } 295 if (q->lr.pfence) { 296 dma_fence_enable_sw_signaling(q->lr.pfence); 297 dma_fence_put(q->lr.pfence); 298 q->lr.pfence = NULL; 299 } 300 up_write(&vm->lock); 301 } 302 303 /** 304 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 305 * that need repinning. 306 * @vm: The VM. 307 * 308 * This function checks for whether the VM has userptrs that need repinning, 309 * and provides a release-type barrier on the userptr.notifier_lock after 310 * checking. 311 * 312 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 313 */ 314 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 315 { 316 lockdep_assert_held_read(&vm->userptr.notifier_lock); 317 318 return (list_empty(&vm->userptr.repin_list) && 319 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 320 } 321 322 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 323 324 /** 325 * xe_vm_kill() - VM Kill 326 * @vm: The VM. 327 * @unlocked: Flag indicates the VM's dma-resv is not held 328 * 329 * Kill the VM by setting banned flag indicated VM is no longer available for 330 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 331 */ 332 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 333 { 334 struct xe_exec_queue *q; 335 336 lockdep_assert_held(&vm->lock); 337 338 if (unlocked) 339 xe_vm_lock(vm, false); 340 341 vm->flags |= XE_VM_FLAG_BANNED; 342 trace_xe_vm_kill(vm); 343 344 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 345 q->ops->kill(q); 346 347 if (unlocked) 348 xe_vm_unlock(vm); 349 350 /* TODO: Inform user the VM is banned */ 351 } 352 353 /** 354 * xe_vm_validate_should_retry() - Whether to retry after a validate error. 355 * @exec: The drm_exec object used for locking before validation. 356 * @err: The error returned from ttm_bo_validate(). 357 * @end: A ktime_t cookie that should be set to 0 before first use and 358 * that should be reused on subsequent calls. 359 * 360 * With multiple active VMs, under memory pressure, it is possible that 361 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 362 * Until ttm properly handles locking in such scenarios, best thing the 363 * driver can do is retry with a timeout. Check if that is necessary, and 364 * if so unlock the drm_exec's objects while keeping the ticket to prepare 365 * for a rerun. 366 * 367 * Return: true if a retry after drm_exec_init() is recommended; 368 * false otherwise. 369 */ 370 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 371 { 372 ktime_t cur; 373 374 if (err != -ENOMEM) 375 return false; 376 377 cur = ktime_get(); 378 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 379 if (!ktime_before(cur, *end)) 380 return false; 381 382 msleep(20); 383 return true; 384 } 385 386 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 387 { 388 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 389 struct drm_gpuva *gpuva; 390 int ret; 391 392 lockdep_assert_held(&vm->lock); 393 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 394 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 395 &vm->rebind_list); 396 397 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 398 if (ret) 399 return ret; 400 401 vm_bo->evicted = false; 402 return 0; 403 } 404 405 /** 406 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 407 * @vm: The vm for which we are rebinding. 408 * @exec: The struct drm_exec with the locked GEM objects. 409 * @num_fences: The number of fences to reserve for the operation, not 410 * including rebinds and validations. 411 * 412 * Validates all evicted gem objects and rebinds their vmas. Note that 413 * rebindings may cause evictions and hence the validation-rebind 414 * sequence is rerun until there are no more objects to validate. 415 * 416 * Return: 0 on success, negative error code on error. In particular, 417 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 418 * the drm_exec transaction needs to be restarted. 419 */ 420 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 421 unsigned int num_fences) 422 { 423 struct drm_gem_object *obj; 424 unsigned long index; 425 int ret; 426 427 do { 428 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 429 if (ret) 430 return ret; 431 432 ret = xe_vm_rebind(vm, false); 433 if (ret) 434 return ret; 435 } while (!list_empty(&vm->gpuvm.evict.list)); 436 437 drm_exec_for_each_locked_object(exec, index, obj) { 438 ret = dma_resv_reserve_fences(obj->resv, num_fences); 439 if (ret) 440 return ret; 441 } 442 443 return 0; 444 } 445 446 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 447 bool *done) 448 { 449 int err; 450 451 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 452 if (err) 453 return err; 454 455 if (xe_vm_is_idle(vm)) { 456 vm->preempt.rebind_deactivated = true; 457 *done = true; 458 return 0; 459 } 460 461 if (!preempt_fences_waiting(vm)) { 462 *done = true; 463 return 0; 464 } 465 466 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 467 if (err) 468 return err; 469 470 err = wait_for_existing_preempt_fences(vm); 471 if (err) 472 return err; 473 474 /* 475 * Add validation and rebinding to the locking loop since both can 476 * cause evictions which may require blocing dma_resv locks. 477 * The fence reservation here is intended for the new preempt fences 478 * we attach at the end of the rebind work. 479 */ 480 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 481 } 482 483 static void preempt_rebind_work_func(struct work_struct *w) 484 { 485 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 486 struct drm_exec exec; 487 unsigned int fence_count = 0; 488 LIST_HEAD(preempt_fences); 489 ktime_t end = 0; 490 int err = 0; 491 long wait; 492 int __maybe_unused tries = 0; 493 494 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 495 trace_xe_vm_rebind_worker_enter(vm); 496 497 down_write(&vm->lock); 498 499 if (xe_vm_is_closed_or_banned(vm)) { 500 up_write(&vm->lock); 501 trace_xe_vm_rebind_worker_exit(vm); 502 return; 503 } 504 505 retry: 506 if (xe_vm_userptr_check_repin(vm)) { 507 err = xe_vm_userptr_pin(vm); 508 if (err) 509 goto out_unlock_outer; 510 } 511 512 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 513 514 drm_exec_until_all_locked(&exec) { 515 bool done = false; 516 517 err = xe_preempt_work_begin(&exec, vm, &done); 518 drm_exec_retry_on_contention(&exec); 519 if (err || done) { 520 drm_exec_fini(&exec); 521 if (err && xe_vm_validate_should_retry(&exec, err, &end)) 522 err = -EAGAIN; 523 524 goto out_unlock_outer; 525 } 526 } 527 528 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 529 if (err) 530 goto out_unlock; 531 532 err = xe_vm_rebind(vm, true); 533 if (err) 534 goto out_unlock; 535 536 /* Wait on rebinds and munmap style VM unbinds */ 537 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 538 DMA_RESV_USAGE_KERNEL, 539 false, MAX_SCHEDULE_TIMEOUT); 540 if (wait <= 0) { 541 err = -ETIME; 542 goto out_unlock; 543 } 544 545 #define retry_required(__tries, __vm) \ 546 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 547 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 548 __xe_vm_userptr_needs_repin(__vm)) 549 550 down_read(&vm->userptr.notifier_lock); 551 if (retry_required(tries, vm)) { 552 up_read(&vm->userptr.notifier_lock); 553 err = -EAGAIN; 554 goto out_unlock; 555 } 556 557 #undef retry_required 558 559 spin_lock(&vm->xe->ttm.lru_lock); 560 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 561 spin_unlock(&vm->xe->ttm.lru_lock); 562 563 /* Point of no return. */ 564 arm_preempt_fences(vm, &preempt_fences); 565 resume_and_reinstall_preempt_fences(vm, &exec); 566 up_read(&vm->userptr.notifier_lock); 567 568 out_unlock: 569 drm_exec_fini(&exec); 570 out_unlock_outer: 571 if (err == -EAGAIN) { 572 trace_xe_vm_rebind_worker_retry(vm); 573 goto retry; 574 } 575 576 if (err) { 577 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 578 xe_vm_kill(vm, true); 579 } 580 up_write(&vm->lock); 581 582 free_preempt_fences(&preempt_fences); 583 584 trace_xe_vm_rebind_worker_exit(vm); 585 } 586 587 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 588 { 589 struct xe_userptr *userptr = &uvma->userptr; 590 struct xe_vma *vma = &uvma->vma; 591 struct dma_resv_iter cursor; 592 struct dma_fence *fence; 593 long err; 594 595 /* 596 * Tell exec and rebind worker they need to repin and rebind this 597 * userptr. 598 */ 599 if (!xe_vm_in_fault_mode(vm) && 600 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 601 spin_lock(&vm->userptr.invalidated_lock); 602 list_move_tail(&userptr->invalidate_link, 603 &vm->userptr.invalidated); 604 spin_unlock(&vm->userptr.invalidated_lock); 605 } 606 607 /* 608 * Preempt fences turn into schedule disables, pipeline these. 609 * Note that even in fault mode, we need to wait for binds and 610 * unbinds to complete, and those are attached as BOOKMARK fences 611 * to the vm. 612 */ 613 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 614 DMA_RESV_USAGE_BOOKKEEP); 615 dma_resv_for_each_fence_unlocked(&cursor, fence) 616 dma_fence_enable_sw_signaling(fence); 617 dma_resv_iter_end(&cursor); 618 619 err = dma_resv_wait_timeout(xe_vm_resv(vm), 620 DMA_RESV_USAGE_BOOKKEEP, 621 false, MAX_SCHEDULE_TIMEOUT); 622 XE_WARN_ON(err <= 0); 623 624 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 625 err = xe_vm_invalidate_vma(vma); 626 XE_WARN_ON(err); 627 } 628 629 xe_hmm_userptr_unmap(uvma); 630 } 631 632 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 633 const struct mmu_notifier_range *range, 634 unsigned long cur_seq) 635 { 636 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 637 struct xe_vma *vma = &uvma->vma; 638 struct xe_vm *vm = xe_vma_vm(vma); 639 640 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 641 trace_xe_vma_userptr_invalidate(vma); 642 643 if (!mmu_notifier_range_blockable(range)) 644 return false; 645 646 vm_dbg(&xe_vma_vm(vma)->xe->drm, 647 "NOTIFIER: addr=0x%016llx, range=0x%016llx", 648 xe_vma_start(vma), xe_vma_size(vma)); 649 650 down_write(&vm->userptr.notifier_lock); 651 mmu_interval_set_seq(mni, cur_seq); 652 653 __vma_userptr_invalidate(vm, uvma); 654 up_write(&vm->userptr.notifier_lock); 655 trace_xe_vma_userptr_invalidate_complete(vma); 656 657 return true; 658 } 659 660 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 661 .invalidate = vma_userptr_invalidate, 662 }; 663 664 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 665 /** 666 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 667 * @uvma: The userptr vma to invalidate 668 * 669 * Perform a forced userptr invalidation for testing purposes. 670 */ 671 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 672 { 673 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 674 675 /* Protect against concurrent userptr pinning */ 676 lockdep_assert_held(&vm->lock); 677 /* Protect against concurrent notifiers */ 678 lockdep_assert_held(&vm->userptr.notifier_lock); 679 /* 680 * Protect against concurrent instances of this function and 681 * the critical exec sections 682 */ 683 xe_vm_assert_held(vm); 684 685 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 686 uvma->userptr.notifier_seq)) 687 uvma->userptr.notifier_seq -= 2; 688 __vma_userptr_invalidate(vm, uvma); 689 } 690 #endif 691 692 int xe_vm_userptr_pin(struct xe_vm *vm) 693 { 694 struct xe_userptr_vma *uvma, *next; 695 int err = 0; 696 697 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 698 lockdep_assert_held_write(&vm->lock); 699 700 /* Collect invalidated userptrs */ 701 spin_lock(&vm->userptr.invalidated_lock); 702 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 703 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 704 userptr.invalidate_link) { 705 list_del_init(&uvma->userptr.invalidate_link); 706 list_add_tail(&uvma->userptr.repin_link, 707 &vm->userptr.repin_list); 708 } 709 spin_unlock(&vm->userptr.invalidated_lock); 710 711 /* Pin and move to bind list */ 712 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 713 userptr.repin_link) { 714 err = xe_vma_userptr_pin_pages(uvma); 715 if (err == -EFAULT) { 716 list_del_init(&uvma->userptr.repin_link); 717 /* 718 * We might have already done the pin once already, but 719 * then had to retry before the re-bind happened, due 720 * some other condition in the caller, but in the 721 * meantime the userptr got dinged by the notifier such 722 * that we need to revalidate here, but this time we hit 723 * the EFAULT. In such a case make sure we remove 724 * ourselves from the rebind list to avoid going down in 725 * flames. 726 */ 727 if (!list_empty(&uvma->vma.combined_links.rebind)) 728 list_del_init(&uvma->vma.combined_links.rebind); 729 730 /* Wait for pending binds */ 731 xe_vm_lock(vm, false); 732 dma_resv_wait_timeout(xe_vm_resv(vm), 733 DMA_RESV_USAGE_BOOKKEEP, 734 false, MAX_SCHEDULE_TIMEOUT); 735 736 down_read(&vm->userptr.notifier_lock); 737 err = xe_vm_invalidate_vma(&uvma->vma); 738 up_read(&vm->userptr.notifier_lock); 739 xe_vm_unlock(vm); 740 if (err) 741 break; 742 } else { 743 if (err) 744 break; 745 746 list_del_init(&uvma->userptr.repin_link); 747 list_move_tail(&uvma->vma.combined_links.rebind, 748 &vm->rebind_list); 749 } 750 } 751 752 if (err) { 753 down_write(&vm->userptr.notifier_lock); 754 spin_lock(&vm->userptr.invalidated_lock); 755 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 756 userptr.repin_link) { 757 list_del_init(&uvma->userptr.repin_link); 758 list_move_tail(&uvma->userptr.invalidate_link, 759 &vm->userptr.invalidated); 760 } 761 spin_unlock(&vm->userptr.invalidated_lock); 762 up_write(&vm->userptr.notifier_lock); 763 } 764 return err; 765 } 766 767 /** 768 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 769 * that need repinning. 770 * @vm: The VM. 771 * 772 * This function does an advisory check for whether the VM has userptrs that 773 * need repinning. 774 * 775 * Return: 0 if there are no indications of userptrs needing repinning, 776 * -EAGAIN if there are. 777 */ 778 int xe_vm_userptr_check_repin(struct xe_vm *vm) 779 { 780 return (list_empty_careful(&vm->userptr.repin_list) && 781 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 782 } 783 784 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 785 { 786 int i; 787 788 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 789 if (!vops->pt_update_ops[i].num_ops) 790 continue; 791 792 vops->pt_update_ops[i].ops = 793 kmalloc_array(vops->pt_update_ops[i].num_ops, 794 sizeof(*vops->pt_update_ops[i].ops), 795 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 796 if (!vops->pt_update_ops[i].ops) 797 return array_of_binds ? -ENOBUFS : -ENOMEM; 798 } 799 800 return 0; 801 } 802 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 803 804 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 805 { 806 struct xe_vma *vma; 807 808 vma = gpuva_to_vma(op->base.prefetch.va); 809 810 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 811 xa_destroy(&op->prefetch_range.range); 812 } 813 814 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 815 { 816 struct xe_vma_op *op; 817 818 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 819 return; 820 821 list_for_each_entry(op, &vops->list, link) 822 xe_vma_svm_prefetch_op_fini(op); 823 } 824 825 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 826 { 827 int i; 828 829 xe_vma_svm_prefetch_ops_fini(vops); 830 831 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 832 kfree(vops->pt_update_ops[i].ops); 833 } 834 835 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 836 { 837 int i; 838 839 if (!inc_val) 840 return; 841 842 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 843 if (BIT(i) & tile_mask) 844 vops->pt_update_ops[i].num_ops += inc_val; 845 } 846 847 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 848 u8 tile_mask) 849 { 850 INIT_LIST_HEAD(&op->link); 851 op->tile_mask = tile_mask; 852 op->base.op = DRM_GPUVA_OP_MAP; 853 op->base.map.va.addr = vma->gpuva.va.addr; 854 op->base.map.va.range = vma->gpuva.va.range; 855 op->base.map.gem.obj = vma->gpuva.gem.obj; 856 op->base.map.gem.offset = vma->gpuva.gem.offset; 857 op->map.vma = vma; 858 op->map.immediate = true; 859 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 860 op->map.is_null = xe_vma_is_null(vma); 861 } 862 863 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 864 u8 tile_mask) 865 { 866 struct xe_vma_op *op; 867 868 op = kzalloc(sizeof(*op), GFP_KERNEL); 869 if (!op) 870 return -ENOMEM; 871 872 xe_vm_populate_rebind(op, vma, tile_mask); 873 list_add_tail(&op->link, &vops->list); 874 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 875 876 return 0; 877 } 878 879 static struct dma_fence *ops_execute(struct xe_vm *vm, 880 struct xe_vma_ops *vops); 881 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 882 struct xe_exec_queue *q, 883 struct xe_sync_entry *syncs, u32 num_syncs); 884 885 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 886 { 887 struct dma_fence *fence; 888 struct xe_vma *vma, *next; 889 struct xe_vma_ops vops; 890 struct xe_vma_op *op, *next_op; 891 int err, i; 892 893 lockdep_assert_held(&vm->lock); 894 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 895 list_empty(&vm->rebind_list)) 896 return 0; 897 898 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 899 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 900 vops.pt_update_ops[i].wait_vm_bookkeep = true; 901 902 xe_vm_assert_held(vm); 903 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 904 xe_assert(vm->xe, vma->tile_present); 905 906 if (rebind_worker) 907 trace_xe_vma_rebind_worker(vma); 908 else 909 trace_xe_vma_rebind_exec(vma); 910 911 err = xe_vm_ops_add_rebind(&vops, vma, 912 vma->tile_present); 913 if (err) 914 goto free_ops; 915 } 916 917 err = xe_vma_ops_alloc(&vops, false); 918 if (err) 919 goto free_ops; 920 921 fence = ops_execute(vm, &vops); 922 if (IS_ERR(fence)) { 923 err = PTR_ERR(fence); 924 } else { 925 dma_fence_put(fence); 926 list_for_each_entry_safe(vma, next, &vm->rebind_list, 927 combined_links.rebind) 928 list_del_init(&vma->combined_links.rebind); 929 } 930 free_ops: 931 list_for_each_entry_safe(op, next_op, &vops.list, link) { 932 list_del(&op->link); 933 kfree(op); 934 } 935 xe_vma_ops_fini(&vops); 936 937 return err; 938 } 939 940 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 941 { 942 struct dma_fence *fence = NULL; 943 struct xe_vma_ops vops; 944 struct xe_vma_op *op, *next_op; 945 struct xe_tile *tile; 946 u8 id; 947 int err; 948 949 lockdep_assert_held(&vm->lock); 950 xe_vm_assert_held(vm); 951 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 952 953 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 954 for_each_tile(tile, vm->xe, id) { 955 vops.pt_update_ops[id].wait_vm_bookkeep = true; 956 vops.pt_update_ops[tile->id].q = 957 xe_migrate_exec_queue(tile->migrate); 958 } 959 960 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 961 if (err) 962 return ERR_PTR(err); 963 964 err = xe_vma_ops_alloc(&vops, false); 965 if (err) { 966 fence = ERR_PTR(err); 967 goto free_ops; 968 } 969 970 fence = ops_execute(vm, &vops); 971 972 free_ops: 973 list_for_each_entry_safe(op, next_op, &vops.list, link) { 974 list_del(&op->link); 975 kfree(op); 976 } 977 xe_vma_ops_fini(&vops); 978 979 return fence; 980 } 981 982 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 983 struct xe_vma *vma, 984 struct xe_svm_range *range, 985 u8 tile_mask) 986 { 987 INIT_LIST_HEAD(&op->link); 988 op->tile_mask = tile_mask; 989 op->base.op = DRM_GPUVA_OP_DRIVER; 990 op->subop = XE_VMA_SUBOP_MAP_RANGE; 991 op->map_range.vma = vma; 992 op->map_range.range = range; 993 } 994 995 static int 996 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 997 struct xe_vma *vma, 998 struct xe_svm_range *range, 999 u8 tile_mask) 1000 { 1001 struct xe_vma_op *op; 1002 1003 op = kzalloc(sizeof(*op), GFP_KERNEL); 1004 if (!op) 1005 return -ENOMEM; 1006 1007 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1008 list_add_tail(&op->link, &vops->list); 1009 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1010 1011 return 0; 1012 } 1013 1014 /** 1015 * xe_vm_range_rebind() - VM range (re)bind 1016 * @vm: The VM which the range belongs to. 1017 * @vma: The VMA which the range belongs to. 1018 * @range: SVM range to rebind. 1019 * @tile_mask: Tile mask to bind the range to. 1020 * 1021 * (re)bind SVM range setting up GPU page tables for the range. 1022 * 1023 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 1024 * failure 1025 */ 1026 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 1027 struct xe_vma *vma, 1028 struct xe_svm_range *range, 1029 u8 tile_mask) 1030 { 1031 struct dma_fence *fence = NULL; 1032 struct xe_vma_ops vops; 1033 struct xe_vma_op *op, *next_op; 1034 struct xe_tile *tile; 1035 u8 id; 1036 int err; 1037 1038 lockdep_assert_held(&vm->lock); 1039 xe_vm_assert_held(vm); 1040 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1041 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1042 1043 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1044 for_each_tile(tile, vm->xe, id) { 1045 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1046 vops.pt_update_ops[tile->id].q = 1047 xe_migrate_exec_queue(tile->migrate); 1048 } 1049 1050 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 1051 if (err) 1052 return ERR_PTR(err); 1053 1054 err = xe_vma_ops_alloc(&vops, false); 1055 if (err) { 1056 fence = ERR_PTR(err); 1057 goto free_ops; 1058 } 1059 1060 fence = ops_execute(vm, &vops); 1061 1062 free_ops: 1063 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1064 list_del(&op->link); 1065 kfree(op); 1066 } 1067 xe_vma_ops_fini(&vops); 1068 1069 return fence; 1070 } 1071 1072 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 1073 struct xe_svm_range *range) 1074 { 1075 INIT_LIST_HEAD(&op->link); 1076 op->tile_mask = range->tile_present; 1077 op->base.op = DRM_GPUVA_OP_DRIVER; 1078 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 1079 op->unmap_range.range = range; 1080 } 1081 1082 static int 1083 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 1084 struct xe_svm_range *range) 1085 { 1086 struct xe_vma_op *op; 1087 1088 op = kzalloc(sizeof(*op), GFP_KERNEL); 1089 if (!op) 1090 return -ENOMEM; 1091 1092 xe_vm_populate_range_unbind(op, range); 1093 list_add_tail(&op->link, &vops->list); 1094 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1095 1096 return 0; 1097 } 1098 1099 /** 1100 * xe_vm_range_unbind() - VM range unbind 1101 * @vm: The VM which the range belongs to. 1102 * @range: SVM range to rebind. 1103 * 1104 * Unbind SVM range removing the GPU page tables for the range. 1105 * 1106 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 1107 * failure 1108 */ 1109 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 1110 struct xe_svm_range *range) 1111 { 1112 struct dma_fence *fence = NULL; 1113 struct xe_vma_ops vops; 1114 struct xe_vma_op *op, *next_op; 1115 struct xe_tile *tile; 1116 u8 id; 1117 int err; 1118 1119 lockdep_assert_held(&vm->lock); 1120 xe_vm_assert_held(vm); 1121 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1122 1123 if (!range->tile_present) 1124 return dma_fence_get_stub(); 1125 1126 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1127 for_each_tile(tile, vm->xe, id) { 1128 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1129 vops.pt_update_ops[tile->id].q = 1130 xe_migrate_exec_queue(tile->migrate); 1131 } 1132 1133 err = xe_vm_ops_add_range_unbind(&vops, range); 1134 if (err) 1135 return ERR_PTR(err); 1136 1137 err = xe_vma_ops_alloc(&vops, false); 1138 if (err) { 1139 fence = ERR_PTR(err); 1140 goto free_ops; 1141 } 1142 1143 fence = ops_execute(vm, &vops); 1144 1145 free_ops: 1146 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1147 list_del(&op->link); 1148 kfree(op); 1149 } 1150 xe_vma_ops_fini(&vops); 1151 1152 return fence; 1153 } 1154 1155 static void xe_vma_free(struct xe_vma *vma) 1156 { 1157 if (xe_vma_is_userptr(vma)) 1158 kfree(to_userptr_vma(vma)); 1159 else 1160 kfree(vma); 1161 } 1162 1163 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 1164 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 1165 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 1166 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 1167 1168 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1169 struct xe_bo *bo, 1170 u64 bo_offset_or_userptr, 1171 u64 start, u64 end, 1172 struct xe_vma_mem_attr *attr, 1173 unsigned int flags) 1174 { 1175 struct xe_vma *vma; 1176 struct xe_tile *tile; 1177 u8 id; 1178 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 1179 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 1180 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 1181 bool is_cpu_addr_mirror = 1182 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 1183 1184 xe_assert(vm->xe, start < end); 1185 xe_assert(vm->xe, end < vm->size); 1186 1187 /* 1188 * Allocate and ensure that the xe_vma_is_userptr() return 1189 * matches what was allocated. 1190 */ 1191 if (!bo && !is_null && !is_cpu_addr_mirror) { 1192 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 1193 1194 if (!uvma) 1195 return ERR_PTR(-ENOMEM); 1196 1197 vma = &uvma->vma; 1198 } else { 1199 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 1200 if (!vma) 1201 return ERR_PTR(-ENOMEM); 1202 1203 if (is_cpu_addr_mirror) 1204 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 1205 if (is_null) 1206 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 1207 if (bo) 1208 vma->gpuva.gem.obj = &bo->ttm.base; 1209 } 1210 1211 INIT_LIST_HEAD(&vma->combined_links.rebind); 1212 1213 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1214 vma->gpuva.vm = &vm->gpuvm; 1215 vma->gpuva.va.addr = start; 1216 vma->gpuva.va.range = end - start + 1; 1217 if (read_only) 1218 vma->gpuva.flags |= XE_VMA_READ_ONLY; 1219 if (dumpable) 1220 vma->gpuva.flags |= XE_VMA_DUMPABLE; 1221 1222 for_each_tile(tile, vm->xe, id) 1223 vma->tile_mask |= 0x1 << id; 1224 1225 if (vm->xe->info.has_atomic_enable_pte_bit) 1226 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1227 1228 vma->attr = *attr; 1229 1230 if (bo) { 1231 struct drm_gpuvm_bo *vm_bo; 1232 1233 xe_bo_assert_held(bo); 1234 1235 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1236 if (IS_ERR(vm_bo)) { 1237 xe_vma_free(vma); 1238 return ERR_CAST(vm_bo); 1239 } 1240 1241 drm_gpuvm_bo_extobj_add(vm_bo); 1242 drm_gem_object_get(&bo->ttm.base); 1243 vma->gpuva.gem.offset = bo_offset_or_userptr; 1244 drm_gpuva_link(&vma->gpuva, vm_bo); 1245 drm_gpuvm_bo_put(vm_bo); 1246 } else /* userptr or null */ { 1247 if (!is_null && !is_cpu_addr_mirror) { 1248 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1249 u64 size = end - start + 1; 1250 int err; 1251 1252 INIT_LIST_HEAD(&userptr->invalidate_link); 1253 INIT_LIST_HEAD(&userptr->repin_link); 1254 vma->gpuva.gem.offset = bo_offset_or_userptr; 1255 mutex_init(&userptr->unmap_mutex); 1256 1257 err = mmu_interval_notifier_insert(&userptr->notifier, 1258 current->mm, 1259 xe_vma_userptr(vma), size, 1260 &vma_userptr_notifier_ops); 1261 if (err) { 1262 xe_vma_free(vma); 1263 return ERR_PTR(err); 1264 } 1265 1266 userptr->notifier_seq = LONG_MAX; 1267 } 1268 1269 xe_vm_get(vm); 1270 } 1271 1272 return vma; 1273 } 1274 1275 static void xe_vma_destroy_late(struct xe_vma *vma) 1276 { 1277 struct xe_vm *vm = xe_vma_vm(vma); 1278 1279 if (vma->ufence) { 1280 xe_sync_ufence_put(vma->ufence); 1281 vma->ufence = NULL; 1282 } 1283 1284 if (xe_vma_is_userptr(vma)) { 1285 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1286 struct xe_userptr *userptr = &uvma->userptr; 1287 1288 if (userptr->sg) 1289 xe_hmm_userptr_free_sg(uvma); 1290 1291 /* 1292 * Since userptr pages are not pinned, we can't remove 1293 * the notifier until we're sure the GPU is not accessing 1294 * them anymore 1295 */ 1296 mmu_interval_notifier_remove(&userptr->notifier); 1297 mutex_destroy(&userptr->unmap_mutex); 1298 xe_vm_put(vm); 1299 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1300 xe_vm_put(vm); 1301 } else { 1302 xe_bo_put(xe_vma_bo(vma)); 1303 } 1304 1305 xe_vma_free(vma); 1306 } 1307 1308 static void vma_destroy_work_func(struct work_struct *w) 1309 { 1310 struct xe_vma *vma = 1311 container_of(w, struct xe_vma, destroy_work); 1312 1313 xe_vma_destroy_late(vma); 1314 } 1315 1316 static void vma_destroy_cb(struct dma_fence *fence, 1317 struct dma_fence_cb *cb) 1318 { 1319 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1320 1321 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1322 queue_work(system_unbound_wq, &vma->destroy_work); 1323 } 1324 1325 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1326 { 1327 struct xe_vm *vm = xe_vma_vm(vma); 1328 1329 lockdep_assert_held_write(&vm->lock); 1330 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1331 1332 if (xe_vma_is_userptr(vma)) { 1333 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1334 1335 spin_lock(&vm->userptr.invalidated_lock); 1336 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1337 list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1338 spin_unlock(&vm->userptr.invalidated_lock); 1339 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1340 xe_bo_assert_held(xe_vma_bo(vma)); 1341 1342 drm_gpuva_unlink(&vma->gpuva); 1343 } 1344 1345 xe_vm_assert_held(vm); 1346 if (fence) { 1347 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1348 vma_destroy_cb); 1349 1350 if (ret) { 1351 XE_WARN_ON(ret != -ENOENT); 1352 xe_vma_destroy_late(vma); 1353 } 1354 } else { 1355 xe_vma_destroy_late(vma); 1356 } 1357 } 1358 1359 /** 1360 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1361 * @exec: The drm_exec object we're currently locking for. 1362 * @vma: The vma for witch we want to lock the vm resv and any attached 1363 * object's resv. 1364 * 1365 * Return: 0 on success, negative error code on error. In particular 1366 * may return -EDEADLK on WW transaction contention and -EINTR if 1367 * an interruptible wait is terminated by a signal. 1368 */ 1369 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1370 { 1371 struct xe_vm *vm = xe_vma_vm(vma); 1372 struct xe_bo *bo = xe_vma_bo(vma); 1373 int err; 1374 1375 XE_WARN_ON(!vm); 1376 1377 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1378 if (!err && bo && !bo->vm) 1379 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1380 1381 return err; 1382 } 1383 1384 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1385 { 1386 struct drm_exec exec; 1387 int err; 1388 1389 drm_exec_init(&exec, 0, 0); 1390 drm_exec_until_all_locked(&exec) { 1391 err = xe_vm_lock_vma(&exec, vma); 1392 drm_exec_retry_on_contention(&exec); 1393 if (XE_WARN_ON(err)) 1394 break; 1395 } 1396 1397 xe_vma_destroy(vma, NULL); 1398 1399 drm_exec_fini(&exec); 1400 } 1401 1402 struct xe_vma * 1403 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1404 { 1405 struct drm_gpuva *gpuva; 1406 1407 lockdep_assert_held(&vm->lock); 1408 1409 if (xe_vm_is_closed_or_banned(vm)) 1410 return NULL; 1411 1412 xe_assert(vm->xe, start + range <= vm->size); 1413 1414 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1415 1416 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1417 } 1418 1419 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1420 { 1421 int err; 1422 1423 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1424 lockdep_assert_held(&vm->lock); 1425 1426 mutex_lock(&vm->snap_mutex); 1427 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1428 mutex_unlock(&vm->snap_mutex); 1429 XE_WARN_ON(err); /* Shouldn't be possible */ 1430 1431 return err; 1432 } 1433 1434 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1435 { 1436 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1437 lockdep_assert_held(&vm->lock); 1438 1439 mutex_lock(&vm->snap_mutex); 1440 drm_gpuva_remove(&vma->gpuva); 1441 mutex_unlock(&vm->snap_mutex); 1442 if (vm->usm.last_fault_vma == vma) 1443 vm->usm.last_fault_vma = NULL; 1444 } 1445 1446 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1447 { 1448 struct xe_vma_op *op; 1449 1450 op = kzalloc(sizeof(*op), GFP_KERNEL); 1451 1452 if (unlikely(!op)) 1453 return NULL; 1454 1455 return &op->base; 1456 } 1457 1458 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1459 1460 static const struct drm_gpuvm_ops gpuvm_ops = { 1461 .op_alloc = xe_vm_op_alloc, 1462 .vm_bo_validate = xe_gpuvm_validate, 1463 .vm_free = xe_vm_free, 1464 }; 1465 1466 static u64 pde_encode_pat_index(u16 pat_index) 1467 { 1468 u64 pte = 0; 1469 1470 if (pat_index & BIT(0)) 1471 pte |= XE_PPGTT_PTE_PAT0; 1472 1473 if (pat_index & BIT(1)) 1474 pte |= XE_PPGTT_PTE_PAT1; 1475 1476 return pte; 1477 } 1478 1479 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1480 { 1481 u64 pte = 0; 1482 1483 if (pat_index & BIT(0)) 1484 pte |= XE_PPGTT_PTE_PAT0; 1485 1486 if (pat_index & BIT(1)) 1487 pte |= XE_PPGTT_PTE_PAT1; 1488 1489 if (pat_index & BIT(2)) { 1490 if (pt_level) 1491 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1492 else 1493 pte |= XE_PPGTT_PTE_PAT2; 1494 } 1495 1496 if (pat_index & BIT(3)) 1497 pte |= XELPG_PPGTT_PTE_PAT3; 1498 1499 if (pat_index & (BIT(4))) 1500 pte |= XE2_PPGTT_PTE_PAT4; 1501 1502 return pte; 1503 } 1504 1505 static u64 pte_encode_ps(u32 pt_level) 1506 { 1507 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1508 1509 if (pt_level == 1) 1510 return XE_PDE_PS_2M; 1511 else if (pt_level == 2) 1512 return XE_PDPE_PS_1G; 1513 1514 return 0; 1515 } 1516 1517 static u16 pde_pat_index(struct xe_bo *bo) 1518 { 1519 struct xe_device *xe = xe_bo_device(bo); 1520 u16 pat_index; 1521 1522 /* 1523 * We only have two bits to encode the PAT index in non-leaf nodes, but 1524 * these only point to other paging structures so we only need a minimal 1525 * selection of options. The user PAT index is only for encoding leaf 1526 * nodes, where we have use of more bits to do the encoding. The 1527 * non-leaf nodes are instead under driver control so the chosen index 1528 * here should be distict from the user PAT index. Also the 1529 * corresponding coherency of the PAT index should be tied to the 1530 * allocation type of the page table (or at least we should pick 1531 * something which is always safe). 1532 */ 1533 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1534 pat_index = xe->pat.idx[XE_CACHE_WB]; 1535 else 1536 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1537 1538 xe_assert(xe, pat_index <= 3); 1539 1540 return pat_index; 1541 } 1542 1543 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1544 { 1545 u64 pde; 1546 1547 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1548 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1549 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1550 1551 return pde; 1552 } 1553 1554 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1555 u16 pat_index, u32 pt_level) 1556 { 1557 u64 pte; 1558 1559 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1560 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1561 pte |= pte_encode_pat_index(pat_index, pt_level); 1562 pte |= pte_encode_ps(pt_level); 1563 1564 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1565 pte |= XE_PPGTT_PTE_DM; 1566 1567 return pte; 1568 } 1569 1570 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1571 u16 pat_index, u32 pt_level) 1572 { 1573 pte |= XE_PAGE_PRESENT; 1574 1575 if (likely(!xe_vma_read_only(vma))) 1576 pte |= XE_PAGE_RW; 1577 1578 pte |= pte_encode_pat_index(pat_index, pt_level); 1579 pte |= pte_encode_ps(pt_level); 1580 1581 if (unlikely(xe_vma_is_null(vma))) 1582 pte |= XE_PTE_NULL; 1583 1584 return pte; 1585 } 1586 1587 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1588 u16 pat_index, 1589 u32 pt_level, bool devmem, u64 flags) 1590 { 1591 u64 pte; 1592 1593 /* Avoid passing random bits directly as flags */ 1594 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1595 1596 pte = addr; 1597 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1598 pte |= pte_encode_pat_index(pat_index, pt_level); 1599 pte |= pte_encode_ps(pt_level); 1600 1601 if (devmem) 1602 pte |= XE_PPGTT_PTE_DM; 1603 1604 pte |= flags; 1605 1606 return pte; 1607 } 1608 1609 static const struct xe_pt_ops xelp_pt_ops = { 1610 .pte_encode_bo = xelp_pte_encode_bo, 1611 .pte_encode_vma = xelp_pte_encode_vma, 1612 .pte_encode_addr = xelp_pte_encode_addr, 1613 .pde_encode_bo = xelp_pde_encode_bo, 1614 }; 1615 1616 static void vm_destroy_work_func(struct work_struct *w); 1617 1618 /** 1619 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1620 * given tile and vm. 1621 * @xe: xe device. 1622 * @tile: tile to set up for. 1623 * @vm: vm to set up for. 1624 * 1625 * Sets up a pagetable tree with one page-table per level and a single 1626 * leaf PTE. All pagetable entries point to the single page-table or, 1627 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1628 * writes become NOPs. 1629 * 1630 * Return: 0 on success, negative error code on error. 1631 */ 1632 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1633 struct xe_vm *vm) 1634 { 1635 u8 id = tile->id; 1636 int i; 1637 1638 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1639 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1640 if (IS_ERR(vm->scratch_pt[id][i])) { 1641 int err = PTR_ERR(vm->scratch_pt[id][i]); 1642 1643 vm->scratch_pt[id][i] = NULL; 1644 return err; 1645 } 1646 1647 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1648 } 1649 1650 return 0; 1651 } 1652 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1653 1654 static void xe_vm_free_scratch(struct xe_vm *vm) 1655 { 1656 struct xe_tile *tile; 1657 u8 id; 1658 1659 if (!xe_vm_has_scratch(vm)) 1660 return; 1661 1662 for_each_tile(tile, vm->xe, id) { 1663 u32 i; 1664 1665 if (!vm->pt_root[id]) 1666 continue; 1667 1668 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1669 if (vm->scratch_pt[id][i]) 1670 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1671 } 1672 } 1673 1674 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1675 { 1676 struct drm_gem_object *vm_resv_obj; 1677 struct xe_vm *vm; 1678 int err, number_tiles = 0; 1679 struct xe_tile *tile; 1680 u8 id; 1681 1682 /* 1683 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1684 * ever be in faulting mode. 1685 */ 1686 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1687 1688 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1689 if (!vm) 1690 return ERR_PTR(-ENOMEM); 1691 1692 vm->xe = xe; 1693 1694 vm->size = 1ull << xe->info.va_bits; 1695 vm->flags = flags; 1696 1697 if (xef) 1698 vm->xef = xe_file_get(xef); 1699 /** 1700 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1701 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1702 * under a user-VM lock when the PXP session is started at exec_queue 1703 * creation time. Those are different VMs and therefore there is no risk 1704 * of deadlock, but we need to tell lockdep that this is the case or it 1705 * will print a warning. 1706 */ 1707 if (flags & XE_VM_FLAG_GSC) { 1708 static struct lock_class_key gsc_vm_key; 1709 1710 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1711 } else { 1712 init_rwsem(&vm->lock); 1713 } 1714 mutex_init(&vm->snap_mutex); 1715 1716 INIT_LIST_HEAD(&vm->rebind_list); 1717 1718 INIT_LIST_HEAD(&vm->userptr.repin_list); 1719 INIT_LIST_HEAD(&vm->userptr.invalidated); 1720 init_rwsem(&vm->userptr.notifier_lock); 1721 spin_lock_init(&vm->userptr.invalidated_lock); 1722 1723 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1724 1725 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1726 1727 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1728 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1729 1730 for_each_tile(tile, xe, id) 1731 xe_range_fence_tree_init(&vm->rftree[id]); 1732 1733 vm->pt_ops = &xelp_pt_ops; 1734 1735 /* 1736 * Long-running workloads are not protected by the scheduler references. 1737 * By design, run_job for long-running workloads returns NULL and the 1738 * scheduler drops all the references of it, hence protecting the VM 1739 * for this case is necessary. 1740 */ 1741 if (flags & XE_VM_FLAG_LR_MODE) { 1742 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1743 xe_pm_runtime_get_noresume(xe); 1744 } 1745 1746 if (flags & XE_VM_FLAG_FAULT_MODE) { 1747 err = xe_svm_init(vm); 1748 if (err) 1749 goto err_no_resv; 1750 } 1751 1752 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1753 if (!vm_resv_obj) { 1754 err = -ENOMEM; 1755 goto err_svm_fini; 1756 } 1757 1758 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1759 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1760 1761 drm_gem_object_put(vm_resv_obj); 1762 1763 err = xe_vm_lock(vm, true); 1764 if (err) 1765 goto err_close; 1766 1767 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1768 vm->flags |= XE_VM_FLAG_64K; 1769 1770 for_each_tile(tile, xe, id) { 1771 if (flags & XE_VM_FLAG_MIGRATION && 1772 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1773 continue; 1774 1775 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1776 if (IS_ERR(vm->pt_root[id])) { 1777 err = PTR_ERR(vm->pt_root[id]); 1778 vm->pt_root[id] = NULL; 1779 goto err_unlock_close; 1780 } 1781 } 1782 1783 if (xe_vm_has_scratch(vm)) { 1784 for_each_tile(tile, xe, id) { 1785 if (!vm->pt_root[id]) 1786 continue; 1787 1788 err = xe_vm_create_scratch(xe, tile, vm); 1789 if (err) 1790 goto err_unlock_close; 1791 } 1792 vm->batch_invalidate_tlb = true; 1793 } 1794 1795 if (vm->flags & XE_VM_FLAG_LR_MODE) 1796 vm->batch_invalidate_tlb = false; 1797 1798 /* Fill pt_root after allocating scratch tables */ 1799 for_each_tile(tile, xe, id) { 1800 if (!vm->pt_root[id]) 1801 continue; 1802 1803 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1804 } 1805 xe_vm_unlock(vm); 1806 1807 /* Kernel migration VM shouldn't have a circular loop.. */ 1808 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1809 for_each_tile(tile, xe, id) { 1810 struct xe_exec_queue *q; 1811 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1812 1813 if (!vm->pt_root[id]) 1814 continue; 1815 1816 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1817 if (IS_ERR(q)) { 1818 err = PTR_ERR(q); 1819 goto err_close; 1820 } 1821 vm->q[id] = q; 1822 number_tiles++; 1823 } 1824 } 1825 1826 if (number_tiles > 1) 1827 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1828 1829 if (xef && xe->info.has_asid) { 1830 u32 asid; 1831 1832 down_write(&xe->usm.lock); 1833 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1834 XA_LIMIT(1, XE_MAX_ASID - 1), 1835 &xe->usm.next_asid, GFP_KERNEL); 1836 up_write(&xe->usm.lock); 1837 if (err < 0) 1838 goto err_unlock_close; 1839 1840 vm->usm.asid = asid; 1841 } 1842 1843 trace_xe_vm_create(vm); 1844 1845 return vm; 1846 1847 err_unlock_close: 1848 xe_vm_unlock(vm); 1849 err_close: 1850 xe_vm_close_and_put(vm); 1851 return ERR_PTR(err); 1852 1853 err_svm_fini: 1854 if (flags & XE_VM_FLAG_FAULT_MODE) { 1855 vm->size = 0; /* close the vm */ 1856 xe_svm_fini(vm); 1857 } 1858 err_no_resv: 1859 mutex_destroy(&vm->snap_mutex); 1860 for_each_tile(tile, xe, id) 1861 xe_range_fence_tree_fini(&vm->rftree[id]); 1862 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1863 if (vm->xef) 1864 xe_file_put(vm->xef); 1865 kfree(vm); 1866 if (flags & XE_VM_FLAG_LR_MODE) 1867 xe_pm_runtime_put(xe); 1868 return ERR_PTR(err); 1869 } 1870 1871 static void xe_vm_close(struct xe_vm *vm) 1872 { 1873 struct xe_device *xe = vm->xe; 1874 bool bound; 1875 int idx; 1876 1877 bound = drm_dev_enter(&xe->drm, &idx); 1878 1879 down_write(&vm->lock); 1880 if (xe_vm_in_fault_mode(vm)) 1881 xe_svm_notifier_lock(vm); 1882 1883 vm->size = 0; 1884 1885 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1886 struct xe_tile *tile; 1887 struct xe_gt *gt; 1888 u8 id; 1889 1890 /* Wait for pending binds */ 1891 dma_resv_wait_timeout(xe_vm_resv(vm), 1892 DMA_RESV_USAGE_BOOKKEEP, 1893 false, MAX_SCHEDULE_TIMEOUT); 1894 1895 if (bound) { 1896 for_each_tile(tile, xe, id) 1897 if (vm->pt_root[id]) 1898 xe_pt_clear(xe, vm->pt_root[id]); 1899 1900 for_each_gt(gt, xe, id) 1901 xe_tlb_inval_vm(>->tlb_inval, vm); 1902 } 1903 } 1904 1905 if (xe_vm_in_fault_mode(vm)) 1906 xe_svm_notifier_unlock(vm); 1907 up_write(&vm->lock); 1908 1909 if (bound) 1910 drm_dev_exit(idx); 1911 } 1912 1913 void xe_vm_close_and_put(struct xe_vm *vm) 1914 { 1915 LIST_HEAD(contested); 1916 struct xe_device *xe = vm->xe; 1917 struct xe_tile *tile; 1918 struct xe_vma *vma, *next_vma; 1919 struct drm_gpuva *gpuva, *next; 1920 u8 id; 1921 1922 xe_assert(xe, !vm->preempt.num_exec_queues); 1923 1924 xe_vm_close(vm); 1925 if (xe_vm_in_preempt_fence_mode(vm)) 1926 flush_work(&vm->preempt.rebind_work); 1927 if (xe_vm_in_fault_mode(vm)) 1928 xe_svm_close(vm); 1929 1930 down_write(&vm->lock); 1931 for_each_tile(tile, xe, id) { 1932 if (vm->q[id]) 1933 xe_exec_queue_last_fence_put(vm->q[id], vm); 1934 } 1935 up_write(&vm->lock); 1936 1937 for_each_tile(tile, xe, id) { 1938 if (vm->q[id]) { 1939 xe_exec_queue_kill(vm->q[id]); 1940 xe_exec_queue_put(vm->q[id]); 1941 vm->q[id] = NULL; 1942 } 1943 } 1944 1945 down_write(&vm->lock); 1946 xe_vm_lock(vm, false); 1947 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1948 vma = gpuva_to_vma(gpuva); 1949 1950 if (xe_vma_has_no_bo(vma)) { 1951 down_read(&vm->userptr.notifier_lock); 1952 vma->gpuva.flags |= XE_VMA_DESTROYED; 1953 up_read(&vm->userptr.notifier_lock); 1954 } 1955 1956 xe_vm_remove_vma(vm, vma); 1957 1958 /* easy case, remove from VMA? */ 1959 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1960 list_del_init(&vma->combined_links.rebind); 1961 xe_vma_destroy(vma, NULL); 1962 continue; 1963 } 1964 1965 list_move_tail(&vma->combined_links.destroy, &contested); 1966 vma->gpuva.flags |= XE_VMA_DESTROYED; 1967 } 1968 1969 /* 1970 * All vm operations will add shared fences to resv. 1971 * The only exception is eviction for a shared object, 1972 * but even so, the unbind when evicted would still 1973 * install a fence to resv. Hence it's safe to 1974 * destroy the pagetables immediately. 1975 */ 1976 xe_vm_free_scratch(vm); 1977 1978 for_each_tile(tile, xe, id) { 1979 if (vm->pt_root[id]) { 1980 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1981 vm->pt_root[id] = NULL; 1982 } 1983 } 1984 xe_vm_unlock(vm); 1985 1986 /* 1987 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1988 * Since we hold a refcount to the bo, we can remove and free 1989 * the members safely without locking. 1990 */ 1991 list_for_each_entry_safe(vma, next_vma, &contested, 1992 combined_links.destroy) { 1993 list_del_init(&vma->combined_links.destroy); 1994 xe_vma_destroy_unlocked(vma); 1995 } 1996 1997 if (xe_vm_in_fault_mode(vm)) 1998 xe_svm_fini(vm); 1999 2000 up_write(&vm->lock); 2001 2002 down_write(&xe->usm.lock); 2003 if (vm->usm.asid) { 2004 void *lookup; 2005 2006 xe_assert(xe, xe->info.has_asid); 2007 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 2008 2009 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 2010 xe_assert(xe, lookup == vm); 2011 } 2012 up_write(&xe->usm.lock); 2013 2014 for_each_tile(tile, xe, id) 2015 xe_range_fence_tree_fini(&vm->rftree[id]); 2016 2017 xe_vm_put(vm); 2018 } 2019 2020 static void vm_destroy_work_func(struct work_struct *w) 2021 { 2022 struct xe_vm *vm = 2023 container_of(w, struct xe_vm, destroy_work); 2024 struct xe_device *xe = vm->xe; 2025 struct xe_tile *tile; 2026 u8 id; 2027 2028 /* xe_vm_close_and_put was not called? */ 2029 xe_assert(xe, !vm->size); 2030 2031 if (xe_vm_in_preempt_fence_mode(vm)) 2032 flush_work(&vm->preempt.rebind_work); 2033 2034 mutex_destroy(&vm->snap_mutex); 2035 2036 if (vm->flags & XE_VM_FLAG_LR_MODE) 2037 xe_pm_runtime_put(xe); 2038 2039 for_each_tile(tile, xe, id) 2040 XE_WARN_ON(vm->pt_root[id]); 2041 2042 trace_xe_vm_free(vm); 2043 2044 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2045 2046 if (vm->xef) 2047 xe_file_put(vm->xef); 2048 2049 kfree(vm); 2050 } 2051 2052 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2053 { 2054 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2055 2056 /* To destroy the VM we need to be able to sleep */ 2057 queue_work(system_unbound_wq, &vm->destroy_work); 2058 } 2059 2060 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2061 { 2062 struct xe_vm *vm; 2063 2064 mutex_lock(&xef->vm.lock); 2065 vm = xa_load(&xef->vm.xa, id); 2066 if (vm) 2067 xe_vm_get(vm); 2068 mutex_unlock(&xef->vm.lock); 2069 2070 return vm; 2071 } 2072 2073 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2074 { 2075 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2076 } 2077 2078 static struct xe_exec_queue * 2079 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2080 { 2081 return q ? q : vm->q[0]; 2082 } 2083 2084 static struct xe_user_fence * 2085 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2086 { 2087 unsigned int i; 2088 2089 for (i = 0; i < num_syncs; i++) { 2090 struct xe_sync_entry *e = &syncs[i]; 2091 2092 if (xe_sync_is_ufence(e)) 2093 return xe_sync_ufence_get(e); 2094 } 2095 2096 return NULL; 2097 } 2098 2099 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2100 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2101 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2102 2103 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2104 struct drm_file *file) 2105 { 2106 struct xe_device *xe = to_xe_device(dev); 2107 struct xe_file *xef = to_xe_file(file); 2108 struct drm_xe_vm_create *args = data; 2109 struct xe_vm *vm; 2110 u32 id; 2111 int err; 2112 u32 flags = 0; 2113 2114 if (XE_IOCTL_DBG(xe, args->extensions)) 2115 return -EINVAL; 2116 2117 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 2118 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2119 2120 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2121 !xe->info.has_usm)) 2122 return -EINVAL; 2123 2124 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2125 return -EINVAL; 2126 2127 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2128 return -EINVAL; 2129 2130 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2131 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2132 !xe->info.needs_scratch)) 2133 return -EINVAL; 2134 2135 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2136 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2137 return -EINVAL; 2138 2139 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2140 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2141 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2142 flags |= XE_VM_FLAG_LR_MODE; 2143 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2144 flags |= XE_VM_FLAG_FAULT_MODE; 2145 2146 vm = xe_vm_create(xe, flags, xef); 2147 if (IS_ERR(vm)) 2148 return PTR_ERR(vm); 2149 2150 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2151 /* Warning: Security issue - never enable by default */ 2152 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2153 #endif 2154 2155 /* user id alloc must always be last in ioctl to prevent UAF */ 2156 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2157 if (err) 2158 goto err_close_and_put; 2159 2160 args->vm_id = id; 2161 2162 return 0; 2163 2164 err_close_and_put: 2165 xe_vm_close_and_put(vm); 2166 2167 return err; 2168 } 2169 2170 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2171 struct drm_file *file) 2172 { 2173 struct xe_device *xe = to_xe_device(dev); 2174 struct xe_file *xef = to_xe_file(file); 2175 struct drm_xe_vm_destroy *args = data; 2176 struct xe_vm *vm; 2177 int err = 0; 2178 2179 if (XE_IOCTL_DBG(xe, args->pad) || 2180 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2181 return -EINVAL; 2182 2183 mutex_lock(&xef->vm.lock); 2184 vm = xa_load(&xef->vm.xa, args->vm_id); 2185 if (XE_IOCTL_DBG(xe, !vm)) 2186 err = -ENOENT; 2187 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2188 err = -EBUSY; 2189 else 2190 xa_erase(&xef->vm.xa, args->vm_id); 2191 mutex_unlock(&xef->vm.lock); 2192 2193 if (!err) 2194 xe_vm_close_and_put(vm); 2195 2196 return err; 2197 } 2198 2199 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2200 { 2201 struct drm_gpuva *gpuva; 2202 u32 num_vmas = 0; 2203 2204 lockdep_assert_held(&vm->lock); 2205 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2206 num_vmas++; 2207 2208 return num_vmas; 2209 } 2210 2211 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2212 u64 end, struct drm_xe_mem_range_attr *attrs) 2213 { 2214 struct drm_gpuva *gpuva; 2215 int i = 0; 2216 2217 lockdep_assert_held(&vm->lock); 2218 2219 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2220 struct xe_vma *vma = gpuva_to_vma(gpuva); 2221 2222 if (i == *num_vmas) 2223 return -ENOSPC; 2224 2225 attrs[i].start = xe_vma_start(vma); 2226 attrs[i].end = xe_vma_end(vma); 2227 attrs[i].atomic.val = vma->attr.atomic_access; 2228 attrs[i].pat_index.val = vma->attr.pat_index; 2229 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2230 attrs[i].preferred_mem_loc.migration_policy = 2231 vma->attr.preferred_loc.migration_policy; 2232 2233 i++; 2234 } 2235 2236 *num_vmas = i; 2237 return 0; 2238 } 2239 2240 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2241 { 2242 struct xe_device *xe = to_xe_device(dev); 2243 struct xe_file *xef = to_xe_file(file); 2244 struct drm_xe_mem_range_attr *mem_attrs; 2245 struct drm_xe_vm_query_mem_range_attr *args = data; 2246 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2247 struct xe_vm *vm; 2248 int err = 0; 2249 2250 if (XE_IOCTL_DBG(xe, 2251 ((args->num_mem_ranges == 0 && 2252 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2253 (args->num_mem_ranges > 0 && 2254 (!attrs_user || 2255 args->sizeof_mem_range_attr != 2256 sizeof(struct drm_xe_mem_range_attr)))))) 2257 return -EINVAL; 2258 2259 vm = xe_vm_lookup(xef, args->vm_id); 2260 if (XE_IOCTL_DBG(xe, !vm)) 2261 return -EINVAL; 2262 2263 err = down_read_interruptible(&vm->lock); 2264 if (err) 2265 goto put_vm; 2266 2267 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2268 2269 if (args->num_mem_ranges == 0 && !attrs_user) { 2270 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2271 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2272 goto unlock_vm; 2273 } 2274 2275 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2276 GFP_KERNEL | __GFP_ACCOUNT | 2277 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2278 if (!mem_attrs) { 2279 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2280 goto unlock_vm; 2281 } 2282 2283 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2284 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2285 args->start + args->range, mem_attrs); 2286 if (err) 2287 goto free_mem_attrs; 2288 2289 err = copy_to_user(attrs_user, mem_attrs, 2290 args->sizeof_mem_range_attr * args->num_mem_ranges); 2291 2292 free_mem_attrs: 2293 kvfree(mem_attrs); 2294 unlock_vm: 2295 up_read(&vm->lock); 2296 put_vm: 2297 xe_vm_put(vm); 2298 return err; 2299 } 2300 2301 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2302 { 2303 if (page_addr > xe_vma_end(vma) - 1 || 2304 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2305 return false; 2306 2307 return true; 2308 } 2309 2310 /** 2311 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2312 * 2313 * @vm: the xe_vm the vma belongs to 2314 * @page_addr: address to look up 2315 */ 2316 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2317 { 2318 struct xe_vma *vma = NULL; 2319 2320 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2321 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2322 vma = vm->usm.last_fault_vma; 2323 } 2324 if (!vma) 2325 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2326 2327 return vma; 2328 } 2329 2330 static const u32 region_to_mem_type[] = { 2331 XE_PL_TT, 2332 XE_PL_VRAM0, 2333 XE_PL_VRAM1, 2334 }; 2335 2336 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2337 bool post_commit) 2338 { 2339 down_read(&vm->userptr.notifier_lock); 2340 vma->gpuva.flags |= XE_VMA_DESTROYED; 2341 up_read(&vm->userptr.notifier_lock); 2342 if (post_commit) 2343 xe_vm_remove_vma(vm, vma); 2344 } 2345 2346 #undef ULL 2347 #define ULL unsigned long long 2348 2349 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2350 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2351 { 2352 struct xe_vma *vma; 2353 2354 switch (op->op) { 2355 case DRM_GPUVA_OP_MAP: 2356 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2357 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2358 break; 2359 case DRM_GPUVA_OP_REMAP: 2360 vma = gpuva_to_vma(op->remap.unmap->va); 2361 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2362 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2363 op->remap.unmap->keep ? 1 : 0); 2364 if (op->remap.prev) 2365 vm_dbg(&xe->drm, 2366 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2367 (ULL)op->remap.prev->va.addr, 2368 (ULL)op->remap.prev->va.range); 2369 if (op->remap.next) 2370 vm_dbg(&xe->drm, 2371 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2372 (ULL)op->remap.next->va.addr, 2373 (ULL)op->remap.next->va.range); 2374 break; 2375 case DRM_GPUVA_OP_UNMAP: 2376 vma = gpuva_to_vma(op->unmap.va); 2377 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2378 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2379 op->unmap.keep ? 1 : 0); 2380 break; 2381 case DRM_GPUVA_OP_PREFETCH: 2382 vma = gpuva_to_vma(op->prefetch.va); 2383 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2384 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2385 break; 2386 default: 2387 drm_warn(&xe->drm, "NOT POSSIBLE"); 2388 } 2389 } 2390 #else 2391 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2392 { 2393 } 2394 #endif 2395 2396 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2397 { 2398 if (!xe_vm_in_fault_mode(vm)) 2399 return false; 2400 2401 if (!xe_vm_has_scratch(vm)) 2402 return false; 2403 2404 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2405 return false; 2406 2407 return true; 2408 } 2409 2410 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2411 { 2412 struct drm_gpuva_op *__op; 2413 2414 drm_gpuva_for_each_op(__op, ops) { 2415 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2416 2417 xe_vma_svm_prefetch_op_fini(op); 2418 } 2419 } 2420 2421 /* 2422 * Create operations list from IOCTL arguments, setup operations fields so parse 2423 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2424 */ 2425 static struct drm_gpuva_ops * 2426 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2427 struct xe_bo *bo, u64 bo_offset_or_userptr, 2428 u64 addr, u64 range, 2429 u32 operation, u32 flags, 2430 u32 prefetch_region, u16 pat_index) 2431 { 2432 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2433 struct drm_gpuva_ops *ops; 2434 struct drm_gpuva_op *__op; 2435 struct drm_gpuvm_bo *vm_bo; 2436 u64 range_end = addr + range; 2437 int err; 2438 2439 lockdep_assert_held_write(&vm->lock); 2440 2441 vm_dbg(&vm->xe->drm, 2442 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2443 operation, (ULL)addr, (ULL)range, 2444 (ULL)bo_offset_or_userptr); 2445 2446 switch (operation) { 2447 case DRM_XE_VM_BIND_OP_MAP: 2448 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2449 struct drm_gpuvm_map_req map_req = { 2450 .map.va.addr = addr, 2451 .map.va.range = range, 2452 .map.gem.obj = obj, 2453 .map.gem.offset = bo_offset_or_userptr, 2454 }; 2455 2456 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2457 break; 2458 } 2459 case DRM_XE_VM_BIND_OP_UNMAP: 2460 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2461 break; 2462 case DRM_XE_VM_BIND_OP_PREFETCH: 2463 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2464 break; 2465 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2466 xe_assert(vm->xe, bo); 2467 2468 err = xe_bo_lock(bo, true); 2469 if (err) 2470 return ERR_PTR(err); 2471 2472 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2473 if (IS_ERR(vm_bo)) { 2474 xe_bo_unlock(bo); 2475 return ERR_CAST(vm_bo); 2476 } 2477 2478 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2479 drm_gpuvm_bo_put(vm_bo); 2480 xe_bo_unlock(bo); 2481 break; 2482 default: 2483 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2484 ops = ERR_PTR(-EINVAL); 2485 } 2486 if (IS_ERR(ops)) 2487 return ops; 2488 2489 drm_gpuva_for_each_op(__op, ops) { 2490 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2491 2492 if (__op->op == DRM_GPUVA_OP_MAP) { 2493 op->map.immediate = 2494 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2495 op->map.read_only = 2496 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2497 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2498 op->map.is_cpu_addr_mirror = flags & 2499 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2500 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2501 op->map.pat_index = pat_index; 2502 op->map.invalidate_on_bind = 2503 __xe_vm_needs_clear_scratch_pages(vm, flags); 2504 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2505 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2506 struct xe_tile *tile; 2507 struct xe_svm_range *svm_range; 2508 struct drm_gpusvm_ctx ctx = {}; 2509 struct drm_pagemap *dpagemap; 2510 u8 id, tile_mask = 0; 2511 u32 i; 2512 2513 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2514 op->prefetch.region = prefetch_region; 2515 break; 2516 } 2517 2518 ctx.read_only = xe_vma_read_only(vma); 2519 ctx.devmem_possible = IS_DGFX(vm->xe) && 2520 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2521 2522 for_each_tile(tile, vm->xe, id) 2523 tile_mask |= 0x1 << id; 2524 2525 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2526 op->prefetch_range.ranges_count = 0; 2527 tile = NULL; 2528 2529 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2530 dpagemap = xe_vma_resolve_pagemap(vma, 2531 xe_device_get_root_tile(vm->xe)); 2532 /* 2533 * TODO: Once multigpu support is enabled will need 2534 * something to dereference tile from dpagemap. 2535 */ 2536 if (dpagemap) 2537 tile = xe_device_get_root_tile(vm->xe); 2538 } else if (prefetch_region) { 2539 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2540 XE_PL_VRAM0]; 2541 } 2542 2543 op->prefetch_range.tile = tile; 2544 alloc_next_range: 2545 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2546 2547 if (PTR_ERR(svm_range) == -ENOENT) { 2548 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2549 2550 addr = ret == ULONG_MAX ? 0 : ret; 2551 if (addr) 2552 goto alloc_next_range; 2553 else 2554 goto print_op_label; 2555 } 2556 2557 if (IS_ERR(svm_range)) { 2558 err = PTR_ERR(svm_range); 2559 goto unwind_prefetch_ops; 2560 } 2561 2562 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2563 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2564 goto check_next_range; 2565 } 2566 2567 err = xa_alloc(&op->prefetch_range.range, 2568 &i, svm_range, xa_limit_32b, 2569 GFP_KERNEL); 2570 2571 if (err) 2572 goto unwind_prefetch_ops; 2573 2574 op->prefetch_range.ranges_count++; 2575 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2576 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2577 check_next_range: 2578 if (range_end > xe_svm_range_end(svm_range) && 2579 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2580 addr = xe_svm_range_end(svm_range); 2581 goto alloc_next_range; 2582 } 2583 } 2584 print_op_label: 2585 print_op(vm->xe, __op); 2586 } 2587 2588 return ops; 2589 2590 unwind_prefetch_ops: 2591 xe_svm_prefetch_gpuva_ops_fini(ops); 2592 drm_gpuva_ops_free(&vm->gpuvm, ops); 2593 return ERR_PTR(err); 2594 } 2595 2596 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2597 2598 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2599 struct xe_vma_mem_attr *attr, unsigned int flags) 2600 { 2601 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2602 struct drm_exec exec; 2603 struct xe_vma *vma; 2604 int err = 0; 2605 2606 lockdep_assert_held_write(&vm->lock); 2607 2608 if (bo) { 2609 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2610 drm_exec_until_all_locked(&exec) { 2611 err = 0; 2612 if (!bo->vm) { 2613 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2614 drm_exec_retry_on_contention(&exec); 2615 } 2616 if (!err) { 2617 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2618 drm_exec_retry_on_contention(&exec); 2619 } 2620 if (err) { 2621 drm_exec_fini(&exec); 2622 return ERR_PTR(err); 2623 } 2624 } 2625 } 2626 vma = xe_vma_create(vm, bo, op->gem.offset, 2627 op->va.addr, op->va.addr + 2628 op->va.range - 1, attr, flags); 2629 if (IS_ERR(vma)) 2630 goto err_unlock; 2631 2632 if (xe_vma_is_userptr(vma)) 2633 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2634 else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2635 err = add_preempt_fences(vm, bo); 2636 2637 err_unlock: 2638 if (bo) 2639 drm_exec_fini(&exec); 2640 2641 if (err) { 2642 prep_vma_destroy(vm, vma, false); 2643 xe_vma_destroy_unlocked(vma); 2644 vma = ERR_PTR(err); 2645 } 2646 2647 return vma; 2648 } 2649 2650 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2651 { 2652 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2653 return SZ_1G; 2654 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2655 return SZ_2M; 2656 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2657 return SZ_64K; 2658 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2659 return SZ_4K; 2660 2661 return SZ_1G; /* Uninitialized, used max size */ 2662 } 2663 2664 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2665 { 2666 switch (size) { 2667 case SZ_1G: 2668 vma->gpuva.flags |= XE_VMA_PTE_1G; 2669 break; 2670 case SZ_2M: 2671 vma->gpuva.flags |= XE_VMA_PTE_2M; 2672 break; 2673 case SZ_64K: 2674 vma->gpuva.flags |= XE_VMA_PTE_64K; 2675 break; 2676 case SZ_4K: 2677 vma->gpuva.flags |= XE_VMA_PTE_4K; 2678 break; 2679 } 2680 } 2681 2682 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2683 { 2684 int err = 0; 2685 2686 lockdep_assert_held_write(&vm->lock); 2687 2688 switch (op->base.op) { 2689 case DRM_GPUVA_OP_MAP: 2690 err |= xe_vm_insert_vma(vm, op->map.vma); 2691 if (!err) 2692 op->flags |= XE_VMA_OP_COMMITTED; 2693 break; 2694 case DRM_GPUVA_OP_REMAP: 2695 { 2696 u8 tile_present = 2697 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2698 2699 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2700 true); 2701 op->flags |= XE_VMA_OP_COMMITTED; 2702 2703 if (op->remap.prev) { 2704 err |= xe_vm_insert_vma(vm, op->remap.prev); 2705 if (!err) 2706 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2707 if (!err && op->remap.skip_prev) { 2708 op->remap.prev->tile_present = 2709 tile_present; 2710 op->remap.prev = NULL; 2711 } 2712 } 2713 if (op->remap.next) { 2714 err |= xe_vm_insert_vma(vm, op->remap.next); 2715 if (!err) 2716 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2717 if (!err && op->remap.skip_next) { 2718 op->remap.next->tile_present = 2719 tile_present; 2720 op->remap.next = NULL; 2721 } 2722 } 2723 2724 /* Adjust for partial unbind after removing VMA from VM */ 2725 if (!err) { 2726 op->base.remap.unmap->va->va.addr = op->remap.start; 2727 op->base.remap.unmap->va->va.range = op->remap.range; 2728 } 2729 break; 2730 } 2731 case DRM_GPUVA_OP_UNMAP: 2732 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2733 op->flags |= XE_VMA_OP_COMMITTED; 2734 break; 2735 case DRM_GPUVA_OP_PREFETCH: 2736 op->flags |= XE_VMA_OP_COMMITTED; 2737 break; 2738 default: 2739 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2740 } 2741 2742 return err; 2743 } 2744 2745 /** 2746 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2747 * @vma: Pointer to the xe_vma structure to check 2748 * 2749 * This function determines whether the given VMA (Virtual Memory Area) 2750 * has its memory attributes set to their default values. Specifically, 2751 * it checks the following conditions: 2752 * 2753 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2754 * - `pat_index` is equal to `default_pat_index` 2755 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2756 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2757 * 2758 * Return: true if all attributes are at their default values, false otherwise. 2759 */ 2760 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2761 { 2762 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2763 vma->attr.pat_index == vma->attr.default_pat_index && 2764 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2765 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2766 } 2767 2768 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2769 struct xe_vma_ops *vops) 2770 { 2771 struct xe_device *xe = vm->xe; 2772 struct drm_gpuva_op *__op; 2773 struct xe_tile *tile; 2774 u8 id, tile_mask = 0; 2775 int err = 0; 2776 2777 lockdep_assert_held_write(&vm->lock); 2778 2779 for_each_tile(tile, vm->xe, id) 2780 tile_mask |= 0x1 << id; 2781 2782 drm_gpuva_for_each_op(__op, ops) { 2783 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2784 struct xe_vma *vma; 2785 unsigned int flags = 0; 2786 2787 INIT_LIST_HEAD(&op->link); 2788 list_add_tail(&op->link, &vops->list); 2789 op->tile_mask = tile_mask; 2790 2791 switch (op->base.op) { 2792 case DRM_GPUVA_OP_MAP: 2793 { 2794 struct xe_vma_mem_attr default_attr = { 2795 .preferred_loc = { 2796 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2797 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2798 }, 2799 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2800 .default_pat_index = op->map.pat_index, 2801 .pat_index = op->map.pat_index, 2802 }; 2803 2804 flags |= op->map.read_only ? 2805 VMA_CREATE_FLAG_READ_ONLY : 0; 2806 flags |= op->map.is_null ? 2807 VMA_CREATE_FLAG_IS_NULL : 0; 2808 flags |= op->map.dumpable ? 2809 VMA_CREATE_FLAG_DUMPABLE : 0; 2810 flags |= op->map.is_cpu_addr_mirror ? 2811 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2812 2813 vma = new_vma(vm, &op->base.map, &default_attr, 2814 flags); 2815 if (IS_ERR(vma)) 2816 return PTR_ERR(vma); 2817 2818 op->map.vma = vma; 2819 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2820 !op->map.is_cpu_addr_mirror) || 2821 op->map.invalidate_on_bind) 2822 xe_vma_ops_incr_pt_update_ops(vops, 2823 op->tile_mask, 1); 2824 break; 2825 } 2826 case DRM_GPUVA_OP_REMAP: 2827 { 2828 struct xe_vma *old = 2829 gpuva_to_vma(op->base.remap.unmap->va); 2830 bool skip = xe_vma_is_cpu_addr_mirror(old); 2831 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2832 int num_remap_ops = 0; 2833 2834 if (op->base.remap.prev) 2835 start = op->base.remap.prev->va.addr + 2836 op->base.remap.prev->va.range; 2837 if (op->base.remap.next) 2838 end = op->base.remap.next->va.addr; 2839 2840 if (xe_vma_is_cpu_addr_mirror(old) && 2841 xe_svm_has_mapping(vm, start, end)) { 2842 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2843 xe_svm_unmap_address_range(vm, start, end); 2844 else 2845 return -EBUSY; 2846 } 2847 2848 op->remap.start = xe_vma_start(old); 2849 op->remap.range = xe_vma_size(old); 2850 2851 flags |= op->base.remap.unmap->va->flags & 2852 XE_VMA_READ_ONLY ? 2853 VMA_CREATE_FLAG_READ_ONLY : 0; 2854 flags |= op->base.remap.unmap->va->flags & 2855 DRM_GPUVA_SPARSE ? 2856 VMA_CREATE_FLAG_IS_NULL : 0; 2857 flags |= op->base.remap.unmap->va->flags & 2858 XE_VMA_DUMPABLE ? 2859 VMA_CREATE_FLAG_DUMPABLE : 0; 2860 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2861 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2862 2863 if (op->base.remap.prev) { 2864 vma = new_vma(vm, op->base.remap.prev, 2865 &old->attr, flags); 2866 if (IS_ERR(vma)) 2867 return PTR_ERR(vma); 2868 2869 op->remap.prev = vma; 2870 2871 /* 2872 * Userptr creates a new SG mapping so 2873 * we must also rebind. 2874 */ 2875 op->remap.skip_prev = skip || 2876 (!xe_vma_is_userptr(old) && 2877 IS_ALIGNED(xe_vma_end(vma), 2878 xe_vma_max_pte_size(old))); 2879 if (op->remap.skip_prev) { 2880 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2881 op->remap.range -= 2882 xe_vma_end(vma) - 2883 xe_vma_start(old); 2884 op->remap.start = xe_vma_end(vma); 2885 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2886 (ULL)op->remap.start, 2887 (ULL)op->remap.range); 2888 } else { 2889 num_remap_ops++; 2890 } 2891 } 2892 2893 if (op->base.remap.next) { 2894 vma = new_vma(vm, op->base.remap.next, 2895 &old->attr, flags); 2896 if (IS_ERR(vma)) 2897 return PTR_ERR(vma); 2898 2899 op->remap.next = vma; 2900 2901 /* 2902 * Userptr creates a new SG mapping so 2903 * we must also rebind. 2904 */ 2905 op->remap.skip_next = skip || 2906 (!xe_vma_is_userptr(old) && 2907 IS_ALIGNED(xe_vma_start(vma), 2908 xe_vma_max_pte_size(old))); 2909 if (op->remap.skip_next) { 2910 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2911 op->remap.range -= 2912 xe_vma_end(old) - 2913 xe_vma_start(vma); 2914 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2915 (ULL)op->remap.start, 2916 (ULL)op->remap.range); 2917 } else { 2918 num_remap_ops++; 2919 } 2920 } 2921 if (!skip) 2922 num_remap_ops++; 2923 2924 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2925 break; 2926 } 2927 case DRM_GPUVA_OP_UNMAP: 2928 vma = gpuva_to_vma(op->base.unmap.va); 2929 2930 if (xe_vma_is_cpu_addr_mirror(vma) && 2931 xe_svm_has_mapping(vm, xe_vma_start(vma), 2932 xe_vma_end(vma))) 2933 return -EBUSY; 2934 2935 if (!xe_vma_is_cpu_addr_mirror(vma)) 2936 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2937 break; 2938 case DRM_GPUVA_OP_PREFETCH: 2939 vma = gpuva_to_vma(op->base.prefetch.va); 2940 2941 if (xe_vma_is_userptr(vma)) { 2942 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2943 if (err) 2944 return err; 2945 } 2946 2947 if (xe_vma_is_cpu_addr_mirror(vma)) 2948 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2949 op->prefetch_range.ranges_count); 2950 else 2951 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2952 2953 break; 2954 default: 2955 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2956 } 2957 2958 err = xe_vma_op_commit(vm, op); 2959 if (err) 2960 return err; 2961 } 2962 2963 return 0; 2964 } 2965 2966 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2967 bool post_commit, bool prev_post_commit, 2968 bool next_post_commit) 2969 { 2970 lockdep_assert_held_write(&vm->lock); 2971 2972 switch (op->base.op) { 2973 case DRM_GPUVA_OP_MAP: 2974 if (op->map.vma) { 2975 prep_vma_destroy(vm, op->map.vma, post_commit); 2976 xe_vma_destroy_unlocked(op->map.vma); 2977 } 2978 break; 2979 case DRM_GPUVA_OP_UNMAP: 2980 { 2981 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2982 2983 if (vma) { 2984 down_read(&vm->userptr.notifier_lock); 2985 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2986 up_read(&vm->userptr.notifier_lock); 2987 if (post_commit) 2988 xe_vm_insert_vma(vm, vma); 2989 } 2990 break; 2991 } 2992 case DRM_GPUVA_OP_REMAP: 2993 { 2994 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2995 2996 if (op->remap.prev) { 2997 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2998 xe_vma_destroy_unlocked(op->remap.prev); 2999 } 3000 if (op->remap.next) { 3001 prep_vma_destroy(vm, op->remap.next, next_post_commit); 3002 xe_vma_destroy_unlocked(op->remap.next); 3003 } 3004 if (vma) { 3005 down_read(&vm->userptr.notifier_lock); 3006 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 3007 up_read(&vm->userptr.notifier_lock); 3008 if (post_commit) 3009 xe_vm_insert_vma(vm, vma); 3010 } 3011 break; 3012 } 3013 case DRM_GPUVA_OP_PREFETCH: 3014 /* Nothing to do */ 3015 break; 3016 default: 3017 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3018 } 3019 } 3020 3021 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 3022 struct drm_gpuva_ops **ops, 3023 int num_ops_list) 3024 { 3025 int i; 3026 3027 for (i = num_ops_list - 1; i >= 0; --i) { 3028 struct drm_gpuva_ops *__ops = ops[i]; 3029 struct drm_gpuva_op *__op; 3030 3031 if (!__ops) 3032 continue; 3033 3034 drm_gpuva_for_each_op_reverse(__op, __ops) { 3035 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 3036 3037 xe_vma_op_unwind(vm, op, 3038 op->flags & XE_VMA_OP_COMMITTED, 3039 op->flags & XE_VMA_OP_PREV_COMMITTED, 3040 op->flags & XE_VMA_OP_NEXT_COMMITTED); 3041 } 3042 } 3043 } 3044 3045 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3046 bool validate) 3047 { 3048 struct xe_bo *bo = xe_vma_bo(vma); 3049 struct xe_vm *vm = xe_vma_vm(vma); 3050 int err = 0; 3051 3052 if (bo) { 3053 if (!bo->vm) 3054 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3055 if (!err && validate) 3056 err = xe_bo_validate(bo, vm, 3057 !xe_vm_in_preempt_fence_mode(vm)); 3058 } 3059 3060 return err; 3061 } 3062 3063 static int check_ufence(struct xe_vma *vma) 3064 { 3065 if (vma->ufence) { 3066 struct xe_user_fence * const f = vma->ufence; 3067 3068 if (!xe_sync_ufence_get_status(f)) 3069 return -EBUSY; 3070 3071 vma->ufence = NULL; 3072 xe_sync_ufence_put(f); 3073 } 3074 3075 return 0; 3076 } 3077 3078 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 3079 { 3080 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 3081 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3082 struct xe_tile *tile = op->prefetch_range.tile; 3083 int err = 0; 3084 3085 struct xe_svm_range *svm_range; 3086 struct drm_gpusvm_ctx ctx = {}; 3087 unsigned long i; 3088 3089 if (!xe_vma_is_cpu_addr_mirror(vma)) 3090 return 0; 3091 3092 ctx.read_only = xe_vma_read_only(vma); 3093 ctx.devmem_possible = devmem_possible; 3094 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 3095 3096 /* TODO: Threading the migration */ 3097 xa_for_each(&op->prefetch_range.range, i, svm_range) { 3098 if (!tile) 3099 xe_svm_range_migrate_to_smem(vm, svm_range); 3100 3101 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 3102 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 3103 if (err) { 3104 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 3105 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3106 return -ENODATA; 3107 } 3108 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 3109 } 3110 3111 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 3112 if (err) { 3113 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 3114 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3115 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 3116 err = -ENODATA; 3117 return err; 3118 } 3119 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 3120 } 3121 3122 return err; 3123 } 3124 3125 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3126 struct xe_vma_op *op) 3127 { 3128 int err = 0; 3129 3130 switch (op->base.op) { 3131 case DRM_GPUVA_OP_MAP: 3132 if (!op->map.invalidate_on_bind) 3133 err = vma_lock_and_validate(exec, op->map.vma, 3134 !xe_vm_in_fault_mode(vm) || 3135 op->map.immediate); 3136 break; 3137 case DRM_GPUVA_OP_REMAP: 3138 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3139 if (err) 3140 break; 3141 3142 err = vma_lock_and_validate(exec, 3143 gpuva_to_vma(op->base.remap.unmap->va), 3144 false); 3145 if (!err && op->remap.prev) 3146 err = vma_lock_and_validate(exec, op->remap.prev, true); 3147 if (!err && op->remap.next) 3148 err = vma_lock_and_validate(exec, op->remap.next, true); 3149 break; 3150 case DRM_GPUVA_OP_UNMAP: 3151 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3152 if (err) 3153 break; 3154 3155 err = vma_lock_and_validate(exec, 3156 gpuva_to_vma(op->base.unmap.va), 3157 false); 3158 break; 3159 case DRM_GPUVA_OP_PREFETCH: 3160 { 3161 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3162 u32 region; 3163 3164 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3165 region = op->prefetch.region; 3166 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3167 region <= ARRAY_SIZE(region_to_mem_type)); 3168 } 3169 3170 err = vma_lock_and_validate(exec, 3171 gpuva_to_vma(op->base.prefetch.va), 3172 false); 3173 if (!err && !xe_vma_has_no_bo(vma)) 3174 err = xe_bo_migrate(xe_vma_bo(vma), 3175 region_to_mem_type[region]); 3176 break; 3177 } 3178 default: 3179 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3180 } 3181 3182 return err; 3183 } 3184 3185 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3186 { 3187 struct xe_vma_op *op; 3188 int err; 3189 3190 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3191 return 0; 3192 3193 list_for_each_entry(op, &vops->list, link) { 3194 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3195 err = prefetch_ranges(vm, op); 3196 if (err) 3197 return err; 3198 } 3199 } 3200 3201 return 0; 3202 } 3203 3204 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3205 struct xe_vm *vm, 3206 struct xe_vma_ops *vops) 3207 { 3208 struct xe_vma_op *op; 3209 int err; 3210 3211 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3212 if (err) 3213 return err; 3214 3215 list_for_each_entry(op, &vops->list, link) { 3216 err = op_lock_and_prep(exec, vm, op); 3217 if (err) 3218 return err; 3219 } 3220 3221 #ifdef TEST_VM_OPS_ERROR 3222 if (vops->inject_error && 3223 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3224 return -ENOSPC; 3225 #endif 3226 3227 return 0; 3228 } 3229 3230 static void op_trace(struct xe_vma_op *op) 3231 { 3232 switch (op->base.op) { 3233 case DRM_GPUVA_OP_MAP: 3234 trace_xe_vma_bind(op->map.vma); 3235 break; 3236 case DRM_GPUVA_OP_REMAP: 3237 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3238 if (op->remap.prev) 3239 trace_xe_vma_bind(op->remap.prev); 3240 if (op->remap.next) 3241 trace_xe_vma_bind(op->remap.next); 3242 break; 3243 case DRM_GPUVA_OP_UNMAP: 3244 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3245 break; 3246 case DRM_GPUVA_OP_PREFETCH: 3247 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3248 break; 3249 case DRM_GPUVA_OP_DRIVER: 3250 break; 3251 default: 3252 XE_WARN_ON("NOT POSSIBLE"); 3253 } 3254 } 3255 3256 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3257 { 3258 struct xe_vma_op *op; 3259 3260 list_for_each_entry(op, &vops->list, link) 3261 op_trace(op); 3262 } 3263 3264 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3265 { 3266 struct xe_exec_queue *q = vops->q; 3267 struct xe_tile *tile; 3268 int number_tiles = 0; 3269 u8 id; 3270 3271 for_each_tile(tile, vm->xe, id) { 3272 if (vops->pt_update_ops[id].num_ops) 3273 ++number_tiles; 3274 3275 if (vops->pt_update_ops[id].q) 3276 continue; 3277 3278 if (q) { 3279 vops->pt_update_ops[id].q = q; 3280 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3281 q = list_next_entry(q, multi_gt_list); 3282 } else { 3283 vops->pt_update_ops[id].q = vm->q[id]; 3284 } 3285 } 3286 3287 return number_tiles; 3288 } 3289 3290 static struct dma_fence *ops_execute(struct xe_vm *vm, 3291 struct xe_vma_ops *vops) 3292 { 3293 struct xe_tile *tile; 3294 struct dma_fence *fence = NULL; 3295 struct dma_fence **fences = NULL; 3296 struct dma_fence_array *cf = NULL; 3297 int number_tiles = 0, current_fence = 0, err; 3298 u8 id; 3299 3300 number_tiles = vm_ops_setup_tile_args(vm, vops); 3301 if (number_tiles == 0) 3302 return ERR_PTR(-ENODATA); 3303 3304 if (number_tiles > 1) { 3305 fences = kmalloc_array(number_tiles, sizeof(*fences), 3306 GFP_KERNEL); 3307 if (!fences) { 3308 fence = ERR_PTR(-ENOMEM); 3309 goto err_trace; 3310 } 3311 } 3312 3313 for_each_tile(tile, vm->xe, id) { 3314 if (!vops->pt_update_ops[id].num_ops) 3315 continue; 3316 3317 err = xe_pt_update_ops_prepare(tile, vops); 3318 if (err) { 3319 fence = ERR_PTR(err); 3320 goto err_out; 3321 } 3322 } 3323 3324 trace_xe_vm_ops_execute(vops); 3325 3326 for_each_tile(tile, vm->xe, id) { 3327 if (!vops->pt_update_ops[id].num_ops) 3328 continue; 3329 3330 fence = xe_pt_update_ops_run(tile, vops); 3331 if (IS_ERR(fence)) 3332 goto err_out; 3333 3334 if (fences) 3335 fences[current_fence++] = fence; 3336 } 3337 3338 if (fences) { 3339 cf = dma_fence_array_create(number_tiles, fences, 3340 vm->composite_fence_ctx, 3341 vm->composite_fence_seqno++, 3342 false); 3343 if (!cf) { 3344 --vm->composite_fence_seqno; 3345 fence = ERR_PTR(-ENOMEM); 3346 goto err_out; 3347 } 3348 fence = &cf->base; 3349 } 3350 3351 for_each_tile(tile, vm->xe, id) { 3352 if (!vops->pt_update_ops[id].num_ops) 3353 continue; 3354 3355 xe_pt_update_ops_fini(tile, vops); 3356 } 3357 3358 return fence; 3359 3360 err_out: 3361 for_each_tile(tile, vm->xe, id) { 3362 if (!vops->pt_update_ops[id].num_ops) 3363 continue; 3364 3365 xe_pt_update_ops_abort(tile, vops); 3366 } 3367 while (current_fence) 3368 dma_fence_put(fences[--current_fence]); 3369 kfree(fences); 3370 kfree(cf); 3371 3372 err_trace: 3373 trace_xe_vm_ops_fail(vm); 3374 return fence; 3375 } 3376 3377 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3378 { 3379 if (vma->ufence) 3380 xe_sync_ufence_put(vma->ufence); 3381 vma->ufence = __xe_sync_ufence_get(ufence); 3382 } 3383 3384 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3385 struct xe_user_fence *ufence) 3386 { 3387 switch (op->base.op) { 3388 case DRM_GPUVA_OP_MAP: 3389 vma_add_ufence(op->map.vma, ufence); 3390 break; 3391 case DRM_GPUVA_OP_REMAP: 3392 if (op->remap.prev) 3393 vma_add_ufence(op->remap.prev, ufence); 3394 if (op->remap.next) 3395 vma_add_ufence(op->remap.next, ufence); 3396 break; 3397 case DRM_GPUVA_OP_UNMAP: 3398 break; 3399 case DRM_GPUVA_OP_PREFETCH: 3400 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3401 break; 3402 default: 3403 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3404 } 3405 } 3406 3407 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3408 struct dma_fence *fence) 3409 { 3410 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3411 struct xe_user_fence *ufence; 3412 struct xe_vma_op *op; 3413 int i; 3414 3415 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3416 list_for_each_entry(op, &vops->list, link) { 3417 if (ufence) 3418 op_add_ufence(vm, op, ufence); 3419 3420 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3421 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3422 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3423 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3424 fence); 3425 } 3426 if (ufence) 3427 xe_sync_ufence_put(ufence); 3428 if (fence) { 3429 for (i = 0; i < vops->num_syncs; i++) 3430 xe_sync_entry_signal(vops->syncs + i, fence); 3431 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3432 } 3433 } 3434 3435 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3436 struct xe_vma_ops *vops) 3437 { 3438 struct drm_exec exec; 3439 struct dma_fence *fence; 3440 int err; 3441 3442 lockdep_assert_held_write(&vm->lock); 3443 3444 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3445 DRM_EXEC_IGNORE_DUPLICATES, 0); 3446 drm_exec_until_all_locked(&exec) { 3447 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3448 drm_exec_retry_on_contention(&exec); 3449 if (err) { 3450 fence = ERR_PTR(err); 3451 goto unlock; 3452 } 3453 3454 fence = ops_execute(vm, vops); 3455 if (IS_ERR(fence)) { 3456 if (PTR_ERR(fence) == -ENODATA) 3457 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3458 goto unlock; 3459 } 3460 3461 vm_bind_ioctl_ops_fini(vm, vops, fence); 3462 } 3463 3464 unlock: 3465 drm_exec_fini(&exec); 3466 return fence; 3467 } 3468 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3469 3470 #define SUPPORTED_FLAGS_STUB \ 3471 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3472 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3473 DRM_XE_VM_BIND_FLAG_NULL | \ 3474 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3475 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3476 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3477 3478 #ifdef TEST_VM_OPS_ERROR 3479 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3480 #else 3481 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3482 #endif 3483 3484 #define XE_64K_PAGE_MASK 0xffffull 3485 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3486 3487 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3488 struct drm_xe_vm_bind *args, 3489 struct drm_xe_vm_bind_op **bind_ops) 3490 { 3491 int err; 3492 int i; 3493 3494 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3495 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3496 return -EINVAL; 3497 3498 if (XE_IOCTL_DBG(xe, args->extensions)) 3499 return -EINVAL; 3500 3501 if (args->num_binds > 1) { 3502 u64 __user *bind_user = 3503 u64_to_user_ptr(args->vector_of_binds); 3504 3505 *bind_ops = kvmalloc_array(args->num_binds, 3506 sizeof(struct drm_xe_vm_bind_op), 3507 GFP_KERNEL | __GFP_ACCOUNT | 3508 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3509 if (!*bind_ops) 3510 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3511 3512 err = copy_from_user(*bind_ops, bind_user, 3513 sizeof(struct drm_xe_vm_bind_op) * 3514 args->num_binds); 3515 if (XE_IOCTL_DBG(xe, err)) { 3516 err = -EFAULT; 3517 goto free_bind_ops; 3518 } 3519 } else { 3520 *bind_ops = &args->bind; 3521 } 3522 3523 for (i = 0; i < args->num_binds; ++i) { 3524 u64 range = (*bind_ops)[i].range; 3525 u64 addr = (*bind_ops)[i].addr; 3526 u32 op = (*bind_ops)[i].op; 3527 u32 flags = (*bind_ops)[i].flags; 3528 u32 obj = (*bind_ops)[i].obj; 3529 u64 obj_offset = (*bind_ops)[i].obj_offset; 3530 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3531 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3532 bool is_cpu_addr_mirror = flags & 3533 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3534 u16 pat_index = (*bind_ops)[i].pat_index; 3535 u16 coh_mode; 3536 3537 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3538 (!xe_vm_in_fault_mode(vm) || 3539 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3540 err = -EINVAL; 3541 goto free_bind_ops; 3542 } 3543 3544 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3545 err = -EINVAL; 3546 goto free_bind_ops; 3547 } 3548 3549 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3550 (*bind_ops)[i].pat_index = pat_index; 3551 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3552 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3553 err = -EINVAL; 3554 goto free_bind_ops; 3555 } 3556 3557 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3558 err = -EINVAL; 3559 goto free_bind_ops; 3560 } 3561 3562 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3563 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3564 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3565 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3566 is_cpu_addr_mirror)) || 3567 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3568 (is_null || is_cpu_addr_mirror)) || 3569 XE_IOCTL_DBG(xe, !obj && 3570 op == DRM_XE_VM_BIND_OP_MAP && 3571 !is_null && !is_cpu_addr_mirror) || 3572 XE_IOCTL_DBG(xe, !obj && 3573 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3574 XE_IOCTL_DBG(xe, addr && 3575 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3576 XE_IOCTL_DBG(xe, range && 3577 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3578 XE_IOCTL_DBG(xe, obj && 3579 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3580 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3581 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3582 XE_IOCTL_DBG(xe, obj && 3583 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3584 XE_IOCTL_DBG(xe, prefetch_region && 3585 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3586 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3587 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3588 XE_IOCTL_DBG(xe, obj && 3589 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3590 err = -EINVAL; 3591 goto free_bind_ops; 3592 } 3593 3594 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3595 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3596 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3597 XE_IOCTL_DBG(xe, !range && 3598 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3599 err = -EINVAL; 3600 goto free_bind_ops; 3601 } 3602 } 3603 3604 return 0; 3605 3606 free_bind_ops: 3607 if (args->num_binds > 1) 3608 kvfree(*bind_ops); 3609 *bind_ops = NULL; 3610 return err; 3611 } 3612 3613 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3614 struct xe_exec_queue *q, 3615 struct xe_sync_entry *syncs, 3616 int num_syncs) 3617 { 3618 struct dma_fence *fence; 3619 int i, err = 0; 3620 3621 fence = xe_sync_in_fence_get(syncs, num_syncs, 3622 to_wait_exec_queue(vm, q), vm); 3623 if (IS_ERR(fence)) 3624 return PTR_ERR(fence); 3625 3626 for (i = 0; i < num_syncs; i++) 3627 xe_sync_entry_signal(&syncs[i], fence); 3628 3629 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3630 fence); 3631 dma_fence_put(fence); 3632 3633 return err; 3634 } 3635 3636 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3637 struct xe_exec_queue *q, 3638 struct xe_sync_entry *syncs, u32 num_syncs) 3639 { 3640 memset(vops, 0, sizeof(*vops)); 3641 INIT_LIST_HEAD(&vops->list); 3642 vops->vm = vm; 3643 vops->q = q; 3644 vops->syncs = syncs; 3645 vops->num_syncs = num_syncs; 3646 vops->flags = 0; 3647 } 3648 3649 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3650 u64 addr, u64 range, u64 obj_offset, 3651 u16 pat_index, u32 op, u32 bind_flags) 3652 { 3653 u16 coh_mode; 3654 3655 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3656 XE_IOCTL_DBG(xe, obj_offset > 3657 xe_bo_size(bo) - range)) { 3658 return -EINVAL; 3659 } 3660 3661 /* 3662 * Some platforms require 64k VM_BIND alignment, 3663 * specifically those with XE_VRAM_FLAGS_NEED64K. 3664 * 3665 * Other platforms may have BO's set to 64k physical placement, 3666 * but can be mapped at 4k offsets anyway. This check is only 3667 * there for the former case. 3668 */ 3669 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3670 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3671 if (XE_IOCTL_DBG(xe, obj_offset & 3672 XE_64K_PAGE_MASK) || 3673 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3674 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3675 return -EINVAL; 3676 } 3677 } 3678 3679 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3680 if (bo->cpu_caching) { 3681 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3682 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3683 return -EINVAL; 3684 } 3685 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3686 /* 3687 * Imported dma-buf from a different device should 3688 * require 1way or 2way coherency since we don't know 3689 * how it was mapped on the CPU. Just assume is it 3690 * potentially cached on CPU side. 3691 */ 3692 return -EINVAL; 3693 } 3694 3695 /* If a BO is protected it can only be mapped if the key is still valid */ 3696 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3697 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3698 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3699 return -ENOEXEC; 3700 3701 return 0; 3702 } 3703 3704 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3705 { 3706 struct xe_device *xe = to_xe_device(dev); 3707 struct xe_file *xef = to_xe_file(file); 3708 struct drm_xe_vm_bind *args = data; 3709 struct drm_xe_sync __user *syncs_user; 3710 struct xe_bo **bos = NULL; 3711 struct drm_gpuva_ops **ops = NULL; 3712 struct xe_vm *vm; 3713 struct xe_exec_queue *q = NULL; 3714 u32 num_syncs, num_ufence = 0; 3715 struct xe_sync_entry *syncs = NULL; 3716 struct drm_xe_vm_bind_op *bind_ops = NULL; 3717 struct xe_vma_ops vops; 3718 struct dma_fence *fence; 3719 int err; 3720 int i; 3721 3722 vm = xe_vm_lookup(xef, args->vm_id); 3723 if (XE_IOCTL_DBG(xe, !vm)) 3724 return -EINVAL; 3725 3726 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3727 if (err) 3728 goto put_vm; 3729 3730 if (args->exec_queue_id) { 3731 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3732 if (XE_IOCTL_DBG(xe, !q)) { 3733 err = -ENOENT; 3734 goto free_bind_ops; 3735 } 3736 3737 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3738 err = -EINVAL; 3739 goto put_exec_queue; 3740 } 3741 } 3742 3743 /* Ensure all UNMAPs visible */ 3744 xe_svm_flush(vm); 3745 3746 err = down_write_killable(&vm->lock); 3747 if (err) 3748 goto put_exec_queue; 3749 3750 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3751 err = -ENOENT; 3752 goto release_vm_lock; 3753 } 3754 3755 for (i = 0; i < args->num_binds; ++i) { 3756 u64 range = bind_ops[i].range; 3757 u64 addr = bind_ops[i].addr; 3758 3759 if (XE_IOCTL_DBG(xe, range > vm->size) || 3760 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3761 err = -EINVAL; 3762 goto release_vm_lock; 3763 } 3764 } 3765 3766 if (args->num_binds) { 3767 bos = kvcalloc(args->num_binds, sizeof(*bos), 3768 GFP_KERNEL | __GFP_ACCOUNT | 3769 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3770 if (!bos) { 3771 err = -ENOMEM; 3772 goto release_vm_lock; 3773 } 3774 3775 ops = kvcalloc(args->num_binds, sizeof(*ops), 3776 GFP_KERNEL | __GFP_ACCOUNT | 3777 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3778 if (!ops) { 3779 err = -ENOMEM; 3780 goto free_bos; 3781 } 3782 } 3783 3784 for (i = 0; i < args->num_binds; ++i) { 3785 struct drm_gem_object *gem_obj; 3786 u64 range = bind_ops[i].range; 3787 u64 addr = bind_ops[i].addr; 3788 u32 obj = bind_ops[i].obj; 3789 u64 obj_offset = bind_ops[i].obj_offset; 3790 u16 pat_index = bind_ops[i].pat_index; 3791 u32 op = bind_ops[i].op; 3792 u32 bind_flags = bind_ops[i].flags; 3793 3794 if (!obj) 3795 continue; 3796 3797 gem_obj = drm_gem_object_lookup(file, obj); 3798 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3799 err = -ENOENT; 3800 goto put_obj; 3801 } 3802 bos[i] = gem_to_xe_bo(gem_obj); 3803 3804 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3805 obj_offset, pat_index, op, 3806 bind_flags); 3807 if (err) 3808 goto put_obj; 3809 } 3810 3811 if (args->num_syncs) { 3812 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3813 if (!syncs) { 3814 err = -ENOMEM; 3815 goto put_obj; 3816 } 3817 } 3818 3819 syncs_user = u64_to_user_ptr(args->syncs); 3820 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3821 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3822 &syncs_user[num_syncs], 3823 (xe_vm_in_lr_mode(vm) ? 3824 SYNC_PARSE_FLAG_LR_MODE : 0) | 3825 (!args->num_binds ? 3826 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3827 if (err) 3828 goto free_syncs; 3829 3830 if (xe_sync_is_ufence(&syncs[num_syncs])) 3831 num_ufence++; 3832 } 3833 3834 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3835 err = -EINVAL; 3836 goto free_syncs; 3837 } 3838 3839 if (!args->num_binds) { 3840 err = -ENODATA; 3841 goto free_syncs; 3842 } 3843 3844 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3845 for (i = 0; i < args->num_binds; ++i) { 3846 u64 range = bind_ops[i].range; 3847 u64 addr = bind_ops[i].addr; 3848 u32 op = bind_ops[i].op; 3849 u32 flags = bind_ops[i].flags; 3850 u64 obj_offset = bind_ops[i].obj_offset; 3851 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3852 u16 pat_index = bind_ops[i].pat_index; 3853 3854 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3855 addr, range, op, flags, 3856 prefetch_region, pat_index); 3857 if (IS_ERR(ops[i])) { 3858 err = PTR_ERR(ops[i]); 3859 ops[i] = NULL; 3860 goto unwind_ops; 3861 } 3862 3863 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3864 if (err) 3865 goto unwind_ops; 3866 3867 #ifdef TEST_VM_OPS_ERROR 3868 if (flags & FORCE_OP_ERROR) { 3869 vops.inject_error = true; 3870 vm->xe->vm_inject_error_position = 3871 (vm->xe->vm_inject_error_position + 1) % 3872 FORCE_OP_ERROR_COUNT; 3873 } 3874 #endif 3875 } 3876 3877 /* Nothing to do */ 3878 if (list_empty(&vops.list)) { 3879 err = -ENODATA; 3880 goto unwind_ops; 3881 } 3882 3883 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3884 if (err) 3885 goto unwind_ops; 3886 3887 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3888 if (err) 3889 goto unwind_ops; 3890 3891 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3892 if (IS_ERR(fence)) 3893 err = PTR_ERR(fence); 3894 else 3895 dma_fence_put(fence); 3896 3897 unwind_ops: 3898 if (err && err != -ENODATA) 3899 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3900 xe_vma_ops_fini(&vops); 3901 for (i = args->num_binds - 1; i >= 0; --i) 3902 if (ops[i]) 3903 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3904 free_syncs: 3905 if (err == -ENODATA) 3906 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3907 while (num_syncs--) 3908 xe_sync_entry_cleanup(&syncs[num_syncs]); 3909 3910 kfree(syncs); 3911 put_obj: 3912 for (i = 0; i < args->num_binds; ++i) 3913 xe_bo_put(bos[i]); 3914 3915 kvfree(ops); 3916 free_bos: 3917 kvfree(bos); 3918 release_vm_lock: 3919 up_write(&vm->lock); 3920 put_exec_queue: 3921 if (q) 3922 xe_exec_queue_put(q); 3923 free_bind_ops: 3924 if (args->num_binds > 1) 3925 kvfree(bind_ops); 3926 put_vm: 3927 xe_vm_put(vm); 3928 return err; 3929 } 3930 3931 /** 3932 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3933 * @vm: VM to bind the BO to 3934 * @bo: BO to bind 3935 * @q: exec queue to use for the bind (optional) 3936 * @addr: address at which to bind the BO 3937 * @cache_lvl: PAT cache level to use 3938 * 3939 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3940 * kernel-owned VM. 3941 * 3942 * Returns a dma_fence to track the binding completion if the job to do so was 3943 * successfully submitted, an error pointer otherwise. 3944 */ 3945 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3946 struct xe_exec_queue *q, u64 addr, 3947 enum xe_cache_level cache_lvl) 3948 { 3949 struct xe_vma_ops vops; 3950 struct drm_gpuva_ops *ops = NULL; 3951 struct dma_fence *fence; 3952 int err; 3953 3954 xe_bo_get(bo); 3955 xe_vm_get(vm); 3956 if (q) 3957 xe_exec_queue_get(q); 3958 3959 down_write(&vm->lock); 3960 3961 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3962 3963 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3964 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3965 vm->xe->pat.idx[cache_lvl]); 3966 if (IS_ERR(ops)) { 3967 err = PTR_ERR(ops); 3968 goto release_vm_lock; 3969 } 3970 3971 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3972 if (err) 3973 goto release_vm_lock; 3974 3975 xe_assert(vm->xe, !list_empty(&vops.list)); 3976 3977 err = xe_vma_ops_alloc(&vops, false); 3978 if (err) 3979 goto unwind_ops; 3980 3981 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3982 if (IS_ERR(fence)) 3983 err = PTR_ERR(fence); 3984 3985 unwind_ops: 3986 if (err && err != -ENODATA) 3987 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3988 3989 xe_vma_ops_fini(&vops); 3990 drm_gpuva_ops_free(&vm->gpuvm, ops); 3991 3992 release_vm_lock: 3993 up_write(&vm->lock); 3994 3995 if (q) 3996 xe_exec_queue_put(q); 3997 xe_vm_put(vm); 3998 xe_bo_put(bo); 3999 4000 if (err) 4001 fence = ERR_PTR(err); 4002 4003 return fence; 4004 } 4005 4006 /** 4007 * xe_vm_lock() - Lock the vm's dma_resv object 4008 * @vm: The struct xe_vm whose lock is to be locked 4009 * @intr: Whether to perform any wait interruptible 4010 * 4011 * Return: 0 on success, -EINTR if @intr is true and the wait for a 4012 * contended lock was interrupted. If @intr is false, the function 4013 * always returns 0. 4014 */ 4015 int xe_vm_lock(struct xe_vm *vm, bool intr) 4016 { 4017 if (intr) 4018 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4019 4020 return dma_resv_lock(xe_vm_resv(vm), NULL); 4021 } 4022 4023 /** 4024 * xe_vm_unlock() - Unlock the vm's dma_resv object 4025 * @vm: The struct xe_vm whose lock is to be released. 4026 * 4027 * Unlock a buffer object lock that was locked by xe_vm_lock(). 4028 */ 4029 void xe_vm_unlock(struct xe_vm *vm) 4030 { 4031 dma_resv_unlock(xe_vm_resv(vm)); 4032 } 4033 4034 /** 4035 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 4036 * address range 4037 * @vm: The VM 4038 * @start: start address 4039 * @end: end address 4040 * @tile_mask: mask for which gt's issue tlb invalidation 4041 * 4042 * Issue a range based TLB invalidation for gt's in tilemask 4043 * 4044 * Returns 0 for success, negative error code otherwise. 4045 */ 4046 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 4047 u64 end, u8 tile_mask) 4048 { 4049 struct xe_tlb_inval_fence 4050 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 4051 struct xe_tile *tile; 4052 u32 fence_id = 0; 4053 u8 id; 4054 int err; 4055 4056 if (!tile_mask) 4057 return 0; 4058 4059 for_each_tile(tile, vm->xe, id) { 4060 if (!(tile_mask & BIT(id))) 4061 continue; 4062 4063 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 4064 &fence[fence_id], true); 4065 4066 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 4067 &fence[fence_id], start, end, 4068 vm->usm.asid); 4069 if (err) 4070 goto wait; 4071 ++fence_id; 4072 4073 if (!tile->media_gt) 4074 continue; 4075 4076 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 4077 &fence[fence_id], true); 4078 4079 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 4080 &fence[fence_id], start, end, 4081 vm->usm.asid); 4082 if (err) 4083 goto wait; 4084 ++fence_id; 4085 } 4086 4087 wait: 4088 for (id = 0; id < fence_id; ++id) 4089 xe_tlb_inval_fence_wait(&fence[id]); 4090 4091 return err; 4092 } 4093 4094 /** 4095 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4096 * @vma: VMA to invalidate 4097 * 4098 * Walks a list of page tables leaves which it memset the entries owned by this 4099 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4100 * complete. 4101 * 4102 * Returns 0 for success, negative error code otherwise. 4103 */ 4104 int xe_vm_invalidate_vma(struct xe_vma *vma) 4105 { 4106 struct xe_device *xe = xe_vma_vm(vma)->xe; 4107 struct xe_vm *vm = xe_vma_vm(vma); 4108 struct xe_tile *tile; 4109 u8 tile_mask = 0; 4110 int ret = 0; 4111 u8 id; 4112 4113 xe_assert(xe, !xe_vma_is_null(vma)); 4114 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4115 trace_xe_vma_invalidate(vma); 4116 4117 vm_dbg(&vm->xe->drm, 4118 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4119 xe_vma_start(vma), xe_vma_size(vma)); 4120 4121 /* 4122 * Check that we don't race with page-table updates, tile_invalidated 4123 * update is safe 4124 */ 4125 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4126 if (xe_vma_is_userptr(vma)) { 4127 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 4128 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 4129 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4130 4131 WARN_ON_ONCE(!mmu_interval_check_retry 4132 (&to_userptr_vma(vma)->userptr.notifier, 4133 to_userptr_vma(vma)->userptr.notifier_seq)); 4134 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4135 DMA_RESV_USAGE_BOOKKEEP)); 4136 4137 } else { 4138 xe_bo_assert_held(xe_vma_bo(vma)); 4139 } 4140 } 4141 4142 for_each_tile(tile, xe, id) 4143 if (xe_pt_zap_ptes(tile, vma)) 4144 tile_mask |= BIT(id); 4145 4146 xe_device_wmb(xe); 4147 4148 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 4149 xe_vma_end(vma), tile_mask); 4150 4151 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4152 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4153 4154 return ret; 4155 } 4156 4157 int xe_vm_validate_protected(struct xe_vm *vm) 4158 { 4159 struct drm_gpuva *gpuva; 4160 int err = 0; 4161 4162 if (!vm) 4163 return -ENODEV; 4164 4165 mutex_lock(&vm->snap_mutex); 4166 4167 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4168 struct xe_vma *vma = gpuva_to_vma(gpuva); 4169 struct xe_bo *bo = vma->gpuva.gem.obj ? 4170 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4171 4172 if (!bo) 4173 continue; 4174 4175 if (xe_bo_is_protected(bo)) { 4176 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4177 if (err) 4178 break; 4179 } 4180 } 4181 4182 mutex_unlock(&vm->snap_mutex); 4183 return err; 4184 } 4185 4186 struct xe_vm_snapshot { 4187 unsigned long num_snaps; 4188 struct { 4189 u64 ofs, bo_ofs; 4190 unsigned long len; 4191 struct xe_bo *bo; 4192 void *data; 4193 struct mm_struct *mm; 4194 } snap[]; 4195 }; 4196 4197 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4198 { 4199 unsigned long num_snaps = 0, i; 4200 struct xe_vm_snapshot *snap = NULL; 4201 struct drm_gpuva *gpuva; 4202 4203 if (!vm) 4204 return NULL; 4205 4206 mutex_lock(&vm->snap_mutex); 4207 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4208 if (gpuva->flags & XE_VMA_DUMPABLE) 4209 num_snaps++; 4210 } 4211 4212 if (num_snaps) 4213 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4214 if (!snap) { 4215 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4216 goto out_unlock; 4217 } 4218 4219 snap->num_snaps = num_snaps; 4220 i = 0; 4221 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4222 struct xe_vma *vma = gpuva_to_vma(gpuva); 4223 struct xe_bo *bo = vma->gpuva.gem.obj ? 4224 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4225 4226 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4227 continue; 4228 4229 snap->snap[i].ofs = xe_vma_start(vma); 4230 snap->snap[i].len = xe_vma_size(vma); 4231 if (bo) { 4232 snap->snap[i].bo = xe_bo_get(bo); 4233 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4234 } else if (xe_vma_is_userptr(vma)) { 4235 struct mm_struct *mm = 4236 to_userptr_vma(vma)->userptr.notifier.mm; 4237 4238 if (mmget_not_zero(mm)) 4239 snap->snap[i].mm = mm; 4240 else 4241 snap->snap[i].data = ERR_PTR(-EFAULT); 4242 4243 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4244 } else { 4245 snap->snap[i].data = ERR_PTR(-ENOENT); 4246 } 4247 i++; 4248 } 4249 4250 out_unlock: 4251 mutex_unlock(&vm->snap_mutex); 4252 return snap; 4253 } 4254 4255 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4256 { 4257 if (IS_ERR_OR_NULL(snap)) 4258 return; 4259 4260 for (int i = 0; i < snap->num_snaps; i++) { 4261 struct xe_bo *bo = snap->snap[i].bo; 4262 int err; 4263 4264 if (IS_ERR(snap->snap[i].data)) 4265 continue; 4266 4267 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4268 if (!snap->snap[i].data) { 4269 snap->snap[i].data = ERR_PTR(-ENOMEM); 4270 goto cleanup_bo; 4271 } 4272 4273 if (bo) { 4274 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4275 snap->snap[i].data, snap->snap[i].len); 4276 } else { 4277 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4278 4279 kthread_use_mm(snap->snap[i].mm); 4280 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4281 err = 0; 4282 else 4283 err = -EFAULT; 4284 kthread_unuse_mm(snap->snap[i].mm); 4285 4286 mmput(snap->snap[i].mm); 4287 snap->snap[i].mm = NULL; 4288 } 4289 4290 if (err) { 4291 kvfree(snap->snap[i].data); 4292 snap->snap[i].data = ERR_PTR(err); 4293 } 4294 4295 cleanup_bo: 4296 xe_bo_put(bo); 4297 snap->snap[i].bo = NULL; 4298 } 4299 } 4300 4301 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4302 { 4303 unsigned long i, j; 4304 4305 if (IS_ERR_OR_NULL(snap)) { 4306 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4307 return; 4308 } 4309 4310 for (i = 0; i < snap->num_snaps; i++) { 4311 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4312 4313 if (IS_ERR(snap->snap[i].data)) { 4314 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4315 PTR_ERR(snap->snap[i].data)); 4316 continue; 4317 } 4318 4319 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4320 4321 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4322 u32 *val = snap->snap[i].data + j; 4323 char dumped[ASCII85_BUFSZ]; 4324 4325 drm_puts(p, ascii85_encode(*val, dumped)); 4326 } 4327 4328 drm_puts(p, "\n"); 4329 4330 if (drm_coredump_printer_is_full(p)) 4331 return; 4332 } 4333 } 4334 4335 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4336 { 4337 unsigned long i; 4338 4339 if (IS_ERR_OR_NULL(snap)) 4340 return; 4341 4342 for (i = 0; i < snap->num_snaps; i++) { 4343 if (!IS_ERR(snap->snap[i].data)) 4344 kvfree(snap->snap[i].data); 4345 xe_bo_put(snap->snap[i].bo); 4346 if (snap->snap[i].mm) 4347 mmput(snap->snap[i].mm); 4348 } 4349 kvfree(snap); 4350 } 4351 4352 /** 4353 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4354 * @xe: Pointer to the XE device structure 4355 * @vma: Pointer to the virtual memory area (VMA) structure 4356 * @is_atomic: In pagefault path and atomic operation 4357 * 4358 * This function determines whether the given VMA needs to be migrated to 4359 * VRAM in order to do atomic GPU operation. 4360 * 4361 * Return: 4362 * 1 - Migration to VRAM is required 4363 * 0 - Migration is not required 4364 * -EACCES - Invalid access for atomic memory attr 4365 * 4366 */ 4367 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4368 { 4369 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4370 vma->attr.atomic_access; 4371 4372 if (!IS_DGFX(xe) || !is_atomic) 4373 return false; 4374 4375 /* 4376 * NOTE: The checks implemented here are platform-specific. For 4377 * instance, on a device supporting CXL atomics, these would ideally 4378 * work universally without additional handling. 4379 */ 4380 switch (atomic_access) { 4381 case DRM_XE_ATOMIC_DEVICE: 4382 return !xe->info.has_device_atomics_on_smem; 4383 4384 case DRM_XE_ATOMIC_CPU: 4385 return -EACCES; 4386 4387 case DRM_XE_ATOMIC_UNDEFINED: 4388 case DRM_XE_ATOMIC_GLOBAL: 4389 default: 4390 return 1; 4391 } 4392 } 4393 4394 static int xe_vm_alloc_vma(struct xe_vm *vm, 4395 struct drm_gpuvm_map_req *map_req, 4396 bool is_madvise) 4397 { 4398 struct xe_vma_ops vops; 4399 struct drm_gpuva_ops *ops = NULL; 4400 struct drm_gpuva_op *__op; 4401 bool is_cpu_addr_mirror = false; 4402 bool remap_op = false; 4403 struct xe_vma_mem_attr tmp_attr; 4404 u16 default_pat; 4405 int err; 4406 4407 lockdep_assert_held_write(&vm->lock); 4408 4409 if (is_madvise) 4410 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4411 else 4412 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4413 4414 if (IS_ERR(ops)) 4415 return PTR_ERR(ops); 4416 4417 if (list_empty(&ops->list)) { 4418 err = 0; 4419 goto free_ops; 4420 } 4421 4422 drm_gpuva_for_each_op(__op, ops) { 4423 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4424 struct xe_vma *vma = NULL; 4425 4426 if (!is_madvise) { 4427 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4428 vma = gpuva_to_vma(op->base.unmap.va); 4429 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4430 default_pat = vma->attr.default_pat_index; 4431 } 4432 4433 if (__op->op == DRM_GPUVA_OP_REMAP) { 4434 vma = gpuva_to_vma(op->base.remap.unmap->va); 4435 default_pat = vma->attr.default_pat_index; 4436 } 4437 4438 if (__op->op == DRM_GPUVA_OP_MAP) { 4439 op->map.is_cpu_addr_mirror = true; 4440 op->map.pat_index = default_pat; 4441 } 4442 } else { 4443 if (__op->op == DRM_GPUVA_OP_REMAP) { 4444 vma = gpuva_to_vma(op->base.remap.unmap->va); 4445 xe_assert(vm->xe, !remap_op); 4446 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4447 remap_op = true; 4448 4449 if (xe_vma_is_cpu_addr_mirror(vma)) 4450 is_cpu_addr_mirror = true; 4451 else 4452 is_cpu_addr_mirror = false; 4453 } 4454 4455 if (__op->op == DRM_GPUVA_OP_MAP) { 4456 xe_assert(vm->xe, remap_op); 4457 remap_op = false; 4458 /* 4459 * In case of madvise ops DRM_GPUVA_OP_MAP is 4460 * always after DRM_GPUVA_OP_REMAP, so ensure 4461 * we assign op->map.is_cpu_addr_mirror true 4462 * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4463 */ 4464 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4465 } 4466 } 4467 print_op(vm->xe, __op); 4468 } 4469 4470 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4471 4472 if (is_madvise) 4473 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4474 4475 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4476 if (err) 4477 goto unwind_ops; 4478 4479 xe_vm_lock(vm, false); 4480 4481 drm_gpuva_for_each_op(__op, ops) { 4482 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4483 struct xe_vma *vma; 4484 4485 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4486 vma = gpuva_to_vma(op->base.unmap.va); 4487 /* There should be no unmap for madvise */ 4488 if (is_madvise) 4489 XE_WARN_ON("UNEXPECTED UNMAP"); 4490 4491 xe_vma_destroy(vma, NULL); 4492 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4493 vma = gpuva_to_vma(op->base.remap.unmap->va); 4494 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4495 * VMA, so they can be assigned to newly MAP created vma. 4496 */ 4497 if (is_madvise) 4498 tmp_attr = vma->attr; 4499 4500 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4501 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4502 vma = op->map.vma; 4503 /* In case of madvise call, MAP will always be follwed by REMAP. 4504 * Therefore temp_attr will always have sane values, making it safe to 4505 * copy them to new vma. 4506 */ 4507 if (is_madvise) 4508 vma->attr = tmp_attr; 4509 } 4510 } 4511 4512 xe_vm_unlock(vm); 4513 drm_gpuva_ops_free(&vm->gpuvm, ops); 4514 return 0; 4515 4516 unwind_ops: 4517 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4518 free_ops: 4519 drm_gpuva_ops_free(&vm->gpuvm, ops); 4520 return err; 4521 } 4522 4523 /** 4524 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4525 * @vm: Pointer to the xe_vm structure 4526 * @start: Starting input address 4527 * @range: Size of the input range 4528 * 4529 * This function splits existing vma to create new vma for user provided input range 4530 * 4531 * Return: 0 if success 4532 */ 4533 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4534 { 4535 struct drm_gpuvm_map_req map_req = { 4536 .map.va.addr = start, 4537 .map.va.range = range, 4538 }; 4539 4540 lockdep_assert_held_write(&vm->lock); 4541 4542 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4543 4544 return xe_vm_alloc_vma(vm, &map_req, true); 4545 } 4546 4547 /** 4548 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4549 * @vm: Pointer to the xe_vm structure 4550 * @start: Starting input address 4551 * @range: Size of the input range 4552 * 4553 * This function splits/merges existing vma to create new vma for user provided input range 4554 * 4555 * Return: 0 if success 4556 */ 4557 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4558 { 4559 struct drm_gpuvm_map_req map_req = { 4560 .map.va.addr = start, 4561 .map.va.range = range, 4562 }; 4563 4564 lockdep_assert_held_write(&vm->lock); 4565 4566 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4567 start, range); 4568 4569 return xe_vm_alloc_vma(vm, &map_req, false); 4570 } 4571