1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_migrate.h" 31 #include "xe_pat.h" 32 #include "xe_pm.h" 33 #include "xe_preempt_fence.h" 34 #include "xe_pt.h" 35 #include "xe_pxp.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sriov_vf.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 bool vf_migration = IS_SRIOV_VF(vm->xe) && 115 xe_sriov_vf_migration_supported(vm->xe); 116 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 117 118 xe_vm_assert_held(vm); 119 120 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 121 if (q->lr.pfence) { 122 long timeout; 123 124 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 125 wait_time); 126 if (!timeout) { 127 xe_assert(vm->xe, vf_migration); 128 return -EAGAIN; 129 } 130 131 /* Only -ETIME on fence indicates VM needs to be killed */ 132 if (timeout < 0 || q->lr.pfence->error == -ETIME) 133 return -ETIME; 134 135 dma_fence_put(q->lr.pfence); 136 q->lr.pfence = NULL; 137 } 138 } 139 140 return 0; 141 } 142 143 static bool xe_vm_is_idle(struct xe_vm *vm) 144 { 145 struct xe_exec_queue *q; 146 147 xe_vm_assert_held(vm); 148 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 149 if (!xe_exec_queue_is_idle(q)) 150 return false; 151 } 152 153 return true; 154 } 155 156 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 157 { 158 struct list_head *link; 159 struct xe_exec_queue *q; 160 161 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 162 struct dma_fence *fence; 163 164 link = list->next; 165 xe_assert(vm->xe, link != list); 166 167 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 168 q, q->lr.context, 169 ++q->lr.seqno); 170 dma_fence_put(q->lr.pfence); 171 q->lr.pfence = fence; 172 } 173 } 174 175 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 176 { 177 struct xe_exec_queue *q; 178 int err; 179 180 xe_bo_assert_held(bo); 181 182 if (!vm->preempt.num_exec_queues) 183 return 0; 184 185 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 186 if (err) 187 return err; 188 189 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 190 if (q->lr.pfence) { 191 dma_resv_add_fence(bo->ttm.base.resv, 192 q->lr.pfence, 193 DMA_RESV_USAGE_BOOKKEEP); 194 } 195 196 return 0; 197 } 198 199 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 200 struct drm_exec *exec) 201 { 202 struct xe_exec_queue *q; 203 204 lockdep_assert_held(&vm->lock); 205 xe_vm_assert_held(vm); 206 207 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 208 q->ops->resume(q); 209 210 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 211 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 212 } 213 } 214 215 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 216 { 217 struct drm_gpuvm_exec vm_exec = { 218 .vm = &vm->gpuvm, 219 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 220 .num_fences = 1, 221 }; 222 struct drm_exec *exec = &vm_exec.exec; 223 struct xe_validation_ctx ctx; 224 struct dma_fence *pfence; 225 int err; 226 bool wait; 227 228 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 229 230 down_write(&vm->lock); 231 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 232 if (err) 233 goto out_up_write; 234 235 pfence = xe_preempt_fence_create(q, q->lr.context, 236 ++q->lr.seqno); 237 if (IS_ERR(pfence)) { 238 err = PTR_ERR(pfence); 239 goto out_fini; 240 } 241 242 list_add(&q->lr.link, &vm->preempt.exec_queues); 243 ++vm->preempt.num_exec_queues; 244 q->lr.pfence = pfence; 245 246 xe_svm_notifier_lock(vm); 247 248 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 249 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 250 251 /* 252 * Check to see if a preemption on VM is in flight or userptr 253 * invalidation, if so trigger this preempt fence to sync state with 254 * other preempt fences on the VM. 255 */ 256 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 257 if (wait) 258 dma_fence_enable_sw_signaling(pfence); 259 260 xe_svm_notifier_unlock(vm); 261 262 out_fini: 263 xe_validation_ctx_fini(&ctx); 264 out_up_write: 265 up_write(&vm->lock); 266 267 return err; 268 } 269 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 270 271 /** 272 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 273 * @vm: The VM. 274 * @q: The exec_queue 275 * 276 * Note that this function might be called multiple times on the same queue. 277 */ 278 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 279 { 280 if (!xe_vm_in_preempt_fence_mode(vm)) 281 return; 282 283 down_write(&vm->lock); 284 if (!list_empty(&q->lr.link)) { 285 list_del_init(&q->lr.link); 286 --vm->preempt.num_exec_queues; 287 } 288 if (q->lr.pfence) { 289 dma_fence_enable_sw_signaling(q->lr.pfence); 290 dma_fence_put(q->lr.pfence); 291 q->lr.pfence = NULL; 292 } 293 up_write(&vm->lock); 294 } 295 296 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 297 298 /** 299 * xe_vm_kill() - VM Kill 300 * @vm: The VM. 301 * @unlocked: Flag indicates the VM's dma-resv is not held 302 * 303 * Kill the VM by setting banned flag indicated VM is no longer available for 304 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 305 */ 306 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 307 { 308 struct xe_exec_queue *q; 309 310 lockdep_assert_held(&vm->lock); 311 312 if (unlocked) 313 xe_vm_lock(vm, false); 314 315 vm->flags |= XE_VM_FLAG_BANNED; 316 trace_xe_vm_kill(vm); 317 318 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 319 q->ops->kill(q); 320 321 if (unlocked) 322 xe_vm_unlock(vm); 323 324 /* TODO: Inform user the VM is banned */ 325 } 326 327 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 328 { 329 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 330 struct drm_gpuva *gpuva; 331 int ret; 332 333 lockdep_assert_held(&vm->lock); 334 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 335 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 336 &vm->rebind_list); 337 338 if (!try_wait_for_completion(&vm->xe->pm_block)) 339 return -EAGAIN; 340 341 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 342 if (ret) 343 return ret; 344 345 vm_bo->evicted = false; 346 return 0; 347 } 348 349 /** 350 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 351 * @vm: The vm for which we are rebinding. 352 * @exec: The struct drm_exec with the locked GEM objects. 353 * @num_fences: The number of fences to reserve for the operation, not 354 * including rebinds and validations. 355 * 356 * Validates all evicted gem objects and rebinds their vmas. Note that 357 * rebindings may cause evictions and hence the validation-rebind 358 * sequence is rerun until there are no more objects to validate. 359 * 360 * Return: 0 on success, negative error code on error. In particular, 361 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 362 * the drm_exec transaction needs to be restarted. 363 */ 364 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 365 unsigned int num_fences) 366 { 367 struct drm_gem_object *obj; 368 unsigned long index; 369 int ret; 370 371 do { 372 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 373 if (ret) 374 return ret; 375 376 ret = xe_vm_rebind(vm, false); 377 if (ret) 378 return ret; 379 } while (!list_empty(&vm->gpuvm.evict.list)); 380 381 drm_exec_for_each_locked_object(exec, index, obj) { 382 ret = dma_resv_reserve_fences(obj->resv, num_fences); 383 if (ret) 384 return ret; 385 } 386 387 return 0; 388 } 389 390 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 391 bool *done) 392 { 393 int err; 394 395 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 396 if (err) 397 return err; 398 399 if (xe_vm_is_idle(vm)) { 400 vm->preempt.rebind_deactivated = true; 401 *done = true; 402 return 0; 403 } 404 405 if (!preempt_fences_waiting(vm)) { 406 *done = true; 407 return 0; 408 } 409 410 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 411 if (err) 412 return err; 413 414 err = wait_for_existing_preempt_fences(vm); 415 if (err) 416 return err; 417 418 /* 419 * Add validation and rebinding to the locking loop since both can 420 * cause evictions which may require blocing dma_resv locks. 421 * The fence reservation here is intended for the new preempt fences 422 * we attach at the end of the rebind work. 423 */ 424 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 425 } 426 427 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 428 { 429 struct xe_device *xe = vm->xe; 430 bool ret = false; 431 432 mutex_lock(&xe->rebind_resume_lock); 433 if (!try_wait_for_completion(&vm->xe->pm_block)) { 434 ret = true; 435 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 436 } 437 mutex_unlock(&xe->rebind_resume_lock); 438 439 return ret; 440 } 441 442 /** 443 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 444 * @vm: The vm whose preempt worker to resume. 445 * 446 * Resume a preempt worker that was previously suspended by 447 * vm_suspend_rebind_worker(). 448 */ 449 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 450 { 451 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 452 } 453 454 static void preempt_rebind_work_func(struct work_struct *w) 455 { 456 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 457 struct xe_validation_ctx ctx; 458 struct drm_exec exec; 459 unsigned int fence_count = 0; 460 LIST_HEAD(preempt_fences); 461 int err = 0; 462 long wait; 463 int __maybe_unused tries = 0; 464 465 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 466 trace_xe_vm_rebind_worker_enter(vm); 467 468 down_write(&vm->lock); 469 470 if (xe_vm_is_closed_or_banned(vm)) { 471 up_write(&vm->lock); 472 trace_xe_vm_rebind_worker_exit(vm); 473 return; 474 } 475 476 retry: 477 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 478 up_write(&vm->lock); 479 /* We don't actually block but don't make progress. */ 480 xe_pm_might_block_on_suspend(); 481 return; 482 } 483 484 if (xe_vm_userptr_check_repin(vm)) { 485 err = xe_vm_userptr_pin(vm); 486 if (err) 487 goto out_unlock_outer; 488 } 489 490 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 491 (struct xe_val_flags) {.interruptible = true}); 492 if (err) 493 goto out_unlock_outer; 494 495 drm_exec_until_all_locked(&exec) { 496 bool done = false; 497 498 err = xe_preempt_work_begin(&exec, vm, &done); 499 drm_exec_retry_on_contention(&exec); 500 xe_validation_retry_on_oom(&ctx, &err); 501 if (err || done) { 502 xe_validation_ctx_fini(&ctx); 503 goto out_unlock_outer; 504 } 505 } 506 507 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 508 if (err) 509 goto out_unlock; 510 511 xe_vm_set_validation_exec(vm, &exec); 512 err = xe_vm_rebind(vm, true); 513 xe_vm_set_validation_exec(vm, NULL); 514 if (err) 515 goto out_unlock; 516 517 /* Wait on rebinds and munmap style VM unbinds */ 518 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 519 DMA_RESV_USAGE_KERNEL, 520 false, MAX_SCHEDULE_TIMEOUT); 521 if (wait <= 0) { 522 err = -ETIME; 523 goto out_unlock; 524 } 525 526 #define retry_required(__tries, __vm) \ 527 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 528 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 529 __xe_vm_userptr_needs_repin(__vm)) 530 531 xe_svm_notifier_lock(vm); 532 if (retry_required(tries, vm)) { 533 xe_svm_notifier_unlock(vm); 534 err = -EAGAIN; 535 goto out_unlock; 536 } 537 538 #undef retry_required 539 540 spin_lock(&vm->xe->ttm.lru_lock); 541 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 542 spin_unlock(&vm->xe->ttm.lru_lock); 543 544 /* Point of no return. */ 545 arm_preempt_fences(vm, &preempt_fences); 546 resume_and_reinstall_preempt_fences(vm, &exec); 547 xe_svm_notifier_unlock(vm); 548 549 out_unlock: 550 xe_validation_ctx_fini(&ctx); 551 out_unlock_outer: 552 if (err == -EAGAIN) { 553 trace_xe_vm_rebind_worker_retry(vm); 554 555 /* 556 * We can't block in workers on a VF which supports migration 557 * given this can block the VF post-migration workers from 558 * getting scheduled. 559 */ 560 if (IS_SRIOV_VF(vm->xe) && 561 xe_sriov_vf_migration_supported(vm->xe)) { 562 up_write(&vm->lock); 563 xe_vm_queue_rebind_worker(vm); 564 return; 565 } 566 567 goto retry; 568 } 569 570 if (err) { 571 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 572 xe_vm_kill(vm, true); 573 } 574 up_write(&vm->lock); 575 576 free_preempt_fences(&preempt_fences); 577 578 trace_xe_vm_rebind_worker_exit(vm); 579 } 580 581 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 582 { 583 int i; 584 585 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 586 if (!vops->pt_update_ops[i].num_ops) 587 continue; 588 589 vops->pt_update_ops[i].ops = 590 kmalloc_array(vops->pt_update_ops[i].num_ops, 591 sizeof(*vops->pt_update_ops[i].ops), 592 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 593 if (!vops->pt_update_ops[i].ops) 594 return array_of_binds ? -ENOBUFS : -ENOMEM; 595 } 596 597 return 0; 598 } 599 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 600 601 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 602 { 603 struct xe_vma *vma; 604 605 vma = gpuva_to_vma(op->base.prefetch.va); 606 607 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 608 xa_destroy(&op->prefetch_range.range); 609 } 610 611 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 612 { 613 struct xe_vma_op *op; 614 615 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 616 return; 617 618 list_for_each_entry(op, &vops->list, link) 619 xe_vma_svm_prefetch_op_fini(op); 620 } 621 622 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 623 { 624 int i; 625 626 xe_vma_svm_prefetch_ops_fini(vops); 627 628 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 629 kfree(vops->pt_update_ops[i].ops); 630 } 631 632 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 633 { 634 int i; 635 636 if (!inc_val) 637 return; 638 639 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 640 if (BIT(i) & tile_mask) 641 vops->pt_update_ops[i].num_ops += inc_val; 642 } 643 644 #define XE_VMA_CREATE_MASK ( \ 645 XE_VMA_READ_ONLY | \ 646 XE_VMA_DUMPABLE | \ 647 XE_VMA_SYSTEM_ALLOCATOR | \ 648 DRM_GPUVA_SPARSE | \ 649 XE_VMA_MADV_AUTORESET) 650 651 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 652 u8 tile_mask) 653 { 654 INIT_LIST_HEAD(&op->link); 655 op->tile_mask = tile_mask; 656 op->base.op = DRM_GPUVA_OP_MAP; 657 op->base.map.va.addr = vma->gpuva.va.addr; 658 op->base.map.va.range = vma->gpuva.va.range; 659 op->base.map.gem.obj = vma->gpuva.gem.obj; 660 op->base.map.gem.offset = vma->gpuva.gem.offset; 661 op->map.vma = vma; 662 op->map.immediate = true; 663 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 664 } 665 666 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 667 u8 tile_mask) 668 { 669 struct xe_vma_op *op; 670 671 op = kzalloc(sizeof(*op), GFP_KERNEL); 672 if (!op) 673 return -ENOMEM; 674 675 xe_vm_populate_rebind(op, vma, tile_mask); 676 list_add_tail(&op->link, &vops->list); 677 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 678 679 return 0; 680 } 681 682 static struct dma_fence *ops_execute(struct xe_vm *vm, 683 struct xe_vma_ops *vops); 684 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 685 struct xe_exec_queue *q, 686 struct xe_sync_entry *syncs, u32 num_syncs); 687 688 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 689 { 690 struct dma_fence *fence; 691 struct xe_vma *vma, *next; 692 struct xe_vma_ops vops; 693 struct xe_vma_op *op, *next_op; 694 int err, i; 695 696 lockdep_assert_held(&vm->lock); 697 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 698 list_empty(&vm->rebind_list)) 699 return 0; 700 701 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 702 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 703 vops.pt_update_ops[i].wait_vm_bookkeep = true; 704 705 xe_vm_assert_held(vm); 706 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 707 xe_assert(vm->xe, vma->tile_present); 708 709 if (rebind_worker) 710 trace_xe_vma_rebind_worker(vma); 711 else 712 trace_xe_vma_rebind_exec(vma); 713 714 err = xe_vm_ops_add_rebind(&vops, vma, 715 vma->tile_present); 716 if (err) 717 goto free_ops; 718 } 719 720 err = xe_vma_ops_alloc(&vops, false); 721 if (err) 722 goto free_ops; 723 724 fence = ops_execute(vm, &vops); 725 if (IS_ERR(fence)) { 726 err = PTR_ERR(fence); 727 } else { 728 dma_fence_put(fence); 729 list_for_each_entry_safe(vma, next, &vm->rebind_list, 730 combined_links.rebind) 731 list_del_init(&vma->combined_links.rebind); 732 } 733 free_ops: 734 list_for_each_entry_safe(op, next_op, &vops.list, link) { 735 list_del(&op->link); 736 kfree(op); 737 } 738 xe_vma_ops_fini(&vops); 739 740 return err; 741 } 742 743 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 744 { 745 struct dma_fence *fence = NULL; 746 struct xe_vma_ops vops; 747 struct xe_vma_op *op, *next_op; 748 struct xe_tile *tile; 749 u8 id; 750 int err; 751 752 lockdep_assert_held(&vm->lock); 753 xe_vm_assert_held(vm); 754 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 755 756 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 757 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 758 for_each_tile(tile, vm->xe, id) { 759 vops.pt_update_ops[id].wait_vm_bookkeep = true; 760 vops.pt_update_ops[tile->id].q = 761 xe_migrate_exec_queue(tile->migrate); 762 } 763 764 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 765 if (err) 766 return ERR_PTR(err); 767 768 err = xe_vma_ops_alloc(&vops, false); 769 if (err) { 770 fence = ERR_PTR(err); 771 goto free_ops; 772 } 773 774 fence = ops_execute(vm, &vops); 775 776 free_ops: 777 list_for_each_entry_safe(op, next_op, &vops.list, link) { 778 list_del(&op->link); 779 kfree(op); 780 } 781 xe_vma_ops_fini(&vops); 782 783 return fence; 784 } 785 786 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 787 struct xe_vma *vma, 788 struct xe_svm_range *range, 789 u8 tile_mask) 790 { 791 INIT_LIST_HEAD(&op->link); 792 op->tile_mask = tile_mask; 793 op->base.op = DRM_GPUVA_OP_DRIVER; 794 op->subop = XE_VMA_SUBOP_MAP_RANGE; 795 op->map_range.vma = vma; 796 op->map_range.range = range; 797 } 798 799 static int 800 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 801 struct xe_vma *vma, 802 struct xe_svm_range *range, 803 u8 tile_mask) 804 { 805 struct xe_vma_op *op; 806 807 op = kzalloc(sizeof(*op), GFP_KERNEL); 808 if (!op) 809 return -ENOMEM; 810 811 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 812 list_add_tail(&op->link, &vops->list); 813 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 814 815 return 0; 816 } 817 818 /** 819 * xe_vm_range_rebind() - VM range (re)bind 820 * @vm: The VM which the range belongs to. 821 * @vma: The VMA which the range belongs to. 822 * @range: SVM range to rebind. 823 * @tile_mask: Tile mask to bind the range to. 824 * 825 * (re)bind SVM range setting up GPU page tables for the range. 826 * 827 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 828 * failure 829 */ 830 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 831 struct xe_vma *vma, 832 struct xe_svm_range *range, 833 u8 tile_mask) 834 { 835 struct dma_fence *fence = NULL; 836 struct xe_vma_ops vops; 837 struct xe_vma_op *op, *next_op; 838 struct xe_tile *tile; 839 u8 id; 840 int err; 841 842 lockdep_assert_held(&vm->lock); 843 xe_vm_assert_held(vm); 844 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 845 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 846 847 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 848 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 849 for_each_tile(tile, vm->xe, id) { 850 vops.pt_update_ops[id].wait_vm_bookkeep = true; 851 vops.pt_update_ops[tile->id].q = 852 xe_migrate_exec_queue(tile->migrate); 853 } 854 855 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 856 if (err) 857 return ERR_PTR(err); 858 859 err = xe_vma_ops_alloc(&vops, false); 860 if (err) { 861 fence = ERR_PTR(err); 862 goto free_ops; 863 } 864 865 fence = ops_execute(vm, &vops); 866 867 free_ops: 868 list_for_each_entry_safe(op, next_op, &vops.list, link) { 869 list_del(&op->link); 870 kfree(op); 871 } 872 xe_vma_ops_fini(&vops); 873 874 return fence; 875 } 876 877 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 878 struct xe_svm_range *range) 879 { 880 INIT_LIST_HEAD(&op->link); 881 op->tile_mask = range->tile_present; 882 op->base.op = DRM_GPUVA_OP_DRIVER; 883 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 884 op->unmap_range.range = range; 885 } 886 887 static int 888 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 889 struct xe_svm_range *range) 890 { 891 struct xe_vma_op *op; 892 893 op = kzalloc(sizeof(*op), GFP_KERNEL); 894 if (!op) 895 return -ENOMEM; 896 897 xe_vm_populate_range_unbind(op, range); 898 list_add_tail(&op->link, &vops->list); 899 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 900 901 return 0; 902 } 903 904 /** 905 * xe_vm_range_unbind() - VM range unbind 906 * @vm: The VM which the range belongs to. 907 * @range: SVM range to rebind. 908 * 909 * Unbind SVM range removing the GPU page tables for the range. 910 * 911 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 912 * failure 913 */ 914 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 915 struct xe_svm_range *range) 916 { 917 struct dma_fence *fence = NULL; 918 struct xe_vma_ops vops; 919 struct xe_vma_op *op, *next_op; 920 struct xe_tile *tile; 921 u8 id; 922 int err; 923 924 lockdep_assert_held(&vm->lock); 925 xe_vm_assert_held(vm); 926 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 927 928 if (!range->tile_present) 929 return dma_fence_get_stub(); 930 931 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 932 for_each_tile(tile, vm->xe, id) { 933 vops.pt_update_ops[id].wait_vm_bookkeep = true; 934 vops.pt_update_ops[tile->id].q = 935 xe_migrate_exec_queue(tile->migrate); 936 } 937 938 err = xe_vm_ops_add_range_unbind(&vops, range); 939 if (err) 940 return ERR_PTR(err); 941 942 err = xe_vma_ops_alloc(&vops, false); 943 if (err) { 944 fence = ERR_PTR(err); 945 goto free_ops; 946 } 947 948 fence = ops_execute(vm, &vops); 949 950 free_ops: 951 list_for_each_entry_safe(op, next_op, &vops.list, link) { 952 list_del(&op->link); 953 kfree(op); 954 } 955 xe_vma_ops_fini(&vops); 956 957 return fence; 958 } 959 960 static void xe_vma_free(struct xe_vma *vma) 961 { 962 if (xe_vma_is_userptr(vma)) 963 kfree(to_userptr_vma(vma)); 964 else 965 kfree(vma); 966 } 967 968 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 969 struct xe_bo *bo, 970 u64 bo_offset_or_userptr, 971 u64 start, u64 end, 972 struct xe_vma_mem_attr *attr, 973 unsigned int flags) 974 { 975 struct xe_vma *vma; 976 struct xe_tile *tile; 977 u8 id; 978 bool is_null = (flags & DRM_GPUVA_SPARSE); 979 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 980 981 xe_assert(vm->xe, start < end); 982 xe_assert(vm->xe, end < vm->size); 983 984 /* 985 * Allocate and ensure that the xe_vma_is_userptr() return 986 * matches what was allocated. 987 */ 988 if (!bo && !is_null && !is_cpu_addr_mirror) { 989 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 990 991 if (!uvma) 992 return ERR_PTR(-ENOMEM); 993 994 vma = &uvma->vma; 995 } else { 996 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 997 if (!vma) 998 return ERR_PTR(-ENOMEM); 999 1000 if (bo) 1001 vma->gpuva.gem.obj = &bo->ttm.base; 1002 } 1003 1004 INIT_LIST_HEAD(&vma->combined_links.rebind); 1005 1006 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1007 vma->gpuva.vm = &vm->gpuvm; 1008 vma->gpuva.va.addr = start; 1009 vma->gpuva.va.range = end - start + 1; 1010 vma->gpuva.flags = flags; 1011 1012 for_each_tile(tile, vm->xe, id) 1013 vma->tile_mask |= 0x1 << id; 1014 1015 if (vm->xe->info.has_atomic_enable_pte_bit) 1016 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1017 1018 vma->attr = *attr; 1019 1020 if (bo) { 1021 struct drm_gpuvm_bo *vm_bo; 1022 1023 xe_bo_assert_held(bo); 1024 1025 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1026 if (IS_ERR(vm_bo)) { 1027 xe_vma_free(vma); 1028 return ERR_CAST(vm_bo); 1029 } 1030 1031 drm_gpuvm_bo_extobj_add(vm_bo); 1032 drm_gem_object_get(&bo->ttm.base); 1033 vma->gpuva.gem.offset = bo_offset_or_userptr; 1034 drm_gpuva_link(&vma->gpuva, vm_bo); 1035 drm_gpuvm_bo_put(vm_bo); 1036 } else /* userptr or null */ { 1037 if (!is_null && !is_cpu_addr_mirror) { 1038 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1039 u64 size = end - start + 1; 1040 int err; 1041 1042 vma->gpuva.gem.offset = bo_offset_or_userptr; 1043 1044 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1045 if (err) { 1046 xe_vma_free(vma); 1047 return ERR_PTR(err); 1048 } 1049 } 1050 1051 xe_vm_get(vm); 1052 } 1053 1054 return vma; 1055 } 1056 1057 static void xe_vma_destroy_late(struct xe_vma *vma) 1058 { 1059 struct xe_vm *vm = xe_vma_vm(vma); 1060 1061 if (vma->ufence) { 1062 xe_sync_ufence_put(vma->ufence); 1063 vma->ufence = NULL; 1064 } 1065 1066 if (xe_vma_is_userptr(vma)) { 1067 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1068 1069 xe_userptr_remove(uvma); 1070 xe_vm_put(vm); 1071 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1072 xe_vm_put(vm); 1073 } else { 1074 xe_bo_put(xe_vma_bo(vma)); 1075 } 1076 1077 xe_vma_free(vma); 1078 } 1079 1080 static void vma_destroy_work_func(struct work_struct *w) 1081 { 1082 struct xe_vma *vma = 1083 container_of(w, struct xe_vma, destroy_work); 1084 1085 xe_vma_destroy_late(vma); 1086 } 1087 1088 static void vma_destroy_cb(struct dma_fence *fence, 1089 struct dma_fence_cb *cb) 1090 { 1091 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1092 1093 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1094 queue_work(system_unbound_wq, &vma->destroy_work); 1095 } 1096 1097 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1098 { 1099 struct xe_vm *vm = xe_vma_vm(vma); 1100 1101 lockdep_assert_held_write(&vm->lock); 1102 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1103 1104 if (xe_vma_is_userptr(vma)) { 1105 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1106 xe_userptr_destroy(to_userptr_vma(vma)); 1107 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1108 xe_bo_assert_held(xe_vma_bo(vma)); 1109 1110 drm_gpuva_unlink(&vma->gpuva); 1111 } 1112 1113 xe_vm_assert_held(vm); 1114 if (fence) { 1115 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1116 vma_destroy_cb); 1117 1118 if (ret) { 1119 XE_WARN_ON(ret != -ENOENT); 1120 xe_vma_destroy_late(vma); 1121 } 1122 } else { 1123 xe_vma_destroy_late(vma); 1124 } 1125 } 1126 1127 /** 1128 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1129 * @exec: The drm_exec object we're currently locking for. 1130 * @vma: The vma for witch we want to lock the vm resv and any attached 1131 * object's resv. 1132 * 1133 * Return: 0 on success, negative error code on error. In particular 1134 * may return -EDEADLK on WW transaction contention and -EINTR if 1135 * an interruptible wait is terminated by a signal. 1136 */ 1137 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1138 { 1139 struct xe_vm *vm = xe_vma_vm(vma); 1140 struct xe_bo *bo = xe_vma_bo(vma); 1141 int err; 1142 1143 XE_WARN_ON(!vm); 1144 1145 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1146 if (!err && bo && !bo->vm) 1147 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1148 1149 return err; 1150 } 1151 1152 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1153 { 1154 struct xe_device *xe = xe_vma_vm(vma)->xe; 1155 struct xe_validation_ctx ctx; 1156 struct drm_exec exec; 1157 int err = 0; 1158 1159 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1160 err = xe_vm_lock_vma(&exec, vma); 1161 drm_exec_retry_on_contention(&exec); 1162 if (XE_WARN_ON(err)) 1163 break; 1164 xe_vma_destroy(vma, NULL); 1165 } 1166 xe_assert(xe, !err); 1167 } 1168 1169 struct xe_vma * 1170 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1171 { 1172 struct drm_gpuva *gpuva; 1173 1174 lockdep_assert_held(&vm->lock); 1175 1176 if (xe_vm_is_closed_or_banned(vm)) 1177 return NULL; 1178 1179 xe_assert(vm->xe, start + range <= vm->size); 1180 1181 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1182 1183 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1184 } 1185 1186 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1187 { 1188 int err; 1189 1190 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1191 lockdep_assert_held(&vm->lock); 1192 1193 mutex_lock(&vm->snap_mutex); 1194 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1195 mutex_unlock(&vm->snap_mutex); 1196 XE_WARN_ON(err); /* Shouldn't be possible */ 1197 1198 return err; 1199 } 1200 1201 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1202 { 1203 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1204 lockdep_assert_held(&vm->lock); 1205 1206 mutex_lock(&vm->snap_mutex); 1207 drm_gpuva_remove(&vma->gpuva); 1208 mutex_unlock(&vm->snap_mutex); 1209 if (vm->usm.last_fault_vma == vma) 1210 vm->usm.last_fault_vma = NULL; 1211 } 1212 1213 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1214 { 1215 struct xe_vma_op *op; 1216 1217 op = kzalloc(sizeof(*op), GFP_KERNEL); 1218 1219 if (unlikely(!op)) 1220 return NULL; 1221 1222 return &op->base; 1223 } 1224 1225 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1226 1227 static const struct drm_gpuvm_ops gpuvm_ops = { 1228 .op_alloc = xe_vm_op_alloc, 1229 .vm_bo_validate = xe_gpuvm_validate, 1230 .vm_free = xe_vm_free, 1231 }; 1232 1233 static u64 pde_encode_pat_index(u16 pat_index) 1234 { 1235 u64 pte = 0; 1236 1237 if (pat_index & BIT(0)) 1238 pte |= XE_PPGTT_PTE_PAT0; 1239 1240 if (pat_index & BIT(1)) 1241 pte |= XE_PPGTT_PTE_PAT1; 1242 1243 return pte; 1244 } 1245 1246 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1247 { 1248 u64 pte = 0; 1249 1250 if (pat_index & BIT(0)) 1251 pte |= XE_PPGTT_PTE_PAT0; 1252 1253 if (pat_index & BIT(1)) 1254 pte |= XE_PPGTT_PTE_PAT1; 1255 1256 if (pat_index & BIT(2)) { 1257 if (pt_level) 1258 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1259 else 1260 pte |= XE_PPGTT_PTE_PAT2; 1261 } 1262 1263 if (pat_index & BIT(3)) 1264 pte |= XELPG_PPGTT_PTE_PAT3; 1265 1266 if (pat_index & (BIT(4))) 1267 pte |= XE2_PPGTT_PTE_PAT4; 1268 1269 return pte; 1270 } 1271 1272 static u64 pte_encode_ps(u32 pt_level) 1273 { 1274 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1275 1276 if (pt_level == 1) 1277 return XE_PDE_PS_2M; 1278 else if (pt_level == 2) 1279 return XE_PDPE_PS_1G; 1280 1281 return 0; 1282 } 1283 1284 static u16 pde_pat_index(struct xe_bo *bo) 1285 { 1286 struct xe_device *xe = xe_bo_device(bo); 1287 u16 pat_index; 1288 1289 /* 1290 * We only have two bits to encode the PAT index in non-leaf nodes, but 1291 * these only point to other paging structures so we only need a minimal 1292 * selection of options. The user PAT index is only for encoding leaf 1293 * nodes, where we have use of more bits to do the encoding. The 1294 * non-leaf nodes are instead under driver control so the chosen index 1295 * here should be distinct from the user PAT index. Also the 1296 * corresponding coherency of the PAT index should be tied to the 1297 * allocation type of the page table (or at least we should pick 1298 * something which is always safe). 1299 */ 1300 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1301 pat_index = xe->pat.idx[XE_CACHE_WB]; 1302 else 1303 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1304 1305 xe_assert(xe, pat_index <= 3); 1306 1307 return pat_index; 1308 } 1309 1310 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1311 { 1312 u64 pde; 1313 1314 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1315 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1316 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1317 1318 return pde; 1319 } 1320 1321 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1322 u16 pat_index, u32 pt_level) 1323 { 1324 u64 pte; 1325 1326 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1327 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1328 pte |= pte_encode_pat_index(pat_index, pt_level); 1329 pte |= pte_encode_ps(pt_level); 1330 1331 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1332 pte |= XE_PPGTT_PTE_DM; 1333 1334 return pte; 1335 } 1336 1337 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1338 u16 pat_index, u32 pt_level) 1339 { 1340 pte |= XE_PAGE_PRESENT; 1341 1342 if (likely(!xe_vma_read_only(vma))) 1343 pte |= XE_PAGE_RW; 1344 1345 pte |= pte_encode_pat_index(pat_index, pt_level); 1346 pte |= pte_encode_ps(pt_level); 1347 1348 if (unlikely(xe_vma_is_null(vma))) 1349 pte |= XE_PTE_NULL; 1350 1351 return pte; 1352 } 1353 1354 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1355 u16 pat_index, 1356 u32 pt_level, bool devmem, u64 flags) 1357 { 1358 u64 pte; 1359 1360 /* Avoid passing random bits directly as flags */ 1361 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1362 1363 pte = addr; 1364 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1365 pte |= pte_encode_pat_index(pat_index, pt_level); 1366 pte |= pte_encode_ps(pt_level); 1367 1368 if (devmem) 1369 pte |= XE_PPGTT_PTE_DM; 1370 1371 pte |= flags; 1372 1373 return pte; 1374 } 1375 1376 static const struct xe_pt_ops xelp_pt_ops = { 1377 .pte_encode_bo = xelp_pte_encode_bo, 1378 .pte_encode_vma = xelp_pte_encode_vma, 1379 .pte_encode_addr = xelp_pte_encode_addr, 1380 .pde_encode_bo = xelp_pde_encode_bo, 1381 }; 1382 1383 static void vm_destroy_work_func(struct work_struct *w); 1384 1385 /** 1386 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1387 * given tile and vm. 1388 * @xe: xe device. 1389 * @tile: tile to set up for. 1390 * @vm: vm to set up for. 1391 * @exec: The struct drm_exec object used to lock the vm resv. 1392 * 1393 * Sets up a pagetable tree with one page-table per level and a single 1394 * leaf PTE. All pagetable entries point to the single page-table or, 1395 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1396 * writes become NOPs. 1397 * 1398 * Return: 0 on success, negative error code on error. 1399 */ 1400 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1401 struct xe_vm *vm, struct drm_exec *exec) 1402 { 1403 u8 id = tile->id; 1404 int i; 1405 1406 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1407 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1408 if (IS_ERR(vm->scratch_pt[id][i])) { 1409 int err = PTR_ERR(vm->scratch_pt[id][i]); 1410 1411 vm->scratch_pt[id][i] = NULL; 1412 return err; 1413 } 1414 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1415 } 1416 1417 return 0; 1418 } 1419 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1420 1421 static void xe_vm_free_scratch(struct xe_vm *vm) 1422 { 1423 struct xe_tile *tile; 1424 u8 id; 1425 1426 if (!xe_vm_has_scratch(vm)) 1427 return; 1428 1429 for_each_tile(tile, vm->xe, id) { 1430 u32 i; 1431 1432 if (!vm->pt_root[id]) 1433 continue; 1434 1435 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1436 if (vm->scratch_pt[id][i]) 1437 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1438 } 1439 } 1440 1441 static void xe_vm_pt_destroy(struct xe_vm *vm) 1442 { 1443 struct xe_tile *tile; 1444 u8 id; 1445 1446 xe_vm_assert_held(vm); 1447 1448 for_each_tile(tile, vm->xe, id) { 1449 if (vm->pt_root[id]) { 1450 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1451 vm->pt_root[id] = NULL; 1452 } 1453 } 1454 } 1455 1456 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1457 { 1458 struct drm_gem_object *vm_resv_obj; 1459 struct xe_validation_ctx ctx; 1460 struct drm_exec exec; 1461 struct xe_vm *vm; 1462 int err; 1463 struct xe_tile *tile; 1464 u8 id; 1465 1466 /* 1467 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1468 * ever be in faulting mode. 1469 */ 1470 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1471 1472 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1473 if (!vm) 1474 return ERR_PTR(-ENOMEM); 1475 1476 vm->xe = xe; 1477 1478 vm->size = 1ull << xe->info.va_bits; 1479 vm->flags = flags; 1480 1481 if (xef) 1482 vm->xef = xe_file_get(xef); 1483 /** 1484 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1485 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1486 * under a user-VM lock when the PXP session is started at exec_queue 1487 * creation time. Those are different VMs and therefore there is no risk 1488 * of deadlock, but we need to tell lockdep that this is the case or it 1489 * will print a warning. 1490 */ 1491 if (flags & XE_VM_FLAG_GSC) { 1492 static struct lock_class_key gsc_vm_key; 1493 1494 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1495 } else { 1496 init_rwsem(&vm->lock); 1497 } 1498 mutex_init(&vm->snap_mutex); 1499 1500 INIT_LIST_HEAD(&vm->rebind_list); 1501 1502 INIT_LIST_HEAD(&vm->userptr.repin_list); 1503 INIT_LIST_HEAD(&vm->userptr.invalidated); 1504 spin_lock_init(&vm->userptr.invalidated_lock); 1505 1506 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1507 1508 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1509 1510 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1511 if (flags & XE_VM_FLAG_FAULT_MODE) 1512 vm->preempt.min_run_period_ms = 0; 1513 else 1514 vm->preempt.min_run_period_ms = 5; 1515 1516 for_each_tile(tile, xe, id) 1517 xe_range_fence_tree_init(&vm->rftree[id]); 1518 1519 vm->pt_ops = &xelp_pt_ops; 1520 1521 /* 1522 * Long-running workloads are not protected by the scheduler references. 1523 * By design, run_job for long-running workloads returns NULL and the 1524 * scheduler drops all the references of it, hence protecting the VM 1525 * for this case is necessary. 1526 */ 1527 if (flags & XE_VM_FLAG_LR_MODE) { 1528 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1529 xe_pm_runtime_get_noresume(xe); 1530 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1531 } 1532 1533 err = xe_svm_init(vm); 1534 if (err) 1535 goto err_no_resv; 1536 1537 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1538 if (!vm_resv_obj) { 1539 err = -ENOMEM; 1540 goto err_svm_fini; 1541 } 1542 1543 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1544 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1545 1546 drm_gem_object_put(vm_resv_obj); 1547 1548 err = 0; 1549 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1550 err) { 1551 err = xe_vm_drm_exec_lock(vm, &exec); 1552 drm_exec_retry_on_contention(&exec); 1553 1554 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1555 vm->flags |= XE_VM_FLAG_64K; 1556 1557 for_each_tile(tile, xe, id) { 1558 if (flags & XE_VM_FLAG_MIGRATION && 1559 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1560 continue; 1561 1562 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1563 &exec); 1564 if (IS_ERR(vm->pt_root[id])) { 1565 err = PTR_ERR(vm->pt_root[id]); 1566 vm->pt_root[id] = NULL; 1567 xe_vm_pt_destroy(vm); 1568 drm_exec_retry_on_contention(&exec); 1569 xe_validation_retry_on_oom(&ctx, &err); 1570 break; 1571 } 1572 } 1573 if (err) 1574 break; 1575 1576 if (xe_vm_has_scratch(vm)) { 1577 for_each_tile(tile, xe, id) { 1578 if (!vm->pt_root[id]) 1579 continue; 1580 1581 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1582 if (err) { 1583 xe_vm_free_scratch(vm); 1584 xe_vm_pt_destroy(vm); 1585 drm_exec_retry_on_contention(&exec); 1586 xe_validation_retry_on_oom(&ctx, &err); 1587 break; 1588 } 1589 } 1590 if (err) 1591 break; 1592 vm->batch_invalidate_tlb = true; 1593 } 1594 1595 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1596 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1597 vm->batch_invalidate_tlb = false; 1598 } 1599 1600 /* Fill pt_root after allocating scratch tables */ 1601 for_each_tile(tile, xe, id) { 1602 if (!vm->pt_root[id]) 1603 continue; 1604 1605 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1606 } 1607 } 1608 if (err) 1609 goto err_close; 1610 1611 /* Kernel migration VM shouldn't have a circular loop.. */ 1612 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1613 for_each_tile(tile, xe, id) { 1614 struct xe_exec_queue *q; 1615 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1616 1617 if (!vm->pt_root[id]) 1618 continue; 1619 1620 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1621 if (IS_ERR(q)) { 1622 err = PTR_ERR(q); 1623 goto err_close; 1624 } 1625 vm->q[id] = q; 1626 } 1627 } 1628 1629 if (xef && xe->info.has_asid) { 1630 u32 asid; 1631 1632 down_write(&xe->usm.lock); 1633 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1634 XA_LIMIT(1, XE_MAX_ASID - 1), 1635 &xe->usm.next_asid, GFP_KERNEL); 1636 up_write(&xe->usm.lock); 1637 if (err < 0) 1638 goto err_close; 1639 1640 vm->usm.asid = asid; 1641 } 1642 1643 trace_xe_vm_create(vm); 1644 1645 return vm; 1646 1647 err_close: 1648 xe_vm_close_and_put(vm); 1649 return ERR_PTR(err); 1650 1651 err_svm_fini: 1652 if (flags & XE_VM_FLAG_FAULT_MODE) { 1653 vm->size = 0; /* close the vm */ 1654 xe_svm_fini(vm); 1655 } 1656 err_no_resv: 1657 mutex_destroy(&vm->snap_mutex); 1658 for_each_tile(tile, xe, id) 1659 xe_range_fence_tree_fini(&vm->rftree[id]); 1660 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1661 if (vm->xef) 1662 xe_file_put(vm->xef); 1663 kfree(vm); 1664 if (flags & XE_VM_FLAG_LR_MODE) 1665 xe_pm_runtime_put(xe); 1666 return ERR_PTR(err); 1667 } 1668 1669 static void xe_vm_close(struct xe_vm *vm) 1670 { 1671 struct xe_device *xe = vm->xe; 1672 bool bound; 1673 int idx; 1674 1675 bound = drm_dev_enter(&xe->drm, &idx); 1676 1677 down_write(&vm->lock); 1678 if (xe_vm_in_fault_mode(vm)) 1679 xe_svm_notifier_lock(vm); 1680 1681 vm->size = 0; 1682 1683 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1684 struct xe_tile *tile; 1685 struct xe_gt *gt; 1686 u8 id; 1687 1688 /* Wait for pending binds */ 1689 dma_resv_wait_timeout(xe_vm_resv(vm), 1690 DMA_RESV_USAGE_BOOKKEEP, 1691 false, MAX_SCHEDULE_TIMEOUT); 1692 1693 if (bound) { 1694 for_each_tile(tile, xe, id) 1695 if (vm->pt_root[id]) 1696 xe_pt_clear(xe, vm->pt_root[id]); 1697 1698 for_each_gt(gt, xe, id) 1699 xe_tlb_inval_vm(>->tlb_inval, vm); 1700 } 1701 } 1702 1703 if (xe_vm_in_fault_mode(vm)) 1704 xe_svm_notifier_unlock(vm); 1705 up_write(&vm->lock); 1706 1707 if (bound) 1708 drm_dev_exit(idx); 1709 } 1710 1711 void xe_vm_close_and_put(struct xe_vm *vm) 1712 { 1713 LIST_HEAD(contested); 1714 struct xe_device *xe = vm->xe; 1715 struct xe_tile *tile; 1716 struct xe_vma *vma, *next_vma; 1717 struct drm_gpuva *gpuva, *next; 1718 u8 id; 1719 1720 xe_assert(xe, !vm->preempt.num_exec_queues); 1721 1722 xe_vm_close(vm); 1723 if (xe_vm_in_preempt_fence_mode(vm)) { 1724 mutex_lock(&xe->rebind_resume_lock); 1725 list_del_init(&vm->preempt.pm_activate_link); 1726 mutex_unlock(&xe->rebind_resume_lock); 1727 flush_work(&vm->preempt.rebind_work); 1728 } 1729 if (xe_vm_in_fault_mode(vm)) 1730 xe_svm_close(vm); 1731 1732 down_write(&vm->lock); 1733 for_each_tile(tile, xe, id) { 1734 if (vm->q[id]) { 1735 int i; 1736 1737 xe_exec_queue_last_fence_put(vm->q[id], vm); 1738 for_each_tlb_inval(i) 1739 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1740 } 1741 } 1742 up_write(&vm->lock); 1743 1744 for_each_tile(tile, xe, id) { 1745 if (vm->q[id]) { 1746 xe_exec_queue_kill(vm->q[id]); 1747 xe_exec_queue_put(vm->q[id]); 1748 vm->q[id] = NULL; 1749 } 1750 } 1751 1752 down_write(&vm->lock); 1753 xe_vm_lock(vm, false); 1754 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1755 vma = gpuva_to_vma(gpuva); 1756 1757 if (xe_vma_has_no_bo(vma)) { 1758 xe_svm_notifier_lock(vm); 1759 vma->gpuva.flags |= XE_VMA_DESTROYED; 1760 xe_svm_notifier_unlock(vm); 1761 } 1762 1763 xe_vm_remove_vma(vm, vma); 1764 1765 /* easy case, remove from VMA? */ 1766 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1767 list_del_init(&vma->combined_links.rebind); 1768 xe_vma_destroy(vma, NULL); 1769 continue; 1770 } 1771 1772 list_move_tail(&vma->combined_links.destroy, &contested); 1773 vma->gpuva.flags |= XE_VMA_DESTROYED; 1774 } 1775 1776 /* 1777 * All vm operations will add shared fences to resv. 1778 * The only exception is eviction for a shared object, 1779 * but even so, the unbind when evicted would still 1780 * install a fence to resv. Hence it's safe to 1781 * destroy the pagetables immediately. 1782 */ 1783 xe_vm_free_scratch(vm); 1784 xe_vm_pt_destroy(vm); 1785 xe_vm_unlock(vm); 1786 1787 /* 1788 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1789 * Since we hold a refcount to the bo, we can remove and free 1790 * the members safely without locking. 1791 */ 1792 list_for_each_entry_safe(vma, next_vma, &contested, 1793 combined_links.destroy) { 1794 list_del_init(&vma->combined_links.destroy); 1795 xe_vma_destroy_unlocked(vma); 1796 } 1797 1798 xe_svm_fini(vm); 1799 1800 up_write(&vm->lock); 1801 1802 down_write(&xe->usm.lock); 1803 if (vm->usm.asid) { 1804 void *lookup; 1805 1806 xe_assert(xe, xe->info.has_asid); 1807 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1808 1809 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1810 xe_assert(xe, lookup == vm); 1811 } 1812 up_write(&xe->usm.lock); 1813 1814 for_each_tile(tile, xe, id) 1815 xe_range_fence_tree_fini(&vm->rftree[id]); 1816 1817 xe_vm_put(vm); 1818 } 1819 1820 static void vm_destroy_work_func(struct work_struct *w) 1821 { 1822 struct xe_vm *vm = 1823 container_of(w, struct xe_vm, destroy_work); 1824 struct xe_device *xe = vm->xe; 1825 struct xe_tile *tile; 1826 u8 id; 1827 1828 /* xe_vm_close_and_put was not called? */ 1829 xe_assert(xe, !vm->size); 1830 1831 if (xe_vm_in_preempt_fence_mode(vm)) 1832 flush_work(&vm->preempt.rebind_work); 1833 1834 mutex_destroy(&vm->snap_mutex); 1835 1836 if (vm->flags & XE_VM_FLAG_LR_MODE) 1837 xe_pm_runtime_put(xe); 1838 1839 for_each_tile(tile, xe, id) 1840 XE_WARN_ON(vm->pt_root[id]); 1841 1842 trace_xe_vm_free(vm); 1843 1844 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1845 1846 if (vm->xef) 1847 xe_file_put(vm->xef); 1848 1849 kfree(vm); 1850 } 1851 1852 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1853 { 1854 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1855 1856 /* To destroy the VM we need to be able to sleep */ 1857 queue_work(system_unbound_wq, &vm->destroy_work); 1858 } 1859 1860 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1861 { 1862 struct xe_vm *vm; 1863 1864 mutex_lock(&xef->vm.lock); 1865 vm = xa_load(&xef->vm.xa, id); 1866 if (vm) 1867 xe_vm_get(vm); 1868 mutex_unlock(&xef->vm.lock); 1869 1870 return vm; 1871 } 1872 1873 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1874 { 1875 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1876 } 1877 1878 static struct xe_exec_queue * 1879 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1880 { 1881 return q ? q : vm->q[0]; 1882 } 1883 1884 static struct xe_user_fence * 1885 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1886 { 1887 unsigned int i; 1888 1889 for (i = 0; i < num_syncs; i++) { 1890 struct xe_sync_entry *e = &syncs[i]; 1891 1892 if (xe_sync_is_ufence(e)) 1893 return xe_sync_ufence_get(e); 1894 } 1895 1896 return NULL; 1897 } 1898 1899 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1900 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1901 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1902 1903 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1904 struct drm_file *file) 1905 { 1906 struct xe_device *xe = to_xe_device(dev); 1907 struct xe_file *xef = to_xe_file(file); 1908 struct drm_xe_vm_create *args = data; 1909 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 1910 struct xe_vm *vm; 1911 u32 id; 1912 int err; 1913 u32 flags = 0; 1914 1915 if (XE_IOCTL_DBG(xe, args->extensions)) 1916 return -EINVAL; 1917 1918 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 1919 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1920 1921 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1922 !xe->info.has_usm)) 1923 return -EINVAL; 1924 1925 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1926 return -EINVAL; 1927 1928 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1929 return -EINVAL; 1930 1931 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1932 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1933 !xe->info.needs_scratch)) 1934 return -EINVAL; 1935 1936 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1937 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1938 return -EINVAL; 1939 1940 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1941 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1942 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1943 flags |= XE_VM_FLAG_LR_MODE; 1944 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1945 flags |= XE_VM_FLAG_FAULT_MODE; 1946 1947 vm = xe_vm_create(xe, flags, xef); 1948 if (IS_ERR(vm)) 1949 return PTR_ERR(vm); 1950 1951 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1952 /* Warning: Security issue - never enable by default */ 1953 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1954 #endif 1955 1956 /* user id alloc must always be last in ioctl to prevent UAF */ 1957 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1958 if (err) 1959 goto err_close_and_put; 1960 1961 args->vm_id = id; 1962 1963 return 0; 1964 1965 err_close_and_put: 1966 xe_vm_close_and_put(vm); 1967 1968 return err; 1969 } 1970 1971 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1972 struct drm_file *file) 1973 { 1974 struct xe_device *xe = to_xe_device(dev); 1975 struct xe_file *xef = to_xe_file(file); 1976 struct drm_xe_vm_destroy *args = data; 1977 struct xe_vm *vm; 1978 int err = 0; 1979 1980 if (XE_IOCTL_DBG(xe, args->pad) || 1981 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1982 return -EINVAL; 1983 1984 mutex_lock(&xef->vm.lock); 1985 vm = xa_load(&xef->vm.xa, args->vm_id); 1986 if (XE_IOCTL_DBG(xe, !vm)) 1987 err = -ENOENT; 1988 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1989 err = -EBUSY; 1990 else 1991 xa_erase(&xef->vm.xa, args->vm_id); 1992 mutex_unlock(&xef->vm.lock); 1993 1994 if (!err) 1995 xe_vm_close_and_put(vm); 1996 1997 return err; 1998 } 1999 2000 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2001 { 2002 struct drm_gpuva *gpuva; 2003 u32 num_vmas = 0; 2004 2005 lockdep_assert_held(&vm->lock); 2006 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2007 num_vmas++; 2008 2009 return num_vmas; 2010 } 2011 2012 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2013 u64 end, struct drm_xe_mem_range_attr *attrs) 2014 { 2015 struct drm_gpuva *gpuva; 2016 int i = 0; 2017 2018 lockdep_assert_held(&vm->lock); 2019 2020 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2021 struct xe_vma *vma = gpuva_to_vma(gpuva); 2022 2023 if (i == *num_vmas) 2024 return -ENOSPC; 2025 2026 attrs[i].start = xe_vma_start(vma); 2027 attrs[i].end = xe_vma_end(vma); 2028 attrs[i].atomic.val = vma->attr.atomic_access; 2029 attrs[i].pat_index.val = vma->attr.pat_index; 2030 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2031 attrs[i].preferred_mem_loc.migration_policy = 2032 vma->attr.preferred_loc.migration_policy; 2033 2034 i++; 2035 } 2036 2037 *num_vmas = i; 2038 return 0; 2039 } 2040 2041 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2042 { 2043 struct xe_device *xe = to_xe_device(dev); 2044 struct xe_file *xef = to_xe_file(file); 2045 struct drm_xe_mem_range_attr *mem_attrs; 2046 struct drm_xe_vm_query_mem_range_attr *args = data; 2047 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2048 struct xe_vm *vm; 2049 int err = 0; 2050 2051 if (XE_IOCTL_DBG(xe, 2052 ((args->num_mem_ranges == 0 && 2053 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2054 (args->num_mem_ranges > 0 && 2055 (!attrs_user || 2056 args->sizeof_mem_range_attr != 2057 sizeof(struct drm_xe_mem_range_attr)))))) 2058 return -EINVAL; 2059 2060 vm = xe_vm_lookup(xef, args->vm_id); 2061 if (XE_IOCTL_DBG(xe, !vm)) 2062 return -EINVAL; 2063 2064 err = down_read_interruptible(&vm->lock); 2065 if (err) 2066 goto put_vm; 2067 2068 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2069 2070 if (args->num_mem_ranges == 0 && !attrs_user) { 2071 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2072 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2073 goto unlock_vm; 2074 } 2075 2076 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2077 GFP_KERNEL | __GFP_ACCOUNT | 2078 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2079 if (!mem_attrs) { 2080 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2081 goto unlock_vm; 2082 } 2083 2084 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2085 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2086 args->start + args->range, mem_attrs); 2087 if (err) 2088 goto free_mem_attrs; 2089 2090 err = copy_to_user(attrs_user, mem_attrs, 2091 args->sizeof_mem_range_attr * args->num_mem_ranges); 2092 if (err) 2093 err = -EFAULT; 2094 2095 free_mem_attrs: 2096 kvfree(mem_attrs); 2097 unlock_vm: 2098 up_read(&vm->lock); 2099 put_vm: 2100 xe_vm_put(vm); 2101 return err; 2102 } 2103 2104 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2105 { 2106 if (page_addr > xe_vma_end(vma) - 1 || 2107 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2108 return false; 2109 2110 return true; 2111 } 2112 2113 /** 2114 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2115 * 2116 * @vm: the xe_vm the vma belongs to 2117 * @page_addr: address to look up 2118 */ 2119 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2120 { 2121 struct xe_vma *vma = NULL; 2122 2123 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2124 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2125 vma = vm->usm.last_fault_vma; 2126 } 2127 if (!vma) 2128 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2129 2130 return vma; 2131 } 2132 2133 static const u32 region_to_mem_type[] = { 2134 XE_PL_TT, 2135 XE_PL_VRAM0, 2136 XE_PL_VRAM1, 2137 }; 2138 2139 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2140 bool post_commit) 2141 { 2142 xe_svm_notifier_lock(vm); 2143 vma->gpuva.flags |= XE_VMA_DESTROYED; 2144 xe_svm_notifier_unlock(vm); 2145 if (post_commit) 2146 xe_vm_remove_vma(vm, vma); 2147 } 2148 2149 #undef ULL 2150 #define ULL unsigned long long 2151 2152 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2153 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2154 { 2155 struct xe_vma *vma; 2156 2157 switch (op->op) { 2158 case DRM_GPUVA_OP_MAP: 2159 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2160 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2161 break; 2162 case DRM_GPUVA_OP_REMAP: 2163 vma = gpuva_to_vma(op->remap.unmap->va); 2164 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2165 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2166 op->remap.unmap->keep ? 1 : 0); 2167 if (op->remap.prev) 2168 vm_dbg(&xe->drm, 2169 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2170 (ULL)op->remap.prev->va.addr, 2171 (ULL)op->remap.prev->va.range); 2172 if (op->remap.next) 2173 vm_dbg(&xe->drm, 2174 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2175 (ULL)op->remap.next->va.addr, 2176 (ULL)op->remap.next->va.range); 2177 break; 2178 case DRM_GPUVA_OP_UNMAP: 2179 vma = gpuva_to_vma(op->unmap.va); 2180 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2181 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2182 op->unmap.keep ? 1 : 0); 2183 break; 2184 case DRM_GPUVA_OP_PREFETCH: 2185 vma = gpuva_to_vma(op->prefetch.va); 2186 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2187 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2188 break; 2189 default: 2190 drm_warn(&xe->drm, "NOT POSSIBLE"); 2191 } 2192 } 2193 #else 2194 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2195 { 2196 } 2197 #endif 2198 2199 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2200 { 2201 if (!xe_vm_in_fault_mode(vm)) 2202 return false; 2203 2204 if (!xe_vm_has_scratch(vm)) 2205 return false; 2206 2207 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2208 return false; 2209 2210 return true; 2211 } 2212 2213 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2214 { 2215 struct drm_gpuva_op *__op; 2216 2217 drm_gpuva_for_each_op(__op, ops) { 2218 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2219 2220 xe_vma_svm_prefetch_op_fini(op); 2221 } 2222 } 2223 2224 /* 2225 * Create operations list from IOCTL arguments, setup operations fields so parse 2226 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2227 */ 2228 static struct drm_gpuva_ops * 2229 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2230 struct xe_bo *bo, u64 bo_offset_or_userptr, 2231 u64 addr, u64 range, 2232 u32 operation, u32 flags, 2233 u32 prefetch_region, u16 pat_index) 2234 { 2235 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2236 struct drm_gpuva_ops *ops; 2237 struct drm_gpuva_op *__op; 2238 struct drm_gpuvm_bo *vm_bo; 2239 u64 range_end = addr + range; 2240 int err; 2241 2242 lockdep_assert_held_write(&vm->lock); 2243 2244 vm_dbg(&vm->xe->drm, 2245 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2246 operation, (ULL)addr, (ULL)range, 2247 (ULL)bo_offset_or_userptr); 2248 2249 switch (operation) { 2250 case DRM_XE_VM_BIND_OP_MAP: 2251 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2252 struct drm_gpuvm_map_req map_req = { 2253 .map.va.addr = addr, 2254 .map.va.range = range, 2255 .map.gem.obj = obj, 2256 .map.gem.offset = bo_offset_or_userptr, 2257 }; 2258 2259 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2260 break; 2261 } 2262 case DRM_XE_VM_BIND_OP_UNMAP: 2263 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2264 break; 2265 case DRM_XE_VM_BIND_OP_PREFETCH: 2266 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2267 break; 2268 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2269 xe_assert(vm->xe, bo); 2270 2271 err = xe_bo_lock(bo, true); 2272 if (err) 2273 return ERR_PTR(err); 2274 2275 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2276 if (IS_ERR(vm_bo)) { 2277 xe_bo_unlock(bo); 2278 return ERR_CAST(vm_bo); 2279 } 2280 2281 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2282 drm_gpuvm_bo_put(vm_bo); 2283 xe_bo_unlock(bo); 2284 break; 2285 default: 2286 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2287 ops = ERR_PTR(-EINVAL); 2288 } 2289 if (IS_ERR(ops)) 2290 return ops; 2291 2292 drm_gpuva_for_each_op(__op, ops) { 2293 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2294 2295 if (__op->op == DRM_GPUVA_OP_MAP) { 2296 op->map.immediate = 2297 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2298 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2299 op->map.vma_flags |= XE_VMA_READ_ONLY; 2300 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2301 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2302 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2303 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2304 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2305 op->map.vma_flags |= XE_VMA_DUMPABLE; 2306 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2307 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2308 op->map.pat_index = pat_index; 2309 op->map.invalidate_on_bind = 2310 __xe_vm_needs_clear_scratch_pages(vm, flags); 2311 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2312 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2313 struct xe_tile *tile; 2314 struct xe_svm_range *svm_range; 2315 struct drm_gpusvm_ctx ctx = {}; 2316 struct drm_pagemap *dpagemap; 2317 u8 id, tile_mask = 0; 2318 u32 i; 2319 2320 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2321 op->prefetch.region = prefetch_region; 2322 break; 2323 } 2324 2325 ctx.read_only = xe_vma_read_only(vma); 2326 ctx.devmem_possible = IS_DGFX(vm->xe) && 2327 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2328 2329 for_each_tile(tile, vm->xe, id) 2330 tile_mask |= 0x1 << id; 2331 2332 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2333 op->prefetch_range.ranges_count = 0; 2334 tile = NULL; 2335 2336 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2337 dpagemap = xe_vma_resolve_pagemap(vma, 2338 xe_device_get_root_tile(vm->xe)); 2339 /* 2340 * TODO: Once multigpu support is enabled will need 2341 * something to dereference tile from dpagemap. 2342 */ 2343 if (dpagemap) 2344 tile = xe_device_get_root_tile(vm->xe); 2345 } else if (prefetch_region) { 2346 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2347 XE_PL_VRAM0]; 2348 } 2349 2350 op->prefetch_range.tile = tile; 2351 alloc_next_range: 2352 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2353 2354 if (PTR_ERR(svm_range) == -ENOENT) { 2355 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2356 2357 addr = ret == ULONG_MAX ? 0 : ret; 2358 if (addr) 2359 goto alloc_next_range; 2360 else 2361 goto print_op_label; 2362 } 2363 2364 if (IS_ERR(svm_range)) { 2365 err = PTR_ERR(svm_range); 2366 goto unwind_prefetch_ops; 2367 } 2368 2369 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2370 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2371 goto check_next_range; 2372 } 2373 2374 err = xa_alloc(&op->prefetch_range.range, 2375 &i, svm_range, xa_limit_32b, 2376 GFP_KERNEL); 2377 2378 if (err) 2379 goto unwind_prefetch_ops; 2380 2381 op->prefetch_range.ranges_count++; 2382 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2383 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2384 check_next_range: 2385 if (range_end > xe_svm_range_end(svm_range) && 2386 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2387 addr = xe_svm_range_end(svm_range); 2388 goto alloc_next_range; 2389 } 2390 } 2391 print_op_label: 2392 print_op(vm->xe, __op); 2393 } 2394 2395 return ops; 2396 2397 unwind_prefetch_ops: 2398 xe_svm_prefetch_gpuva_ops_fini(ops); 2399 drm_gpuva_ops_free(&vm->gpuvm, ops); 2400 return ERR_PTR(err); 2401 } 2402 2403 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2404 2405 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2406 struct xe_vma_mem_attr *attr, unsigned int flags) 2407 { 2408 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2409 struct xe_validation_ctx ctx; 2410 struct drm_exec exec; 2411 struct xe_vma *vma; 2412 int err = 0; 2413 2414 lockdep_assert_held_write(&vm->lock); 2415 2416 if (bo) { 2417 err = 0; 2418 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2419 (struct xe_val_flags) {.interruptible = true}, err) { 2420 if (!bo->vm) { 2421 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2422 drm_exec_retry_on_contention(&exec); 2423 } 2424 if (!err) { 2425 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2426 drm_exec_retry_on_contention(&exec); 2427 } 2428 if (err) 2429 return ERR_PTR(err); 2430 2431 vma = xe_vma_create(vm, bo, op->gem.offset, 2432 op->va.addr, op->va.addr + 2433 op->va.range - 1, attr, flags); 2434 if (IS_ERR(vma)) 2435 return vma; 2436 2437 if (!bo->vm) { 2438 err = add_preempt_fences(vm, bo); 2439 if (err) { 2440 prep_vma_destroy(vm, vma, false); 2441 xe_vma_destroy(vma, NULL); 2442 } 2443 } 2444 } 2445 if (err) 2446 return ERR_PTR(err); 2447 } else { 2448 vma = xe_vma_create(vm, NULL, op->gem.offset, 2449 op->va.addr, op->va.addr + 2450 op->va.range - 1, attr, flags); 2451 if (IS_ERR(vma)) 2452 return vma; 2453 2454 if (xe_vma_is_userptr(vma)) 2455 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2456 } 2457 if (err) { 2458 prep_vma_destroy(vm, vma, false); 2459 xe_vma_destroy_unlocked(vma); 2460 vma = ERR_PTR(err); 2461 } 2462 2463 return vma; 2464 } 2465 2466 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2467 { 2468 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2469 return SZ_1G; 2470 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2471 return SZ_2M; 2472 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2473 return SZ_64K; 2474 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2475 return SZ_4K; 2476 2477 return SZ_1G; /* Uninitialized, used max size */ 2478 } 2479 2480 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2481 { 2482 switch (size) { 2483 case SZ_1G: 2484 vma->gpuva.flags |= XE_VMA_PTE_1G; 2485 break; 2486 case SZ_2M: 2487 vma->gpuva.flags |= XE_VMA_PTE_2M; 2488 break; 2489 case SZ_64K: 2490 vma->gpuva.flags |= XE_VMA_PTE_64K; 2491 break; 2492 case SZ_4K: 2493 vma->gpuva.flags |= XE_VMA_PTE_4K; 2494 break; 2495 } 2496 } 2497 2498 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2499 { 2500 int err = 0; 2501 2502 lockdep_assert_held_write(&vm->lock); 2503 2504 switch (op->base.op) { 2505 case DRM_GPUVA_OP_MAP: 2506 err |= xe_vm_insert_vma(vm, op->map.vma); 2507 if (!err) 2508 op->flags |= XE_VMA_OP_COMMITTED; 2509 break; 2510 case DRM_GPUVA_OP_REMAP: 2511 { 2512 u8 tile_present = 2513 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2514 2515 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2516 true); 2517 op->flags |= XE_VMA_OP_COMMITTED; 2518 2519 if (op->remap.prev) { 2520 err |= xe_vm_insert_vma(vm, op->remap.prev); 2521 if (!err) 2522 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2523 if (!err && op->remap.skip_prev) { 2524 op->remap.prev->tile_present = 2525 tile_present; 2526 op->remap.prev = NULL; 2527 } 2528 } 2529 if (op->remap.next) { 2530 err |= xe_vm_insert_vma(vm, op->remap.next); 2531 if (!err) 2532 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2533 if (!err && op->remap.skip_next) { 2534 op->remap.next->tile_present = 2535 tile_present; 2536 op->remap.next = NULL; 2537 } 2538 } 2539 2540 /* Adjust for partial unbind after removing VMA from VM */ 2541 if (!err) { 2542 op->base.remap.unmap->va->va.addr = op->remap.start; 2543 op->base.remap.unmap->va->va.range = op->remap.range; 2544 } 2545 break; 2546 } 2547 case DRM_GPUVA_OP_UNMAP: 2548 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2549 op->flags |= XE_VMA_OP_COMMITTED; 2550 break; 2551 case DRM_GPUVA_OP_PREFETCH: 2552 op->flags |= XE_VMA_OP_COMMITTED; 2553 break; 2554 default: 2555 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2556 } 2557 2558 return err; 2559 } 2560 2561 /** 2562 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2563 * @vma: Pointer to the xe_vma structure to check 2564 * 2565 * This function determines whether the given VMA (Virtual Memory Area) 2566 * has its memory attributes set to their default values. Specifically, 2567 * it checks the following conditions: 2568 * 2569 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2570 * - `pat_index` is equal to `default_pat_index` 2571 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2572 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2573 * 2574 * Return: true if all attributes are at their default values, false otherwise. 2575 */ 2576 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2577 { 2578 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2579 vma->attr.pat_index == vma->attr.default_pat_index && 2580 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2581 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2582 } 2583 2584 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2585 struct xe_vma_ops *vops) 2586 { 2587 struct xe_device *xe = vm->xe; 2588 struct drm_gpuva_op *__op; 2589 struct xe_tile *tile; 2590 u8 id, tile_mask = 0; 2591 int err = 0; 2592 2593 lockdep_assert_held_write(&vm->lock); 2594 2595 for_each_tile(tile, vm->xe, id) 2596 tile_mask |= 0x1 << id; 2597 2598 drm_gpuva_for_each_op(__op, ops) { 2599 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2600 struct xe_vma *vma; 2601 unsigned int flags = 0; 2602 2603 INIT_LIST_HEAD(&op->link); 2604 list_add_tail(&op->link, &vops->list); 2605 op->tile_mask = tile_mask; 2606 2607 switch (op->base.op) { 2608 case DRM_GPUVA_OP_MAP: 2609 { 2610 struct xe_vma_mem_attr default_attr = { 2611 .preferred_loc = { 2612 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2613 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2614 }, 2615 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2616 .default_pat_index = op->map.pat_index, 2617 .pat_index = op->map.pat_index, 2618 }; 2619 2620 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2621 2622 vma = new_vma(vm, &op->base.map, &default_attr, 2623 flags); 2624 if (IS_ERR(vma)) 2625 return PTR_ERR(vma); 2626 2627 op->map.vma = vma; 2628 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2629 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2630 op->map.invalidate_on_bind) 2631 xe_vma_ops_incr_pt_update_ops(vops, 2632 op->tile_mask, 1); 2633 break; 2634 } 2635 case DRM_GPUVA_OP_REMAP: 2636 { 2637 struct xe_vma *old = 2638 gpuva_to_vma(op->base.remap.unmap->va); 2639 bool skip = xe_vma_is_cpu_addr_mirror(old); 2640 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2641 int num_remap_ops = 0; 2642 2643 if (op->base.remap.prev) 2644 start = op->base.remap.prev->va.addr + 2645 op->base.remap.prev->va.range; 2646 if (op->base.remap.next) 2647 end = op->base.remap.next->va.addr; 2648 2649 if (xe_vma_is_cpu_addr_mirror(old) && 2650 xe_svm_has_mapping(vm, start, end)) { 2651 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2652 xe_svm_unmap_address_range(vm, start, end); 2653 else 2654 return -EBUSY; 2655 } 2656 2657 op->remap.start = xe_vma_start(old); 2658 op->remap.range = xe_vma_size(old); 2659 2660 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2661 if (op->base.remap.prev) { 2662 vma = new_vma(vm, op->base.remap.prev, 2663 &old->attr, flags); 2664 if (IS_ERR(vma)) 2665 return PTR_ERR(vma); 2666 2667 op->remap.prev = vma; 2668 2669 /* 2670 * Userptr creates a new SG mapping so 2671 * we must also rebind. 2672 */ 2673 op->remap.skip_prev = skip || 2674 (!xe_vma_is_userptr(old) && 2675 IS_ALIGNED(xe_vma_end(vma), 2676 xe_vma_max_pte_size(old))); 2677 if (op->remap.skip_prev) { 2678 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2679 op->remap.range -= 2680 xe_vma_end(vma) - 2681 xe_vma_start(old); 2682 op->remap.start = xe_vma_end(vma); 2683 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2684 (ULL)op->remap.start, 2685 (ULL)op->remap.range); 2686 } else { 2687 num_remap_ops++; 2688 } 2689 } 2690 2691 if (op->base.remap.next) { 2692 vma = new_vma(vm, op->base.remap.next, 2693 &old->attr, flags); 2694 if (IS_ERR(vma)) 2695 return PTR_ERR(vma); 2696 2697 op->remap.next = vma; 2698 2699 /* 2700 * Userptr creates a new SG mapping so 2701 * we must also rebind. 2702 */ 2703 op->remap.skip_next = skip || 2704 (!xe_vma_is_userptr(old) && 2705 IS_ALIGNED(xe_vma_start(vma), 2706 xe_vma_max_pte_size(old))); 2707 if (op->remap.skip_next) { 2708 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2709 op->remap.range -= 2710 xe_vma_end(old) - 2711 xe_vma_start(vma); 2712 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2713 (ULL)op->remap.start, 2714 (ULL)op->remap.range); 2715 } else { 2716 num_remap_ops++; 2717 } 2718 } 2719 if (!skip) 2720 num_remap_ops++; 2721 2722 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2723 break; 2724 } 2725 case DRM_GPUVA_OP_UNMAP: 2726 vma = gpuva_to_vma(op->base.unmap.va); 2727 2728 if (xe_vma_is_cpu_addr_mirror(vma) && 2729 xe_svm_has_mapping(vm, xe_vma_start(vma), 2730 xe_vma_end(vma))) 2731 return -EBUSY; 2732 2733 if (!xe_vma_is_cpu_addr_mirror(vma)) 2734 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2735 break; 2736 case DRM_GPUVA_OP_PREFETCH: 2737 vma = gpuva_to_vma(op->base.prefetch.va); 2738 2739 if (xe_vma_is_userptr(vma)) { 2740 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2741 if (err) 2742 return err; 2743 } 2744 2745 if (xe_vma_is_cpu_addr_mirror(vma)) 2746 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2747 op->prefetch_range.ranges_count); 2748 else 2749 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2750 2751 break; 2752 default: 2753 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2754 } 2755 2756 err = xe_vma_op_commit(vm, op); 2757 if (err) 2758 return err; 2759 } 2760 2761 return 0; 2762 } 2763 2764 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2765 bool post_commit, bool prev_post_commit, 2766 bool next_post_commit) 2767 { 2768 lockdep_assert_held_write(&vm->lock); 2769 2770 switch (op->base.op) { 2771 case DRM_GPUVA_OP_MAP: 2772 if (op->map.vma) { 2773 prep_vma_destroy(vm, op->map.vma, post_commit); 2774 xe_vma_destroy_unlocked(op->map.vma); 2775 } 2776 break; 2777 case DRM_GPUVA_OP_UNMAP: 2778 { 2779 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2780 2781 if (vma) { 2782 xe_svm_notifier_lock(vm); 2783 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2784 xe_svm_notifier_unlock(vm); 2785 if (post_commit) 2786 xe_vm_insert_vma(vm, vma); 2787 } 2788 break; 2789 } 2790 case DRM_GPUVA_OP_REMAP: 2791 { 2792 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2793 2794 if (op->remap.prev) { 2795 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2796 xe_vma_destroy_unlocked(op->remap.prev); 2797 } 2798 if (op->remap.next) { 2799 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2800 xe_vma_destroy_unlocked(op->remap.next); 2801 } 2802 if (vma) { 2803 xe_svm_notifier_lock(vm); 2804 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2805 xe_svm_notifier_unlock(vm); 2806 if (post_commit) 2807 xe_vm_insert_vma(vm, vma); 2808 } 2809 break; 2810 } 2811 case DRM_GPUVA_OP_PREFETCH: 2812 /* Nothing to do */ 2813 break; 2814 default: 2815 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2816 } 2817 } 2818 2819 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2820 struct drm_gpuva_ops **ops, 2821 int num_ops_list) 2822 { 2823 int i; 2824 2825 for (i = num_ops_list - 1; i >= 0; --i) { 2826 struct drm_gpuva_ops *__ops = ops[i]; 2827 struct drm_gpuva_op *__op; 2828 2829 if (!__ops) 2830 continue; 2831 2832 drm_gpuva_for_each_op_reverse(__op, __ops) { 2833 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2834 2835 xe_vma_op_unwind(vm, op, 2836 op->flags & XE_VMA_OP_COMMITTED, 2837 op->flags & XE_VMA_OP_PREV_COMMITTED, 2838 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2839 } 2840 } 2841 } 2842 2843 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2844 bool res_evict, bool validate) 2845 { 2846 struct xe_bo *bo = xe_vma_bo(vma); 2847 struct xe_vm *vm = xe_vma_vm(vma); 2848 int err = 0; 2849 2850 if (bo) { 2851 if (!bo->vm) 2852 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2853 if (!err && validate) 2854 err = xe_bo_validate(bo, vm, 2855 !xe_vm_in_preempt_fence_mode(vm) && 2856 res_evict, exec); 2857 } 2858 2859 return err; 2860 } 2861 2862 static int check_ufence(struct xe_vma *vma) 2863 { 2864 if (vma->ufence) { 2865 struct xe_user_fence * const f = vma->ufence; 2866 2867 if (!xe_sync_ufence_get_status(f)) 2868 return -EBUSY; 2869 2870 vma->ufence = NULL; 2871 xe_sync_ufence_put(f); 2872 } 2873 2874 return 0; 2875 } 2876 2877 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2878 { 2879 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2880 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2881 struct xe_tile *tile = op->prefetch_range.tile; 2882 int err = 0; 2883 2884 struct xe_svm_range *svm_range; 2885 struct drm_gpusvm_ctx ctx = {}; 2886 unsigned long i; 2887 2888 if (!xe_vma_is_cpu_addr_mirror(vma)) 2889 return 0; 2890 2891 ctx.read_only = xe_vma_read_only(vma); 2892 ctx.devmem_possible = devmem_possible; 2893 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2894 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2895 2896 /* TODO: Threading the migration */ 2897 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2898 if (!tile) 2899 xe_svm_range_migrate_to_smem(vm, svm_range); 2900 2901 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2902 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2903 if (err) { 2904 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2905 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2906 return -ENODATA; 2907 } 2908 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2909 } 2910 2911 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2912 if (err) { 2913 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2914 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2915 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2916 err = -ENODATA; 2917 return err; 2918 } 2919 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2920 } 2921 2922 return err; 2923 } 2924 2925 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2926 struct xe_vma_ops *vops, struct xe_vma_op *op) 2927 { 2928 int err = 0; 2929 bool res_evict; 2930 2931 /* 2932 * We only allow evicting a BO within the VM if it is not part of an 2933 * array of binds, as an array of binds can evict another BO within the 2934 * bind. 2935 */ 2936 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 2937 2938 switch (op->base.op) { 2939 case DRM_GPUVA_OP_MAP: 2940 if (!op->map.invalidate_on_bind) 2941 err = vma_lock_and_validate(exec, op->map.vma, 2942 res_evict, 2943 !xe_vm_in_fault_mode(vm) || 2944 op->map.immediate); 2945 break; 2946 case DRM_GPUVA_OP_REMAP: 2947 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2948 if (err) 2949 break; 2950 2951 err = vma_lock_and_validate(exec, 2952 gpuva_to_vma(op->base.remap.unmap->va), 2953 res_evict, false); 2954 if (!err && op->remap.prev) 2955 err = vma_lock_and_validate(exec, op->remap.prev, 2956 res_evict, true); 2957 if (!err && op->remap.next) 2958 err = vma_lock_and_validate(exec, op->remap.next, 2959 res_evict, true); 2960 break; 2961 case DRM_GPUVA_OP_UNMAP: 2962 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2963 if (err) 2964 break; 2965 2966 err = vma_lock_and_validate(exec, 2967 gpuva_to_vma(op->base.unmap.va), 2968 res_evict, false); 2969 break; 2970 case DRM_GPUVA_OP_PREFETCH: 2971 { 2972 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2973 u32 region; 2974 2975 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2976 region = op->prefetch.region; 2977 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2978 region <= ARRAY_SIZE(region_to_mem_type)); 2979 } 2980 2981 err = vma_lock_and_validate(exec, 2982 gpuva_to_vma(op->base.prefetch.va), 2983 res_evict, false); 2984 if (!err && !xe_vma_has_no_bo(vma)) 2985 err = xe_bo_migrate(xe_vma_bo(vma), 2986 region_to_mem_type[region], 2987 NULL, 2988 exec); 2989 break; 2990 } 2991 default: 2992 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2993 } 2994 2995 return err; 2996 } 2997 2998 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2999 { 3000 struct xe_vma_op *op; 3001 int err; 3002 3003 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3004 return 0; 3005 3006 list_for_each_entry(op, &vops->list, link) { 3007 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3008 err = prefetch_ranges(vm, op); 3009 if (err) 3010 return err; 3011 } 3012 } 3013 3014 return 0; 3015 } 3016 3017 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3018 struct xe_vm *vm, 3019 struct xe_vma_ops *vops) 3020 { 3021 struct xe_vma_op *op; 3022 int err; 3023 3024 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3025 if (err) 3026 return err; 3027 3028 list_for_each_entry(op, &vops->list, link) { 3029 err = op_lock_and_prep(exec, vm, vops, op); 3030 if (err) 3031 return err; 3032 } 3033 3034 #ifdef TEST_VM_OPS_ERROR 3035 if (vops->inject_error && 3036 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3037 return -ENOSPC; 3038 #endif 3039 3040 return 0; 3041 } 3042 3043 static void op_trace(struct xe_vma_op *op) 3044 { 3045 switch (op->base.op) { 3046 case DRM_GPUVA_OP_MAP: 3047 trace_xe_vma_bind(op->map.vma); 3048 break; 3049 case DRM_GPUVA_OP_REMAP: 3050 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3051 if (op->remap.prev) 3052 trace_xe_vma_bind(op->remap.prev); 3053 if (op->remap.next) 3054 trace_xe_vma_bind(op->remap.next); 3055 break; 3056 case DRM_GPUVA_OP_UNMAP: 3057 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3058 break; 3059 case DRM_GPUVA_OP_PREFETCH: 3060 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3061 break; 3062 case DRM_GPUVA_OP_DRIVER: 3063 break; 3064 default: 3065 XE_WARN_ON("NOT POSSIBLE"); 3066 } 3067 } 3068 3069 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3070 { 3071 struct xe_vma_op *op; 3072 3073 list_for_each_entry(op, &vops->list, link) 3074 op_trace(op); 3075 } 3076 3077 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3078 { 3079 struct xe_exec_queue *q = vops->q; 3080 struct xe_tile *tile; 3081 int number_tiles = 0; 3082 u8 id; 3083 3084 for_each_tile(tile, vm->xe, id) { 3085 if (vops->pt_update_ops[id].num_ops) 3086 ++number_tiles; 3087 3088 if (vops->pt_update_ops[id].q) 3089 continue; 3090 3091 if (q) { 3092 vops->pt_update_ops[id].q = q; 3093 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3094 q = list_next_entry(q, multi_gt_list); 3095 } else { 3096 vops->pt_update_ops[id].q = vm->q[id]; 3097 } 3098 } 3099 3100 return number_tiles; 3101 } 3102 3103 static struct dma_fence *ops_execute(struct xe_vm *vm, 3104 struct xe_vma_ops *vops) 3105 { 3106 struct xe_tile *tile; 3107 struct dma_fence *fence = NULL; 3108 struct dma_fence **fences = NULL; 3109 struct dma_fence_array *cf = NULL; 3110 int number_tiles = 0, current_fence = 0, n_fence = 0, err; 3111 u8 id; 3112 3113 number_tiles = vm_ops_setup_tile_args(vm, vops); 3114 if (number_tiles == 0) 3115 return ERR_PTR(-ENODATA); 3116 3117 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) { 3118 for_each_tile(tile, vm->xe, id) 3119 ++n_fence; 3120 } else { 3121 for_each_tile(tile, vm->xe, id) 3122 n_fence += (1 + XE_MAX_GT_PER_TILE); 3123 } 3124 3125 fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); 3126 if (!fences) { 3127 fence = ERR_PTR(-ENOMEM); 3128 goto err_trace; 3129 } 3130 3131 cf = dma_fence_array_alloc(n_fence); 3132 if (!cf) { 3133 fence = ERR_PTR(-ENOMEM); 3134 goto err_out; 3135 } 3136 3137 for_each_tile(tile, vm->xe, id) { 3138 if (!vops->pt_update_ops[id].num_ops) 3139 continue; 3140 3141 err = xe_pt_update_ops_prepare(tile, vops); 3142 if (err) { 3143 fence = ERR_PTR(err); 3144 goto err_out; 3145 } 3146 } 3147 3148 trace_xe_vm_ops_execute(vops); 3149 3150 for_each_tile(tile, vm->xe, id) { 3151 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3152 int i; 3153 3154 fence = NULL; 3155 if (!vops->pt_update_ops[id].num_ops) 3156 goto collect_fences; 3157 3158 fence = xe_pt_update_ops_run(tile, vops); 3159 if (IS_ERR(fence)) 3160 goto err_out; 3161 3162 collect_fences: 3163 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3164 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3165 continue; 3166 3167 xe_migrate_job_lock(tile->migrate, q); 3168 for_each_tlb_inval(i) 3169 fences[current_fence++] = 3170 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3171 xe_migrate_job_unlock(tile->migrate, q); 3172 } 3173 3174 xe_assert(vm->xe, current_fence == n_fence); 3175 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3176 1, false); 3177 fence = &cf->base; 3178 3179 for_each_tile(tile, vm->xe, id) { 3180 if (!vops->pt_update_ops[id].num_ops) 3181 continue; 3182 3183 xe_pt_update_ops_fini(tile, vops); 3184 } 3185 3186 return fence; 3187 3188 err_out: 3189 for_each_tile(tile, vm->xe, id) { 3190 if (!vops->pt_update_ops[id].num_ops) 3191 continue; 3192 3193 xe_pt_update_ops_abort(tile, vops); 3194 } 3195 while (current_fence) 3196 dma_fence_put(fences[--current_fence]); 3197 kfree(fences); 3198 kfree(cf); 3199 3200 err_trace: 3201 trace_xe_vm_ops_fail(vm); 3202 return fence; 3203 } 3204 3205 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3206 { 3207 if (vma->ufence) 3208 xe_sync_ufence_put(vma->ufence); 3209 vma->ufence = __xe_sync_ufence_get(ufence); 3210 } 3211 3212 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3213 struct xe_user_fence *ufence) 3214 { 3215 switch (op->base.op) { 3216 case DRM_GPUVA_OP_MAP: 3217 vma_add_ufence(op->map.vma, ufence); 3218 break; 3219 case DRM_GPUVA_OP_REMAP: 3220 if (op->remap.prev) 3221 vma_add_ufence(op->remap.prev, ufence); 3222 if (op->remap.next) 3223 vma_add_ufence(op->remap.next, ufence); 3224 break; 3225 case DRM_GPUVA_OP_UNMAP: 3226 break; 3227 case DRM_GPUVA_OP_PREFETCH: 3228 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3229 break; 3230 default: 3231 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3232 } 3233 } 3234 3235 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3236 struct dma_fence *fence) 3237 { 3238 struct xe_user_fence *ufence; 3239 struct xe_vma_op *op; 3240 int i; 3241 3242 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3243 list_for_each_entry(op, &vops->list, link) { 3244 if (ufence) 3245 op_add_ufence(vm, op, ufence); 3246 3247 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3248 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3249 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3250 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3251 fence); 3252 } 3253 if (ufence) 3254 xe_sync_ufence_put(ufence); 3255 if (fence) { 3256 for (i = 0; i < vops->num_syncs; i++) 3257 xe_sync_entry_signal(vops->syncs + i, fence); 3258 } 3259 } 3260 3261 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3262 struct xe_vma_ops *vops) 3263 { 3264 struct xe_validation_ctx ctx; 3265 struct drm_exec exec; 3266 struct dma_fence *fence; 3267 int err = 0; 3268 3269 lockdep_assert_held_write(&vm->lock); 3270 3271 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3272 ((struct xe_val_flags) { 3273 .interruptible = true, 3274 .exec_ignore_duplicates = true, 3275 }), err) { 3276 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3277 drm_exec_retry_on_contention(&exec); 3278 xe_validation_retry_on_oom(&ctx, &err); 3279 if (err) 3280 return ERR_PTR(err); 3281 3282 xe_vm_set_validation_exec(vm, &exec); 3283 fence = ops_execute(vm, vops); 3284 xe_vm_set_validation_exec(vm, NULL); 3285 if (IS_ERR(fence)) { 3286 if (PTR_ERR(fence) == -ENODATA) 3287 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3288 return fence; 3289 } 3290 3291 vm_bind_ioctl_ops_fini(vm, vops, fence); 3292 } 3293 3294 return err ? ERR_PTR(err) : fence; 3295 } 3296 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3297 3298 #define SUPPORTED_FLAGS_STUB \ 3299 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3300 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3301 DRM_XE_VM_BIND_FLAG_NULL | \ 3302 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3303 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3304 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3305 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 3306 3307 #ifdef TEST_VM_OPS_ERROR 3308 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3309 #else 3310 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3311 #endif 3312 3313 #define XE_64K_PAGE_MASK 0xffffull 3314 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3315 3316 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3317 struct drm_xe_vm_bind *args, 3318 struct drm_xe_vm_bind_op **bind_ops) 3319 { 3320 int err; 3321 int i; 3322 3323 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3324 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3325 return -EINVAL; 3326 3327 if (XE_IOCTL_DBG(xe, args->extensions)) 3328 return -EINVAL; 3329 3330 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) 3331 return -EINVAL; 3332 3333 if (args->num_binds > 1) { 3334 u64 __user *bind_user = 3335 u64_to_user_ptr(args->vector_of_binds); 3336 3337 *bind_ops = kvmalloc_array(args->num_binds, 3338 sizeof(struct drm_xe_vm_bind_op), 3339 GFP_KERNEL | __GFP_ACCOUNT | 3340 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3341 if (!*bind_ops) 3342 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3343 3344 err = copy_from_user(*bind_ops, bind_user, 3345 sizeof(struct drm_xe_vm_bind_op) * 3346 args->num_binds); 3347 if (XE_IOCTL_DBG(xe, err)) { 3348 err = -EFAULT; 3349 goto free_bind_ops; 3350 } 3351 } else { 3352 *bind_ops = &args->bind; 3353 } 3354 3355 for (i = 0; i < args->num_binds; ++i) { 3356 u64 range = (*bind_ops)[i].range; 3357 u64 addr = (*bind_ops)[i].addr; 3358 u32 op = (*bind_ops)[i].op; 3359 u32 flags = (*bind_ops)[i].flags; 3360 u32 obj = (*bind_ops)[i].obj; 3361 u64 obj_offset = (*bind_ops)[i].obj_offset; 3362 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3363 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3364 bool is_cpu_addr_mirror = flags & 3365 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3366 u16 pat_index = (*bind_ops)[i].pat_index; 3367 u16 coh_mode; 3368 3369 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3370 (!xe_vm_in_fault_mode(vm) || 3371 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3372 err = -EINVAL; 3373 goto free_bind_ops; 3374 } 3375 3376 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3377 err = -EINVAL; 3378 goto free_bind_ops; 3379 } 3380 3381 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3382 (*bind_ops)[i].pat_index = pat_index; 3383 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3384 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3385 err = -EINVAL; 3386 goto free_bind_ops; 3387 } 3388 3389 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3390 err = -EINVAL; 3391 goto free_bind_ops; 3392 } 3393 3394 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3395 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3396 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3397 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3398 is_cpu_addr_mirror)) || 3399 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3400 (is_null || is_cpu_addr_mirror)) || 3401 XE_IOCTL_DBG(xe, !obj && 3402 op == DRM_XE_VM_BIND_OP_MAP && 3403 !is_null && !is_cpu_addr_mirror) || 3404 XE_IOCTL_DBG(xe, !obj && 3405 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3406 XE_IOCTL_DBG(xe, addr && 3407 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3408 XE_IOCTL_DBG(xe, range && 3409 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3410 XE_IOCTL_DBG(xe, obj && 3411 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3412 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3413 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3414 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3415 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3416 XE_IOCTL_DBG(xe, obj && 3417 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3418 XE_IOCTL_DBG(xe, prefetch_region && 3419 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3420 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3421 /* Guard against undefined shift in BIT(prefetch_region) */ 3422 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3423 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3424 XE_IOCTL_DBG(xe, obj && 3425 op == DRM_XE_VM_BIND_OP_UNMAP) || 3426 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3427 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3428 err = -EINVAL; 3429 goto free_bind_ops; 3430 } 3431 3432 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3433 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3434 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3435 XE_IOCTL_DBG(xe, !range && 3436 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3437 err = -EINVAL; 3438 goto free_bind_ops; 3439 } 3440 } 3441 3442 return 0; 3443 3444 free_bind_ops: 3445 if (args->num_binds > 1) 3446 kvfree(*bind_ops); 3447 *bind_ops = NULL; 3448 return err; 3449 } 3450 3451 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3452 struct xe_exec_queue *q, 3453 struct xe_sync_entry *syncs, 3454 int num_syncs) 3455 { 3456 struct dma_fence *fence = NULL; 3457 int i, err = 0; 3458 3459 if (num_syncs) { 3460 fence = xe_sync_in_fence_get(syncs, num_syncs, 3461 to_wait_exec_queue(vm, q), vm); 3462 if (IS_ERR(fence)) 3463 return PTR_ERR(fence); 3464 3465 for (i = 0; i < num_syncs; i++) 3466 xe_sync_entry_signal(&syncs[i], fence); 3467 } 3468 3469 dma_fence_put(fence); 3470 3471 return err; 3472 } 3473 3474 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3475 struct xe_exec_queue *q, 3476 struct xe_sync_entry *syncs, u32 num_syncs) 3477 { 3478 memset(vops, 0, sizeof(*vops)); 3479 INIT_LIST_HEAD(&vops->list); 3480 vops->vm = vm; 3481 vops->q = q; 3482 vops->syncs = syncs; 3483 vops->num_syncs = num_syncs; 3484 vops->flags = 0; 3485 } 3486 3487 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3488 u64 addr, u64 range, u64 obj_offset, 3489 u16 pat_index, u32 op, u32 bind_flags) 3490 { 3491 u16 coh_mode; 3492 3493 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3494 XE_IOCTL_DBG(xe, obj_offset > 3495 xe_bo_size(bo) - range)) { 3496 return -EINVAL; 3497 } 3498 3499 /* 3500 * Some platforms require 64k VM_BIND alignment, 3501 * specifically those with XE_VRAM_FLAGS_NEED64K. 3502 * 3503 * Other platforms may have BO's set to 64k physical placement, 3504 * but can be mapped at 4k offsets anyway. This check is only 3505 * there for the former case. 3506 */ 3507 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3508 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3509 if (XE_IOCTL_DBG(xe, obj_offset & 3510 XE_64K_PAGE_MASK) || 3511 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3512 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3513 return -EINVAL; 3514 } 3515 } 3516 3517 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3518 if (bo->cpu_caching) { 3519 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3520 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3521 return -EINVAL; 3522 } 3523 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3524 /* 3525 * Imported dma-buf from a different device should 3526 * require 1way or 2way coherency since we don't know 3527 * how it was mapped on the CPU. Just assume is it 3528 * potentially cached on CPU side. 3529 */ 3530 return -EINVAL; 3531 } 3532 3533 /* If a BO is protected it can only be mapped if the key is still valid */ 3534 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3535 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3536 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3537 return -ENOEXEC; 3538 3539 return 0; 3540 } 3541 3542 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3543 { 3544 struct xe_device *xe = to_xe_device(dev); 3545 struct xe_file *xef = to_xe_file(file); 3546 struct drm_xe_vm_bind *args = data; 3547 struct drm_xe_sync __user *syncs_user; 3548 struct xe_bo **bos = NULL; 3549 struct drm_gpuva_ops **ops = NULL; 3550 struct xe_vm *vm; 3551 struct xe_exec_queue *q = NULL; 3552 u32 num_syncs, num_ufence = 0; 3553 struct xe_sync_entry *syncs = NULL; 3554 struct drm_xe_vm_bind_op *bind_ops = NULL; 3555 struct xe_vma_ops vops; 3556 struct dma_fence *fence; 3557 int err; 3558 int i; 3559 3560 vm = xe_vm_lookup(xef, args->vm_id); 3561 if (XE_IOCTL_DBG(xe, !vm)) 3562 return -EINVAL; 3563 3564 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3565 if (err) 3566 goto put_vm; 3567 3568 if (args->exec_queue_id) { 3569 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3570 if (XE_IOCTL_DBG(xe, !q)) { 3571 err = -ENOENT; 3572 goto free_bind_ops; 3573 } 3574 3575 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3576 err = -EINVAL; 3577 goto put_exec_queue; 3578 } 3579 } 3580 3581 /* Ensure all UNMAPs visible */ 3582 xe_svm_flush(vm); 3583 3584 err = down_write_killable(&vm->lock); 3585 if (err) 3586 goto put_exec_queue; 3587 3588 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3589 err = -ENOENT; 3590 goto release_vm_lock; 3591 } 3592 3593 for (i = 0; i < args->num_binds; ++i) { 3594 u64 range = bind_ops[i].range; 3595 u64 addr = bind_ops[i].addr; 3596 3597 if (XE_IOCTL_DBG(xe, range > vm->size) || 3598 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3599 err = -EINVAL; 3600 goto release_vm_lock; 3601 } 3602 } 3603 3604 if (args->num_binds) { 3605 bos = kvcalloc(args->num_binds, sizeof(*bos), 3606 GFP_KERNEL | __GFP_ACCOUNT | 3607 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3608 if (!bos) { 3609 err = -ENOMEM; 3610 goto release_vm_lock; 3611 } 3612 3613 ops = kvcalloc(args->num_binds, sizeof(*ops), 3614 GFP_KERNEL | __GFP_ACCOUNT | 3615 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3616 if (!ops) { 3617 err = -ENOMEM; 3618 goto free_bos; 3619 } 3620 } 3621 3622 for (i = 0; i < args->num_binds; ++i) { 3623 struct drm_gem_object *gem_obj; 3624 u64 range = bind_ops[i].range; 3625 u64 addr = bind_ops[i].addr; 3626 u32 obj = bind_ops[i].obj; 3627 u64 obj_offset = bind_ops[i].obj_offset; 3628 u16 pat_index = bind_ops[i].pat_index; 3629 u32 op = bind_ops[i].op; 3630 u32 bind_flags = bind_ops[i].flags; 3631 3632 if (!obj) 3633 continue; 3634 3635 gem_obj = drm_gem_object_lookup(file, obj); 3636 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3637 err = -ENOENT; 3638 goto put_obj; 3639 } 3640 bos[i] = gem_to_xe_bo(gem_obj); 3641 3642 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3643 obj_offset, pat_index, op, 3644 bind_flags); 3645 if (err) 3646 goto put_obj; 3647 } 3648 3649 if (args->num_syncs) { 3650 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3651 if (!syncs) { 3652 err = -ENOMEM; 3653 goto put_obj; 3654 } 3655 } 3656 3657 syncs_user = u64_to_user_ptr(args->syncs); 3658 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3659 struct xe_exec_queue *__q = q ?: vm->q[0]; 3660 3661 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3662 &syncs_user[num_syncs], 3663 __q->ufence_syncobj, 3664 ++__q->ufence_timeline_value, 3665 (xe_vm_in_lr_mode(vm) ? 3666 SYNC_PARSE_FLAG_LR_MODE : 0) | 3667 (!args->num_binds ? 3668 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3669 if (err) 3670 goto free_syncs; 3671 3672 if (xe_sync_is_ufence(&syncs[num_syncs])) 3673 num_ufence++; 3674 } 3675 3676 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3677 err = -EINVAL; 3678 goto free_syncs; 3679 } 3680 3681 if (!args->num_binds) { 3682 err = -ENODATA; 3683 goto free_syncs; 3684 } 3685 3686 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3687 if (args->num_binds > 1) 3688 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3689 for (i = 0; i < args->num_binds; ++i) { 3690 u64 range = bind_ops[i].range; 3691 u64 addr = bind_ops[i].addr; 3692 u32 op = bind_ops[i].op; 3693 u32 flags = bind_ops[i].flags; 3694 u64 obj_offset = bind_ops[i].obj_offset; 3695 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3696 u16 pat_index = bind_ops[i].pat_index; 3697 3698 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3699 addr, range, op, flags, 3700 prefetch_region, pat_index); 3701 if (IS_ERR(ops[i])) { 3702 err = PTR_ERR(ops[i]); 3703 ops[i] = NULL; 3704 goto unwind_ops; 3705 } 3706 3707 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3708 if (err) 3709 goto unwind_ops; 3710 3711 #ifdef TEST_VM_OPS_ERROR 3712 if (flags & FORCE_OP_ERROR) { 3713 vops.inject_error = true; 3714 vm->xe->vm_inject_error_position = 3715 (vm->xe->vm_inject_error_position + 1) % 3716 FORCE_OP_ERROR_COUNT; 3717 } 3718 #endif 3719 } 3720 3721 /* Nothing to do */ 3722 if (list_empty(&vops.list)) { 3723 err = -ENODATA; 3724 goto unwind_ops; 3725 } 3726 3727 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3728 if (err) 3729 goto unwind_ops; 3730 3731 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3732 if (err) 3733 goto unwind_ops; 3734 3735 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3736 if (IS_ERR(fence)) 3737 err = PTR_ERR(fence); 3738 else 3739 dma_fence_put(fence); 3740 3741 unwind_ops: 3742 if (err && err != -ENODATA) 3743 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3744 xe_vma_ops_fini(&vops); 3745 for (i = args->num_binds - 1; i >= 0; --i) 3746 if (ops[i]) 3747 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3748 free_syncs: 3749 if (err == -ENODATA) 3750 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3751 while (num_syncs--) 3752 xe_sync_entry_cleanup(&syncs[num_syncs]); 3753 3754 kfree(syncs); 3755 put_obj: 3756 for (i = 0; i < args->num_binds; ++i) 3757 xe_bo_put(bos[i]); 3758 3759 kvfree(ops); 3760 free_bos: 3761 kvfree(bos); 3762 release_vm_lock: 3763 up_write(&vm->lock); 3764 put_exec_queue: 3765 if (q) 3766 xe_exec_queue_put(q); 3767 free_bind_ops: 3768 if (args->num_binds > 1) 3769 kvfree(bind_ops); 3770 put_vm: 3771 xe_vm_put(vm); 3772 return err; 3773 } 3774 3775 /** 3776 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3777 * @vm: VM to bind the BO to 3778 * @bo: BO to bind 3779 * @q: exec queue to use for the bind (optional) 3780 * @addr: address at which to bind the BO 3781 * @cache_lvl: PAT cache level to use 3782 * 3783 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3784 * kernel-owned VM. 3785 * 3786 * Returns a dma_fence to track the binding completion if the job to do so was 3787 * successfully submitted, an error pointer otherwise. 3788 */ 3789 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3790 struct xe_exec_queue *q, u64 addr, 3791 enum xe_cache_level cache_lvl) 3792 { 3793 struct xe_vma_ops vops; 3794 struct drm_gpuva_ops *ops = NULL; 3795 struct dma_fence *fence; 3796 int err; 3797 3798 xe_bo_get(bo); 3799 xe_vm_get(vm); 3800 if (q) 3801 xe_exec_queue_get(q); 3802 3803 down_write(&vm->lock); 3804 3805 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3806 3807 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3808 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3809 vm->xe->pat.idx[cache_lvl]); 3810 if (IS_ERR(ops)) { 3811 err = PTR_ERR(ops); 3812 goto release_vm_lock; 3813 } 3814 3815 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3816 if (err) 3817 goto release_vm_lock; 3818 3819 xe_assert(vm->xe, !list_empty(&vops.list)); 3820 3821 err = xe_vma_ops_alloc(&vops, false); 3822 if (err) 3823 goto unwind_ops; 3824 3825 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3826 if (IS_ERR(fence)) 3827 err = PTR_ERR(fence); 3828 3829 unwind_ops: 3830 if (err && err != -ENODATA) 3831 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3832 3833 xe_vma_ops_fini(&vops); 3834 drm_gpuva_ops_free(&vm->gpuvm, ops); 3835 3836 release_vm_lock: 3837 up_write(&vm->lock); 3838 3839 if (q) 3840 xe_exec_queue_put(q); 3841 xe_vm_put(vm); 3842 xe_bo_put(bo); 3843 3844 if (err) 3845 fence = ERR_PTR(err); 3846 3847 return fence; 3848 } 3849 3850 /** 3851 * xe_vm_lock() - Lock the vm's dma_resv object 3852 * @vm: The struct xe_vm whose lock is to be locked 3853 * @intr: Whether to perform any wait interruptible 3854 * 3855 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3856 * contended lock was interrupted. If @intr is false, the function 3857 * always returns 0. 3858 */ 3859 int xe_vm_lock(struct xe_vm *vm, bool intr) 3860 { 3861 int ret; 3862 3863 if (intr) 3864 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3865 else 3866 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3867 3868 return ret; 3869 } 3870 3871 /** 3872 * xe_vm_unlock() - Unlock the vm's dma_resv object 3873 * @vm: The struct xe_vm whose lock is to be released. 3874 * 3875 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3876 */ 3877 void xe_vm_unlock(struct xe_vm *vm) 3878 { 3879 dma_resv_unlock(xe_vm_resv(vm)); 3880 } 3881 3882 /** 3883 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3884 * address range 3885 * @vm: The VM 3886 * @start: start address 3887 * @end: end address 3888 * @tile_mask: mask for which gt's issue tlb invalidation 3889 * 3890 * Issue a range based TLB invalidation for gt's in tilemask 3891 * 3892 * Returns 0 for success, negative error code otherwise. 3893 */ 3894 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3895 u64 end, u8 tile_mask) 3896 { 3897 struct xe_tlb_inval_fence 3898 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3899 struct xe_tile *tile; 3900 u32 fence_id = 0; 3901 u8 id; 3902 int err; 3903 3904 if (!tile_mask) 3905 return 0; 3906 3907 for_each_tile(tile, vm->xe, id) { 3908 if (!(tile_mask & BIT(id))) 3909 continue; 3910 3911 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3912 &fence[fence_id], true); 3913 3914 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3915 &fence[fence_id], start, end, 3916 vm->usm.asid); 3917 if (err) 3918 goto wait; 3919 ++fence_id; 3920 3921 if (!tile->media_gt) 3922 continue; 3923 3924 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3925 &fence[fence_id], true); 3926 3927 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3928 &fence[fence_id], start, end, 3929 vm->usm.asid); 3930 if (err) 3931 goto wait; 3932 ++fence_id; 3933 } 3934 3935 wait: 3936 for (id = 0; id < fence_id; ++id) 3937 xe_tlb_inval_fence_wait(&fence[id]); 3938 3939 return err; 3940 } 3941 3942 /** 3943 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3944 * @vma: VMA to invalidate 3945 * 3946 * Walks a list of page tables leaves which it memset the entries owned by this 3947 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3948 * complete. 3949 * 3950 * Returns 0 for success, negative error code otherwise. 3951 */ 3952 int xe_vm_invalidate_vma(struct xe_vma *vma) 3953 { 3954 struct xe_device *xe = xe_vma_vm(vma)->xe; 3955 struct xe_vm *vm = xe_vma_vm(vma); 3956 struct xe_tile *tile; 3957 u8 tile_mask = 0; 3958 int ret = 0; 3959 u8 id; 3960 3961 xe_assert(xe, !xe_vma_is_null(vma)); 3962 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3963 trace_xe_vma_invalidate(vma); 3964 3965 vm_dbg(&vm->xe->drm, 3966 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3967 xe_vma_start(vma), xe_vma_size(vma)); 3968 3969 /* 3970 * Check that we don't race with page-table updates, tile_invalidated 3971 * update is safe 3972 */ 3973 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3974 if (xe_vma_is_userptr(vma)) { 3975 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3976 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3977 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3978 3979 WARN_ON_ONCE(!mmu_interval_check_retry 3980 (&to_userptr_vma(vma)->userptr.notifier, 3981 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3982 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3983 DMA_RESV_USAGE_BOOKKEEP)); 3984 3985 } else { 3986 xe_bo_assert_held(xe_vma_bo(vma)); 3987 } 3988 } 3989 3990 for_each_tile(tile, xe, id) 3991 if (xe_pt_zap_ptes(tile, vma)) 3992 tile_mask |= BIT(id); 3993 3994 xe_device_wmb(xe); 3995 3996 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3997 xe_vma_end(vma), tile_mask); 3998 3999 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4000 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4001 4002 return ret; 4003 } 4004 4005 int xe_vm_validate_protected(struct xe_vm *vm) 4006 { 4007 struct drm_gpuva *gpuva; 4008 int err = 0; 4009 4010 if (!vm) 4011 return -ENODEV; 4012 4013 mutex_lock(&vm->snap_mutex); 4014 4015 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4016 struct xe_vma *vma = gpuva_to_vma(gpuva); 4017 struct xe_bo *bo = vma->gpuva.gem.obj ? 4018 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4019 4020 if (!bo) 4021 continue; 4022 4023 if (xe_bo_is_protected(bo)) { 4024 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4025 if (err) 4026 break; 4027 } 4028 } 4029 4030 mutex_unlock(&vm->snap_mutex); 4031 return err; 4032 } 4033 4034 struct xe_vm_snapshot { 4035 unsigned long num_snaps; 4036 struct { 4037 u64 ofs, bo_ofs; 4038 unsigned long len; 4039 struct xe_bo *bo; 4040 void *data; 4041 struct mm_struct *mm; 4042 } snap[]; 4043 }; 4044 4045 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4046 { 4047 unsigned long num_snaps = 0, i; 4048 struct xe_vm_snapshot *snap = NULL; 4049 struct drm_gpuva *gpuva; 4050 4051 if (!vm) 4052 return NULL; 4053 4054 mutex_lock(&vm->snap_mutex); 4055 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4056 if (gpuva->flags & XE_VMA_DUMPABLE) 4057 num_snaps++; 4058 } 4059 4060 if (num_snaps) 4061 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4062 if (!snap) { 4063 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4064 goto out_unlock; 4065 } 4066 4067 snap->num_snaps = num_snaps; 4068 i = 0; 4069 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4070 struct xe_vma *vma = gpuva_to_vma(gpuva); 4071 struct xe_bo *bo = vma->gpuva.gem.obj ? 4072 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4073 4074 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4075 continue; 4076 4077 snap->snap[i].ofs = xe_vma_start(vma); 4078 snap->snap[i].len = xe_vma_size(vma); 4079 if (bo) { 4080 snap->snap[i].bo = xe_bo_get(bo); 4081 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4082 } else if (xe_vma_is_userptr(vma)) { 4083 struct mm_struct *mm = 4084 to_userptr_vma(vma)->userptr.notifier.mm; 4085 4086 if (mmget_not_zero(mm)) 4087 snap->snap[i].mm = mm; 4088 else 4089 snap->snap[i].data = ERR_PTR(-EFAULT); 4090 4091 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4092 } else { 4093 snap->snap[i].data = ERR_PTR(-ENOENT); 4094 } 4095 i++; 4096 } 4097 4098 out_unlock: 4099 mutex_unlock(&vm->snap_mutex); 4100 return snap; 4101 } 4102 4103 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4104 { 4105 if (IS_ERR_OR_NULL(snap)) 4106 return; 4107 4108 for (int i = 0; i < snap->num_snaps; i++) { 4109 struct xe_bo *bo = snap->snap[i].bo; 4110 int err; 4111 4112 if (IS_ERR(snap->snap[i].data)) 4113 continue; 4114 4115 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4116 if (!snap->snap[i].data) { 4117 snap->snap[i].data = ERR_PTR(-ENOMEM); 4118 goto cleanup_bo; 4119 } 4120 4121 if (bo) { 4122 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4123 snap->snap[i].data, snap->snap[i].len); 4124 } else { 4125 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4126 4127 kthread_use_mm(snap->snap[i].mm); 4128 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4129 err = 0; 4130 else 4131 err = -EFAULT; 4132 kthread_unuse_mm(snap->snap[i].mm); 4133 4134 mmput(snap->snap[i].mm); 4135 snap->snap[i].mm = NULL; 4136 } 4137 4138 if (err) { 4139 kvfree(snap->snap[i].data); 4140 snap->snap[i].data = ERR_PTR(err); 4141 } 4142 4143 cleanup_bo: 4144 xe_bo_put(bo); 4145 snap->snap[i].bo = NULL; 4146 } 4147 } 4148 4149 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4150 { 4151 unsigned long i, j; 4152 4153 if (IS_ERR_OR_NULL(snap)) { 4154 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4155 return; 4156 } 4157 4158 for (i = 0; i < snap->num_snaps; i++) { 4159 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4160 4161 if (IS_ERR(snap->snap[i].data)) { 4162 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4163 PTR_ERR(snap->snap[i].data)); 4164 continue; 4165 } 4166 4167 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4168 4169 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4170 u32 *val = snap->snap[i].data + j; 4171 char dumped[ASCII85_BUFSZ]; 4172 4173 drm_puts(p, ascii85_encode(*val, dumped)); 4174 } 4175 4176 drm_puts(p, "\n"); 4177 4178 if (drm_coredump_printer_is_full(p)) 4179 return; 4180 } 4181 } 4182 4183 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4184 { 4185 unsigned long i; 4186 4187 if (IS_ERR_OR_NULL(snap)) 4188 return; 4189 4190 for (i = 0; i < snap->num_snaps; i++) { 4191 if (!IS_ERR(snap->snap[i].data)) 4192 kvfree(snap->snap[i].data); 4193 xe_bo_put(snap->snap[i].bo); 4194 if (snap->snap[i].mm) 4195 mmput(snap->snap[i].mm); 4196 } 4197 kvfree(snap); 4198 } 4199 4200 /** 4201 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4202 * @xe: Pointer to the Xe device structure 4203 * @vma: Pointer to the virtual memory area (VMA) structure 4204 * @is_atomic: In pagefault path and atomic operation 4205 * 4206 * This function determines whether the given VMA needs to be migrated to 4207 * VRAM in order to do atomic GPU operation. 4208 * 4209 * Return: 4210 * 1 - Migration to VRAM is required 4211 * 0 - Migration is not required 4212 * -EACCES - Invalid access for atomic memory attr 4213 * 4214 */ 4215 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4216 { 4217 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4218 vma->attr.atomic_access; 4219 4220 if (!IS_DGFX(xe) || !is_atomic) 4221 return false; 4222 4223 /* 4224 * NOTE: The checks implemented here are platform-specific. For 4225 * instance, on a device supporting CXL atomics, these would ideally 4226 * work universally without additional handling. 4227 */ 4228 switch (atomic_access) { 4229 case DRM_XE_ATOMIC_DEVICE: 4230 return !xe->info.has_device_atomics_on_smem; 4231 4232 case DRM_XE_ATOMIC_CPU: 4233 return -EACCES; 4234 4235 case DRM_XE_ATOMIC_UNDEFINED: 4236 case DRM_XE_ATOMIC_GLOBAL: 4237 default: 4238 return 1; 4239 } 4240 } 4241 4242 static int xe_vm_alloc_vma(struct xe_vm *vm, 4243 struct drm_gpuvm_map_req *map_req, 4244 bool is_madvise) 4245 { 4246 struct xe_vma_ops vops; 4247 struct drm_gpuva_ops *ops = NULL; 4248 struct drm_gpuva_op *__op; 4249 unsigned int vma_flags = 0; 4250 bool remap_op = false; 4251 struct xe_vma_mem_attr tmp_attr; 4252 u16 default_pat; 4253 int err; 4254 4255 lockdep_assert_held_write(&vm->lock); 4256 4257 if (is_madvise) 4258 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4259 else 4260 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4261 4262 if (IS_ERR(ops)) 4263 return PTR_ERR(ops); 4264 4265 if (list_empty(&ops->list)) { 4266 err = 0; 4267 goto free_ops; 4268 } 4269 4270 drm_gpuva_for_each_op(__op, ops) { 4271 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4272 struct xe_vma *vma = NULL; 4273 4274 if (!is_madvise) { 4275 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4276 vma = gpuva_to_vma(op->base.unmap.va); 4277 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4278 default_pat = vma->attr.default_pat_index; 4279 vma_flags = vma->gpuva.flags; 4280 } 4281 4282 if (__op->op == DRM_GPUVA_OP_REMAP) { 4283 vma = gpuva_to_vma(op->base.remap.unmap->va); 4284 default_pat = vma->attr.default_pat_index; 4285 vma_flags = vma->gpuva.flags; 4286 } 4287 4288 if (__op->op == DRM_GPUVA_OP_MAP) { 4289 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4290 op->map.pat_index = default_pat; 4291 } 4292 } else { 4293 if (__op->op == DRM_GPUVA_OP_REMAP) { 4294 vma = gpuva_to_vma(op->base.remap.unmap->va); 4295 xe_assert(vm->xe, !remap_op); 4296 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4297 remap_op = true; 4298 vma_flags = vma->gpuva.flags; 4299 } 4300 4301 if (__op->op == DRM_GPUVA_OP_MAP) { 4302 xe_assert(vm->xe, remap_op); 4303 remap_op = false; 4304 /* 4305 * In case of madvise ops DRM_GPUVA_OP_MAP is 4306 * always after DRM_GPUVA_OP_REMAP, so ensure 4307 * to propagate the flags from the vma we're 4308 * unmapping. 4309 */ 4310 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4311 } 4312 } 4313 print_op(vm->xe, __op); 4314 } 4315 4316 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4317 4318 if (is_madvise) 4319 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4320 4321 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4322 if (err) 4323 goto unwind_ops; 4324 4325 xe_vm_lock(vm, false); 4326 4327 drm_gpuva_for_each_op(__op, ops) { 4328 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4329 struct xe_vma *vma; 4330 4331 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4332 vma = gpuva_to_vma(op->base.unmap.va); 4333 /* There should be no unmap for madvise */ 4334 if (is_madvise) 4335 XE_WARN_ON("UNEXPECTED UNMAP"); 4336 4337 xe_vma_destroy(vma, NULL); 4338 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4339 vma = gpuva_to_vma(op->base.remap.unmap->va); 4340 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4341 * VMA, so they can be assigned to newly MAP created vma. 4342 */ 4343 if (is_madvise) 4344 tmp_attr = vma->attr; 4345 4346 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4347 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4348 vma = op->map.vma; 4349 /* In case of madvise call, MAP will always be followed by REMAP. 4350 * Therefore temp_attr will always have sane values, making it safe to 4351 * copy them to new vma. 4352 */ 4353 if (is_madvise) 4354 vma->attr = tmp_attr; 4355 } 4356 } 4357 4358 xe_vm_unlock(vm); 4359 drm_gpuva_ops_free(&vm->gpuvm, ops); 4360 return 0; 4361 4362 unwind_ops: 4363 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4364 free_ops: 4365 drm_gpuva_ops_free(&vm->gpuvm, ops); 4366 return err; 4367 } 4368 4369 /** 4370 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4371 * @vm: Pointer to the xe_vm structure 4372 * @start: Starting input address 4373 * @range: Size of the input range 4374 * 4375 * This function splits existing vma to create new vma for user provided input range 4376 * 4377 * Return: 0 if success 4378 */ 4379 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4380 { 4381 struct drm_gpuvm_map_req map_req = { 4382 .map.va.addr = start, 4383 .map.va.range = range, 4384 }; 4385 4386 lockdep_assert_held_write(&vm->lock); 4387 4388 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4389 4390 return xe_vm_alloc_vma(vm, &map_req, true); 4391 } 4392 4393 /** 4394 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4395 * @vm: Pointer to the xe_vm structure 4396 * @start: Starting input address 4397 * @range: Size of the input range 4398 * 4399 * This function splits/merges existing vma to create new vma for user provided input range 4400 * 4401 * Return: 0 if success 4402 */ 4403 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4404 { 4405 struct drm_gpuvm_map_req map_req = { 4406 .map.va.addr = start, 4407 .map.va.range = range, 4408 }; 4409 4410 lockdep_assert_held_write(&vm->lock); 4411 4412 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4413 start, range); 4414 4415 return xe_vm_alloc_vma(vm, &map_req, false); 4416 } 4417