1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_migrate.h" 31 #include "xe_pat.h" 32 #include "xe_pm.h" 33 #include "xe_preempt_fence.h" 34 #include "xe_pt.h" 35 #include "xe_pxp.h" 36 #include "xe_sriov_vf.h" 37 #include "xe_svm.h" 38 #include "xe_sync.h" 39 #include "xe_tile.h" 40 #include "xe_tlb_inval.h" 41 #include "xe_trace_bo.h" 42 #include "xe_wa.h" 43 44 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 45 { 46 return vm->gpuvm.r_obj; 47 } 48 49 /** 50 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 51 * @vm: The vm whose resv is to be locked. 52 * @exec: The drm_exec transaction. 53 * 54 * Helper to lock the vm's resv as part of a drm_exec transaction. 55 * 56 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 57 */ 58 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 59 { 60 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 61 } 62 63 static bool preempt_fences_waiting(struct xe_vm *vm) 64 { 65 struct xe_exec_queue *q; 66 67 lockdep_assert_held(&vm->lock); 68 xe_vm_assert_held(vm); 69 70 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 71 if (!q->lr.pfence || 72 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 73 &q->lr.pfence->flags)) { 74 return true; 75 } 76 } 77 78 return false; 79 } 80 81 static void free_preempt_fences(struct list_head *list) 82 { 83 struct list_head *link, *next; 84 85 list_for_each_safe(link, next, list) 86 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 87 } 88 89 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 90 unsigned int *count) 91 { 92 lockdep_assert_held(&vm->lock); 93 xe_vm_assert_held(vm); 94 95 if (*count >= vm->preempt.num_exec_queues) 96 return 0; 97 98 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 99 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 100 101 if (IS_ERR(pfence)) 102 return PTR_ERR(pfence); 103 104 list_move_tail(xe_preempt_fence_link(pfence), list); 105 } 106 107 return 0; 108 } 109 110 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 111 { 112 struct xe_exec_queue *q; 113 bool vf_migration = IS_SRIOV_VF(vm->xe) && 114 xe_sriov_vf_migration_supported(vm->xe); 115 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 116 117 xe_vm_assert_held(vm); 118 119 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 120 if (q->lr.pfence) { 121 long timeout; 122 123 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 124 wait_time); 125 if (!timeout) { 126 xe_assert(vm->xe, vf_migration); 127 return -EAGAIN; 128 } 129 130 /* Only -ETIME on fence indicates VM needs to be killed */ 131 if (timeout < 0 || q->lr.pfence->error == -ETIME) 132 return -ETIME; 133 134 dma_fence_put(q->lr.pfence); 135 q->lr.pfence = NULL; 136 } 137 } 138 139 return 0; 140 } 141 142 static bool xe_vm_is_idle(struct xe_vm *vm) 143 { 144 struct xe_exec_queue *q; 145 146 xe_vm_assert_held(vm); 147 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 148 if (!xe_exec_queue_is_idle(q)) 149 return false; 150 } 151 152 return true; 153 } 154 155 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 156 { 157 struct list_head *link; 158 struct xe_exec_queue *q; 159 160 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 161 struct dma_fence *fence; 162 163 link = list->next; 164 xe_assert(vm->xe, link != list); 165 166 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 167 q, q->lr.context, 168 ++q->lr.seqno); 169 dma_fence_put(q->lr.pfence); 170 q->lr.pfence = fence; 171 } 172 } 173 174 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 175 { 176 struct xe_exec_queue *q; 177 int err; 178 179 xe_bo_assert_held(bo); 180 181 if (!vm->preempt.num_exec_queues) 182 return 0; 183 184 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 185 if (err) 186 return err; 187 188 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 189 if (q->lr.pfence) { 190 dma_resv_add_fence(bo->ttm.base.resv, 191 q->lr.pfence, 192 DMA_RESV_USAGE_BOOKKEEP); 193 } 194 195 return 0; 196 } 197 198 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 199 struct drm_exec *exec) 200 { 201 struct xe_exec_queue *q; 202 203 lockdep_assert_held(&vm->lock); 204 xe_vm_assert_held(vm); 205 206 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 207 q->ops->resume(q); 208 209 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 210 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 211 } 212 } 213 214 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 215 { 216 struct drm_gpuvm_exec vm_exec = { 217 .vm = &vm->gpuvm, 218 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 219 .num_fences = 1, 220 }; 221 struct drm_exec *exec = &vm_exec.exec; 222 struct xe_validation_ctx ctx; 223 struct dma_fence *pfence; 224 int err; 225 bool wait; 226 227 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 228 229 down_write(&vm->lock); 230 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 231 if (err) 232 goto out_up_write; 233 234 pfence = xe_preempt_fence_create(q, q->lr.context, 235 ++q->lr.seqno); 236 if (IS_ERR(pfence)) { 237 err = PTR_ERR(pfence); 238 goto out_fini; 239 } 240 241 list_add(&q->lr.link, &vm->preempt.exec_queues); 242 ++vm->preempt.num_exec_queues; 243 q->lr.pfence = pfence; 244 245 xe_svm_notifier_lock(vm); 246 247 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 248 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 249 250 /* 251 * Check to see if a preemption on VM is in flight or userptr 252 * invalidation, if so trigger this preempt fence to sync state with 253 * other preempt fences on the VM. 254 */ 255 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 256 if (wait) 257 dma_fence_enable_sw_signaling(pfence); 258 259 xe_svm_notifier_unlock(vm); 260 261 out_fini: 262 xe_validation_ctx_fini(&ctx); 263 out_up_write: 264 up_write(&vm->lock); 265 266 return err; 267 } 268 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 269 270 /** 271 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 272 * @vm: The VM. 273 * @q: The exec_queue 274 * 275 * Note that this function might be called multiple times on the same queue. 276 */ 277 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 278 { 279 if (!xe_vm_in_preempt_fence_mode(vm)) 280 return; 281 282 down_write(&vm->lock); 283 if (!list_empty(&q->lr.link)) { 284 list_del_init(&q->lr.link); 285 --vm->preempt.num_exec_queues; 286 } 287 if (q->lr.pfence) { 288 dma_fence_enable_sw_signaling(q->lr.pfence); 289 dma_fence_put(q->lr.pfence); 290 q->lr.pfence = NULL; 291 } 292 up_write(&vm->lock); 293 } 294 295 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 296 297 /** 298 * xe_vm_kill() - VM Kill 299 * @vm: The VM. 300 * @unlocked: Flag indicates the VM's dma-resv is not held 301 * 302 * Kill the VM by setting banned flag indicated VM is no longer available for 303 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 304 */ 305 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 306 { 307 struct xe_exec_queue *q; 308 309 lockdep_assert_held(&vm->lock); 310 311 if (unlocked) 312 xe_vm_lock(vm, false); 313 314 vm->flags |= XE_VM_FLAG_BANNED; 315 trace_xe_vm_kill(vm); 316 317 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 318 q->ops->kill(q); 319 320 if (unlocked) 321 xe_vm_unlock(vm); 322 323 /* TODO: Inform user the VM is banned */ 324 } 325 326 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 327 { 328 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 329 struct drm_gpuva *gpuva; 330 int ret; 331 332 lockdep_assert_held(&vm->lock); 333 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 334 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 335 &vm->rebind_list); 336 337 if (!try_wait_for_completion(&vm->xe->pm_block)) 338 return -EAGAIN; 339 340 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 341 if (ret) 342 return ret; 343 344 vm_bo->evicted = false; 345 return 0; 346 } 347 348 /** 349 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 350 * @vm: The vm for which we are rebinding. 351 * @exec: The struct drm_exec with the locked GEM objects. 352 * @num_fences: The number of fences to reserve for the operation, not 353 * including rebinds and validations. 354 * 355 * Validates all evicted gem objects and rebinds their vmas. Note that 356 * rebindings may cause evictions and hence the validation-rebind 357 * sequence is rerun until there are no more objects to validate. 358 * 359 * Return: 0 on success, negative error code on error. In particular, 360 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 361 * the drm_exec transaction needs to be restarted. 362 */ 363 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 364 unsigned int num_fences) 365 { 366 struct drm_gem_object *obj; 367 unsigned long index; 368 int ret; 369 370 do { 371 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 372 if (ret) 373 return ret; 374 375 ret = xe_vm_rebind(vm, false); 376 if (ret) 377 return ret; 378 } while (!list_empty(&vm->gpuvm.evict.list)); 379 380 drm_exec_for_each_locked_object(exec, index, obj) { 381 ret = dma_resv_reserve_fences(obj->resv, num_fences); 382 if (ret) 383 return ret; 384 } 385 386 return 0; 387 } 388 389 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 390 bool *done) 391 { 392 int err; 393 394 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 395 if (err) 396 return err; 397 398 if (xe_vm_is_idle(vm)) { 399 vm->preempt.rebind_deactivated = true; 400 *done = true; 401 return 0; 402 } 403 404 if (!preempt_fences_waiting(vm)) { 405 *done = true; 406 return 0; 407 } 408 409 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 410 if (err) 411 return err; 412 413 err = wait_for_existing_preempt_fences(vm); 414 if (err) 415 return err; 416 417 /* 418 * Add validation and rebinding to the locking loop since both can 419 * cause evictions which may require blocing dma_resv locks. 420 * The fence reservation here is intended for the new preempt fences 421 * we attach at the end of the rebind work. 422 */ 423 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 424 } 425 426 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 427 { 428 struct xe_device *xe = vm->xe; 429 bool ret = false; 430 431 mutex_lock(&xe->rebind_resume_lock); 432 if (!try_wait_for_completion(&vm->xe->pm_block)) { 433 ret = true; 434 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 435 } 436 mutex_unlock(&xe->rebind_resume_lock); 437 438 return ret; 439 } 440 441 /** 442 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 443 * @vm: The vm whose preempt worker to resume. 444 * 445 * Resume a preempt worker that was previously suspended by 446 * vm_suspend_rebind_worker(). 447 */ 448 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 449 { 450 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 451 } 452 453 static void preempt_rebind_work_func(struct work_struct *w) 454 { 455 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 456 struct xe_validation_ctx ctx; 457 struct drm_exec exec; 458 unsigned int fence_count = 0; 459 LIST_HEAD(preempt_fences); 460 int err = 0; 461 long wait; 462 int __maybe_unused tries = 0; 463 464 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 465 trace_xe_vm_rebind_worker_enter(vm); 466 467 down_write(&vm->lock); 468 469 if (xe_vm_is_closed_or_banned(vm)) { 470 up_write(&vm->lock); 471 trace_xe_vm_rebind_worker_exit(vm); 472 return; 473 } 474 475 retry: 476 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 477 up_write(&vm->lock); 478 /* We don't actually block but don't make progress. */ 479 xe_pm_might_block_on_suspend(); 480 return; 481 } 482 483 if (xe_vm_userptr_check_repin(vm)) { 484 err = xe_vm_userptr_pin(vm); 485 if (err) 486 goto out_unlock_outer; 487 } 488 489 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 490 (struct xe_val_flags) {.interruptible = true}); 491 if (err) 492 goto out_unlock_outer; 493 494 drm_exec_until_all_locked(&exec) { 495 bool done = false; 496 497 err = xe_preempt_work_begin(&exec, vm, &done); 498 drm_exec_retry_on_contention(&exec); 499 xe_validation_retry_on_oom(&ctx, &err); 500 if (err || done) { 501 xe_validation_ctx_fini(&ctx); 502 goto out_unlock_outer; 503 } 504 } 505 506 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 507 if (err) 508 goto out_unlock; 509 510 xe_vm_set_validation_exec(vm, &exec); 511 err = xe_vm_rebind(vm, true); 512 xe_vm_set_validation_exec(vm, NULL); 513 if (err) 514 goto out_unlock; 515 516 /* Wait on rebinds and munmap style VM unbinds */ 517 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 518 DMA_RESV_USAGE_KERNEL, 519 false, MAX_SCHEDULE_TIMEOUT); 520 if (wait <= 0) { 521 err = -ETIME; 522 goto out_unlock; 523 } 524 525 #define retry_required(__tries, __vm) \ 526 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 527 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 528 __xe_vm_userptr_needs_repin(__vm)) 529 530 xe_svm_notifier_lock(vm); 531 if (retry_required(tries, vm)) { 532 xe_svm_notifier_unlock(vm); 533 err = -EAGAIN; 534 goto out_unlock; 535 } 536 537 #undef retry_required 538 539 spin_lock(&vm->xe->ttm.lru_lock); 540 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 541 spin_unlock(&vm->xe->ttm.lru_lock); 542 543 /* Point of no return. */ 544 arm_preempt_fences(vm, &preempt_fences); 545 resume_and_reinstall_preempt_fences(vm, &exec); 546 xe_svm_notifier_unlock(vm); 547 548 out_unlock: 549 xe_validation_ctx_fini(&ctx); 550 out_unlock_outer: 551 if (err == -EAGAIN) { 552 trace_xe_vm_rebind_worker_retry(vm); 553 554 /* 555 * We can't block in workers on a VF which supports migration 556 * given this can block the VF post-migration workers from 557 * getting scheduled. 558 */ 559 if (IS_SRIOV_VF(vm->xe) && 560 xe_sriov_vf_migration_supported(vm->xe)) { 561 up_write(&vm->lock); 562 xe_vm_queue_rebind_worker(vm); 563 return; 564 } 565 566 goto retry; 567 } 568 569 if (err) { 570 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 571 xe_vm_kill(vm, true); 572 } 573 up_write(&vm->lock); 574 575 free_preempt_fences(&preempt_fences); 576 577 trace_xe_vm_rebind_worker_exit(vm); 578 } 579 580 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 581 { 582 int i; 583 584 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 585 if (!vops->pt_update_ops[i].num_ops) 586 continue; 587 588 vops->pt_update_ops[i].ops = 589 kmalloc_objs(*vops->pt_update_ops[i].ops, 590 vops->pt_update_ops[i].num_ops, 591 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 592 if (!vops->pt_update_ops[i].ops) 593 return array_of_binds ? -ENOBUFS : -ENOMEM; 594 } 595 596 return 0; 597 } 598 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 599 600 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 601 { 602 struct xe_vma *vma; 603 604 vma = gpuva_to_vma(op->base.prefetch.va); 605 606 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 607 xa_destroy(&op->prefetch_range.range); 608 } 609 610 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 611 { 612 struct xe_vma_op *op; 613 614 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 615 return; 616 617 list_for_each_entry(op, &vops->list, link) 618 xe_vma_svm_prefetch_op_fini(op); 619 } 620 621 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 622 { 623 int i; 624 625 xe_vma_svm_prefetch_ops_fini(vops); 626 627 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 628 kfree(vops->pt_update_ops[i].ops); 629 } 630 631 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 632 { 633 int i; 634 635 if (!inc_val) 636 return; 637 638 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 639 if (BIT(i) & tile_mask) 640 vops->pt_update_ops[i].num_ops += inc_val; 641 } 642 643 #define XE_VMA_CREATE_MASK ( \ 644 XE_VMA_READ_ONLY | \ 645 XE_VMA_DUMPABLE | \ 646 XE_VMA_SYSTEM_ALLOCATOR | \ 647 DRM_GPUVA_SPARSE | \ 648 XE_VMA_MADV_AUTORESET) 649 650 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 651 u8 tile_mask) 652 { 653 INIT_LIST_HEAD(&op->link); 654 op->tile_mask = tile_mask; 655 op->base.op = DRM_GPUVA_OP_MAP; 656 op->base.map.va.addr = vma->gpuva.va.addr; 657 op->base.map.va.range = vma->gpuva.va.range; 658 op->base.map.gem.obj = vma->gpuva.gem.obj; 659 op->base.map.gem.offset = vma->gpuva.gem.offset; 660 op->map.vma = vma; 661 op->map.immediate = true; 662 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 663 } 664 665 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 666 u8 tile_mask) 667 { 668 struct xe_vma_op *op; 669 670 op = kzalloc_obj(*op); 671 if (!op) 672 return -ENOMEM; 673 674 xe_vm_populate_rebind(op, vma, tile_mask); 675 list_add_tail(&op->link, &vops->list); 676 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 677 678 return 0; 679 } 680 681 static struct dma_fence *ops_execute(struct xe_vm *vm, 682 struct xe_vma_ops *vops); 683 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 684 struct xe_exec_queue *q, 685 struct xe_sync_entry *syncs, u32 num_syncs); 686 687 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 688 { 689 struct dma_fence *fence; 690 struct xe_vma *vma, *next; 691 struct xe_vma_ops vops; 692 struct xe_vma_op *op, *next_op; 693 int err, i; 694 695 lockdep_assert_held(&vm->lock); 696 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 697 list_empty(&vm->rebind_list)) 698 return 0; 699 700 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 701 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 702 vops.pt_update_ops[i].wait_vm_bookkeep = true; 703 704 xe_vm_assert_held(vm); 705 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 706 xe_assert(vm->xe, vma->tile_present); 707 708 if (rebind_worker) 709 trace_xe_vma_rebind_worker(vma); 710 else 711 trace_xe_vma_rebind_exec(vma); 712 713 err = xe_vm_ops_add_rebind(&vops, vma, 714 vma->tile_present); 715 if (err) 716 goto free_ops; 717 } 718 719 err = xe_vma_ops_alloc(&vops, false); 720 if (err) 721 goto free_ops; 722 723 fence = ops_execute(vm, &vops); 724 if (IS_ERR(fence)) { 725 err = PTR_ERR(fence); 726 } else { 727 dma_fence_put(fence); 728 list_for_each_entry_safe(vma, next, &vm->rebind_list, 729 combined_links.rebind) 730 list_del_init(&vma->combined_links.rebind); 731 } 732 free_ops: 733 list_for_each_entry_safe(op, next_op, &vops.list, link) { 734 list_del(&op->link); 735 kfree(op); 736 } 737 xe_vma_ops_fini(&vops); 738 739 return err; 740 } 741 742 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 743 { 744 struct dma_fence *fence = NULL; 745 struct xe_vma_ops vops; 746 struct xe_vma_op *op, *next_op; 747 struct xe_tile *tile; 748 u8 id; 749 int err; 750 751 lockdep_assert_held(&vm->lock); 752 xe_vm_assert_held(vm); 753 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 754 755 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 756 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 757 for_each_tile(tile, vm->xe, id) { 758 vops.pt_update_ops[id].wait_vm_bookkeep = true; 759 vops.pt_update_ops[tile->id].q = 760 xe_migrate_exec_queue(tile->migrate); 761 } 762 763 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 764 if (err) 765 return ERR_PTR(err); 766 767 err = xe_vma_ops_alloc(&vops, false); 768 if (err) { 769 fence = ERR_PTR(err); 770 goto free_ops; 771 } 772 773 fence = ops_execute(vm, &vops); 774 775 free_ops: 776 list_for_each_entry_safe(op, next_op, &vops.list, link) { 777 list_del(&op->link); 778 kfree(op); 779 } 780 xe_vma_ops_fini(&vops); 781 782 return fence; 783 } 784 785 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 786 struct xe_vma *vma, 787 struct xe_svm_range *range, 788 u8 tile_mask) 789 { 790 INIT_LIST_HEAD(&op->link); 791 op->tile_mask = tile_mask; 792 op->base.op = DRM_GPUVA_OP_DRIVER; 793 op->subop = XE_VMA_SUBOP_MAP_RANGE; 794 op->map_range.vma = vma; 795 op->map_range.range = range; 796 } 797 798 static int 799 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 800 struct xe_vma *vma, 801 struct xe_svm_range *range, 802 u8 tile_mask) 803 { 804 struct xe_vma_op *op; 805 806 op = kzalloc_obj(*op); 807 if (!op) 808 return -ENOMEM; 809 810 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 811 list_add_tail(&op->link, &vops->list); 812 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 813 814 return 0; 815 } 816 817 /** 818 * xe_vm_range_rebind() - VM range (re)bind 819 * @vm: The VM which the range belongs to. 820 * @vma: The VMA which the range belongs to. 821 * @range: SVM range to rebind. 822 * @tile_mask: Tile mask to bind the range to. 823 * 824 * (re)bind SVM range setting up GPU page tables for the range. 825 * 826 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 827 * failure 828 */ 829 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 830 struct xe_vma *vma, 831 struct xe_svm_range *range, 832 u8 tile_mask) 833 { 834 struct dma_fence *fence = NULL; 835 struct xe_vma_ops vops; 836 struct xe_vma_op *op, *next_op; 837 struct xe_tile *tile; 838 u8 id; 839 int err; 840 841 lockdep_assert_held(&vm->lock); 842 xe_vm_assert_held(vm); 843 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 844 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 845 846 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 847 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 848 for_each_tile(tile, vm->xe, id) { 849 vops.pt_update_ops[id].wait_vm_bookkeep = true; 850 vops.pt_update_ops[tile->id].q = 851 xe_migrate_exec_queue(tile->migrate); 852 } 853 854 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 855 if (err) 856 return ERR_PTR(err); 857 858 err = xe_vma_ops_alloc(&vops, false); 859 if (err) { 860 fence = ERR_PTR(err); 861 goto free_ops; 862 } 863 864 fence = ops_execute(vm, &vops); 865 866 free_ops: 867 list_for_each_entry_safe(op, next_op, &vops.list, link) { 868 list_del(&op->link); 869 kfree(op); 870 } 871 xe_vma_ops_fini(&vops); 872 873 return fence; 874 } 875 876 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 877 struct xe_svm_range *range) 878 { 879 INIT_LIST_HEAD(&op->link); 880 op->tile_mask = range->tile_present; 881 op->base.op = DRM_GPUVA_OP_DRIVER; 882 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 883 op->unmap_range.range = range; 884 } 885 886 static int 887 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 888 struct xe_svm_range *range) 889 { 890 struct xe_vma_op *op; 891 892 op = kzalloc_obj(*op); 893 if (!op) 894 return -ENOMEM; 895 896 xe_vm_populate_range_unbind(op, range); 897 list_add_tail(&op->link, &vops->list); 898 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 899 900 return 0; 901 } 902 903 /** 904 * xe_vm_range_unbind() - VM range unbind 905 * @vm: The VM which the range belongs to. 906 * @range: SVM range to rebind. 907 * 908 * Unbind SVM range removing the GPU page tables for the range. 909 * 910 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 911 * failure 912 */ 913 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 914 struct xe_svm_range *range) 915 { 916 struct dma_fence *fence = NULL; 917 struct xe_vma_ops vops; 918 struct xe_vma_op *op, *next_op; 919 struct xe_tile *tile; 920 u8 id; 921 int err; 922 923 lockdep_assert_held(&vm->lock); 924 xe_vm_assert_held(vm); 925 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 926 927 if (!range->tile_present) 928 return dma_fence_get_stub(); 929 930 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 931 for_each_tile(tile, vm->xe, id) { 932 vops.pt_update_ops[id].wait_vm_bookkeep = true; 933 vops.pt_update_ops[tile->id].q = 934 xe_migrate_exec_queue(tile->migrate); 935 } 936 937 err = xe_vm_ops_add_range_unbind(&vops, range); 938 if (err) 939 return ERR_PTR(err); 940 941 err = xe_vma_ops_alloc(&vops, false); 942 if (err) { 943 fence = ERR_PTR(err); 944 goto free_ops; 945 } 946 947 fence = ops_execute(vm, &vops); 948 949 free_ops: 950 list_for_each_entry_safe(op, next_op, &vops.list, link) { 951 list_del(&op->link); 952 kfree(op); 953 } 954 xe_vma_ops_fini(&vops); 955 956 return fence; 957 } 958 959 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr) 960 { 961 drm_pagemap_put(attr->preferred_loc.dpagemap); 962 } 963 964 static void xe_vma_free(struct xe_vma *vma) 965 { 966 xe_vma_mem_attr_fini(&vma->attr); 967 968 if (xe_vma_is_userptr(vma)) 969 kfree(to_userptr_vma(vma)); 970 else 971 kfree(vma); 972 } 973 974 /** 975 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure. 976 * @to: Destination. 977 * @from: Source. 978 * 979 * Copies an xe_vma_mem_attr structure taking care to get reference 980 * counting of individual members right. 981 */ 982 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from) 983 { 984 xe_vma_mem_attr_fini(to); 985 *to = *from; 986 if (to->preferred_loc.dpagemap) 987 drm_pagemap_get(to->preferred_loc.dpagemap); 988 } 989 990 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 991 struct xe_bo *bo, 992 u64 bo_offset_or_userptr, 993 u64 start, u64 end, 994 struct xe_vma_mem_attr *attr, 995 unsigned int flags) 996 { 997 struct xe_vma *vma; 998 struct xe_tile *tile; 999 u8 id; 1000 bool is_null = (flags & DRM_GPUVA_SPARSE); 1001 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 1002 1003 xe_assert(vm->xe, start < end); 1004 xe_assert(vm->xe, end < vm->size); 1005 1006 /* 1007 * Allocate and ensure that the xe_vma_is_userptr() return 1008 * matches what was allocated. 1009 */ 1010 if (!bo && !is_null && !is_cpu_addr_mirror) { 1011 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma); 1012 1013 if (!uvma) 1014 return ERR_PTR(-ENOMEM); 1015 1016 vma = &uvma->vma; 1017 } else { 1018 vma = kzalloc_obj(*vma); 1019 if (!vma) 1020 return ERR_PTR(-ENOMEM); 1021 1022 if (bo) 1023 vma->gpuva.gem.obj = &bo->ttm.base; 1024 } 1025 1026 INIT_LIST_HEAD(&vma->combined_links.rebind); 1027 1028 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1029 vma->gpuva.vm = &vm->gpuvm; 1030 vma->gpuva.va.addr = start; 1031 vma->gpuva.va.range = end - start + 1; 1032 vma->gpuva.flags = flags; 1033 1034 for_each_tile(tile, vm->xe, id) 1035 vma->tile_mask |= 0x1 << id; 1036 1037 if (vm->xe->info.has_atomic_enable_pte_bit) 1038 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1039 1040 xe_vma_mem_attr_copy(&vma->attr, attr); 1041 if (bo) { 1042 struct drm_gpuvm_bo *vm_bo; 1043 1044 xe_bo_assert_held(bo); 1045 1046 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); 1047 if (IS_ERR(vm_bo)) { 1048 xe_vma_free(vma); 1049 return ERR_CAST(vm_bo); 1050 } 1051 1052 drm_gpuvm_bo_extobj_add(vm_bo); 1053 drm_gem_object_get(&bo->ttm.base); 1054 vma->gpuva.gem.offset = bo_offset_or_userptr; 1055 drm_gpuva_link(&vma->gpuva, vm_bo); 1056 drm_gpuvm_bo_put(vm_bo); 1057 } else /* userptr or null */ { 1058 if (!is_null && !is_cpu_addr_mirror) { 1059 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1060 u64 size = end - start + 1; 1061 int err; 1062 1063 vma->gpuva.gem.offset = bo_offset_or_userptr; 1064 1065 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1066 if (err) { 1067 xe_vma_free(vma); 1068 return ERR_PTR(err); 1069 } 1070 } 1071 1072 xe_vm_get(vm); 1073 } 1074 1075 return vma; 1076 } 1077 1078 static void xe_vma_destroy_late(struct xe_vma *vma) 1079 { 1080 struct xe_vm *vm = xe_vma_vm(vma); 1081 1082 if (vma->ufence) { 1083 xe_sync_ufence_put(vma->ufence); 1084 vma->ufence = NULL; 1085 } 1086 1087 if (xe_vma_is_userptr(vma)) { 1088 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1089 1090 xe_userptr_remove(uvma); 1091 xe_vm_put(vm); 1092 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1093 xe_vm_put(vm); 1094 } else { 1095 xe_bo_put(xe_vma_bo(vma)); 1096 } 1097 1098 xe_vma_free(vma); 1099 } 1100 1101 static void vma_destroy_work_func(struct work_struct *w) 1102 { 1103 struct xe_vma *vma = 1104 container_of(w, struct xe_vma, destroy_work); 1105 1106 xe_vma_destroy_late(vma); 1107 } 1108 1109 static void vma_destroy_cb(struct dma_fence *fence, 1110 struct dma_fence_cb *cb) 1111 { 1112 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1113 1114 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1115 queue_work(system_dfl_wq, &vma->destroy_work); 1116 } 1117 1118 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1119 { 1120 struct xe_vm *vm = xe_vma_vm(vma); 1121 1122 lockdep_assert_held_write(&vm->lock); 1123 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1124 1125 if (xe_vma_is_userptr(vma)) { 1126 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1127 xe_userptr_destroy(to_userptr_vma(vma)); 1128 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1129 xe_bo_assert_held(xe_vma_bo(vma)); 1130 1131 drm_gpuva_unlink(&vma->gpuva); 1132 } 1133 1134 xe_vm_assert_held(vm); 1135 if (fence) { 1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1137 vma_destroy_cb); 1138 1139 if (ret) { 1140 XE_WARN_ON(ret != -ENOENT); 1141 xe_vma_destroy_late(vma); 1142 } 1143 } else { 1144 xe_vma_destroy_late(vma); 1145 } 1146 } 1147 1148 /** 1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1150 * @exec: The drm_exec object we're currently locking for. 1151 * @vma: The vma for witch we want to lock the vm resv and any attached 1152 * object's resv. 1153 * 1154 * Return: 0 on success, negative error code on error. In particular 1155 * may return -EDEADLK on WW transaction contention and -EINTR if 1156 * an interruptible wait is terminated by a signal. 1157 */ 1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1159 { 1160 struct xe_vm *vm = xe_vma_vm(vma); 1161 struct xe_bo *bo = xe_vma_bo(vma); 1162 int err; 1163 1164 XE_WARN_ON(!vm); 1165 1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1167 if (!err && bo && !bo->vm) 1168 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1169 1170 return err; 1171 } 1172 1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1174 { 1175 struct xe_device *xe = xe_vma_vm(vma)->xe; 1176 struct xe_validation_ctx ctx; 1177 struct drm_exec exec; 1178 int err = 0; 1179 1180 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1181 err = xe_vm_lock_vma(&exec, vma); 1182 drm_exec_retry_on_contention(&exec); 1183 if (XE_WARN_ON(err)) 1184 break; 1185 xe_vma_destroy(vma, NULL); 1186 } 1187 xe_assert(xe, !err); 1188 } 1189 1190 struct xe_vma * 1191 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1192 { 1193 struct drm_gpuva *gpuva; 1194 1195 lockdep_assert_held(&vm->lock); 1196 1197 if (xe_vm_is_closed_or_banned(vm)) 1198 return NULL; 1199 1200 xe_assert(vm->xe, start + range <= vm->size); 1201 1202 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1203 1204 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1205 } 1206 1207 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1208 { 1209 int err; 1210 1211 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1212 lockdep_assert_held(&vm->lock); 1213 1214 mutex_lock(&vm->snap_mutex); 1215 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1216 mutex_unlock(&vm->snap_mutex); 1217 XE_WARN_ON(err); /* Shouldn't be possible */ 1218 1219 return err; 1220 } 1221 1222 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1223 { 1224 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1225 lockdep_assert_held(&vm->lock); 1226 1227 mutex_lock(&vm->snap_mutex); 1228 drm_gpuva_remove(&vma->gpuva); 1229 mutex_unlock(&vm->snap_mutex); 1230 if (vm->usm.last_fault_vma == vma) 1231 vm->usm.last_fault_vma = NULL; 1232 } 1233 1234 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1235 { 1236 struct xe_vma_op *op; 1237 1238 op = kzalloc_obj(*op); 1239 1240 if (unlikely(!op)) 1241 return NULL; 1242 1243 return &op->base; 1244 } 1245 1246 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1247 1248 static const struct drm_gpuvm_ops gpuvm_ops = { 1249 .op_alloc = xe_vm_op_alloc, 1250 .vm_bo_validate = xe_gpuvm_validate, 1251 .vm_free = xe_vm_free, 1252 }; 1253 1254 static u64 pde_encode_pat_index(u16 pat_index) 1255 { 1256 u64 pte = 0; 1257 1258 if (pat_index & BIT(0)) 1259 pte |= XE_PPGTT_PTE_PAT0; 1260 1261 if (pat_index & BIT(1)) 1262 pte |= XE_PPGTT_PTE_PAT1; 1263 1264 return pte; 1265 } 1266 1267 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1268 { 1269 u64 pte = 0; 1270 1271 if (pat_index & BIT(0)) 1272 pte |= XE_PPGTT_PTE_PAT0; 1273 1274 if (pat_index & BIT(1)) 1275 pte |= XE_PPGTT_PTE_PAT1; 1276 1277 if (pat_index & BIT(2)) { 1278 if (pt_level) 1279 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1280 else 1281 pte |= XE_PPGTT_PTE_PAT2; 1282 } 1283 1284 if (pat_index & BIT(3)) 1285 pte |= XELPG_PPGTT_PTE_PAT3; 1286 1287 if (pat_index & (BIT(4))) 1288 pte |= XE2_PPGTT_PTE_PAT4; 1289 1290 return pte; 1291 } 1292 1293 static u64 pte_encode_ps(u32 pt_level) 1294 { 1295 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1296 1297 if (pt_level == 1) 1298 return XE_PDE_PS_2M; 1299 else if (pt_level == 2) 1300 return XE_PDPE_PS_1G; 1301 1302 return 0; 1303 } 1304 1305 static u16 pde_pat_index(struct xe_bo *bo) 1306 { 1307 struct xe_device *xe = xe_bo_device(bo); 1308 u16 pat_index; 1309 1310 /* 1311 * We only have two bits to encode the PAT index in non-leaf nodes, but 1312 * these only point to other paging structures so we only need a minimal 1313 * selection of options. The user PAT index is only for encoding leaf 1314 * nodes, where we have use of more bits to do the encoding. The 1315 * non-leaf nodes are instead under driver control so the chosen index 1316 * here should be distinct from the user PAT index. Also the 1317 * corresponding coherency of the PAT index should be tied to the 1318 * allocation type of the page table (or at least we should pick 1319 * something which is always safe). 1320 */ 1321 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1322 pat_index = xe->pat.idx[XE_CACHE_WB]; 1323 else 1324 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1325 1326 xe_assert(xe, pat_index <= 3); 1327 1328 return pat_index; 1329 } 1330 1331 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1332 { 1333 u64 pde; 1334 1335 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1336 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1337 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1338 1339 return pde; 1340 } 1341 1342 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1343 u16 pat_index, u32 pt_level) 1344 { 1345 u64 pte; 1346 1347 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1348 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1349 pte |= pte_encode_pat_index(pat_index, pt_level); 1350 pte |= pte_encode_ps(pt_level); 1351 1352 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1353 pte |= XE_PPGTT_PTE_DM; 1354 1355 return pte; 1356 } 1357 1358 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1359 u16 pat_index, u32 pt_level) 1360 { 1361 pte |= XE_PAGE_PRESENT; 1362 1363 if (likely(!xe_vma_read_only(vma))) 1364 pte |= XE_PAGE_RW; 1365 1366 pte |= pte_encode_pat_index(pat_index, pt_level); 1367 pte |= pte_encode_ps(pt_level); 1368 1369 if (unlikely(xe_vma_is_null(vma))) 1370 pte |= XE_PTE_NULL; 1371 1372 return pte; 1373 } 1374 1375 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1376 u16 pat_index, 1377 u32 pt_level, bool devmem, u64 flags) 1378 { 1379 u64 pte; 1380 1381 /* Avoid passing random bits directly as flags */ 1382 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1383 1384 pte = addr; 1385 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1386 pte |= pte_encode_pat_index(pat_index, pt_level); 1387 pte |= pte_encode_ps(pt_level); 1388 1389 if (devmem) 1390 pte |= XE_PPGTT_PTE_DM; 1391 1392 pte |= flags; 1393 1394 return pte; 1395 } 1396 1397 static const struct xe_pt_ops xelp_pt_ops = { 1398 .pte_encode_bo = xelp_pte_encode_bo, 1399 .pte_encode_vma = xelp_pte_encode_vma, 1400 .pte_encode_addr = xelp_pte_encode_addr, 1401 .pde_encode_bo = xelp_pde_encode_bo, 1402 }; 1403 1404 static void vm_destroy_work_func(struct work_struct *w); 1405 1406 /** 1407 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1408 * given tile and vm. 1409 * @xe: xe device. 1410 * @tile: tile to set up for. 1411 * @vm: vm to set up for. 1412 * @exec: The struct drm_exec object used to lock the vm resv. 1413 * 1414 * Sets up a pagetable tree with one page-table per level and a single 1415 * leaf PTE. All pagetable entries point to the single page-table or, 1416 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1417 * writes become NOPs. 1418 * 1419 * Return: 0 on success, negative error code on error. 1420 */ 1421 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1422 struct xe_vm *vm, struct drm_exec *exec) 1423 { 1424 u8 id = tile->id; 1425 int i; 1426 1427 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1428 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1429 if (IS_ERR(vm->scratch_pt[id][i])) { 1430 int err = PTR_ERR(vm->scratch_pt[id][i]); 1431 1432 vm->scratch_pt[id][i] = NULL; 1433 return err; 1434 } 1435 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1436 } 1437 1438 return 0; 1439 } 1440 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1441 1442 static void xe_vm_free_scratch(struct xe_vm *vm) 1443 { 1444 struct xe_tile *tile; 1445 u8 id; 1446 1447 if (!xe_vm_has_scratch(vm)) 1448 return; 1449 1450 for_each_tile(tile, vm->xe, id) { 1451 u32 i; 1452 1453 if (!vm->pt_root[id]) 1454 continue; 1455 1456 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1457 if (vm->scratch_pt[id][i]) 1458 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1459 } 1460 } 1461 1462 static void xe_vm_pt_destroy(struct xe_vm *vm) 1463 { 1464 struct xe_tile *tile; 1465 u8 id; 1466 1467 xe_vm_assert_held(vm); 1468 1469 for_each_tile(tile, vm->xe, id) { 1470 if (vm->pt_root[id]) { 1471 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1472 vm->pt_root[id] = NULL; 1473 } 1474 } 1475 } 1476 1477 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) 1478 { 1479 if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) 1480 return; 1481 1482 fs_reclaim_acquire(GFP_KERNEL); 1483 might_lock(&vm->exec_queues.lock); 1484 fs_reclaim_release(GFP_KERNEL); 1485 1486 down_read(&vm->exec_queues.lock); 1487 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); 1488 up_read(&vm->exec_queues.lock); 1489 } 1490 1491 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1492 { 1493 struct drm_gem_object *vm_resv_obj; 1494 struct xe_validation_ctx ctx; 1495 struct drm_exec exec; 1496 struct xe_vm *vm; 1497 int err; 1498 struct xe_tile *tile; 1499 u8 id; 1500 1501 /* 1502 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1503 * ever be in faulting mode. 1504 */ 1505 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1506 1507 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1508 if (!vm) 1509 return ERR_PTR(-ENOMEM); 1510 1511 vm->xe = xe; 1512 1513 vm->size = 1ull << xe->info.va_bits; 1514 vm->flags = flags; 1515 1516 if (xef) 1517 vm->xef = xe_file_get(xef); 1518 /** 1519 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1520 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1521 * under a user-VM lock when the PXP session is started at exec_queue 1522 * creation time. Those are different VMs and therefore there is no risk 1523 * of deadlock, but we need to tell lockdep that this is the case or it 1524 * will print a warning. 1525 */ 1526 if (flags & XE_VM_FLAG_GSC) { 1527 static struct lock_class_key gsc_vm_key; 1528 1529 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1530 } else { 1531 init_rwsem(&vm->lock); 1532 } 1533 mutex_init(&vm->snap_mutex); 1534 1535 INIT_LIST_HEAD(&vm->rebind_list); 1536 1537 INIT_LIST_HEAD(&vm->userptr.repin_list); 1538 INIT_LIST_HEAD(&vm->userptr.invalidated); 1539 spin_lock_init(&vm->userptr.invalidated_lock); 1540 1541 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1542 1543 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1544 1545 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1546 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) 1547 INIT_LIST_HEAD(&vm->exec_queues.list[id]); 1548 if (flags & XE_VM_FLAG_FAULT_MODE) 1549 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; 1550 else 1551 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; 1552 1553 init_rwsem(&vm->exec_queues.lock); 1554 xe_vm_init_prove_locking(xe, vm); 1555 1556 for_each_tile(tile, xe, id) 1557 xe_range_fence_tree_init(&vm->rftree[id]); 1558 1559 vm->pt_ops = &xelp_pt_ops; 1560 1561 /* 1562 * Long-running workloads are not protected by the scheduler references. 1563 * By design, run_job for long-running workloads returns NULL and the 1564 * scheduler drops all the references of it, hence protecting the VM 1565 * for this case is necessary. 1566 */ 1567 if (flags & XE_VM_FLAG_LR_MODE) { 1568 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1569 xe_pm_runtime_get_noresume(xe); 1570 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1571 } 1572 1573 err = xe_svm_init(vm); 1574 if (err) 1575 goto err_no_resv; 1576 1577 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1578 if (!vm_resv_obj) { 1579 err = -ENOMEM; 1580 goto err_svm_fini; 1581 } 1582 1583 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1584 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1585 1586 drm_gem_object_put(vm_resv_obj); 1587 1588 err = 0; 1589 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1590 err) { 1591 err = xe_vm_drm_exec_lock(vm, &exec); 1592 drm_exec_retry_on_contention(&exec); 1593 1594 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1595 vm->flags |= XE_VM_FLAG_64K; 1596 1597 for_each_tile(tile, xe, id) { 1598 if (flags & XE_VM_FLAG_MIGRATION && 1599 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1600 continue; 1601 1602 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1603 &exec); 1604 if (IS_ERR(vm->pt_root[id])) { 1605 err = PTR_ERR(vm->pt_root[id]); 1606 vm->pt_root[id] = NULL; 1607 xe_vm_pt_destroy(vm); 1608 drm_exec_retry_on_contention(&exec); 1609 xe_validation_retry_on_oom(&ctx, &err); 1610 break; 1611 } 1612 } 1613 if (err) 1614 break; 1615 1616 if (xe_vm_has_scratch(vm)) { 1617 for_each_tile(tile, xe, id) { 1618 if (!vm->pt_root[id]) 1619 continue; 1620 1621 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1622 if (err) { 1623 xe_vm_free_scratch(vm); 1624 xe_vm_pt_destroy(vm); 1625 drm_exec_retry_on_contention(&exec); 1626 xe_validation_retry_on_oom(&ctx, &err); 1627 break; 1628 } 1629 } 1630 if (err) 1631 break; 1632 vm->batch_invalidate_tlb = true; 1633 } 1634 1635 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1636 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1637 vm->batch_invalidate_tlb = false; 1638 } 1639 1640 /* Fill pt_root after allocating scratch tables */ 1641 for_each_tile(tile, xe, id) { 1642 if (!vm->pt_root[id]) 1643 continue; 1644 1645 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1646 } 1647 } 1648 if (err) 1649 goto err_close; 1650 1651 /* Kernel migration VM shouldn't have a circular loop.. */ 1652 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1653 for_each_tile(tile, xe, id) { 1654 struct xe_exec_queue *q; 1655 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1656 1657 if (!vm->pt_root[id]) 1658 continue; 1659 1660 if (!xef) /* Not from userspace */ 1661 create_flags |= EXEC_QUEUE_FLAG_KERNEL; 1662 1663 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); 1664 if (IS_ERR(q)) { 1665 err = PTR_ERR(q); 1666 goto err_close; 1667 } 1668 vm->q[id] = q; 1669 } 1670 } 1671 1672 if (xef && xe->info.has_asid) { 1673 u32 asid; 1674 1675 down_write(&xe->usm.lock); 1676 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1677 XA_LIMIT(1, XE_MAX_ASID - 1), 1678 &xe->usm.next_asid, GFP_NOWAIT); 1679 up_write(&xe->usm.lock); 1680 if (err < 0) 1681 goto err_close; 1682 1683 vm->usm.asid = asid; 1684 } 1685 1686 trace_xe_vm_create(vm); 1687 1688 return vm; 1689 1690 err_close: 1691 xe_vm_close_and_put(vm); 1692 return ERR_PTR(err); 1693 1694 err_svm_fini: 1695 if (flags & XE_VM_FLAG_FAULT_MODE) { 1696 vm->size = 0; /* close the vm */ 1697 xe_svm_fini(vm); 1698 } 1699 err_no_resv: 1700 mutex_destroy(&vm->snap_mutex); 1701 for_each_tile(tile, xe, id) 1702 xe_range_fence_tree_fini(&vm->rftree[id]); 1703 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1704 if (vm->xef) 1705 xe_file_put(vm->xef); 1706 kfree(vm); 1707 if (flags & XE_VM_FLAG_LR_MODE) 1708 xe_pm_runtime_put(xe); 1709 return ERR_PTR(err); 1710 } 1711 1712 static void xe_vm_close(struct xe_vm *vm) 1713 { 1714 struct xe_device *xe = vm->xe; 1715 bool bound; 1716 int idx; 1717 1718 bound = drm_dev_enter(&xe->drm, &idx); 1719 1720 down_write(&vm->lock); 1721 if (xe_vm_in_fault_mode(vm)) 1722 xe_svm_notifier_lock(vm); 1723 1724 vm->size = 0; 1725 1726 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1727 struct xe_tile *tile; 1728 struct xe_gt *gt; 1729 u8 id; 1730 1731 /* Wait for pending binds */ 1732 dma_resv_wait_timeout(xe_vm_resv(vm), 1733 DMA_RESV_USAGE_BOOKKEEP, 1734 false, MAX_SCHEDULE_TIMEOUT); 1735 1736 if (bound) { 1737 for_each_tile(tile, xe, id) 1738 if (vm->pt_root[id]) 1739 xe_pt_clear(xe, vm->pt_root[id]); 1740 1741 for_each_gt(gt, xe, id) 1742 xe_tlb_inval_vm(>->tlb_inval, vm); 1743 } 1744 } 1745 1746 if (xe_vm_in_fault_mode(vm)) 1747 xe_svm_notifier_unlock(vm); 1748 up_write(&vm->lock); 1749 1750 if (bound) 1751 drm_dev_exit(idx); 1752 } 1753 1754 void xe_vm_close_and_put(struct xe_vm *vm) 1755 { 1756 LIST_HEAD(contested); 1757 struct xe_device *xe = vm->xe; 1758 struct xe_tile *tile; 1759 struct xe_vma *vma, *next_vma; 1760 struct drm_gpuva *gpuva, *next; 1761 u8 id; 1762 1763 xe_assert(xe, !vm->preempt.num_exec_queues); 1764 1765 xe_vm_close(vm); 1766 if (xe_vm_in_preempt_fence_mode(vm)) { 1767 mutex_lock(&xe->rebind_resume_lock); 1768 list_del_init(&vm->preempt.pm_activate_link); 1769 mutex_unlock(&xe->rebind_resume_lock); 1770 flush_work(&vm->preempt.rebind_work); 1771 } 1772 if (xe_vm_in_fault_mode(vm)) 1773 xe_svm_close(vm); 1774 1775 down_write(&vm->lock); 1776 for_each_tile(tile, xe, id) { 1777 if (vm->q[id]) { 1778 int i; 1779 1780 xe_exec_queue_last_fence_put(vm->q[id], vm); 1781 for_each_tlb_inval(i) 1782 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1783 } 1784 } 1785 up_write(&vm->lock); 1786 1787 for_each_tile(tile, xe, id) { 1788 if (vm->q[id]) { 1789 xe_exec_queue_kill(vm->q[id]); 1790 xe_exec_queue_put(vm->q[id]); 1791 vm->q[id] = NULL; 1792 } 1793 } 1794 1795 down_write(&vm->lock); 1796 xe_vm_lock(vm, false); 1797 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1798 vma = gpuva_to_vma(gpuva); 1799 1800 if (xe_vma_has_no_bo(vma)) { 1801 xe_svm_notifier_lock(vm); 1802 vma->gpuva.flags |= XE_VMA_DESTROYED; 1803 xe_svm_notifier_unlock(vm); 1804 } 1805 1806 xe_vm_remove_vma(vm, vma); 1807 1808 /* easy case, remove from VMA? */ 1809 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1810 list_del_init(&vma->combined_links.rebind); 1811 xe_vma_destroy(vma, NULL); 1812 continue; 1813 } 1814 1815 list_move_tail(&vma->combined_links.destroy, &contested); 1816 vma->gpuva.flags |= XE_VMA_DESTROYED; 1817 } 1818 1819 /* 1820 * All vm operations will add shared fences to resv. 1821 * The only exception is eviction for a shared object, 1822 * but even so, the unbind when evicted would still 1823 * install a fence to resv. Hence it's safe to 1824 * destroy the pagetables immediately. 1825 */ 1826 xe_vm_free_scratch(vm); 1827 xe_vm_pt_destroy(vm); 1828 xe_vm_unlock(vm); 1829 1830 /* 1831 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1832 * Since we hold a refcount to the bo, we can remove and free 1833 * the members safely without locking. 1834 */ 1835 list_for_each_entry_safe(vma, next_vma, &contested, 1836 combined_links.destroy) { 1837 list_del_init(&vma->combined_links.destroy); 1838 xe_vma_destroy_unlocked(vma); 1839 } 1840 1841 xe_svm_fini(vm); 1842 1843 up_write(&vm->lock); 1844 1845 down_write(&xe->usm.lock); 1846 if (vm->usm.asid) { 1847 void *lookup; 1848 1849 xe_assert(xe, xe->info.has_asid); 1850 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1851 1852 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1853 xe_assert(xe, lookup == vm); 1854 } 1855 up_write(&xe->usm.lock); 1856 1857 for_each_tile(tile, xe, id) 1858 xe_range_fence_tree_fini(&vm->rftree[id]); 1859 1860 xe_vm_put(vm); 1861 } 1862 1863 static void vm_destroy_work_func(struct work_struct *w) 1864 { 1865 struct xe_vm *vm = 1866 container_of(w, struct xe_vm, destroy_work); 1867 struct xe_device *xe = vm->xe; 1868 struct xe_tile *tile; 1869 u8 id; 1870 1871 /* xe_vm_close_and_put was not called? */ 1872 xe_assert(xe, !vm->size); 1873 1874 if (xe_vm_in_preempt_fence_mode(vm)) 1875 flush_work(&vm->preempt.rebind_work); 1876 1877 mutex_destroy(&vm->snap_mutex); 1878 1879 if (vm->flags & XE_VM_FLAG_LR_MODE) 1880 xe_pm_runtime_put(xe); 1881 1882 for_each_tile(tile, xe, id) 1883 XE_WARN_ON(vm->pt_root[id]); 1884 1885 trace_xe_vm_free(vm); 1886 1887 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1888 1889 if (vm->xef) 1890 xe_file_put(vm->xef); 1891 1892 kfree(vm); 1893 } 1894 1895 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1896 { 1897 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1898 1899 /* To destroy the VM we need to be able to sleep */ 1900 queue_work(system_dfl_wq, &vm->destroy_work); 1901 } 1902 1903 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1904 { 1905 struct xe_vm *vm; 1906 1907 mutex_lock(&xef->vm.lock); 1908 vm = xa_load(&xef->vm.xa, id); 1909 if (vm) 1910 xe_vm_get(vm); 1911 mutex_unlock(&xef->vm.lock); 1912 1913 return vm; 1914 } 1915 1916 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1917 { 1918 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1919 } 1920 1921 static struct xe_exec_queue * 1922 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1923 { 1924 return q ? q : vm->q[0]; 1925 } 1926 1927 static struct xe_user_fence * 1928 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1929 { 1930 unsigned int i; 1931 1932 for (i = 0; i < num_syncs; i++) { 1933 struct xe_sync_entry *e = &syncs[i]; 1934 1935 if (xe_sync_is_ufence(e)) 1936 return xe_sync_ufence_get(e); 1937 } 1938 1939 return NULL; 1940 } 1941 1942 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1943 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1944 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ 1945 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 1946 1947 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1948 struct drm_file *file) 1949 { 1950 struct xe_device *xe = to_xe_device(dev); 1951 struct xe_file *xef = to_xe_file(file); 1952 struct drm_xe_vm_create *args = data; 1953 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 1954 struct xe_vm *vm; 1955 u32 id; 1956 int err; 1957 u32 flags = 0; 1958 1959 if (XE_IOCTL_DBG(xe, args->extensions)) 1960 return -EINVAL; 1961 1962 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 1963 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1964 1965 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1966 !xe->info.has_usm)) 1967 return -EINVAL; 1968 1969 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1970 return -EINVAL; 1971 1972 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1973 return -EINVAL; 1974 1975 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1976 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1977 !xe->info.needs_scratch)) 1978 return -EINVAL; 1979 1980 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1981 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1982 return -EINVAL; 1983 1984 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 1985 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) 1986 return -EINVAL; 1987 1988 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1989 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1990 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1991 flags |= XE_VM_FLAG_LR_MODE; 1992 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1993 flags |= XE_VM_FLAG_FAULT_MODE; 1994 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 1995 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; 1996 1997 vm = xe_vm_create(xe, flags, xef); 1998 if (IS_ERR(vm)) 1999 return PTR_ERR(vm); 2000 2001 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2002 /* Warning: Security issue - never enable by default */ 2003 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2004 #endif 2005 2006 /* user id alloc must always be last in ioctl to prevent UAF */ 2007 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2008 if (err) 2009 goto err_close_and_put; 2010 2011 args->vm_id = id; 2012 2013 return 0; 2014 2015 err_close_and_put: 2016 xe_vm_close_and_put(vm); 2017 2018 return err; 2019 } 2020 2021 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2022 struct drm_file *file) 2023 { 2024 struct xe_device *xe = to_xe_device(dev); 2025 struct xe_file *xef = to_xe_file(file); 2026 struct drm_xe_vm_destroy *args = data; 2027 struct xe_vm *vm; 2028 int err = 0; 2029 2030 if (XE_IOCTL_DBG(xe, args->pad) || 2031 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2032 return -EINVAL; 2033 2034 mutex_lock(&xef->vm.lock); 2035 vm = xa_load(&xef->vm.xa, args->vm_id); 2036 if (XE_IOCTL_DBG(xe, !vm)) 2037 err = -ENOENT; 2038 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2039 err = -EBUSY; 2040 else 2041 xa_erase(&xef->vm.xa, args->vm_id); 2042 mutex_unlock(&xef->vm.lock); 2043 2044 if (!err) 2045 xe_vm_close_and_put(vm); 2046 2047 return err; 2048 } 2049 2050 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2051 { 2052 struct drm_gpuva *gpuva; 2053 u32 num_vmas = 0; 2054 2055 lockdep_assert_held(&vm->lock); 2056 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2057 num_vmas++; 2058 2059 return num_vmas; 2060 } 2061 2062 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2063 u64 end, struct drm_xe_mem_range_attr *attrs) 2064 { 2065 struct drm_gpuva *gpuva; 2066 int i = 0; 2067 2068 lockdep_assert_held(&vm->lock); 2069 2070 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2071 struct xe_vma *vma = gpuva_to_vma(gpuva); 2072 2073 if (i == *num_vmas) 2074 return -ENOSPC; 2075 2076 attrs[i].start = xe_vma_start(vma); 2077 attrs[i].end = xe_vma_end(vma); 2078 attrs[i].atomic.val = vma->attr.atomic_access; 2079 attrs[i].pat_index.val = vma->attr.pat_index; 2080 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2081 attrs[i].preferred_mem_loc.migration_policy = 2082 vma->attr.preferred_loc.migration_policy; 2083 2084 i++; 2085 } 2086 2087 *num_vmas = i; 2088 return 0; 2089 } 2090 2091 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2092 { 2093 struct xe_device *xe = to_xe_device(dev); 2094 struct xe_file *xef = to_xe_file(file); 2095 struct drm_xe_mem_range_attr *mem_attrs; 2096 struct drm_xe_vm_query_mem_range_attr *args = data; 2097 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2098 struct xe_vm *vm; 2099 int err = 0; 2100 2101 if (XE_IOCTL_DBG(xe, 2102 ((args->num_mem_ranges == 0 && 2103 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2104 (args->num_mem_ranges > 0 && 2105 (!attrs_user || 2106 args->sizeof_mem_range_attr != 2107 sizeof(struct drm_xe_mem_range_attr)))))) 2108 return -EINVAL; 2109 2110 vm = xe_vm_lookup(xef, args->vm_id); 2111 if (XE_IOCTL_DBG(xe, !vm)) 2112 return -EINVAL; 2113 2114 err = down_read_interruptible(&vm->lock); 2115 if (err) 2116 goto put_vm; 2117 2118 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2119 2120 if (args->num_mem_ranges == 0 && !attrs_user) { 2121 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2122 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2123 goto unlock_vm; 2124 } 2125 2126 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2127 GFP_KERNEL | __GFP_ACCOUNT | 2128 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2129 if (!mem_attrs) { 2130 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2131 goto unlock_vm; 2132 } 2133 2134 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2135 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2136 args->start + args->range, mem_attrs); 2137 if (err) 2138 goto free_mem_attrs; 2139 2140 err = copy_to_user(attrs_user, mem_attrs, 2141 args->sizeof_mem_range_attr * args->num_mem_ranges); 2142 if (err) 2143 err = -EFAULT; 2144 2145 free_mem_attrs: 2146 kvfree(mem_attrs); 2147 unlock_vm: 2148 up_read(&vm->lock); 2149 put_vm: 2150 xe_vm_put(vm); 2151 return err; 2152 } 2153 2154 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2155 { 2156 if (page_addr > xe_vma_end(vma) - 1 || 2157 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2158 return false; 2159 2160 return true; 2161 } 2162 2163 /** 2164 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2165 * 2166 * @vm: the xe_vm the vma belongs to 2167 * @page_addr: address to look up 2168 */ 2169 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2170 { 2171 struct xe_vma *vma = NULL; 2172 2173 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2174 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2175 vma = vm->usm.last_fault_vma; 2176 } 2177 if (!vma) 2178 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2179 2180 return vma; 2181 } 2182 2183 static const u32 region_to_mem_type[] = { 2184 XE_PL_TT, 2185 XE_PL_VRAM0, 2186 XE_PL_VRAM1, 2187 }; 2188 2189 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2190 bool post_commit) 2191 { 2192 xe_svm_notifier_lock(vm); 2193 vma->gpuva.flags |= XE_VMA_DESTROYED; 2194 xe_svm_notifier_unlock(vm); 2195 if (post_commit) 2196 xe_vm_remove_vma(vm, vma); 2197 } 2198 2199 #undef ULL 2200 #define ULL unsigned long long 2201 2202 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2203 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2204 { 2205 struct xe_vma *vma; 2206 2207 switch (op->op) { 2208 case DRM_GPUVA_OP_MAP: 2209 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2210 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2211 break; 2212 case DRM_GPUVA_OP_REMAP: 2213 vma = gpuva_to_vma(op->remap.unmap->va); 2214 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2215 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2216 op->remap.unmap->keep ? 1 : 0); 2217 if (op->remap.prev) 2218 vm_dbg(&xe->drm, 2219 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2220 (ULL)op->remap.prev->va.addr, 2221 (ULL)op->remap.prev->va.range); 2222 if (op->remap.next) 2223 vm_dbg(&xe->drm, 2224 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2225 (ULL)op->remap.next->va.addr, 2226 (ULL)op->remap.next->va.range); 2227 break; 2228 case DRM_GPUVA_OP_UNMAP: 2229 vma = gpuva_to_vma(op->unmap.va); 2230 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2231 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2232 op->unmap.keep ? 1 : 0); 2233 break; 2234 case DRM_GPUVA_OP_PREFETCH: 2235 vma = gpuva_to_vma(op->prefetch.va); 2236 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2237 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2238 break; 2239 default: 2240 drm_warn(&xe->drm, "NOT POSSIBLE\n"); 2241 } 2242 } 2243 #else 2244 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2245 { 2246 } 2247 #endif 2248 2249 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2250 { 2251 if (!xe_vm_in_fault_mode(vm)) 2252 return false; 2253 2254 if (!xe_vm_has_scratch(vm)) 2255 return false; 2256 2257 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2258 return false; 2259 2260 return true; 2261 } 2262 2263 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2264 { 2265 struct drm_gpuva_op *__op; 2266 2267 drm_gpuva_for_each_op(__op, ops) { 2268 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2269 2270 xe_vma_svm_prefetch_op_fini(op); 2271 } 2272 } 2273 2274 /* 2275 * Create operations list from IOCTL arguments, setup operations fields so parse 2276 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2277 */ 2278 static struct drm_gpuva_ops * 2279 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2280 struct xe_bo *bo, u64 bo_offset_or_userptr, 2281 u64 addr, u64 range, 2282 u32 operation, u32 flags, 2283 u32 prefetch_region, u16 pat_index) 2284 { 2285 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2286 struct drm_gpuva_ops *ops; 2287 struct drm_gpuva_op *__op; 2288 struct drm_gpuvm_bo *vm_bo; 2289 u64 range_start = addr; 2290 u64 range_end = addr + range; 2291 int err; 2292 2293 lockdep_assert_held_write(&vm->lock); 2294 2295 vm_dbg(&vm->xe->drm, 2296 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2297 operation, (ULL)addr, (ULL)range, 2298 (ULL)bo_offset_or_userptr); 2299 2300 switch (operation) { 2301 case DRM_XE_VM_BIND_OP_MAP: 2302 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { 2303 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); 2304 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 2305 } 2306 2307 fallthrough; 2308 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2309 struct drm_gpuvm_map_req map_req = { 2310 .map.va.addr = range_start, 2311 .map.va.range = range_end - range_start, 2312 .map.gem.obj = obj, 2313 .map.gem.offset = bo_offset_or_userptr, 2314 }; 2315 2316 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2317 break; 2318 } 2319 case DRM_XE_VM_BIND_OP_UNMAP: 2320 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2321 break; 2322 case DRM_XE_VM_BIND_OP_PREFETCH: 2323 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2324 break; 2325 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2326 xe_assert(vm->xe, bo); 2327 2328 err = xe_bo_lock(bo, true); 2329 if (err) 2330 return ERR_PTR(err); 2331 2332 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj); 2333 if (IS_ERR(vm_bo)) { 2334 xe_bo_unlock(bo); 2335 return ERR_CAST(vm_bo); 2336 } 2337 2338 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2339 drm_gpuvm_bo_put(vm_bo); 2340 xe_bo_unlock(bo); 2341 break; 2342 default: 2343 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2344 ops = ERR_PTR(-EINVAL); 2345 } 2346 if (IS_ERR(ops)) 2347 return ops; 2348 2349 drm_gpuva_for_each_op(__op, ops) { 2350 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2351 2352 if (__op->op == DRM_GPUVA_OP_MAP) { 2353 op->map.immediate = 2354 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2355 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2356 op->map.vma_flags |= XE_VMA_READ_ONLY; 2357 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2358 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2359 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2360 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2361 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2362 op->map.vma_flags |= XE_VMA_DUMPABLE; 2363 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2364 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2365 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 2366 op->map.pat_index = pat_index; 2367 op->map.invalidate_on_bind = 2368 __xe_vm_needs_clear_scratch_pages(vm, flags); 2369 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2370 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2371 struct xe_tile *tile; 2372 struct xe_svm_range *svm_range; 2373 struct drm_gpusvm_ctx ctx = {}; 2374 struct drm_pagemap *dpagemap = NULL; 2375 u8 id, tile_mask = 0; 2376 u32 i; 2377 2378 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2379 op->prefetch.region = prefetch_region; 2380 break; 2381 } 2382 2383 ctx.read_only = xe_vma_read_only(vma); 2384 ctx.devmem_possible = IS_DGFX(vm->xe) && 2385 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2386 2387 for_each_tile(tile, vm->xe, id) 2388 tile_mask |= 0x1 << id; 2389 2390 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2391 op->prefetch_range.ranges_count = 0; 2392 2393 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2394 dpagemap = xe_vma_resolve_pagemap(vma, 2395 xe_device_get_root_tile(vm->xe)); 2396 } else if (prefetch_region) { 2397 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2398 XE_PL_VRAM0]; 2399 dpagemap = xe_tile_local_pagemap(tile); 2400 } 2401 2402 op->prefetch_range.dpagemap = dpagemap; 2403 alloc_next_range: 2404 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2405 2406 if (PTR_ERR(svm_range) == -ENOENT) { 2407 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2408 2409 addr = ret == ULONG_MAX ? 0 : ret; 2410 if (addr) 2411 goto alloc_next_range; 2412 else 2413 goto print_op_label; 2414 } 2415 2416 if (IS_ERR(svm_range)) { 2417 err = PTR_ERR(svm_range); 2418 goto unwind_prefetch_ops; 2419 } 2420 2421 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) { 2422 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2423 goto check_next_range; 2424 } 2425 2426 err = xa_alloc(&op->prefetch_range.range, 2427 &i, svm_range, xa_limit_32b, 2428 GFP_KERNEL); 2429 2430 if (err) 2431 goto unwind_prefetch_ops; 2432 2433 op->prefetch_range.ranges_count++; 2434 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2435 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2436 check_next_range: 2437 if (range_end > xe_svm_range_end(svm_range) && 2438 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2439 addr = xe_svm_range_end(svm_range); 2440 goto alloc_next_range; 2441 } 2442 } 2443 print_op_label: 2444 print_op(vm->xe, __op); 2445 } 2446 2447 return ops; 2448 2449 unwind_prefetch_ops: 2450 xe_svm_prefetch_gpuva_ops_fini(ops); 2451 drm_gpuva_ops_free(&vm->gpuvm, ops); 2452 return ERR_PTR(err); 2453 } 2454 2455 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2456 2457 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2458 struct xe_vma_mem_attr *attr, unsigned int flags) 2459 { 2460 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2461 struct xe_validation_ctx ctx; 2462 struct drm_exec exec; 2463 struct xe_vma *vma; 2464 int err = 0; 2465 2466 lockdep_assert_held_write(&vm->lock); 2467 2468 if (bo) { 2469 err = 0; 2470 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2471 (struct xe_val_flags) {.interruptible = true}, err) { 2472 if (!bo->vm) { 2473 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2474 drm_exec_retry_on_contention(&exec); 2475 } 2476 if (!err) { 2477 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2478 drm_exec_retry_on_contention(&exec); 2479 } 2480 if (err) 2481 return ERR_PTR(err); 2482 2483 vma = xe_vma_create(vm, bo, op->gem.offset, 2484 op->va.addr, op->va.addr + 2485 op->va.range - 1, attr, flags); 2486 if (IS_ERR(vma)) 2487 return vma; 2488 2489 if (!bo->vm) { 2490 err = add_preempt_fences(vm, bo); 2491 if (err) { 2492 prep_vma_destroy(vm, vma, false); 2493 xe_vma_destroy(vma, NULL); 2494 } 2495 } 2496 } 2497 if (err) 2498 return ERR_PTR(err); 2499 } else { 2500 vma = xe_vma_create(vm, NULL, op->gem.offset, 2501 op->va.addr, op->va.addr + 2502 op->va.range - 1, attr, flags); 2503 if (IS_ERR(vma)) 2504 return vma; 2505 2506 if (xe_vma_is_userptr(vma)) { 2507 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2508 /* 2509 * -EBUSY has dedicated meaning that a user fence 2510 * attached to the VMA is busy, in practice 2511 * xe_vma_userptr_pin_pages can only fail with -EBUSY if 2512 * we are low on memory so convert this to -ENOMEM. 2513 */ 2514 if (err == -EBUSY) 2515 err = -ENOMEM; 2516 } 2517 } 2518 if (err) { 2519 prep_vma_destroy(vm, vma, false); 2520 xe_vma_destroy_unlocked(vma); 2521 vma = ERR_PTR(err); 2522 } 2523 2524 return vma; 2525 } 2526 2527 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2528 { 2529 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2530 return SZ_1G; 2531 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2532 return SZ_2M; 2533 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2534 return SZ_64K; 2535 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2536 return SZ_4K; 2537 2538 return SZ_1G; /* Uninitialized, used max size */ 2539 } 2540 2541 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2542 { 2543 switch (size) { 2544 case SZ_1G: 2545 vma->gpuva.flags |= XE_VMA_PTE_1G; 2546 break; 2547 case SZ_2M: 2548 vma->gpuva.flags |= XE_VMA_PTE_2M; 2549 break; 2550 case SZ_64K: 2551 vma->gpuva.flags |= XE_VMA_PTE_64K; 2552 break; 2553 case SZ_4K: 2554 vma->gpuva.flags |= XE_VMA_PTE_4K; 2555 break; 2556 } 2557 } 2558 2559 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2560 { 2561 int err = 0; 2562 2563 lockdep_assert_held_write(&vm->lock); 2564 2565 switch (op->base.op) { 2566 case DRM_GPUVA_OP_MAP: 2567 err |= xe_vm_insert_vma(vm, op->map.vma); 2568 if (!err) 2569 op->flags |= XE_VMA_OP_COMMITTED; 2570 break; 2571 case DRM_GPUVA_OP_REMAP: 2572 { 2573 u8 tile_present = 2574 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2575 2576 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2577 true); 2578 op->flags |= XE_VMA_OP_COMMITTED; 2579 2580 if (op->remap.prev) { 2581 err |= xe_vm_insert_vma(vm, op->remap.prev); 2582 if (!err) 2583 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2584 if (!err && op->remap.skip_prev) { 2585 op->remap.prev->tile_present = 2586 tile_present; 2587 op->remap.prev = NULL; 2588 } 2589 } 2590 if (op->remap.next) { 2591 err |= xe_vm_insert_vma(vm, op->remap.next); 2592 if (!err) 2593 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2594 if (!err && op->remap.skip_next) { 2595 op->remap.next->tile_present = 2596 tile_present; 2597 op->remap.next = NULL; 2598 } 2599 } 2600 2601 /* Adjust for partial unbind after removing VMA from VM */ 2602 if (!err) { 2603 op->base.remap.unmap->va->va.addr = op->remap.start; 2604 op->base.remap.unmap->va->va.range = op->remap.range; 2605 } 2606 break; 2607 } 2608 case DRM_GPUVA_OP_UNMAP: 2609 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2610 op->flags |= XE_VMA_OP_COMMITTED; 2611 break; 2612 case DRM_GPUVA_OP_PREFETCH: 2613 op->flags |= XE_VMA_OP_COMMITTED; 2614 break; 2615 default: 2616 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2617 } 2618 2619 return err; 2620 } 2621 2622 /** 2623 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2624 * @vma: Pointer to the xe_vma structure to check 2625 * 2626 * This function determines whether the given VMA (Virtual Memory Area) 2627 * has its memory attributes set to their default values. Specifically, 2628 * it checks the following conditions: 2629 * 2630 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2631 * - `pat_index` is equal to `default_pat_index` 2632 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2633 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2634 * 2635 * Return: true if all attributes are at their default values, false otherwise. 2636 */ 2637 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2638 { 2639 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2640 vma->attr.pat_index == vma->attr.default_pat_index && 2641 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2642 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2643 } 2644 2645 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2646 struct xe_vma_ops *vops) 2647 { 2648 struct xe_device *xe = vm->xe; 2649 struct drm_gpuva_op *__op; 2650 struct xe_tile *tile; 2651 u8 id, tile_mask = 0; 2652 int err = 0; 2653 2654 lockdep_assert_held_write(&vm->lock); 2655 2656 for_each_tile(tile, vm->xe, id) 2657 tile_mask |= 0x1 << id; 2658 2659 drm_gpuva_for_each_op(__op, ops) { 2660 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2661 struct xe_vma *vma; 2662 unsigned int flags = 0; 2663 2664 INIT_LIST_HEAD(&op->link); 2665 list_add_tail(&op->link, &vops->list); 2666 op->tile_mask = tile_mask; 2667 2668 switch (op->base.op) { 2669 case DRM_GPUVA_OP_MAP: 2670 { 2671 struct xe_vma_mem_attr default_attr = { 2672 .preferred_loc = { 2673 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2674 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2675 }, 2676 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2677 .default_pat_index = op->map.pat_index, 2678 .pat_index = op->map.pat_index, 2679 }; 2680 2681 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2682 2683 vma = new_vma(vm, &op->base.map, &default_attr, 2684 flags); 2685 if (IS_ERR(vma)) 2686 return PTR_ERR(vma); 2687 2688 op->map.vma = vma; 2689 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2690 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2691 op->map.invalidate_on_bind) 2692 xe_vma_ops_incr_pt_update_ops(vops, 2693 op->tile_mask, 1); 2694 break; 2695 } 2696 case DRM_GPUVA_OP_REMAP: 2697 { 2698 struct xe_vma *old = 2699 gpuva_to_vma(op->base.remap.unmap->va); 2700 bool skip = xe_vma_is_cpu_addr_mirror(old); 2701 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2702 int num_remap_ops = 0; 2703 2704 if (op->base.remap.prev) 2705 start = op->base.remap.prev->va.addr + 2706 op->base.remap.prev->va.range; 2707 if (op->base.remap.next) 2708 end = op->base.remap.next->va.addr; 2709 2710 if (xe_vma_is_cpu_addr_mirror(old) && 2711 xe_svm_has_mapping(vm, start, end)) { 2712 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2713 xe_svm_unmap_address_range(vm, start, end); 2714 else 2715 return -EBUSY; 2716 } 2717 2718 op->remap.start = xe_vma_start(old); 2719 op->remap.range = xe_vma_size(old); 2720 2721 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2722 if (op->base.remap.prev) { 2723 vma = new_vma(vm, op->base.remap.prev, 2724 &old->attr, flags); 2725 if (IS_ERR(vma)) 2726 return PTR_ERR(vma); 2727 2728 op->remap.prev = vma; 2729 2730 /* 2731 * Userptr creates a new SG mapping so 2732 * we must also rebind. 2733 */ 2734 op->remap.skip_prev = skip || 2735 (!xe_vma_is_userptr(old) && 2736 IS_ALIGNED(xe_vma_end(vma), 2737 xe_vma_max_pte_size(old))); 2738 if (op->remap.skip_prev) { 2739 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2740 op->remap.range -= 2741 xe_vma_end(vma) - 2742 xe_vma_start(old); 2743 op->remap.start = xe_vma_end(vma); 2744 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2745 (ULL)op->remap.start, 2746 (ULL)op->remap.range); 2747 } else { 2748 num_remap_ops++; 2749 } 2750 } 2751 2752 if (op->base.remap.next) { 2753 vma = new_vma(vm, op->base.remap.next, 2754 &old->attr, flags); 2755 if (IS_ERR(vma)) 2756 return PTR_ERR(vma); 2757 2758 op->remap.next = vma; 2759 2760 /* 2761 * Userptr creates a new SG mapping so 2762 * we must also rebind. 2763 */ 2764 op->remap.skip_next = skip || 2765 (!xe_vma_is_userptr(old) && 2766 IS_ALIGNED(xe_vma_start(vma), 2767 xe_vma_max_pte_size(old))); 2768 if (op->remap.skip_next) { 2769 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2770 op->remap.range -= 2771 xe_vma_end(old) - 2772 xe_vma_start(vma); 2773 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2774 (ULL)op->remap.start, 2775 (ULL)op->remap.range); 2776 } else { 2777 num_remap_ops++; 2778 } 2779 } 2780 if (!skip) 2781 num_remap_ops++; 2782 2783 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2784 break; 2785 } 2786 case DRM_GPUVA_OP_UNMAP: 2787 vma = gpuva_to_vma(op->base.unmap.va); 2788 2789 if (xe_vma_is_cpu_addr_mirror(vma) && 2790 xe_svm_has_mapping(vm, xe_vma_start(vma), 2791 xe_vma_end(vma)) && 2792 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) 2793 return -EBUSY; 2794 2795 if (!xe_vma_is_cpu_addr_mirror(vma)) 2796 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2797 break; 2798 case DRM_GPUVA_OP_PREFETCH: 2799 vma = gpuva_to_vma(op->base.prefetch.va); 2800 2801 if (xe_vma_is_userptr(vma)) { 2802 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2803 if (err) 2804 return err; 2805 } 2806 2807 if (xe_vma_is_cpu_addr_mirror(vma)) 2808 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2809 op->prefetch_range.ranges_count); 2810 else 2811 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2812 2813 break; 2814 default: 2815 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2816 } 2817 2818 err = xe_vma_op_commit(vm, op); 2819 if (err) 2820 return err; 2821 } 2822 2823 return 0; 2824 } 2825 2826 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2827 bool post_commit, bool prev_post_commit, 2828 bool next_post_commit) 2829 { 2830 lockdep_assert_held_write(&vm->lock); 2831 2832 switch (op->base.op) { 2833 case DRM_GPUVA_OP_MAP: 2834 if (op->map.vma) { 2835 prep_vma_destroy(vm, op->map.vma, post_commit); 2836 xe_vma_destroy_unlocked(op->map.vma); 2837 } 2838 break; 2839 case DRM_GPUVA_OP_UNMAP: 2840 { 2841 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2842 2843 if (vma) { 2844 xe_svm_notifier_lock(vm); 2845 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2846 xe_svm_notifier_unlock(vm); 2847 if (post_commit) 2848 xe_vm_insert_vma(vm, vma); 2849 } 2850 break; 2851 } 2852 case DRM_GPUVA_OP_REMAP: 2853 { 2854 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2855 2856 if (op->remap.prev) { 2857 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2858 xe_vma_destroy_unlocked(op->remap.prev); 2859 } 2860 if (op->remap.next) { 2861 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2862 xe_vma_destroy_unlocked(op->remap.next); 2863 } 2864 if (vma) { 2865 xe_svm_notifier_lock(vm); 2866 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2867 xe_svm_notifier_unlock(vm); 2868 if (post_commit) 2869 xe_vm_insert_vma(vm, vma); 2870 } 2871 break; 2872 } 2873 case DRM_GPUVA_OP_PREFETCH: 2874 /* Nothing to do */ 2875 break; 2876 default: 2877 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2878 } 2879 } 2880 2881 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2882 struct drm_gpuva_ops **ops, 2883 int num_ops_list) 2884 { 2885 int i; 2886 2887 for (i = num_ops_list - 1; i >= 0; --i) { 2888 struct drm_gpuva_ops *__ops = ops[i]; 2889 struct drm_gpuva_op *__op; 2890 2891 if (!__ops) 2892 continue; 2893 2894 drm_gpuva_for_each_op_reverse(__op, __ops) { 2895 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2896 2897 xe_vma_op_unwind(vm, op, 2898 op->flags & XE_VMA_OP_COMMITTED, 2899 op->flags & XE_VMA_OP_PREV_COMMITTED, 2900 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2901 } 2902 } 2903 } 2904 2905 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2906 bool res_evict, bool validate, bool request_decompress) 2907 { 2908 struct xe_bo *bo = xe_vma_bo(vma); 2909 struct xe_vm *vm = xe_vma_vm(vma); 2910 int err = 0; 2911 2912 if (bo) { 2913 if (!bo->vm) 2914 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2915 if (!err && validate) 2916 err = xe_bo_validate(bo, vm, 2917 xe_vm_allow_vm_eviction(vm) && 2918 res_evict, exec); 2919 2920 if (err) 2921 return err; 2922 2923 if (request_decompress) 2924 err = xe_bo_decompress(bo); 2925 } 2926 2927 return err; 2928 } 2929 2930 static int check_ufence(struct xe_vma *vma) 2931 { 2932 if (vma->ufence) { 2933 struct xe_user_fence * const f = vma->ufence; 2934 2935 if (!xe_sync_ufence_get_status(f)) 2936 return -EBUSY; 2937 2938 vma->ufence = NULL; 2939 xe_sync_ufence_put(f); 2940 } 2941 2942 return 0; 2943 } 2944 2945 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2946 { 2947 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2948 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2949 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap; 2950 int err = 0; 2951 2952 struct xe_svm_range *svm_range; 2953 struct drm_gpusvm_ctx ctx = {}; 2954 unsigned long i; 2955 2956 if (!xe_vma_is_cpu_addr_mirror(vma)) 2957 return 0; 2958 2959 ctx.read_only = xe_vma_read_only(vma); 2960 ctx.devmem_possible = devmem_possible; 2961 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2962 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 2963 2964 /* TODO: Threading the migration */ 2965 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2966 if (!dpagemap) 2967 xe_svm_range_migrate_to_smem(vm, svm_range); 2968 2969 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 2970 drm_dbg(&vm->xe->drm, 2971 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n", 2972 dpagemap ? dpagemap->drm->unique : "system", 2973 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range)); 2974 } 2975 2976 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) { 2977 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap); 2978 if (err) { 2979 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2980 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2981 return -ENODATA; 2982 } 2983 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2984 } 2985 2986 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2987 if (err) { 2988 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2989 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2990 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2991 err = -ENODATA; 2992 return err; 2993 } 2994 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2995 } 2996 2997 return err; 2998 } 2999 3000 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3001 struct xe_vma_ops *vops, struct xe_vma_op *op) 3002 { 3003 int err = 0; 3004 bool res_evict; 3005 3006 /* 3007 * We only allow evicting a BO within the VM if it is not part of an 3008 * array of binds, as an array of binds can evict another BO within the 3009 * bind. 3010 */ 3011 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 3012 3013 switch (op->base.op) { 3014 case DRM_GPUVA_OP_MAP: 3015 if (!op->map.invalidate_on_bind) 3016 err = vma_lock_and_validate(exec, op->map.vma, 3017 res_evict, 3018 !xe_vm_in_fault_mode(vm) || 3019 op->map.immediate, 3020 op->map.request_decompress); 3021 break; 3022 case DRM_GPUVA_OP_REMAP: 3023 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3024 if (err) 3025 break; 3026 3027 err = vma_lock_and_validate(exec, 3028 gpuva_to_vma(op->base.remap.unmap->va), 3029 res_evict, false, false); 3030 if (!err && op->remap.prev) 3031 err = vma_lock_and_validate(exec, op->remap.prev, 3032 res_evict, true, false); 3033 if (!err && op->remap.next) 3034 err = vma_lock_and_validate(exec, op->remap.next, 3035 res_evict, true, false); 3036 break; 3037 case DRM_GPUVA_OP_UNMAP: 3038 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3039 if (err) 3040 break; 3041 3042 err = vma_lock_and_validate(exec, 3043 gpuva_to_vma(op->base.unmap.va), 3044 res_evict, false, false); 3045 break; 3046 case DRM_GPUVA_OP_PREFETCH: 3047 { 3048 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3049 u32 region; 3050 3051 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3052 region = op->prefetch.region; 3053 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3054 region <= ARRAY_SIZE(region_to_mem_type)); 3055 } 3056 3057 err = vma_lock_and_validate(exec, 3058 gpuva_to_vma(op->base.prefetch.va), 3059 res_evict, false, false); 3060 if (!err && !xe_vma_has_no_bo(vma)) 3061 err = xe_bo_migrate(xe_vma_bo(vma), 3062 region_to_mem_type[region], 3063 NULL, 3064 exec); 3065 break; 3066 } 3067 default: 3068 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3069 } 3070 3071 return err; 3072 } 3073 3074 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3075 { 3076 struct xe_vma_op *op; 3077 int err; 3078 3079 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3080 return 0; 3081 3082 list_for_each_entry(op, &vops->list, link) { 3083 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3084 err = prefetch_ranges(vm, op); 3085 if (err) 3086 return err; 3087 } 3088 } 3089 3090 return 0; 3091 } 3092 3093 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3094 struct xe_vm *vm, 3095 struct xe_vma_ops *vops) 3096 { 3097 struct xe_vma_op *op; 3098 int err; 3099 3100 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3101 if (err) 3102 return err; 3103 3104 list_for_each_entry(op, &vops->list, link) { 3105 err = op_lock_and_prep(exec, vm, vops, op); 3106 if (err) 3107 return err; 3108 } 3109 3110 #ifdef TEST_VM_OPS_ERROR 3111 if (vops->inject_error && 3112 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3113 return -ENOSPC; 3114 #endif 3115 3116 return 0; 3117 } 3118 3119 static void op_trace(struct xe_vma_op *op) 3120 { 3121 switch (op->base.op) { 3122 case DRM_GPUVA_OP_MAP: 3123 trace_xe_vma_bind(op->map.vma); 3124 break; 3125 case DRM_GPUVA_OP_REMAP: 3126 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3127 if (op->remap.prev) 3128 trace_xe_vma_bind(op->remap.prev); 3129 if (op->remap.next) 3130 trace_xe_vma_bind(op->remap.next); 3131 break; 3132 case DRM_GPUVA_OP_UNMAP: 3133 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3134 break; 3135 case DRM_GPUVA_OP_PREFETCH: 3136 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3137 break; 3138 case DRM_GPUVA_OP_DRIVER: 3139 break; 3140 default: 3141 XE_WARN_ON("NOT POSSIBLE"); 3142 } 3143 } 3144 3145 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3146 { 3147 struct xe_vma_op *op; 3148 3149 list_for_each_entry(op, &vops->list, link) 3150 op_trace(op); 3151 } 3152 3153 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3154 { 3155 struct xe_exec_queue *q = vops->q; 3156 struct xe_tile *tile; 3157 int number_tiles = 0; 3158 u8 id; 3159 3160 for_each_tile(tile, vm->xe, id) { 3161 if (vops->pt_update_ops[id].num_ops) 3162 ++number_tiles; 3163 3164 if (vops->pt_update_ops[id].q) 3165 continue; 3166 3167 if (q) { 3168 vops->pt_update_ops[id].q = q; 3169 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3170 q = list_next_entry(q, multi_gt_list); 3171 } else { 3172 vops->pt_update_ops[id].q = vm->q[id]; 3173 } 3174 } 3175 3176 return number_tiles; 3177 } 3178 3179 static struct dma_fence *ops_execute(struct xe_vm *vm, 3180 struct xe_vma_ops *vops) 3181 { 3182 struct xe_tile *tile; 3183 struct dma_fence *fence = NULL; 3184 struct dma_fence **fences = NULL; 3185 struct dma_fence_array *cf = NULL; 3186 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; 3187 u8 id; 3188 3189 number_tiles = vm_ops_setup_tile_args(vm, vops); 3190 if (number_tiles == 0) 3191 return ERR_PTR(-ENODATA); 3192 3193 for_each_tile(tile, vm->xe, id) { 3194 ++n_fence; 3195 3196 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) 3197 for_each_tlb_inval(i) 3198 ++n_fence; 3199 } 3200 3201 fences = kmalloc_objs(*fences, n_fence); 3202 if (!fences) { 3203 fence = ERR_PTR(-ENOMEM); 3204 goto err_trace; 3205 } 3206 3207 cf = dma_fence_array_alloc(n_fence); 3208 if (!cf) { 3209 fence = ERR_PTR(-ENOMEM); 3210 goto err_out; 3211 } 3212 3213 for_each_tile(tile, vm->xe, id) { 3214 if (!vops->pt_update_ops[id].num_ops) 3215 continue; 3216 3217 err = xe_pt_update_ops_prepare(tile, vops); 3218 if (err) { 3219 fence = ERR_PTR(err); 3220 goto err_out; 3221 } 3222 } 3223 3224 trace_xe_vm_ops_execute(vops); 3225 3226 for_each_tile(tile, vm->xe, id) { 3227 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3228 3229 fence = NULL; 3230 if (!vops->pt_update_ops[id].num_ops) 3231 goto collect_fences; 3232 3233 fence = xe_pt_update_ops_run(tile, vops); 3234 if (IS_ERR(fence)) 3235 goto err_out; 3236 3237 collect_fences: 3238 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3239 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3240 continue; 3241 3242 xe_migrate_job_lock(tile->migrate, q); 3243 for_each_tlb_inval(i) 3244 fences[current_fence++] = 3245 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3246 xe_migrate_job_unlock(tile->migrate, q); 3247 } 3248 3249 xe_assert(vm->xe, current_fence == n_fence); 3250 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3251 1, false); 3252 fence = &cf->base; 3253 3254 for_each_tile(tile, vm->xe, id) { 3255 if (!vops->pt_update_ops[id].num_ops) 3256 continue; 3257 3258 xe_pt_update_ops_fini(tile, vops); 3259 } 3260 3261 return fence; 3262 3263 err_out: 3264 for_each_tile(tile, vm->xe, id) { 3265 if (!vops->pt_update_ops[id].num_ops) 3266 continue; 3267 3268 xe_pt_update_ops_abort(tile, vops); 3269 } 3270 while (current_fence) 3271 dma_fence_put(fences[--current_fence]); 3272 kfree(fences); 3273 kfree(cf); 3274 3275 err_trace: 3276 trace_xe_vm_ops_fail(vm); 3277 return fence; 3278 } 3279 3280 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3281 { 3282 if (vma->ufence) 3283 xe_sync_ufence_put(vma->ufence); 3284 vma->ufence = __xe_sync_ufence_get(ufence); 3285 } 3286 3287 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3288 struct xe_user_fence *ufence) 3289 { 3290 switch (op->base.op) { 3291 case DRM_GPUVA_OP_MAP: 3292 if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) 3293 vma_add_ufence(op->map.vma, ufence); 3294 break; 3295 case DRM_GPUVA_OP_REMAP: 3296 if (op->remap.prev) 3297 vma_add_ufence(op->remap.prev, ufence); 3298 if (op->remap.next) 3299 vma_add_ufence(op->remap.next, ufence); 3300 break; 3301 case DRM_GPUVA_OP_UNMAP: 3302 break; 3303 case DRM_GPUVA_OP_PREFETCH: 3304 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3305 break; 3306 default: 3307 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3308 } 3309 } 3310 3311 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3312 struct dma_fence *fence) 3313 { 3314 struct xe_user_fence *ufence; 3315 struct xe_vma_op *op; 3316 int i; 3317 3318 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3319 list_for_each_entry(op, &vops->list, link) { 3320 if (ufence) 3321 op_add_ufence(vm, op, ufence); 3322 3323 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3324 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3325 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3326 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3327 fence); 3328 } 3329 if (ufence) 3330 xe_sync_ufence_put(ufence); 3331 if (fence) { 3332 for (i = 0; i < vops->num_syncs; i++) 3333 xe_sync_entry_signal(vops->syncs + i, fence); 3334 } 3335 } 3336 3337 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3338 struct xe_vma_ops *vops) 3339 { 3340 struct xe_validation_ctx ctx; 3341 struct drm_exec exec; 3342 struct dma_fence *fence; 3343 int err = 0; 3344 3345 lockdep_assert_held_write(&vm->lock); 3346 3347 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3348 ((struct xe_val_flags) { 3349 .interruptible = true, 3350 .exec_ignore_duplicates = true, 3351 }), err) { 3352 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3353 drm_exec_retry_on_contention(&exec); 3354 xe_validation_retry_on_oom(&ctx, &err); 3355 if (err) 3356 return ERR_PTR(err); 3357 3358 xe_vm_set_validation_exec(vm, &exec); 3359 fence = ops_execute(vm, vops); 3360 xe_vm_set_validation_exec(vm, NULL); 3361 if (IS_ERR(fence)) { 3362 if (PTR_ERR(fence) == -ENODATA) 3363 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3364 return fence; 3365 } 3366 3367 vm_bind_ioctl_ops_fini(vm, vops, fence); 3368 } 3369 3370 return err ? ERR_PTR(err) : fence; 3371 } 3372 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3373 3374 #define SUPPORTED_FLAGS_STUB \ 3375 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3376 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3377 DRM_XE_VM_BIND_FLAG_NULL | \ 3378 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3379 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3380 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3381 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \ 3382 DRM_XE_VM_BIND_FLAG_DECOMPRESS) 3383 3384 #ifdef TEST_VM_OPS_ERROR 3385 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3386 #else 3387 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3388 #endif 3389 3390 #define XE_64K_PAGE_MASK 0xffffull 3391 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3392 3393 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3394 struct drm_xe_vm_bind *args, 3395 struct drm_xe_vm_bind_op **bind_ops) 3396 { 3397 int err; 3398 int i; 3399 3400 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3401 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3402 return -EINVAL; 3403 3404 if (XE_IOCTL_DBG(xe, args->extensions)) 3405 return -EINVAL; 3406 3407 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) 3408 return -EINVAL; 3409 3410 if (args->num_binds > 1) { 3411 u64 __user *bind_user = 3412 u64_to_user_ptr(args->vector_of_binds); 3413 3414 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op, 3415 args->num_binds, 3416 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3417 if (!*bind_ops) 3418 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3419 3420 err = copy_from_user(*bind_ops, bind_user, 3421 sizeof(struct drm_xe_vm_bind_op) * 3422 args->num_binds); 3423 if (XE_IOCTL_DBG(xe, err)) { 3424 err = -EFAULT; 3425 goto free_bind_ops; 3426 } 3427 } else { 3428 *bind_ops = &args->bind; 3429 } 3430 3431 for (i = 0; i < args->num_binds; ++i) { 3432 u64 range = (*bind_ops)[i].range; 3433 u64 addr = (*bind_ops)[i].addr; 3434 u32 op = (*bind_ops)[i].op; 3435 u32 flags = (*bind_ops)[i].flags; 3436 u32 obj = (*bind_ops)[i].obj; 3437 u64 obj_offset = (*bind_ops)[i].obj_offset; 3438 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3439 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3440 bool is_cpu_addr_mirror = flags & 3441 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3442 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 3443 u16 pat_index = (*bind_ops)[i].pat_index; 3444 u16 coh_mode; 3445 bool comp_en; 3446 3447 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3448 (!xe_vm_in_fault_mode(vm) || 3449 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3450 err = -EINVAL; 3451 goto free_bind_ops; 3452 } 3453 3454 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3455 err = -EINVAL; 3456 goto free_bind_ops; 3457 } 3458 3459 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3460 (*bind_ops)[i].pat_index = pat_index; 3461 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3462 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3463 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3464 err = -EINVAL; 3465 goto free_bind_ops; 3466 } 3467 3468 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3469 err = -EINVAL; 3470 goto free_bind_ops; 3471 } 3472 3473 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3474 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3475 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3476 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3477 is_cpu_addr_mirror)) || 3478 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3479 (is_decompress || is_null || is_cpu_addr_mirror)) || 3480 XE_IOCTL_DBG(xe, is_decompress && 3481 xe_pat_index_get_comp_en(xe, pat_index)) || 3482 XE_IOCTL_DBG(xe, !obj && 3483 op == DRM_XE_VM_BIND_OP_MAP && 3484 !is_null && !is_cpu_addr_mirror) || 3485 XE_IOCTL_DBG(xe, !obj && 3486 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3487 XE_IOCTL_DBG(xe, addr && 3488 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3489 XE_IOCTL_DBG(xe, range && 3490 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3491 XE_IOCTL_DBG(xe, obj && 3492 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3493 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3494 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3495 XE_IOCTL_DBG(xe, comp_en && 3496 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3497 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3498 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3499 XE_IOCTL_DBG(xe, obj && 3500 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3501 XE_IOCTL_DBG(xe, prefetch_region && 3502 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3503 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3504 /* Guard against undefined shift in BIT(prefetch_region) */ 3505 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3506 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3507 XE_IOCTL_DBG(xe, obj && 3508 op == DRM_XE_VM_BIND_OP_UNMAP) || 3509 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3510 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3511 err = -EINVAL; 3512 goto free_bind_ops; 3513 } 3514 3515 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3516 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3517 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3518 XE_IOCTL_DBG(xe, !range && 3519 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3520 err = -EINVAL; 3521 goto free_bind_ops; 3522 } 3523 3524 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) || 3525 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) || 3526 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) { 3527 err = -EOPNOTSUPP; 3528 goto free_bind_ops; 3529 } 3530 } 3531 3532 return 0; 3533 3534 free_bind_ops: 3535 if (args->num_binds > 1) 3536 kvfree(*bind_ops); 3537 *bind_ops = NULL; 3538 return err; 3539 } 3540 3541 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3542 struct xe_exec_queue *q, 3543 struct xe_sync_entry *syncs, 3544 int num_syncs) 3545 { 3546 struct dma_fence *fence = NULL; 3547 int i, err = 0; 3548 3549 if (num_syncs) { 3550 fence = xe_sync_in_fence_get(syncs, num_syncs, 3551 to_wait_exec_queue(vm, q), vm); 3552 if (IS_ERR(fence)) 3553 return PTR_ERR(fence); 3554 3555 for (i = 0; i < num_syncs; i++) 3556 xe_sync_entry_signal(&syncs[i], fence); 3557 } 3558 3559 dma_fence_put(fence); 3560 3561 return err; 3562 } 3563 3564 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3565 struct xe_exec_queue *q, 3566 struct xe_sync_entry *syncs, u32 num_syncs) 3567 { 3568 memset(vops, 0, sizeof(*vops)); 3569 INIT_LIST_HEAD(&vops->list); 3570 vops->vm = vm; 3571 vops->q = q; 3572 vops->syncs = syncs; 3573 vops->num_syncs = num_syncs; 3574 vops->flags = 0; 3575 } 3576 3577 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3578 u64 addr, u64 range, u64 obj_offset, 3579 u16 pat_index, u32 op, u32 bind_flags) 3580 { 3581 u16 coh_mode; 3582 bool comp_en; 3583 3584 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && 3585 xe_pat_index_get_comp_en(xe, pat_index))) 3586 return -EINVAL; 3587 3588 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3589 XE_IOCTL_DBG(xe, obj_offset > 3590 xe_bo_size(bo) - range)) { 3591 return -EINVAL; 3592 } 3593 3594 /* 3595 * Some platforms require 64k VM_BIND alignment, 3596 * specifically those with XE_VRAM_FLAGS_NEED64K. 3597 * 3598 * Other platforms may have BO's set to 64k physical placement, 3599 * but can be mapped at 4k offsets anyway. This check is only 3600 * there for the former case. 3601 */ 3602 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3603 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3604 if (XE_IOCTL_DBG(xe, obj_offset & 3605 XE_64K_PAGE_MASK) || 3606 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3607 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3608 return -EINVAL; 3609 } 3610 } 3611 3612 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3613 if (bo->cpu_caching) { 3614 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3615 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3616 return -EINVAL; 3617 } 3618 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3619 /* 3620 * Imported dma-buf from a different device should 3621 * require 1way or 2way coherency since we don't know 3622 * how it was mapped on the CPU. Just assume is it 3623 * potentially cached on CPU side. 3624 */ 3625 return -EINVAL; 3626 } 3627 3628 /* 3629 * Ensures that imported buffer objects (dma-bufs) are not mapped 3630 * with a PAT index that enables compression. 3631 */ 3632 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3633 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) 3634 return -EINVAL; 3635 3636 /* If a BO is protected it can only be mapped if the key is still valid */ 3637 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3638 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3639 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3640 return -ENOEXEC; 3641 3642 return 0; 3643 } 3644 3645 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3646 { 3647 struct xe_device *xe = to_xe_device(dev); 3648 struct xe_file *xef = to_xe_file(file); 3649 struct drm_xe_vm_bind *args = data; 3650 struct drm_xe_sync __user *syncs_user; 3651 struct xe_bo **bos = NULL; 3652 struct drm_gpuva_ops **ops = NULL; 3653 struct xe_vm *vm; 3654 struct xe_exec_queue *q = NULL; 3655 u32 num_syncs, num_ufence = 0; 3656 struct xe_sync_entry *syncs = NULL; 3657 struct drm_xe_vm_bind_op *bind_ops = NULL; 3658 struct xe_vma_ops vops; 3659 struct dma_fence *fence; 3660 int err; 3661 int i; 3662 3663 vm = xe_vm_lookup(xef, args->vm_id); 3664 if (XE_IOCTL_DBG(xe, !vm)) 3665 return -EINVAL; 3666 3667 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3668 if (err) 3669 goto put_vm; 3670 3671 if (args->exec_queue_id) { 3672 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3673 if (XE_IOCTL_DBG(xe, !q)) { 3674 err = -ENOENT; 3675 goto free_bind_ops; 3676 } 3677 3678 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3679 err = -EINVAL; 3680 goto put_exec_queue; 3681 } 3682 } 3683 3684 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) { 3685 err = -EINVAL; 3686 goto put_exec_queue; 3687 } 3688 3689 /* Ensure all UNMAPs visible */ 3690 xe_svm_flush(vm); 3691 3692 err = down_write_killable(&vm->lock); 3693 if (err) 3694 goto put_exec_queue; 3695 3696 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3697 err = -ENOENT; 3698 goto release_vm_lock; 3699 } 3700 3701 for (i = 0; i < args->num_binds; ++i) { 3702 u64 range = bind_ops[i].range; 3703 u64 addr = bind_ops[i].addr; 3704 3705 if (XE_IOCTL_DBG(xe, range > vm->size) || 3706 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3707 err = -EINVAL; 3708 goto release_vm_lock; 3709 } 3710 } 3711 3712 if (args->num_binds) { 3713 bos = kvzalloc_objs(*bos, args->num_binds, 3714 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3715 if (!bos) { 3716 err = -ENOMEM; 3717 goto release_vm_lock; 3718 } 3719 3720 ops = kvzalloc_objs(*ops, args->num_binds, 3721 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3722 if (!ops) { 3723 err = -ENOMEM; 3724 goto free_bos; 3725 } 3726 } 3727 3728 for (i = 0; i < args->num_binds; ++i) { 3729 struct drm_gem_object *gem_obj; 3730 u64 range = bind_ops[i].range; 3731 u64 addr = bind_ops[i].addr; 3732 u32 obj = bind_ops[i].obj; 3733 u64 obj_offset = bind_ops[i].obj_offset; 3734 u16 pat_index = bind_ops[i].pat_index; 3735 u32 op = bind_ops[i].op; 3736 u32 bind_flags = bind_ops[i].flags; 3737 3738 if (!obj) 3739 continue; 3740 3741 gem_obj = drm_gem_object_lookup(file, obj); 3742 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3743 err = -ENOENT; 3744 goto put_obj; 3745 } 3746 bos[i] = gem_to_xe_bo(gem_obj); 3747 3748 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3749 obj_offset, pat_index, op, 3750 bind_flags); 3751 if (err) 3752 goto put_obj; 3753 } 3754 3755 if (args->num_syncs) { 3756 syncs = kzalloc_objs(*syncs, args->num_syncs); 3757 if (!syncs) { 3758 err = -ENOMEM; 3759 goto put_obj; 3760 } 3761 } 3762 3763 syncs_user = u64_to_user_ptr(args->syncs); 3764 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3765 struct xe_exec_queue *__q = q ?: vm->q[0]; 3766 3767 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3768 &syncs_user[num_syncs], 3769 __q->ufence_syncobj, 3770 ++__q->ufence_timeline_value, 3771 (xe_vm_in_lr_mode(vm) ? 3772 SYNC_PARSE_FLAG_LR_MODE : 0) | 3773 (!args->num_binds ? 3774 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3775 if (err) 3776 goto free_syncs; 3777 3778 if (xe_sync_is_ufence(&syncs[num_syncs])) 3779 num_ufence++; 3780 } 3781 3782 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3783 err = -EINVAL; 3784 goto free_syncs; 3785 } 3786 3787 if (!args->num_binds) { 3788 err = -ENODATA; 3789 goto free_syncs; 3790 } 3791 3792 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3793 if (args->num_binds > 1) 3794 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3795 for (i = 0; i < args->num_binds; ++i) { 3796 u64 range = bind_ops[i].range; 3797 u64 addr = bind_ops[i].addr; 3798 u32 op = bind_ops[i].op; 3799 u32 flags = bind_ops[i].flags; 3800 u64 obj_offset = bind_ops[i].obj_offset; 3801 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3802 u16 pat_index = bind_ops[i].pat_index; 3803 3804 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3805 addr, range, op, flags, 3806 prefetch_region, pat_index); 3807 if (IS_ERR(ops[i])) { 3808 err = PTR_ERR(ops[i]); 3809 ops[i] = NULL; 3810 goto unwind_ops; 3811 } 3812 3813 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3814 if (err) 3815 goto unwind_ops; 3816 3817 #ifdef TEST_VM_OPS_ERROR 3818 if (flags & FORCE_OP_ERROR) { 3819 vops.inject_error = true; 3820 vm->xe->vm_inject_error_position = 3821 (vm->xe->vm_inject_error_position + 1) % 3822 FORCE_OP_ERROR_COUNT; 3823 } 3824 #endif 3825 } 3826 3827 /* Nothing to do */ 3828 if (list_empty(&vops.list)) { 3829 err = -ENODATA; 3830 goto unwind_ops; 3831 } 3832 3833 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3834 if (err) 3835 goto unwind_ops; 3836 3837 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3838 if (err) 3839 goto unwind_ops; 3840 3841 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3842 if (IS_ERR(fence)) 3843 err = PTR_ERR(fence); 3844 else 3845 dma_fence_put(fence); 3846 3847 unwind_ops: 3848 if (err && err != -ENODATA) 3849 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3850 xe_vma_ops_fini(&vops); 3851 for (i = args->num_binds - 1; i >= 0; --i) 3852 if (ops[i]) 3853 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3854 free_syncs: 3855 if (err == -ENODATA) 3856 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3857 while (num_syncs--) 3858 xe_sync_entry_cleanup(&syncs[num_syncs]); 3859 3860 kfree(syncs); 3861 put_obj: 3862 for (i = 0; i < args->num_binds; ++i) 3863 xe_bo_put(bos[i]); 3864 3865 kvfree(ops); 3866 free_bos: 3867 kvfree(bos); 3868 release_vm_lock: 3869 up_write(&vm->lock); 3870 put_exec_queue: 3871 if (q) 3872 xe_exec_queue_put(q); 3873 free_bind_ops: 3874 if (args->num_binds > 1) 3875 kvfree(bind_ops); 3876 put_vm: 3877 xe_vm_put(vm); 3878 return err; 3879 } 3880 3881 /** 3882 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3883 * @vm: VM to bind the BO to 3884 * @bo: BO to bind 3885 * @q: exec queue to use for the bind (optional) 3886 * @addr: address at which to bind the BO 3887 * @cache_lvl: PAT cache level to use 3888 * 3889 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3890 * kernel-owned VM. 3891 * 3892 * Returns a dma_fence to track the binding completion if the job to do so was 3893 * successfully submitted, an error pointer otherwise. 3894 */ 3895 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3896 struct xe_exec_queue *q, u64 addr, 3897 enum xe_cache_level cache_lvl) 3898 { 3899 struct xe_vma_ops vops; 3900 struct drm_gpuva_ops *ops = NULL; 3901 struct dma_fence *fence; 3902 int err; 3903 3904 xe_bo_get(bo); 3905 xe_vm_get(vm); 3906 if (q) 3907 xe_exec_queue_get(q); 3908 3909 down_write(&vm->lock); 3910 3911 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3912 3913 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3914 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3915 vm->xe->pat.idx[cache_lvl]); 3916 if (IS_ERR(ops)) { 3917 err = PTR_ERR(ops); 3918 goto release_vm_lock; 3919 } 3920 3921 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3922 if (err) 3923 goto release_vm_lock; 3924 3925 xe_assert(vm->xe, !list_empty(&vops.list)); 3926 3927 err = xe_vma_ops_alloc(&vops, false); 3928 if (err) 3929 goto unwind_ops; 3930 3931 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3932 if (IS_ERR(fence)) 3933 err = PTR_ERR(fence); 3934 3935 unwind_ops: 3936 if (err && err != -ENODATA) 3937 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3938 3939 xe_vma_ops_fini(&vops); 3940 drm_gpuva_ops_free(&vm->gpuvm, ops); 3941 3942 release_vm_lock: 3943 up_write(&vm->lock); 3944 3945 if (q) 3946 xe_exec_queue_put(q); 3947 xe_vm_put(vm); 3948 xe_bo_put(bo); 3949 3950 if (err) 3951 fence = ERR_PTR(err); 3952 3953 return fence; 3954 } 3955 3956 /** 3957 * xe_vm_lock() - Lock the vm's dma_resv object 3958 * @vm: The struct xe_vm whose lock is to be locked 3959 * @intr: Whether to perform any wait interruptible 3960 * 3961 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3962 * contended lock was interrupted. If @intr is false, the function 3963 * always returns 0. 3964 */ 3965 int xe_vm_lock(struct xe_vm *vm, bool intr) 3966 { 3967 int ret; 3968 3969 if (intr) 3970 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3971 else 3972 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3973 3974 return ret; 3975 } 3976 3977 /** 3978 * xe_vm_unlock() - Unlock the vm's dma_resv object 3979 * @vm: The struct xe_vm whose lock is to be released. 3980 * 3981 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3982 */ 3983 void xe_vm_unlock(struct xe_vm *vm) 3984 { 3985 dma_resv_unlock(xe_vm_resv(vm)); 3986 } 3987 3988 /** 3989 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for 3990 * VMA. 3991 * @vma: VMA to invalidate 3992 * @batch: TLB invalidation batch to populate; caller must later call 3993 * xe_tlb_inval_batch_wait() on it to wait for completion 3994 * 3995 * Walks a list of page tables leaves which it memset the entries owned by this 3996 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush 3997 * to complete, but instead populates @batch which can be waited on using 3998 * xe_tlb_inval_batch_wait(). 3999 * 4000 * Returns 0 for success, negative error code otherwise. 4001 */ 4002 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch) 4003 { 4004 struct xe_device *xe = xe_vma_vm(vma)->xe; 4005 struct xe_vm *vm = xe_vma_vm(vma); 4006 struct xe_tile *tile; 4007 u8 tile_mask = 0; 4008 int ret = 0; 4009 u8 id; 4010 4011 xe_assert(xe, !xe_vma_is_null(vma)); 4012 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4013 trace_xe_vma_invalidate(vma); 4014 4015 vm_dbg(&vm->xe->drm, 4016 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4017 xe_vma_start(vma), xe_vma_size(vma)); 4018 4019 /* 4020 * Check that we don't race with page-table updates, tile_invalidated 4021 * update is safe 4022 */ 4023 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4024 if (xe_vma_is_userptr(vma)) { 4025 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 4026 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 4027 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4028 4029 WARN_ON_ONCE(!mmu_interval_check_retry 4030 (&to_userptr_vma(vma)->userptr.notifier, 4031 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 4032 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4033 DMA_RESV_USAGE_BOOKKEEP)); 4034 4035 } else { 4036 xe_bo_assert_held(xe_vma_bo(vma)); 4037 } 4038 } 4039 4040 for_each_tile(tile, xe, id) 4041 if (xe_pt_zap_ptes(tile, vma)) 4042 tile_mask |= BIT(id); 4043 4044 xe_device_wmb(xe); 4045 4046 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid, 4047 xe_vma_start(vma), xe_vma_end(vma), 4048 tile_mask, batch); 4049 4050 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4051 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4052 return ret; 4053 } 4054 4055 /** 4056 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4057 * @vma: VMA to invalidate 4058 * 4059 * Walks a list of page tables leaves which it memset the entries owned by this 4060 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4061 * complete. 4062 * 4063 * Returns 0 for success, negative error code otherwise. 4064 */ 4065 int xe_vm_invalidate_vma(struct xe_vma *vma) 4066 { 4067 struct xe_tlb_inval_batch batch; 4068 int ret; 4069 4070 ret = xe_vm_invalidate_vma_submit(vma, &batch); 4071 if (ret) 4072 return ret; 4073 4074 xe_tlb_inval_batch_wait(&batch); 4075 return ret; 4076 } 4077 4078 int xe_vm_validate_protected(struct xe_vm *vm) 4079 { 4080 struct drm_gpuva *gpuva; 4081 int err = 0; 4082 4083 if (!vm) 4084 return -ENODEV; 4085 4086 mutex_lock(&vm->snap_mutex); 4087 4088 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4089 struct xe_vma *vma = gpuva_to_vma(gpuva); 4090 struct xe_bo *bo = vma->gpuva.gem.obj ? 4091 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4092 4093 if (!bo) 4094 continue; 4095 4096 if (xe_bo_is_protected(bo)) { 4097 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4098 if (err) 4099 break; 4100 } 4101 } 4102 4103 mutex_unlock(&vm->snap_mutex); 4104 return err; 4105 } 4106 4107 struct xe_vm_snapshot { 4108 int uapi_flags; 4109 unsigned long num_snaps; 4110 struct { 4111 u64 ofs, bo_ofs; 4112 unsigned long len; 4113 #define XE_VM_SNAP_FLAG_USERPTR BIT(0) 4114 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) 4115 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2) 4116 unsigned long flags; 4117 int uapi_mem_region; 4118 int pat_index; 4119 int cpu_caching; 4120 struct xe_bo *bo; 4121 void *data; 4122 struct mm_struct *mm; 4123 } snap[]; 4124 }; 4125 4126 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4127 { 4128 unsigned long num_snaps = 0, i; 4129 struct xe_vm_snapshot *snap = NULL; 4130 struct drm_gpuva *gpuva; 4131 4132 if (!vm) 4133 return NULL; 4134 4135 mutex_lock(&vm->snap_mutex); 4136 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4137 if (gpuva->flags & XE_VMA_DUMPABLE) 4138 num_snaps++; 4139 } 4140 4141 if (num_snaps) 4142 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4143 if (!snap) { 4144 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4145 goto out_unlock; 4146 } 4147 4148 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 4149 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; 4150 if (vm->flags & XE_VM_FLAG_LR_MODE) 4151 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; 4152 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) 4153 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 4154 4155 snap->num_snaps = num_snaps; 4156 i = 0; 4157 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4158 struct xe_vma *vma = gpuva_to_vma(gpuva); 4159 struct xe_bo *bo = vma->gpuva.gem.obj ? 4160 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4161 4162 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4163 continue; 4164 4165 snap->snap[i].ofs = xe_vma_start(vma); 4166 snap->snap[i].len = xe_vma_size(vma); 4167 snap->snap[i].flags = xe_vma_read_only(vma) ? 4168 XE_VM_SNAP_FLAG_READ_ONLY : 0; 4169 snap->snap[i].pat_index = vma->attr.pat_index; 4170 if (bo) { 4171 snap->snap[i].cpu_caching = bo->cpu_caching; 4172 snap->snap[i].bo = xe_bo_get(bo); 4173 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4174 switch (bo->ttm.resource->mem_type) { 4175 case XE_PL_SYSTEM: 4176 case XE_PL_TT: 4177 snap->snap[i].uapi_mem_region = 0; 4178 break; 4179 case XE_PL_VRAM0: 4180 snap->snap[i].uapi_mem_region = 1; 4181 break; 4182 case XE_PL_VRAM1: 4183 snap->snap[i].uapi_mem_region = 2; 4184 break; 4185 } 4186 } else if (xe_vma_is_userptr(vma)) { 4187 struct mm_struct *mm = 4188 to_userptr_vma(vma)->userptr.notifier.mm; 4189 4190 if (mmget_not_zero(mm)) 4191 snap->snap[i].mm = mm; 4192 else 4193 snap->snap[i].data = ERR_PTR(-EFAULT); 4194 4195 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4196 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; 4197 snap->snap[i].uapi_mem_region = 0; 4198 } else if (xe_vma_is_null(vma)) { 4199 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; 4200 snap->snap[i].uapi_mem_region = -1; 4201 } else { 4202 snap->snap[i].data = ERR_PTR(-ENOENT); 4203 snap->snap[i].uapi_mem_region = -1; 4204 } 4205 i++; 4206 } 4207 4208 out_unlock: 4209 mutex_unlock(&vm->snap_mutex); 4210 return snap; 4211 } 4212 4213 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4214 { 4215 if (IS_ERR_OR_NULL(snap)) 4216 return; 4217 4218 for (int i = 0; i < snap->num_snaps; i++) { 4219 struct xe_bo *bo = snap->snap[i].bo; 4220 int err; 4221 4222 if (IS_ERR(snap->snap[i].data) || 4223 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4224 continue; 4225 4226 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4227 if (!snap->snap[i].data) { 4228 snap->snap[i].data = ERR_PTR(-ENOMEM); 4229 goto cleanup_bo; 4230 } 4231 4232 if (bo) { 4233 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4234 snap->snap[i].data, snap->snap[i].len); 4235 } else { 4236 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4237 4238 kthread_use_mm(snap->snap[i].mm); 4239 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4240 err = 0; 4241 else 4242 err = -EFAULT; 4243 kthread_unuse_mm(snap->snap[i].mm); 4244 4245 mmput(snap->snap[i].mm); 4246 snap->snap[i].mm = NULL; 4247 } 4248 4249 if (err) { 4250 kvfree(snap->snap[i].data); 4251 snap->snap[i].data = ERR_PTR(err); 4252 } 4253 4254 cleanup_bo: 4255 xe_bo_put(bo); 4256 snap->snap[i].bo = NULL; 4257 } 4258 } 4259 4260 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4261 { 4262 unsigned long i, j; 4263 4264 if (IS_ERR_OR_NULL(snap)) { 4265 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4266 return; 4267 } 4268 4269 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); 4270 for (i = 0; i < snap->num_snaps; i++) { 4271 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4272 4273 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", 4274 snap->snap[i].ofs, 4275 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? 4276 "read_only" : "read_write", 4277 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? 4278 "null_sparse" : 4279 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? 4280 "userptr" : "bo", 4281 snap->snap[i].uapi_mem_region == -1 ? 0 : 4282 BIT(snap->snap[i].uapi_mem_region), 4283 snap->snap[i].pat_index, 4284 snap->snap[i].cpu_caching); 4285 4286 if (IS_ERR(snap->snap[i].data)) { 4287 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4288 PTR_ERR(snap->snap[i].data)); 4289 continue; 4290 } 4291 4292 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4293 continue; 4294 4295 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4296 4297 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4298 u32 *val = snap->snap[i].data + j; 4299 char dumped[ASCII85_BUFSZ]; 4300 4301 drm_puts(p, ascii85_encode(*val, dumped)); 4302 } 4303 4304 drm_puts(p, "\n"); 4305 4306 if (drm_coredump_printer_is_full(p)) 4307 return; 4308 } 4309 } 4310 4311 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4312 { 4313 unsigned long i; 4314 4315 if (IS_ERR_OR_NULL(snap)) 4316 return; 4317 4318 for (i = 0; i < snap->num_snaps; i++) { 4319 if (!IS_ERR(snap->snap[i].data)) 4320 kvfree(snap->snap[i].data); 4321 xe_bo_put(snap->snap[i].bo); 4322 if (snap->snap[i].mm) 4323 mmput(snap->snap[i].mm); 4324 } 4325 kvfree(snap); 4326 } 4327 4328 /** 4329 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4330 * @xe: Pointer to the Xe device structure 4331 * @vma: Pointer to the virtual memory area (VMA) structure 4332 * @is_atomic: In pagefault path and atomic operation 4333 * 4334 * This function determines whether the given VMA needs to be migrated to 4335 * VRAM in order to do atomic GPU operation. 4336 * 4337 * Return: 4338 * 1 - Migration to VRAM is required 4339 * 0 - Migration is not required 4340 * -EACCES - Invalid access for atomic memory attr 4341 * 4342 */ 4343 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4344 { 4345 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4346 vma->attr.atomic_access; 4347 4348 if (!IS_DGFX(xe) || !is_atomic) 4349 return false; 4350 4351 /* 4352 * NOTE: The checks implemented here are platform-specific. For 4353 * instance, on a device supporting CXL atomics, these would ideally 4354 * work universally without additional handling. 4355 */ 4356 switch (atomic_access) { 4357 case DRM_XE_ATOMIC_DEVICE: 4358 return !xe->info.has_device_atomics_on_smem; 4359 4360 case DRM_XE_ATOMIC_CPU: 4361 return -EACCES; 4362 4363 case DRM_XE_ATOMIC_UNDEFINED: 4364 case DRM_XE_ATOMIC_GLOBAL: 4365 default: 4366 return 1; 4367 } 4368 } 4369 4370 static int xe_vm_alloc_vma(struct xe_vm *vm, 4371 struct drm_gpuvm_map_req *map_req, 4372 bool is_madvise) 4373 { 4374 struct xe_vma_ops vops; 4375 struct drm_gpuva_ops *ops = NULL; 4376 struct drm_gpuva_op *__op; 4377 unsigned int vma_flags = 0; 4378 bool remap_op = false; 4379 struct xe_vma_mem_attr tmp_attr = {}; 4380 u16 default_pat; 4381 int err; 4382 4383 lockdep_assert_held_write(&vm->lock); 4384 4385 if (is_madvise) 4386 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4387 else 4388 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4389 4390 if (IS_ERR(ops)) 4391 return PTR_ERR(ops); 4392 4393 if (list_empty(&ops->list)) { 4394 err = 0; 4395 goto free_ops; 4396 } 4397 4398 drm_gpuva_for_each_op(__op, ops) { 4399 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4400 struct xe_vma *vma = NULL; 4401 4402 if (!is_madvise) { 4403 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4404 vma = gpuva_to_vma(op->base.unmap.va); 4405 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4406 default_pat = vma->attr.default_pat_index; 4407 vma_flags = vma->gpuva.flags; 4408 } 4409 4410 if (__op->op == DRM_GPUVA_OP_REMAP) { 4411 vma = gpuva_to_vma(op->base.remap.unmap->va); 4412 default_pat = vma->attr.default_pat_index; 4413 vma_flags = vma->gpuva.flags; 4414 } 4415 4416 if (__op->op == DRM_GPUVA_OP_MAP) { 4417 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4418 op->map.pat_index = default_pat; 4419 } 4420 } else { 4421 if (__op->op == DRM_GPUVA_OP_REMAP) { 4422 vma = gpuva_to_vma(op->base.remap.unmap->va); 4423 xe_assert(vm->xe, !remap_op); 4424 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4425 remap_op = true; 4426 vma_flags = vma->gpuva.flags; 4427 } 4428 4429 if (__op->op == DRM_GPUVA_OP_MAP) { 4430 xe_assert(vm->xe, remap_op); 4431 remap_op = false; 4432 /* 4433 * In case of madvise ops DRM_GPUVA_OP_MAP is 4434 * always after DRM_GPUVA_OP_REMAP, so ensure 4435 * to propagate the flags from the vma we're 4436 * unmapping. 4437 */ 4438 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4439 } 4440 } 4441 print_op(vm->xe, __op); 4442 } 4443 4444 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4445 4446 if (is_madvise) 4447 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4448 else 4449 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 4450 4451 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4452 if (err) 4453 goto unwind_ops; 4454 4455 xe_vm_lock(vm, false); 4456 4457 drm_gpuva_for_each_op(__op, ops) { 4458 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4459 struct xe_vma *vma; 4460 4461 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4462 vma = gpuva_to_vma(op->base.unmap.va); 4463 /* There should be no unmap for madvise */ 4464 if (is_madvise) 4465 XE_WARN_ON("UNEXPECTED UNMAP"); 4466 4467 xe_vma_destroy(vma, NULL); 4468 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4469 vma = gpuva_to_vma(op->base.remap.unmap->va); 4470 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4471 * VMA, so they can be assigned to newly MAP created vma. 4472 */ 4473 if (is_madvise) 4474 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr); 4475 4476 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4477 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4478 vma = op->map.vma; 4479 /* In case of madvise call, MAP will always be followed by REMAP. 4480 * Therefore temp_attr will always have sane values, making it safe to 4481 * copy them to new vma. 4482 */ 4483 if (is_madvise) 4484 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr); 4485 } 4486 } 4487 4488 xe_vm_unlock(vm); 4489 drm_gpuva_ops_free(&vm->gpuvm, ops); 4490 xe_vma_mem_attr_fini(&tmp_attr); 4491 return 0; 4492 4493 unwind_ops: 4494 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4495 free_ops: 4496 drm_gpuva_ops_free(&vm->gpuvm, ops); 4497 return err; 4498 } 4499 4500 /** 4501 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4502 * @vm: Pointer to the xe_vm structure 4503 * @start: Starting input address 4504 * @range: Size of the input range 4505 * 4506 * This function splits existing vma to create new vma for user provided input range 4507 * 4508 * Return: 0 if success 4509 */ 4510 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4511 { 4512 struct drm_gpuvm_map_req map_req = { 4513 .map.va.addr = start, 4514 .map.va.range = range, 4515 }; 4516 4517 lockdep_assert_held_write(&vm->lock); 4518 4519 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4520 4521 return xe_vm_alloc_vma(vm, &map_req, true); 4522 } 4523 4524 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) 4525 { 4526 return vma && xe_vma_is_cpu_addr_mirror(vma) && 4527 xe_vma_has_default_mem_attrs(vma); 4528 } 4529 4530 /** 4531 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs 4532 * @vm: VM to search within 4533 * @start: Input/output pointer to the starting address of the range 4534 * @end: Input/output pointer to the end address of the range 4535 * 4536 * Given a range defined by @start and @range, this function checks the VMAs 4537 * immediately before and after the range. If those neighboring VMAs are 4538 * CPU-address-mirrored and have default memory attributes, the function 4539 * updates @start and @range to include them. This extended range can then 4540 * be used for merging or other operations that require a unified VMA. 4541 * 4542 * The function does not perform the merge itself; it only computes the 4543 * mergeable boundaries. 4544 */ 4545 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) 4546 { 4547 struct xe_vma *prev, *next; 4548 4549 lockdep_assert_held(&vm->lock); 4550 4551 if (*start >= SZ_4K) { 4552 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); 4553 if (is_cpu_addr_vma_with_default_attr(prev)) 4554 *start = xe_vma_start(prev); 4555 } 4556 4557 if (*end < vm->size) { 4558 next = xe_vm_find_vma_by_addr(vm, *end + 1); 4559 if (is_cpu_addr_vma_with_default_attr(next)) 4560 *end = xe_vma_end(next); 4561 } 4562 } 4563 4564 /** 4565 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4566 * @vm: Pointer to the xe_vm structure 4567 * @start: Starting input address 4568 * @range: Size of the input range 4569 * 4570 * This function splits/merges existing vma to create new vma for user provided input range 4571 * 4572 * Return: 0 if success 4573 */ 4574 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4575 { 4576 struct drm_gpuvm_map_req map_req = { 4577 .map.va.addr = start, 4578 .map.va.range = range, 4579 }; 4580 4581 lockdep_assert_held_write(&vm->lock); 4582 4583 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4584 start, range); 4585 4586 return xe_vm_alloc_vma(vm, &map_req, false); 4587 } 4588 4589 /** 4590 * xe_vm_add_exec_queue() - Add exec queue to VM 4591 * @vm: The VM. 4592 * @q: The exec_queue 4593 * 4594 * Add exec queue to VM, skipped if the device does not have context based TLB 4595 * invalidations. 4596 */ 4597 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4598 { 4599 struct xe_device *xe = vm->xe; 4600 4601 /* User VMs and queues only */ 4602 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 4603 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 4604 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 4605 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); 4606 xe_assert(xe, vm->xef); 4607 xe_assert(xe, vm == q->vm); 4608 4609 if (!xe->info.has_ctx_tlb_inval) 4610 return; 4611 4612 down_write(&vm->exec_queues.lock); 4613 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); 4614 ++vm->exec_queues.count[q->gt->info.id]; 4615 up_write(&vm->exec_queues.lock); 4616 } 4617 4618 /** 4619 * xe_vm_remove_exec_queue() - Remove exec queue from VM 4620 * @vm: The VM. 4621 * @q: The exec_queue 4622 * 4623 * Remove exec queue from VM, skipped if the device does not have context based 4624 * TLB invalidations. 4625 */ 4626 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4627 { 4628 if (!vm->xe->info.has_ctx_tlb_inval) 4629 return; 4630 4631 down_write(&vm->exec_queues.lock); 4632 if (!list_empty(&q->vm_exec_queue_link)) { 4633 list_del(&q->vm_exec_queue_link); 4634 --vm->exec_queues.count[q->gt->info.id]; 4635 } 4636 up_write(&vm->exec_queues.lock); 4637 } 4638