1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_migrate.h" 31 #include "xe_pat.h" 32 #include "xe_pm.h" 33 #include "xe_preempt_fence.h" 34 #include "xe_pt.h" 35 #include "xe_pxp.h" 36 #include "xe_res_cursor.h" 37 #include "xe_sriov_vf.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 bool vf_migration = IS_SRIOV_VF(vm->xe) && 115 xe_sriov_vf_migration_supported(vm->xe); 116 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 117 118 xe_vm_assert_held(vm); 119 120 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 121 if (q->lr.pfence) { 122 long timeout; 123 124 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 125 wait_time); 126 if (!timeout) { 127 xe_assert(vm->xe, vf_migration); 128 return -EAGAIN; 129 } 130 131 /* Only -ETIME on fence indicates VM needs to be killed */ 132 if (timeout < 0 || q->lr.pfence->error == -ETIME) 133 return -ETIME; 134 135 dma_fence_put(q->lr.pfence); 136 q->lr.pfence = NULL; 137 } 138 } 139 140 return 0; 141 } 142 143 static bool xe_vm_is_idle(struct xe_vm *vm) 144 { 145 struct xe_exec_queue *q; 146 147 xe_vm_assert_held(vm); 148 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 149 if (!xe_exec_queue_is_idle(q)) 150 return false; 151 } 152 153 return true; 154 } 155 156 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 157 { 158 struct list_head *link; 159 struct xe_exec_queue *q; 160 161 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 162 struct dma_fence *fence; 163 164 link = list->next; 165 xe_assert(vm->xe, link != list); 166 167 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 168 q, q->lr.context, 169 ++q->lr.seqno); 170 dma_fence_put(q->lr.pfence); 171 q->lr.pfence = fence; 172 } 173 } 174 175 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 176 { 177 struct xe_exec_queue *q; 178 int err; 179 180 xe_bo_assert_held(bo); 181 182 if (!vm->preempt.num_exec_queues) 183 return 0; 184 185 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 186 if (err) 187 return err; 188 189 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 190 if (q->lr.pfence) { 191 dma_resv_add_fence(bo->ttm.base.resv, 192 q->lr.pfence, 193 DMA_RESV_USAGE_BOOKKEEP); 194 } 195 196 return 0; 197 } 198 199 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 200 struct drm_exec *exec) 201 { 202 struct xe_exec_queue *q; 203 204 lockdep_assert_held(&vm->lock); 205 xe_vm_assert_held(vm); 206 207 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 208 q->ops->resume(q); 209 210 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 211 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 212 } 213 } 214 215 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 216 { 217 struct drm_gpuvm_exec vm_exec = { 218 .vm = &vm->gpuvm, 219 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 220 .num_fences = 1, 221 }; 222 struct drm_exec *exec = &vm_exec.exec; 223 struct xe_validation_ctx ctx; 224 struct dma_fence *pfence; 225 int err; 226 bool wait; 227 228 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 229 230 down_write(&vm->lock); 231 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 232 if (err) 233 goto out_up_write; 234 235 pfence = xe_preempt_fence_create(q, q->lr.context, 236 ++q->lr.seqno); 237 if (IS_ERR(pfence)) { 238 err = PTR_ERR(pfence); 239 goto out_fini; 240 } 241 242 list_add(&q->lr.link, &vm->preempt.exec_queues); 243 ++vm->preempt.num_exec_queues; 244 q->lr.pfence = pfence; 245 246 xe_svm_notifier_lock(vm); 247 248 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 249 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 250 251 /* 252 * Check to see if a preemption on VM is in flight or userptr 253 * invalidation, if so trigger this preempt fence to sync state with 254 * other preempt fences on the VM. 255 */ 256 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 257 if (wait) 258 dma_fence_enable_sw_signaling(pfence); 259 260 xe_svm_notifier_unlock(vm); 261 262 out_fini: 263 xe_validation_ctx_fini(&ctx); 264 out_up_write: 265 up_write(&vm->lock); 266 267 return err; 268 } 269 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 270 271 /** 272 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 273 * @vm: The VM. 274 * @q: The exec_queue 275 * 276 * Note that this function might be called multiple times on the same queue. 277 */ 278 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 279 { 280 if (!xe_vm_in_preempt_fence_mode(vm)) 281 return; 282 283 down_write(&vm->lock); 284 if (!list_empty(&q->lr.link)) { 285 list_del_init(&q->lr.link); 286 --vm->preempt.num_exec_queues; 287 } 288 if (q->lr.pfence) { 289 dma_fence_enable_sw_signaling(q->lr.pfence); 290 dma_fence_put(q->lr.pfence); 291 q->lr.pfence = NULL; 292 } 293 up_write(&vm->lock); 294 } 295 296 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 297 298 /** 299 * xe_vm_kill() - VM Kill 300 * @vm: The VM. 301 * @unlocked: Flag indicates the VM's dma-resv is not held 302 * 303 * Kill the VM by setting banned flag indicated VM is no longer available for 304 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 305 */ 306 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 307 { 308 struct xe_exec_queue *q; 309 310 lockdep_assert_held(&vm->lock); 311 312 if (unlocked) 313 xe_vm_lock(vm, false); 314 315 vm->flags |= XE_VM_FLAG_BANNED; 316 trace_xe_vm_kill(vm); 317 318 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 319 q->ops->kill(q); 320 321 if (unlocked) 322 xe_vm_unlock(vm); 323 324 /* TODO: Inform user the VM is banned */ 325 } 326 327 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 328 { 329 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 330 struct drm_gpuva *gpuva; 331 int ret; 332 333 lockdep_assert_held(&vm->lock); 334 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 335 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 336 &vm->rebind_list); 337 338 if (!try_wait_for_completion(&vm->xe->pm_block)) 339 return -EAGAIN; 340 341 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 342 if (ret) 343 return ret; 344 345 vm_bo->evicted = false; 346 return 0; 347 } 348 349 /** 350 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 351 * @vm: The vm for which we are rebinding. 352 * @exec: The struct drm_exec with the locked GEM objects. 353 * @num_fences: The number of fences to reserve for the operation, not 354 * including rebinds and validations. 355 * 356 * Validates all evicted gem objects and rebinds their vmas. Note that 357 * rebindings may cause evictions and hence the validation-rebind 358 * sequence is rerun until there are no more objects to validate. 359 * 360 * Return: 0 on success, negative error code on error. In particular, 361 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 362 * the drm_exec transaction needs to be restarted. 363 */ 364 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 365 unsigned int num_fences) 366 { 367 struct drm_gem_object *obj; 368 unsigned long index; 369 int ret; 370 371 do { 372 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 373 if (ret) 374 return ret; 375 376 ret = xe_vm_rebind(vm, false); 377 if (ret) 378 return ret; 379 } while (!list_empty(&vm->gpuvm.evict.list)); 380 381 drm_exec_for_each_locked_object(exec, index, obj) { 382 ret = dma_resv_reserve_fences(obj->resv, num_fences); 383 if (ret) 384 return ret; 385 } 386 387 return 0; 388 } 389 390 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 391 bool *done) 392 { 393 int err; 394 395 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 396 if (err) 397 return err; 398 399 if (xe_vm_is_idle(vm)) { 400 vm->preempt.rebind_deactivated = true; 401 *done = true; 402 return 0; 403 } 404 405 if (!preempt_fences_waiting(vm)) { 406 *done = true; 407 return 0; 408 } 409 410 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 411 if (err) 412 return err; 413 414 err = wait_for_existing_preempt_fences(vm); 415 if (err) 416 return err; 417 418 /* 419 * Add validation and rebinding to the locking loop since both can 420 * cause evictions which may require blocing dma_resv locks. 421 * The fence reservation here is intended for the new preempt fences 422 * we attach at the end of the rebind work. 423 */ 424 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 425 } 426 427 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 428 { 429 struct xe_device *xe = vm->xe; 430 bool ret = false; 431 432 mutex_lock(&xe->rebind_resume_lock); 433 if (!try_wait_for_completion(&vm->xe->pm_block)) { 434 ret = true; 435 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 436 } 437 mutex_unlock(&xe->rebind_resume_lock); 438 439 return ret; 440 } 441 442 /** 443 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 444 * @vm: The vm whose preempt worker to resume. 445 * 446 * Resume a preempt worker that was previously suspended by 447 * vm_suspend_rebind_worker(). 448 */ 449 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 450 { 451 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 452 } 453 454 static void preempt_rebind_work_func(struct work_struct *w) 455 { 456 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 457 struct xe_validation_ctx ctx; 458 struct drm_exec exec; 459 unsigned int fence_count = 0; 460 LIST_HEAD(preempt_fences); 461 int err = 0; 462 long wait; 463 int __maybe_unused tries = 0; 464 465 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 466 trace_xe_vm_rebind_worker_enter(vm); 467 468 down_write(&vm->lock); 469 470 if (xe_vm_is_closed_or_banned(vm)) { 471 up_write(&vm->lock); 472 trace_xe_vm_rebind_worker_exit(vm); 473 return; 474 } 475 476 retry: 477 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 478 up_write(&vm->lock); 479 /* We don't actually block but don't make progress. */ 480 xe_pm_might_block_on_suspend(); 481 return; 482 } 483 484 if (xe_vm_userptr_check_repin(vm)) { 485 err = xe_vm_userptr_pin(vm); 486 if (err) 487 goto out_unlock_outer; 488 } 489 490 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 491 (struct xe_val_flags) {.interruptible = true}); 492 if (err) 493 goto out_unlock_outer; 494 495 drm_exec_until_all_locked(&exec) { 496 bool done = false; 497 498 err = xe_preempt_work_begin(&exec, vm, &done); 499 drm_exec_retry_on_contention(&exec); 500 xe_validation_retry_on_oom(&ctx, &err); 501 if (err || done) { 502 xe_validation_ctx_fini(&ctx); 503 goto out_unlock_outer; 504 } 505 } 506 507 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 508 if (err) 509 goto out_unlock; 510 511 xe_vm_set_validation_exec(vm, &exec); 512 err = xe_vm_rebind(vm, true); 513 xe_vm_set_validation_exec(vm, NULL); 514 if (err) 515 goto out_unlock; 516 517 /* Wait on rebinds and munmap style VM unbinds */ 518 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 519 DMA_RESV_USAGE_KERNEL, 520 false, MAX_SCHEDULE_TIMEOUT); 521 if (wait <= 0) { 522 err = -ETIME; 523 goto out_unlock; 524 } 525 526 #define retry_required(__tries, __vm) \ 527 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 528 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 529 __xe_vm_userptr_needs_repin(__vm)) 530 531 xe_svm_notifier_lock(vm); 532 if (retry_required(tries, vm)) { 533 xe_svm_notifier_unlock(vm); 534 err = -EAGAIN; 535 goto out_unlock; 536 } 537 538 #undef retry_required 539 540 spin_lock(&vm->xe->ttm.lru_lock); 541 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 542 spin_unlock(&vm->xe->ttm.lru_lock); 543 544 /* Point of no return. */ 545 arm_preempt_fences(vm, &preempt_fences); 546 resume_and_reinstall_preempt_fences(vm, &exec); 547 xe_svm_notifier_unlock(vm); 548 549 out_unlock: 550 xe_validation_ctx_fini(&ctx); 551 out_unlock_outer: 552 if (err == -EAGAIN) { 553 trace_xe_vm_rebind_worker_retry(vm); 554 555 /* 556 * We can't block in workers on a VF which supports migration 557 * given this can block the VF post-migration workers from 558 * getting scheduled. 559 */ 560 if (IS_SRIOV_VF(vm->xe) && 561 xe_sriov_vf_migration_supported(vm->xe)) { 562 up_write(&vm->lock); 563 xe_vm_queue_rebind_worker(vm); 564 return; 565 } 566 567 goto retry; 568 } 569 570 if (err) { 571 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 572 xe_vm_kill(vm, true); 573 } 574 up_write(&vm->lock); 575 576 free_preempt_fences(&preempt_fences); 577 578 trace_xe_vm_rebind_worker_exit(vm); 579 } 580 581 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 582 { 583 int i; 584 585 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 586 if (!vops->pt_update_ops[i].num_ops) 587 continue; 588 589 vops->pt_update_ops[i].ops = 590 kmalloc_array(vops->pt_update_ops[i].num_ops, 591 sizeof(*vops->pt_update_ops[i].ops), 592 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 593 if (!vops->pt_update_ops[i].ops) 594 return array_of_binds ? -ENOBUFS : -ENOMEM; 595 } 596 597 return 0; 598 } 599 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 600 601 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 602 { 603 struct xe_vma *vma; 604 605 vma = gpuva_to_vma(op->base.prefetch.va); 606 607 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 608 xa_destroy(&op->prefetch_range.range); 609 } 610 611 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 612 { 613 struct xe_vma_op *op; 614 615 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 616 return; 617 618 list_for_each_entry(op, &vops->list, link) 619 xe_vma_svm_prefetch_op_fini(op); 620 } 621 622 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 623 { 624 int i; 625 626 xe_vma_svm_prefetch_ops_fini(vops); 627 628 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 629 kfree(vops->pt_update_ops[i].ops); 630 } 631 632 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 633 { 634 int i; 635 636 if (!inc_val) 637 return; 638 639 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 640 if (BIT(i) & tile_mask) 641 vops->pt_update_ops[i].num_ops += inc_val; 642 } 643 644 #define XE_VMA_CREATE_MASK ( \ 645 XE_VMA_READ_ONLY | \ 646 XE_VMA_DUMPABLE | \ 647 XE_VMA_SYSTEM_ALLOCATOR | \ 648 DRM_GPUVA_SPARSE | \ 649 XE_VMA_MADV_AUTORESET) 650 651 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 652 u8 tile_mask) 653 { 654 INIT_LIST_HEAD(&op->link); 655 op->tile_mask = tile_mask; 656 op->base.op = DRM_GPUVA_OP_MAP; 657 op->base.map.va.addr = vma->gpuva.va.addr; 658 op->base.map.va.range = vma->gpuva.va.range; 659 op->base.map.gem.obj = vma->gpuva.gem.obj; 660 op->base.map.gem.offset = vma->gpuva.gem.offset; 661 op->map.vma = vma; 662 op->map.immediate = true; 663 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 664 } 665 666 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 667 u8 tile_mask) 668 { 669 struct xe_vma_op *op; 670 671 op = kzalloc(sizeof(*op), GFP_KERNEL); 672 if (!op) 673 return -ENOMEM; 674 675 xe_vm_populate_rebind(op, vma, tile_mask); 676 list_add_tail(&op->link, &vops->list); 677 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 678 679 return 0; 680 } 681 682 static struct dma_fence *ops_execute(struct xe_vm *vm, 683 struct xe_vma_ops *vops); 684 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 685 struct xe_exec_queue *q, 686 struct xe_sync_entry *syncs, u32 num_syncs); 687 688 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 689 { 690 struct dma_fence *fence; 691 struct xe_vma *vma, *next; 692 struct xe_vma_ops vops; 693 struct xe_vma_op *op, *next_op; 694 int err, i; 695 696 lockdep_assert_held(&vm->lock); 697 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 698 list_empty(&vm->rebind_list)) 699 return 0; 700 701 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 702 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 703 vops.pt_update_ops[i].wait_vm_bookkeep = true; 704 705 xe_vm_assert_held(vm); 706 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 707 xe_assert(vm->xe, vma->tile_present); 708 709 if (rebind_worker) 710 trace_xe_vma_rebind_worker(vma); 711 else 712 trace_xe_vma_rebind_exec(vma); 713 714 err = xe_vm_ops_add_rebind(&vops, vma, 715 vma->tile_present); 716 if (err) 717 goto free_ops; 718 } 719 720 err = xe_vma_ops_alloc(&vops, false); 721 if (err) 722 goto free_ops; 723 724 fence = ops_execute(vm, &vops); 725 if (IS_ERR(fence)) { 726 err = PTR_ERR(fence); 727 } else { 728 dma_fence_put(fence); 729 list_for_each_entry_safe(vma, next, &vm->rebind_list, 730 combined_links.rebind) 731 list_del_init(&vma->combined_links.rebind); 732 } 733 free_ops: 734 list_for_each_entry_safe(op, next_op, &vops.list, link) { 735 list_del(&op->link); 736 kfree(op); 737 } 738 xe_vma_ops_fini(&vops); 739 740 return err; 741 } 742 743 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 744 { 745 struct dma_fence *fence = NULL; 746 struct xe_vma_ops vops; 747 struct xe_vma_op *op, *next_op; 748 struct xe_tile *tile; 749 u8 id; 750 int err; 751 752 lockdep_assert_held(&vm->lock); 753 xe_vm_assert_held(vm); 754 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 755 756 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 757 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 758 for_each_tile(tile, vm->xe, id) { 759 vops.pt_update_ops[id].wait_vm_bookkeep = true; 760 vops.pt_update_ops[tile->id].q = 761 xe_migrate_exec_queue(tile->migrate); 762 } 763 764 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 765 if (err) 766 return ERR_PTR(err); 767 768 err = xe_vma_ops_alloc(&vops, false); 769 if (err) { 770 fence = ERR_PTR(err); 771 goto free_ops; 772 } 773 774 fence = ops_execute(vm, &vops); 775 776 free_ops: 777 list_for_each_entry_safe(op, next_op, &vops.list, link) { 778 list_del(&op->link); 779 kfree(op); 780 } 781 xe_vma_ops_fini(&vops); 782 783 return fence; 784 } 785 786 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 787 struct xe_vma *vma, 788 struct xe_svm_range *range, 789 u8 tile_mask) 790 { 791 INIT_LIST_HEAD(&op->link); 792 op->tile_mask = tile_mask; 793 op->base.op = DRM_GPUVA_OP_DRIVER; 794 op->subop = XE_VMA_SUBOP_MAP_RANGE; 795 op->map_range.vma = vma; 796 op->map_range.range = range; 797 } 798 799 static int 800 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 801 struct xe_vma *vma, 802 struct xe_svm_range *range, 803 u8 tile_mask) 804 { 805 struct xe_vma_op *op; 806 807 op = kzalloc(sizeof(*op), GFP_KERNEL); 808 if (!op) 809 return -ENOMEM; 810 811 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 812 list_add_tail(&op->link, &vops->list); 813 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 814 815 return 0; 816 } 817 818 /** 819 * xe_vm_range_rebind() - VM range (re)bind 820 * @vm: The VM which the range belongs to. 821 * @vma: The VMA which the range belongs to. 822 * @range: SVM range to rebind. 823 * @tile_mask: Tile mask to bind the range to. 824 * 825 * (re)bind SVM range setting up GPU page tables for the range. 826 * 827 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 828 * failure 829 */ 830 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 831 struct xe_vma *vma, 832 struct xe_svm_range *range, 833 u8 tile_mask) 834 { 835 struct dma_fence *fence = NULL; 836 struct xe_vma_ops vops; 837 struct xe_vma_op *op, *next_op; 838 struct xe_tile *tile; 839 u8 id; 840 int err; 841 842 lockdep_assert_held(&vm->lock); 843 xe_vm_assert_held(vm); 844 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 845 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 846 847 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 848 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 849 for_each_tile(tile, vm->xe, id) { 850 vops.pt_update_ops[id].wait_vm_bookkeep = true; 851 vops.pt_update_ops[tile->id].q = 852 xe_migrate_exec_queue(tile->migrate); 853 } 854 855 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 856 if (err) 857 return ERR_PTR(err); 858 859 err = xe_vma_ops_alloc(&vops, false); 860 if (err) { 861 fence = ERR_PTR(err); 862 goto free_ops; 863 } 864 865 fence = ops_execute(vm, &vops); 866 867 free_ops: 868 list_for_each_entry_safe(op, next_op, &vops.list, link) { 869 list_del(&op->link); 870 kfree(op); 871 } 872 xe_vma_ops_fini(&vops); 873 874 return fence; 875 } 876 877 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 878 struct xe_svm_range *range) 879 { 880 INIT_LIST_HEAD(&op->link); 881 op->tile_mask = range->tile_present; 882 op->base.op = DRM_GPUVA_OP_DRIVER; 883 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 884 op->unmap_range.range = range; 885 } 886 887 static int 888 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 889 struct xe_svm_range *range) 890 { 891 struct xe_vma_op *op; 892 893 op = kzalloc(sizeof(*op), GFP_KERNEL); 894 if (!op) 895 return -ENOMEM; 896 897 xe_vm_populate_range_unbind(op, range); 898 list_add_tail(&op->link, &vops->list); 899 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 900 901 return 0; 902 } 903 904 /** 905 * xe_vm_range_unbind() - VM range unbind 906 * @vm: The VM which the range belongs to. 907 * @range: SVM range to rebind. 908 * 909 * Unbind SVM range removing the GPU page tables for the range. 910 * 911 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 912 * failure 913 */ 914 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 915 struct xe_svm_range *range) 916 { 917 struct dma_fence *fence = NULL; 918 struct xe_vma_ops vops; 919 struct xe_vma_op *op, *next_op; 920 struct xe_tile *tile; 921 u8 id; 922 int err; 923 924 lockdep_assert_held(&vm->lock); 925 xe_vm_assert_held(vm); 926 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 927 928 if (!range->tile_present) 929 return dma_fence_get_stub(); 930 931 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 932 for_each_tile(tile, vm->xe, id) { 933 vops.pt_update_ops[id].wait_vm_bookkeep = true; 934 vops.pt_update_ops[tile->id].q = 935 xe_migrate_exec_queue(tile->migrate); 936 } 937 938 err = xe_vm_ops_add_range_unbind(&vops, range); 939 if (err) 940 return ERR_PTR(err); 941 942 err = xe_vma_ops_alloc(&vops, false); 943 if (err) { 944 fence = ERR_PTR(err); 945 goto free_ops; 946 } 947 948 fence = ops_execute(vm, &vops); 949 950 free_ops: 951 list_for_each_entry_safe(op, next_op, &vops.list, link) { 952 list_del(&op->link); 953 kfree(op); 954 } 955 xe_vma_ops_fini(&vops); 956 957 return fence; 958 } 959 960 static void xe_vma_free(struct xe_vma *vma) 961 { 962 if (xe_vma_is_userptr(vma)) 963 kfree(to_userptr_vma(vma)); 964 else 965 kfree(vma); 966 } 967 968 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 969 struct xe_bo *bo, 970 u64 bo_offset_or_userptr, 971 u64 start, u64 end, 972 struct xe_vma_mem_attr *attr, 973 unsigned int flags) 974 { 975 struct xe_vma *vma; 976 struct xe_tile *tile; 977 u8 id; 978 bool is_null = (flags & DRM_GPUVA_SPARSE); 979 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 980 981 xe_assert(vm->xe, start < end); 982 xe_assert(vm->xe, end < vm->size); 983 984 /* 985 * Allocate and ensure that the xe_vma_is_userptr() return 986 * matches what was allocated. 987 */ 988 if (!bo && !is_null && !is_cpu_addr_mirror) { 989 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 990 991 if (!uvma) 992 return ERR_PTR(-ENOMEM); 993 994 vma = &uvma->vma; 995 } else { 996 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 997 if (!vma) 998 return ERR_PTR(-ENOMEM); 999 1000 if (bo) 1001 vma->gpuva.gem.obj = &bo->ttm.base; 1002 } 1003 1004 INIT_LIST_HEAD(&vma->combined_links.rebind); 1005 1006 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1007 vma->gpuva.vm = &vm->gpuvm; 1008 vma->gpuva.va.addr = start; 1009 vma->gpuva.va.range = end - start + 1; 1010 vma->gpuva.flags = flags; 1011 1012 for_each_tile(tile, vm->xe, id) 1013 vma->tile_mask |= 0x1 << id; 1014 1015 if (vm->xe->info.has_atomic_enable_pte_bit) 1016 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1017 1018 vma->attr = *attr; 1019 1020 if (bo) { 1021 struct drm_gpuvm_bo *vm_bo; 1022 1023 xe_bo_assert_held(bo); 1024 1025 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1026 if (IS_ERR(vm_bo)) { 1027 xe_vma_free(vma); 1028 return ERR_CAST(vm_bo); 1029 } 1030 1031 drm_gpuvm_bo_extobj_add(vm_bo); 1032 drm_gem_object_get(&bo->ttm.base); 1033 vma->gpuva.gem.offset = bo_offset_or_userptr; 1034 drm_gpuva_link(&vma->gpuva, vm_bo); 1035 drm_gpuvm_bo_put(vm_bo); 1036 } else /* userptr or null */ { 1037 if (!is_null && !is_cpu_addr_mirror) { 1038 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1039 u64 size = end - start + 1; 1040 int err; 1041 1042 vma->gpuva.gem.offset = bo_offset_or_userptr; 1043 1044 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1045 if (err) { 1046 xe_vma_free(vma); 1047 return ERR_PTR(err); 1048 } 1049 } 1050 1051 xe_vm_get(vm); 1052 } 1053 1054 return vma; 1055 } 1056 1057 static void xe_vma_destroy_late(struct xe_vma *vma) 1058 { 1059 struct xe_vm *vm = xe_vma_vm(vma); 1060 1061 if (vma->ufence) { 1062 xe_sync_ufence_put(vma->ufence); 1063 vma->ufence = NULL; 1064 } 1065 1066 if (xe_vma_is_userptr(vma)) { 1067 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1068 1069 xe_userptr_remove(uvma); 1070 xe_vm_put(vm); 1071 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1072 xe_vm_put(vm); 1073 } else { 1074 xe_bo_put(xe_vma_bo(vma)); 1075 } 1076 1077 xe_vma_free(vma); 1078 } 1079 1080 static void vma_destroy_work_func(struct work_struct *w) 1081 { 1082 struct xe_vma *vma = 1083 container_of(w, struct xe_vma, destroy_work); 1084 1085 xe_vma_destroy_late(vma); 1086 } 1087 1088 static void vma_destroy_cb(struct dma_fence *fence, 1089 struct dma_fence_cb *cb) 1090 { 1091 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1092 1093 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1094 queue_work(system_unbound_wq, &vma->destroy_work); 1095 } 1096 1097 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1098 { 1099 struct xe_vm *vm = xe_vma_vm(vma); 1100 1101 lockdep_assert_held_write(&vm->lock); 1102 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1103 1104 if (xe_vma_is_userptr(vma)) { 1105 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1106 xe_userptr_destroy(to_userptr_vma(vma)); 1107 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1108 xe_bo_assert_held(xe_vma_bo(vma)); 1109 1110 drm_gpuva_unlink(&vma->gpuva); 1111 } 1112 1113 xe_vm_assert_held(vm); 1114 if (fence) { 1115 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1116 vma_destroy_cb); 1117 1118 if (ret) { 1119 XE_WARN_ON(ret != -ENOENT); 1120 xe_vma_destroy_late(vma); 1121 } 1122 } else { 1123 xe_vma_destroy_late(vma); 1124 } 1125 } 1126 1127 /** 1128 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1129 * @exec: The drm_exec object we're currently locking for. 1130 * @vma: The vma for witch we want to lock the vm resv and any attached 1131 * object's resv. 1132 * 1133 * Return: 0 on success, negative error code on error. In particular 1134 * may return -EDEADLK on WW transaction contention and -EINTR if 1135 * an interruptible wait is terminated by a signal. 1136 */ 1137 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1138 { 1139 struct xe_vm *vm = xe_vma_vm(vma); 1140 struct xe_bo *bo = xe_vma_bo(vma); 1141 int err; 1142 1143 XE_WARN_ON(!vm); 1144 1145 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1146 if (!err && bo && !bo->vm) 1147 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1148 1149 return err; 1150 } 1151 1152 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1153 { 1154 struct xe_device *xe = xe_vma_vm(vma)->xe; 1155 struct xe_validation_ctx ctx; 1156 struct drm_exec exec; 1157 int err = 0; 1158 1159 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1160 err = xe_vm_lock_vma(&exec, vma); 1161 drm_exec_retry_on_contention(&exec); 1162 if (XE_WARN_ON(err)) 1163 break; 1164 xe_vma_destroy(vma, NULL); 1165 } 1166 xe_assert(xe, !err); 1167 } 1168 1169 struct xe_vma * 1170 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1171 { 1172 struct drm_gpuva *gpuva; 1173 1174 lockdep_assert_held(&vm->lock); 1175 1176 if (xe_vm_is_closed_or_banned(vm)) 1177 return NULL; 1178 1179 xe_assert(vm->xe, start + range <= vm->size); 1180 1181 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1182 1183 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1184 } 1185 1186 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1187 { 1188 int err; 1189 1190 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1191 lockdep_assert_held(&vm->lock); 1192 1193 mutex_lock(&vm->snap_mutex); 1194 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1195 mutex_unlock(&vm->snap_mutex); 1196 XE_WARN_ON(err); /* Shouldn't be possible */ 1197 1198 return err; 1199 } 1200 1201 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1202 { 1203 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1204 lockdep_assert_held(&vm->lock); 1205 1206 mutex_lock(&vm->snap_mutex); 1207 drm_gpuva_remove(&vma->gpuva); 1208 mutex_unlock(&vm->snap_mutex); 1209 if (vm->usm.last_fault_vma == vma) 1210 vm->usm.last_fault_vma = NULL; 1211 } 1212 1213 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1214 { 1215 struct xe_vma_op *op; 1216 1217 op = kzalloc(sizeof(*op), GFP_KERNEL); 1218 1219 if (unlikely(!op)) 1220 return NULL; 1221 1222 return &op->base; 1223 } 1224 1225 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1226 1227 static const struct drm_gpuvm_ops gpuvm_ops = { 1228 .op_alloc = xe_vm_op_alloc, 1229 .vm_bo_validate = xe_gpuvm_validate, 1230 .vm_free = xe_vm_free, 1231 }; 1232 1233 static u64 pde_encode_pat_index(u16 pat_index) 1234 { 1235 u64 pte = 0; 1236 1237 if (pat_index & BIT(0)) 1238 pte |= XE_PPGTT_PTE_PAT0; 1239 1240 if (pat_index & BIT(1)) 1241 pte |= XE_PPGTT_PTE_PAT1; 1242 1243 return pte; 1244 } 1245 1246 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1247 { 1248 u64 pte = 0; 1249 1250 if (pat_index & BIT(0)) 1251 pte |= XE_PPGTT_PTE_PAT0; 1252 1253 if (pat_index & BIT(1)) 1254 pte |= XE_PPGTT_PTE_PAT1; 1255 1256 if (pat_index & BIT(2)) { 1257 if (pt_level) 1258 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1259 else 1260 pte |= XE_PPGTT_PTE_PAT2; 1261 } 1262 1263 if (pat_index & BIT(3)) 1264 pte |= XELPG_PPGTT_PTE_PAT3; 1265 1266 if (pat_index & (BIT(4))) 1267 pte |= XE2_PPGTT_PTE_PAT4; 1268 1269 return pte; 1270 } 1271 1272 static u64 pte_encode_ps(u32 pt_level) 1273 { 1274 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1275 1276 if (pt_level == 1) 1277 return XE_PDE_PS_2M; 1278 else if (pt_level == 2) 1279 return XE_PDPE_PS_1G; 1280 1281 return 0; 1282 } 1283 1284 static u16 pde_pat_index(struct xe_bo *bo) 1285 { 1286 struct xe_device *xe = xe_bo_device(bo); 1287 u16 pat_index; 1288 1289 /* 1290 * We only have two bits to encode the PAT index in non-leaf nodes, but 1291 * these only point to other paging structures so we only need a minimal 1292 * selection of options. The user PAT index is only for encoding leaf 1293 * nodes, where we have use of more bits to do the encoding. The 1294 * non-leaf nodes are instead under driver control so the chosen index 1295 * here should be distinct from the user PAT index. Also the 1296 * corresponding coherency of the PAT index should be tied to the 1297 * allocation type of the page table (or at least we should pick 1298 * something which is always safe). 1299 */ 1300 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1301 pat_index = xe->pat.idx[XE_CACHE_WB]; 1302 else 1303 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1304 1305 xe_assert(xe, pat_index <= 3); 1306 1307 return pat_index; 1308 } 1309 1310 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1311 { 1312 u64 pde; 1313 1314 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1315 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1316 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1317 1318 return pde; 1319 } 1320 1321 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1322 u16 pat_index, u32 pt_level) 1323 { 1324 u64 pte; 1325 1326 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1327 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1328 pte |= pte_encode_pat_index(pat_index, pt_level); 1329 pte |= pte_encode_ps(pt_level); 1330 1331 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1332 pte |= XE_PPGTT_PTE_DM; 1333 1334 return pte; 1335 } 1336 1337 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1338 u16 pat_index, u32 pt_level) 1339 { 1340 pte |= XE_PAGE_PRESENT; 1341 1342 if (likely(!xe_vma_read_only(vma))) 1343 pte |= XE_PAGE_RW; 1344 1345 pte |= pte_encode_pat_index(pat_index, pt_level); 1346 pte |= pte_encode_ps(pt_level); 1347 1348 if (unlikely(xe_vma_is_null(vma))) 1349 pte |= XE_PTE_NULL; 1350 1351 return pte; 1352 } 1353 1354 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1355 u16 pat_index, 1356 u32 pt_level, bool devmem, u64 flags) 1357 { 1358 u64 pte; 1359 1360 /* Avoid passing random bits directly as flags */ 1361 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1362 1363 pte = addr; 1364 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1365 pte |= pte_encode_pat_index(pat_index, pt_level); 1366 pte |= pte_encode_ps(pt_level); 1367 1368 if (devmem) 1369 pte |= XE_PPGTT_PTE_DM; 1370 1371 pte |= flags; 1372 1373 return pte; 1374 } 1375 1376 static const struct xe_pt_ops xelp_pt_ops = { 1377 .pte_encode_bo = xelp_pte_encode_bo, 1378 .pte_encode_vma = xelp_pte_encode_vma, 1379 .pte_encode_addr = xelp_pte_encode_addr, 1380 .pde_encode_bo = xelp_pde_encode_bo, 1381 }; 1382 1383 static void vm_destroy_work_func(struct work_struct *w); 1384 1385 /** 1386 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1387 * given tile and vm. 1388 * @xe: xe device. 1389 * @tile: tile to set up for. 1390 * @vm: vm to set up for. 1391 * @exec: The struct drm_exec object used to lock the vm resv. 1392 * 1393 * Sets up a pagetable tree with one page-table per level and a single 1394 * leaf PTE. All pagetable entries point to the single page-table or, 1395 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1396 * writes become NOPs. 1397 * 1398 * Return: 0 on success, negative error code on error. 1399 */ 1400 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1401 struct xe_vm *vm, struct drm_exec *exec) 1402 { 1403 u8 id = tile->id; 1404 int i; 1405 1406 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1407 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1408 if (IS_ERR(vm->scratch_pt[id][i])) { 1409 int err = PTR_ERR(vm->scratch_pt[id][i]); 1410 1411 vm->scratch_pt[id][i] = NULL; 1412 return err; 1413 } 1414 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1415 } 1416 1417 return 0; 1418 } 1419 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1420 1421 static void xe_vm_free_scratch(struct xe_vm *vm) 1422 { 1423 struct xe_tile *tile; 1424 u8 id; 1425 1426 if (!xe_vm_has_scratch(vm)) 1427 return; 1428 1429 for_each_tile(tile, vm->xe, id) { 1430 u32 i; 1431 1432 if (!vm->pt_root[id]) 1433 continue; 1434 1435 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1436 if (vm->scratch_pt[id][i]) 1437 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1438 } 1439 } 1440 1441 static void xe_vm_pt_destroy(struct xe_vm *vm) 1442 { 1443 struct xe_tile *tile; 1444 u8 id; 1445 1446 xe_vm_assert_held(vm); 1447 1448 for_each_tile(tile, vm->xe, id) { 1449 if (vm->pt_root[id]) { 1450 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1451 vm->pt_root[id] = NULL; 1452 } 1453 } 1454 } 1455 1456 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1457 { 1458 struct drm_gem_object *vm_resv_obj; 1459 struct xe_validation_ctx ctx; 1460 struct drm_exec exec; 1461 struct xe_vm *vm; 1462 int err; 1463 struct xe_tile *tile; 1464 u8 id; 1465 1466 /* 1467 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1468 * ever be in faulting mode. 1469 */ 1470 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1471 1472 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1473 if (!vm) 1474 return ERR_PTR(-ENOMEM); 1475 1476 vm->xe = xe; 1477 1478 vm->size = 1ull << xe->info.va_bits; 1479 vm->flags = flags; 1480 1481 if (xef) 1482 vm->xef = xe_file_get(xef); 1483 /** 1484 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1485 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1486 * under a user-VM lock when the PXP session is started at exec_queue 1487 * creation time. Those are different VMs and therefore there is no risk 1488 * of deadlock, but we need to tell lockdep that this is the case or it 1489 * will print a warning. 1490 */ 1491 if (flags & XE_VM_FLAG_GSC) { 1492 static struct lock_class_key gsc_vm_key; 1493 1494 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1495 } else { 1496 init_rwsem(&vm->lock); 1497 } 1498 mutex_init(&vm->snap_mutex); 1499 1500 INIT_LIST_HEAD(&vm->rebind_list); 1501 1502 INIT_LIST_HEAD(&vm->userptr.repin_list); 1503 INIT_LIST_HEAD(&vm->userptr.invalidated); 1504 spin_lock_init(&vm->userptr.invalidated_lock); 1505 1506 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1507 1508 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1509 1510 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1511 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1512 1513 for_each_tile(tile, xe, id) 1514 xe_range_fence_tree_init(&vm->rftree[id]); 1515 1516 vm->pt_ops = &xelp_pt_ops; 1517 1518 /* 1519 * Long-running workloads are not protected by the scheduler references. 1520 * By design, run_job for long-running workloads returns NULL and the 1521 * scheduler drops all the references of it, hence protecting the VM 1522 * for this case is necessary. 1523 */ 1524 if (flags & XE_VM_FLAG_LR_MODE) { 1525 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1526 xe_pm_runtime_get_noresume(xe); 1527 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1528 } 1529 1530 err = xe_svm_init(vm); 1531 if (err) 1532 goto err_no_resv; 1533 1534 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1535 if (!vm_resv_obj) { 1536 err = -ENOMEM; 1537 goto err_svm_fini; 1538 } 1539 1540 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1541 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1542 1543 drm_gem_object_put(vm_resv_obj); 1544 1545 err = 0; 1546 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1547 err) { 1548 err = xe_vm_drm_exec_lock(vm, &exec); 1549 drm_exec_retry_on_contention(&exec); 1550 1551 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1552 vm->flags |= XE_VM_FLAG_64K; 1553 1554 for_each_tile(tile, xe, id) { 1555 if (flags & XE_VM_FLAG_MIGRATION && 1556 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1557 continue; 1558 1559 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1560 &exec); 1561 if (IS_ERR(vm->pt_root[id])) { 1562 err = PTR_ERR(vm->pt_root[id]); 1563 vm->pt_root[id] = NULL; 1564 xe_vm_pt_destroy(vm); 1565 drm_exec_retry_on_contention(&exec); 1566 xe_validation_retry_on_oom(&ctx, &err); 1567 break; 1568 } 1569 } 1570 if (err) 1571 break; 1572 1573 if (xe_vm_has_scratch(vm)) { 1574 for_each_tile(tile, xe, id) { 1575 if (!vm->pt_root[id]) 1576 continue; 1577 1578 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1579 if (err) { 1580 xe_vm_free_scratch(vm); 1581 xe_vm_pt_destroy(vm); 1582 drm_exec_retry_on_contention(&exec); 1583 xe_validation_retry_on_oom(&ctx, &err); 1584 break; 1585 } 1586 } 1587 if (err) 1588 break; 1589 vm->batch_invalidate_tlb = true; 1590 } 1591 1592 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1593 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1594 vm->batch_invalidate_tlb = false; 1595 } 1596 1597 /* Fill pt_root after allocating scratch tables */ 1598 for_each_tile(tile, xe, id) { 1599 if (!vm->pt_root[id]) 1600 continue; 1601 1602 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1603 } 1604 } 1605 if (err) 1606 goto err_close; 1607 1608 /* Kernel migration VM shouldn't have a circular loop.. */ 1609 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1610 for_each_tile(tile, xe, id) { 1611 struct xe_exec_queue *q; 1612 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1613 1614 if (!vm->pt_root[id]) 1615 continue; 1616 1617 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1618 if (IS_ERR(q)) { 1619 err = PTR_ERR(q); 1620 goto err_close; 1621 } 1622 vm->q[id] = q; 1623 } 1624 } 1625 1626 if (xef && xe->info.has_asid) { 1627 u32 asid; 1628 1629 down_write(&xe->usm.lock); 1630 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1631 XA_LIMIT(1, XE_MAX_ASID - 1), 1632 &xe->usm.next_asid, GFP_KERNEL); 1633 up_write(&xe->usm.lock); 1634 if (err < 0) 1635 goto err_close; 1636 1637 vm->usm.asid = asid; 1638 } 1639 1640 trace_xe_vm_create(vm); 1641 1642 return vm; 1643 1644 err_close: 1645 xe_vm_close_and_put(vm); 1646 return ERR_PTR(err); 1647 1648 err_svm_fini: 1649 if (flags & XE_VM_FLAG_FAULT_MODE) { 1650 vm->size = 0; /* close the vm */ 1651 xe_svm_fini(vm); 1652 } 1653 err_no_resv: 1654 mutex_destroy(&vm->snap_mutex); 1655 for_each_tile(tile, xe, id) 1656 xe_range_fence_tree_fini(&vm->rftree[id]); 1657 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1658 if (vm->xef) 1659 xe_file_put(vm->xef); 1660 kfree(vm); 1661 if (flags & XE_VM_FLAG_LR_MODE) 1662 xe_pm_runtime_put(xe); 1663 return ERR_PTR(err); 1664 } 1665 1666 static void xe_vm_close(struct xe_vm *vm) 1667 { 1668 struct xe_device *xe = vm->xe; 1669 bool bound; 1670 int idx; 1671 1672 bound = drm_dev_enter(&xe->drm, &idx); 1673 1674 down_write(&vm->lock); 1675 if (xe_vm_in_fault_mode(vm)) 1676 xe_svm_notifier_lock(vm); 1677 1678 vm->size = 0; 1679 1680 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1681 struct xe_tile *tile; 1682 struct xe_gt *gt; 1683 u8 id; 1684 1685 /* Wait for pending binds */ 1686 dma_resv_wait_timeout(xe_vm_resv(vm), 1687 DMA_RESV_USAGE_BOOKKEEP, 1688 false, MAX_SCHEDULE_TIMEOUT); 1689 1690 if (bound) { 1691 for_each_tile(tile, xe, id) 1692 if (vm->pt_root[id]) 1693 xe_pt_clear(xe, vm->pt_root[id]); 1694 1695 for_each_gt(gt, xe, id) 1696 xe_tlb_inval_vm(>->tlb_inval, vm); 1697 } 1698 } 1699 1700 if (xe_vm_in_fault_mode(vm)) 1701 xe_svm_notifier_unlock(vm); 1702 up_write(&vm->lock); 1703 1704 if (bound) 1705 drm_dev_exit(idx); 1706 } 1707 1708 void xe_vm_close_and_put(struct xe_vm *vm) 1709 { 1710 LIST_HEAD(contested); 1711 struct xe_device *xe = vm->xe; 1712 struct xe_tile *tile; 1713 struct xe_vma *vma, *next_vma; 1714 struct drm_gpuva *gpuva, *next; 1715 u8 id; 1716 1717 xe_assert(xe, !vm->preempt.num_exec_queues); 1718 1719 xe_vm_close(vm); 1720 if (xe_vm_in_preempt_fence_mode(vm)) { 1721 mutex_lock(&xe->rebind_resume_lock); 1722 list_del_init(&vm->preempt.pm_activate_link); 1723 mutex_unlock(&xe->rebind_resume_lock); 1724 flush_work(&vm->preempt.rebind_work); 1725 } 1726 if (xe_vm_in_fault_mode(vm)) 1727 xe_svm_close(vm); 1728 1729 down_write(&vm->lock); 1730 for_each_tile(tile, xe, id) { 1731 if (vm->q[id]) { 1732 int i; 1733 1734 xe_exec_queue_last_fence_put(vm->q[id], vm); 1735 for_each_tlb_inval(i) 1736 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1737 } 1738 } 1739 up_write(&vm->lock); 1740 1741 for_each_tile(tile, xe, id) { 1742 if (vm->q[id]) { 1743 xe_exec_queue_kill(vm->q[id]); 1744 xe_exec_queue_put(vm->q[id]); 1745 vm->q[id] = NULL; 1746 } 1747 } 1748 1749 down_write(&vm->lock); 1750 xe_vm_lock(vm, false); 1751 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1752 vma = gpuva_to_vma(gpuva); 1753 1754 if (xe_vma_has_no_bo(vma)) { 1755 xe_svm_notifier_lock(vm); 1756 vma->gpuva.flags |= XE_VMA_DESTROYED; 1757 xe_svm_notifier_unlock(vm); 1758 } 1759 1760 xe_vm_remove_vma(vm, vma); 1761 1762 /* easy case, remove from VMA? */ 1763 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1764 list_del_init(&vma->combined_links.rebind); 1765 xe_vma_destroy(vma, NULL); 1766 continue; 1767 } 1768 1769 list_move_tail(&vma->combined_links.destroy, &contested); 1770 vma->gpuva.flags |= XE_VMA_DESTROYED; 1771 } 1772 1773 /* 1774 * All vm operations will add shared fences to resv. 1775 * The only exception is eviction for a shared object, 1776 * but even so, the unbind when evicted would still 1777 * install a fence to resv. Hence it's safe to 1778 * destroy the pagetables immediately. 1779 */ 1780 xe_vm_free_scratch(vm); 1781 xe_vm_pt_destroy(vm); 1782 xe_vm_unlock(vm); 1783 1784 /* 1785 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1786 * Since we hold a refcount to the bo, we can remove and free 1787 * the members safely without locking. 1788 */ 1789 list_for_each_entry_safe(vma, next_vma, &contested, 1790 combined_links.destroy) { 1791 list_del_init(&vma->combined_links.destroy); 1792 xe_vma_destroy_unlocked(vma); 1793 } 1794 1795 xe_svm_fini(vm); 1796 1797 up_write(&vm->lock); 1798 1799 down_write(&xe->usm.lock); 1800 if (vm->usm.asid) { 1801 void *lookup; 1802 1803 xe_assert(xe, xe->info.has_asid); 1804 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1805 1806 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1807 xe_assert(xe, lookup == vm); 1808 } 1809 up_write(&xe->usm.lock); 1810 1811 for_each_tile(tile, xe, id) 1812 xe_range_fence_tree_fini(&vm->rftree[id]); 1813 1814 xe_vm_put(vm); 1815 } 1816 1817 static void vm_destroy_work_func(struct work_struct *w) 1818 { 1819 struct xe_vm *vm = 1820 container_of(w, struct xe_vm, destroy_work); 1821 struct xe_device *xe = vm->xe; 1822 struct xe_tile *tile; 1823 u8 id; 1824 1825 /* xe_vm_close_and_put was not called? */ 1826 xe_assert(xe, !vm->size); 1827 1828 if (xe_vm_in_preempt_fence_mode(vm)) 1829 flush_work(&vm->preempt.rebind_work); 1830 1831 mutex_destroy(&vm->snap_mutex); 1832 1833 if (vm->flags & XE_VM_FLAG_LR_MODE) 1834 xe_pm_runtime_put(xe); 1835 1836 for_each_tile(tile, xe, id) 1837 XE_WARN_ON(vm->pt_root[id]); 1838 1839 trace_xe_vm_free(vm); 1840 1841 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1842 1843 if (vm->xef) 1844 xe_file_put(vm->xef); 1845 1846 kfree(vm); 1847 } 1848 1849 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1850 { 1851 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1852 1853 /* To destroy the VM we need to be able to sleep */ 1854 queue_work(system_unbound_wq, &vm->destroy_work); 1855 } 1856 1857 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1858 { 1859 struct xe_vm *vm; 1860 1861 mutex_lock(&xef->vm.lock); 1862 vm = xa_load(&xef->vm.xa, id); 1863 if (vm) 1864 xe_vm_get(vm); 1865 mutex_unlock(&xef->vm.lock); 1866 1867 return vm; 1868 } 1869 1870 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1871 { 1872 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1873 } 1874 1875 static struct xe_exec_queue * 1876 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1877 { 1878 return q ? q : vm->q[0]; 1879 } 1880 1881 static struct xe_user_fence * 1882 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1883 { 1884 unsigned int i; 1885 1886 for (i = 0; i < num_syncs; i++) { 1887 struct xe_sync_entry *e = &syncs[i]; 1888 1889 if (xe_sync_is_ufence(e)) 1890 return xe_sync_ufence_get(e); 1891 } 1892 1893 return NULL; 1894 } 1895 1896 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1897 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1898 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1899 1900 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1901 struct drm_file *file) 1902 { 1903 struct xe_device *xe = to_xe_device(dev); 1904 struct xe_file *xef = to_xe_file(file); 1905 struct drm_xe_vm_create *args = data; 1906 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 1907 struct xe_vm *vm; 1908 u32 id; 1909 int err; 1910 u32 flags = 0; 1911 1912 if (XE_IOCTL_DBG(xe, args->extensions)) 1913 return -EINVAL; 1914 1915 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 1916 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1917 1918 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1919 !xe->info.has_usm)) 1920 return -EINVAL; 1921 1922 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1923 return -EINVAL; 1924 1925 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1926 return -EINVAL; 1927 1928 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1929 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1930 !xe->info.needs_scratch)) 1931 return -EINVAL; 1932 1933 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1934 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1935 return -EINVAL; 1936 1937 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1938 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1939 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1940 flags |= XE_VM_FLAG_LR_MODE; 1941 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1942 flags |= XE_VM_FLAG_FAULT_MODE; 1943 1944 vm = xe_vm_create(xe, flags, xef); 1945 if (IS_ERR(vm)) 1946 return PTR_ERR(vm); 1947 1948 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1949 /* Warning: Security issue - never enable by default */ 1950 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1951 #endif 1952 1953 /* user id alloc must always be last in ioctl to prevent UAF */ 1954 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1955 if (err) 1956 goto err_close_and_put; 1957 1958 args->vm_id = id; 1959 1960 return 0; 1961 1962 err_close_and_put: 1963 xe_vm_close_and_put(vm); 1964 1965 return err; 1966 } 1967 1968 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1969 struct drm_file *file) 1970 { 1971 struct xe_device *xe = to_xe_device(dev); 1972 struct xe_file *xef = to_xe_file(file); 1973 struct drm_xe_vm_destroy *args = data; 1974 struct xe_vm *vm; 1975 int err = 0; 1976 1977 if (XE_IOCTL_DBG(xe, args->pad) || 1978 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1979 return -EINVAL; 1980 1981 mutex_lock(&xef->vm.lock); 1982 vm = xa_load(&xef->vm.xa, args->vm_id); 1983 if (XE_IOCTL_DBG(xe, !vm)) 1984 err = -ENOENT; 1985 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1986 err = -EBUSY; 1987 else 1988 xa_erase(&xef->vm.xa, args->vm_id); 1989 mutex_unlock(&xef->vm.lock); 1990 1991 if (!err) 1992 xe_vm_close_and_put(vm); 1993 1994 return err; 1995 } 1996 1997 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1998 { 1999 struct drm_gpuva *gpuva; 2000 u32 num_vmas = 0; 2001 2002 lockdep_assert_held(&vm->lock); 2003 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2004 num_vmas++; 2005 2006 return num_vmas; 2007 } 2008 2009 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2010 u64 end, struct drm_xe_mem_range_attr *attrs) 2011 { 2012 struct drm_gpuva *gpuva; 2013 int i = 0; 2014 2015 lockdep_assert_held(&vm->lock); 2016 2017 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2018 struct xe_vma *vma = gpuva_to_vma(gpuva); 2019 2020 if (i == *num_vmas) 2021 return -ENOSPC; 2022 2023 attrs[i].start = xe_vma_start(vma); 2024 attrs[i].end = xe_vma_end(vma); 2025 attrs[i].atomic.val = vma->attr.atomic_access; 2026 attrs[i].pat_index.val = vma->attr.pat_index; 2027 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2028 attrs[i].preferred_mem_loc.migration_policy = 2029 vma->attr.preferred_loc.migration_policy; 2030 2031 i++; 2032 } 2033 2034 *num_vmas = i; 2035 return 0; 2036 } 2037 2038 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2039 { 2040 struct xe_device *xe = to_xe_device(dev); 2041 struct xe_file *xef = to_xe_file(file); 2042 struct drm_xe_mem_range_attr *mem_attrs; 2043 struct drm_xe_vm_query_mem_range_attr *args = data; 2044 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2045 struct xe_vm *vm; 2046 int err = 0; 2047 2048 if (XE_IOCTL_DBG(xe, 2049 ((args->num_mem_ranges == 0 && 2050 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2051 (args->num_mem_ranges > 0 && 2052 (!attrs_user || 2053 args->sizeof_mem_range_attr != 2054 sizeof(struct drm_xe_mem_range_attr)))))) 2055 return -EINVAL; 2056 2057 vm = xe_vm_lookup(xef, args->vm_id); 2058 if (XE_IOCTL_DBG(xe, !vm)) 2059 return -EINVAL; 2060 2061 err = down_read_interruptible(&vm->lock); 2062 if (err) 2063 goto put_vm; 2064 2065 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2066 2067 if (args->num_mem_ranges == 0 && !attrs_user) { 2068 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2069 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2070 goto unlock_vm; 2071 } 2072 2073 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2074 GFP_KERNEL | __GFP_ACCOUNT | 2075 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2076 if (!mem_attrs) { 2077 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2078 goto unlock_vm; 2079 } 2080 2081 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2082 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2083 args->start + args->range, mem_attrs); 2084 if (err) 2085 goto free_mem_attrs; 2086 2087 err = copy_to_user(attrs_user, mem_attrs, 2088 args->sizeof_mem_range_attr * args->num_mem_ranges); 2089 if (err) 2090 err = -EFAULT; 2091 2092 free_mem_attrs: 2093 kvfree(mem_attrs); 2094 unlock_vm: 2095 up_read(&vm->lock); 2096 put_vm: 2097 xe_vm_put(vm); 2098 return err; 2099 } 2100 2101 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2102 { 2103 if (page_addr > xe_vma_end(vma) - 1 || 2104 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2105 return false; 2106 2107 return true; 2108 } 2109 2110 /** 2111 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2112 * 2113 * @vm: the xe_vm the vma belongs to 2114 * @page_addr: address to look up 2115 */ 2116 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2117 { 2118 struct xe_vma *vma = NULL; 2119 2120 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2121 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2122 vma = vm->usm.last_fault_vma; 2123 } 2124 if (!vma) 2125 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2126 2127 return vma; 2128 } 2129 2130 static const u32 region_to_mem_type[] = { 2131 XE_PL_TT, 2132 XE_PL_VRAM0, 2133 XE_PL_VRAM1, 2134 }; 2135 2136 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2137 bool post_commit) 2138 { 2139 xe_svm_notifier_lock(vm); 2140 vma->gpuva.flags |= XE_VMA_DESTROYED; 2141 xe_svm_notifier_unlock(vm); 2142 if (post_commit) 2143 xe_vm_remove_vma(vm, vma); 2144 } 2145 2146 #undef ULL 2147 #define ULL unsigned long long 2148 2149 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2150 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2151 { 2152 struct xe_vma *vma; 2153 2154 switch (op->op) { 2155 case DRM_GPUVA_OP_MAP: 2156 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2157 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2158 break; 2159 case DRM_GPUVA_OP_REMAP: 2160 vma = gpuva_to_vma(op->remap.unmap->va); 2161 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2162 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2163 op->remap.unmap->keep ? 1 : 0); 2164 if (op->remap.prev) 2165 vm_dbg(&xe->drm, 2166 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2167 (ULL)op->remap.prev->va.addr, 2168 (ULL)op->remap.prev->va.range); 2169 if (op->remap.next) 2170 vm_dbg(&xe->drm, 2171 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2172 (ULL)op->remap.next->va.addr, 2173 (ULL)op->remap.next->va.range); 2174 break; 2175 case DRM_GPUVA_OP_UNMAP: 2176 vma = gpuva_to_vma(op->unmap.va); 2177 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2178 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2179 op->unmap.keep ? 1 : 0); 2180 break; 2181 case DRM_GPUVA_OP_PREFETCH: 2182 vma = gpuva_to_vma(op->prefetch.va); 2183 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2184 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2185 break; 2186 default: 2187 drm_warn(&xe->drm, "NOT POSSIBLE"); 2188 } 2189 } 2190 #else 2191 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2192 { 2193 } 2194 #endif 2195 2196 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2197 { 2198 if (!xe_vm_in_fault_mode(vm)) 2199 return false; 2200 2201 if (!xe_vm_has_scratch(vm)) 2202 return false; 2203 2204 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2205 return false; 2206 2207 return true; 2208 } 2209 2210 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2211 { 2212 struct drm_gpuva_op *__op; 2213 2214 drm_gpuva_for_each_op(__op, ops) { 2215 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2216 2217 xe_vma_svm_prefetch_op_fini(op); 2218 } 2219 } 2220 2221 /* 2222 * Create operations list from IOCTL arguments, setup operations fields so parse 2223 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2224 */ 2225 static struct drm_gpuva_ops * 2226 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2227 struct xe_bo *bo, u64 bo_offset_or_userptr, 2228 u64 addr, u64 range, 2229 u32 operation, u32 flags, 2230 u32 prefetch_region, u16 pat_index) 2231 { 2232 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2233 struct drm_gpuva_ops *ops; 2234 struct drm_gpuva_op *__op; 2235 struct drm_gpuvm_bo *vm_bo; 2236 u64 range_start = addr; 2237 u64 range_end = addr + range; 2238 int err; 2239 2240 lockdep_assert_held_write(&vm->lock); 2241 2242 vm_dbg(&vm->xe->drm, 2243 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2244 operation, (ULL)addr, (ULL)range, 2245 (ULL)bo_offset_or_userptr); 2246 2247 switch (operation) { 2248 case DRM_XE_VM_BIND_OP_MAP: 2249 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { 2250 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); 2251 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 2252 } 2253 2254 fallthrough; 2255 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2256 struct drm_gpuvm_map_req map_req = { 2257 .map.va.addr = range_start, 2258 .map.va.range = range_end - range_start, 2259 .map.gem.obj = obj, 2260 .map.gem.offset = bo_offset_or_userptr, 2261 }; 2262 2263 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2264 break; 2265 } 2266 case DRM_XE_VM_BIND_OP_UNMAP: 2267 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2268 break; 2269 case DRM_XE_VM_BIND_OP_PREFETCH: 2270 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2271 break; 2272 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2273 xe_assert(vm->xe, bo); 2274 2275 err = xe_bo_lock(bo, true); 2276 if (err) 2277 return ERR_PTR(err); 2278 2279 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2280 if (IS_ERR(vm_bo)) { 2281 xe_bo_unlock(bo); 2282 return ERR_CAST(vm_bo); 2283 } 2284 2285 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2286 drm_gpuvm_bo_put(vm_bo); 2287 xe_bo_unlock(bo); 2288 break; 2289 default: 2290 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2291 ops = ERR_PTR(-EINVAL); 2292 } 2293 if (IS_ERR(ops)) 2294 return ops; 2295 2296 drm_gpuva_for_each_op(__op, ops) { 2297 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2298 2299 if (__op->op == DRM_GPUVA_OP_MAP) { 2300 op->map.immediate = 2301 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2302 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2303 op->map.vma_flags |= XE_VMA_READ_ONLY; 2304 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2305 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2306 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2307 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2308 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2309 op->map.vma_flags |= XE_VMA_DUMPABLE; 2310 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2311 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2312 op->map.pat_index = pat_index; 2313 op->map.invalidate_on_bind = 2314 __xe_vm_needs_clear_scratch_pages(vm, flags); 2315 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2316 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2317 struct xe_tile *tile; 2318 struct xe_svm_range *svm_range; 2319 struct drm_gpusvm_ctx ctx = {}; 2320 struct drm_pagemap *dpagemap; 2321 u8 id, tile_mask = 0; 2322 u32 i; 2323 2324 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2325 op->prefetch.region = prefetch_region; 2326 break; 2327 } 2328 2329 ctx.read_only = xe_vma_read_only(vma); 2330 ctx.devmem_possible = IS_DGFX(vm->xe) && 2331 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2332 2333 for_each_tile(tile, vm->xe, id) 2334 tile_mask |= 0x1 << id; 2335 2336 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2337 op->prefetch_range.ranges_count = 0; 2338 tile = NULL; 2339 2340 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2341 dpagemap = xe_vma_resolve_pagemap(vma, 2342 xe_device_get_root_tile(vm->xe)); 2343 /* 2344 * TODO: Once multigpu support is enabled will need 2345 * something to dereference tile from dpagemap. 2346 */ 2347 if (dpagemap) 2348 tile = xe_device_get_root_tile(vm->xe); 2349 } else if (prefetch_region) { 2350 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2351 XE_PL_VRAM0]; 2352 } 2353 2354 op->prefetch_range.tile = tile; 2355 alloc_next_range: 2356 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2357 2358 if (PTR_ERR(svm_range) == -ENOENT) { 2359 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2360 2361 addr = ret == ULONG_MAX ? 0 : ret; 2362 if (addr) 2363 goto alloc_next_range; 2364 else 2365 goto print_op_label; 2366 } 2367 2368 if (IS_ERR(svm_range)) { 2369 err = PTR_ERR(svm_range); 2370 goto unwind_prefetch_ops; 2371 } 2372 2373 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2374 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2375 goto check_next_range; 2376 } 2377 2378 err = xa_alloc(&op->prefetch_range.range, 2379 &i, svm_range, xa_limit_32b, 2380 GFP_KERNEL); 2381 2382 if (err) 2383 goto unwind_prefetch_ops; 2384 2385 op->prefetch_range.ranges_count++; 2386 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2387 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2388 check_next_range: 2389 if (range_end > xe_svm_range_end(svm_range) && 2390 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2391 addr = xe_svm_range_end(svm_range); 2392 goto alloc_next_range; 2393 } 2394 } 2395 print_op_label: 2396 print_op(vm->xe, __op); 2397 } 2398 2399 return ops; 2400 2401 unwind_prefetch_ops: 2402 xe_svm_prefetch_gpuva_ops_fini(ops); 2403 drm_gpuva_ops_free(&vm->gpuvm, ops); 2404 return ERR_PTR(err); 2405 } 2406 2407 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2408 2409 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2410 struct xe_vma_mem_attr *attr, unsigned int flags) 2411 { 2412 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2413 struct xe_validation_ctx ctx; 2414 struct drm_exec exec; 2415 struct xe_vma *vma; 2416 int err = 0; 2417 2418 lockdep_assert_held_write(&vm->lock); 2419 2420 if (bo) { 2421 err = 0; 2422 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2423 (struct xe_val_flags) {.interruptible = true}, err) { 2424 if (!bo->vm) { 2425 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2426 drm_exec_retry_on_contention(&exec); 2427 } 2428 if (!err) { 2429 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2430 drm_exec_retry_on_contention(&exec); 2431 } 2432 if (err) 2433 return ERR_PTR(err); 2434 2435 vma = xe_vma_create(vm, bo, op->gem.offset, 2436 op->va.addr, op->va.addr + 2437 op->va.range - 1, attr, flags); 2438 if (IS_ERR(vma)) 2439 return vma; 2440 2441 if (!bo->vm) { 2442 err = add_preempt_fences(vm, bo); 2443 if (err) { 2444 prep_vma_destroy(vm, vma, false); 2445 xe_vma_destroy(vma, NULL); 2446 } 2447 } 2448 } 2449 if (err) 2450 return ERR_PTR(err); 2451 } else { 2452 vma = xe_vma_create(vm, NULL, op->gem.offset, 2453 op->va.addr, op->va.addr + 2454 op->va.range - 1, attr, flags); 2455 if (IS_ERR(vma)) 2456 return vma; 2457 2458 if (xe_vma_is_userptr(vma)) { 2459 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2460 /* 2461 * -EBUSY has dedicated meaning that a user fence 2462 * attached to the VMA is busy, in practice 2463 * xe_vma_userptr_pin_pages can only fail with -EBUSY if 2464 * we are low on memory so convert this to -ENOMEM. 2465 */ 2466 if (err == -EBUSY) 2467 err = -ENOMEM; 2468 } 2469 } 2470 if (err) { 2471 prep_vma_destroy(vm, vma, false); 2472 xe_vma_destroy_unlocked(vma); 2473 vma = ERR_PTR(err); 2474 } 2475 2476 return vma; 2477 } 2478 2479 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2480 { 2481 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2482 return SZ_1G; 2483 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2484 return SZ_2M; 2485 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2486 return SZ_64K; 2487 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2488 return SZ_4K; 2489 2490 return SZ_1G; /* Uninitialized, used max size */ 2491 } 2492 2493 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2494 { 2495 switch (size) { 2496 case SZ_1G: 2497 vma->gpuva.flags |= XE_VMA_PTE_1G; 2498 break; 2499 case SZ_2M: 2500 vma->gpuva.flags |= XE_VMA_PTE_2M; 2501 break; 2502 case SZ_64K: 2503 vma->gpuva.flags |= XE_VMA_PTE_64K; 2504 break; 2505 case SZ_4K: 2506 vma->gpuva.flags |= XE_VMA_PTE_4K; 2507 break; 2508 } 2509 } 2510 2511 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2512 { 2513 int err = 0; 2514 2515 lockdep_assert_held_write(&vm->lock); 2516 2517 switch (op->base.op) { 2518 case DRM_GPUVA_OP_MAP: 2519 err |= xe_vm_insert_vma(vm, op->map.vma); 2520 if (!err) 2521 op->flags |= XE_VMA_OP_COMMITTED; 2522 break; 2523 case DRM_GPUVA_OP_REMAP: 2524 { 2525 u8 tile_present = 2526 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2527 2528 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2529 true); 2530 op->flags |= XE_VMA_OP_COMMITTED; 2531 2532 if (op->remap.prev) { 2533 err |= xe_vm_insert_vma(vm, op->remap.prev); 2534 if (!err) 2535 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2536 if (!err && op->remap.skip_prev) { 2537 op->remap.prev->tile_present = 2538 tile_present; 2539 op->remap.prev = NULL; 2540 } 2541 } 2542 if (op->remap.next) { 2543 err |= xe_vm_insert_vma(vm, op->remap.next); 2544 if (!err) 2545 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2546 if (!err && op->remap.skip_next) { 2547 op->remap.next->tile_present = 2548 tile_present; 2549 op->remap.next = NULL; 2550 } 2551 } 2552 2553 /* Adjust for partial unbind after removing VMA from VM */ 2554 if (!err) { 2555 op->base.remap.unmap->va->va.addr = op->remap.start; 2556 op->base.remap.unmap->va->va.range = op->remap.range; 2557 } 2558 break; 2559 } 2560 case DRM_GPUVA_OP_UNMAP: 2561 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2562 op->flags |= XE_VMA_OP_COMMITTED; 2563 break; 2564 case DRM_GPUVA_OP_PREFETCH: 2565 op->flags |= XE_VMA_OP_COMMITTED; 2566 break; 2567 default: 2568 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2569 } 2570 2571 return err; 2572 } 2573 2574 /** 2575 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2576 * @vma: Pointer to the xe_vma structure to check 2577 * 2578 * This function determines whether the given VMA (Virtual Memory Area) 2579 * has its memory attributes set to their default values. Specifically, 2580 * it checks the following conditions: 2581 * 2582 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2583 * - `pat_index` is equal to `default_pat_index` 2584 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2585 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2586 * 2587 * Return: true if all attributes are at their default values, false otherwise. 2588 */ 2589 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2590 { 2591 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2592 vma->attr.pat_index == vma->attr.default_pat_index && 2593 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2594 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2595 } 2596 2597 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2598 struct xe_vma_ops *vops) 2599 { 2600 struct xe_device *xe = vm->xe; 2601 struct drm_gpuva_op *__op; 2602 struct xe_tile *tile; 2603 u8 id, tile_mask = 0; 2604 int err = 0; 2605 2606 lockdep_assert_held_write(&vm->lock); 2607 2608 for_each_tile(tile, vm->xe, id) 2609 tile_mask |= 0x1 << id; 2610 2611 drm_gpuva_for_each_op(__op, ops) { 2612 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2613 struct xe_vma *vma; 2614 unsigned int flags = 0; 2615 2616 INIT_LIST_HEAD(&op->link); 2617 list_add_tail(&op->link, &vops->list); 2618 op->tile_mask = tile_mask; 2619 2620 switch (op->base.op) { 2621 case DRM_GPUVA_OP_MAP: 2622 { 2623 struct xe_vma_mem_attr default_attr = { 2624 .preferred_loc = { 2625 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2626 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2627 }, 2628 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2629 .default_pat_index = op->map.pat_index, 2630 .pat_index = op->map.pat_index, 2631 }; 2632 2633 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2634 2635 vma = new_vma(vm, &op->base.map, &default_attr, 2636 flags); 2637 if (IS_ERR(vma)) 2638 return PTR_ERR(vma); 2639 2640 op->map.vma = vma; 2641 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2642 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2643 op->map.invalidate_on_bind) 2644 xe_vma_ops_incr_pt_update_ops(vops, 2645 op->tile_mask, 1); 2646 break; 2647 } 2648 case DRM_GPUVA_OP_REMAP: 2649 { 2650 struct xe_vma *old = 2651 gpuva_to_vma(op->base.remap.unmap->va); 2652 bool skip = xe_vma_is_cpu_addr_mirror(old); 2653 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2654 int num_remap_ops = 0; 2655 2656 if (op->base.remap.prev) 2657 start = op->base.remap.prev->va.addr + 2658 op->base.remap.prev->va.range; 2659 if (op->base.remap.next) 2660 end = op->base.remap.next->va.addr; 2661 2662 if (xe_vma_is_cpu_addr_mirror(old) && 2663 xe_svm_has_mapping(vm, start, end)) { 2664 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2665 xe_svm_unmap_address_range(vm, start, end); 2666 else 2667 return -EBUSY; 2668 } 2669 2670 op->remap.start = xe_vma_start(old); 2671 op->remap.range = xe_vma_size(old); 2672 2673 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2674 if (op->base.remap.prev) { 2675 vma = new_vma(vm, op->base.remap.prev, 2676 &old->attr, flags); 2677 if (IS_ERR(vma)) 2678 return PTR_ERR(vma); 2679 2680 op->remap.prev = vma; 2681 2682 /* 2683 * Userptr creates a new SG mapping so 2684 * we must also rebind. 2685 */ 2686 op->remap.skip_prev = skip || 2687 (!xe_vma_is_userptr(old) && 2688 IS_ALIGNED(xe_vma_end(vma), 2689 xe_vma_max_pte_size(old))); 2690 if (op->remap.skip_prev) { 2691 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2692 op->remap.range -= 2693 xe_vma_end(vma) - 2694 xe_vma_start(old); 2695 op->remap.start = xe_vma_end(vma); 2696 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2697 (ULL)op->remap.start, 2698 (ULL)op->remap.range); 2699 } else { 2700 num_remap_ops++; 2701 } 2702 } 2703 2704 if (op->base.remap.next) { 2705 vma = new_vma(vm, op->base.remap.next, 2706 &old->attr, flags); 2707 if (IS_ERR(vma)) 2708 return PTR_ERR(vma); 2709 2710 op->remap.next = vma; 2711 2712 /* 2713 * Userptr creates a new SG mapping so 2714 * we must also rebind. 2715 */ 2716 op->remap.skip_next = skip || 2717 (!xe_vma_is_userptr(old) && 2718 IS_ALIGNED(xe_vma_start(vma), 2719 xe_vma_max_pte_size(old))); 2720 if (op->remap.skip_next) { 2721 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2722 op->remap.range -= 2723 xe_vma_end(old) - 2724 xe_vma_start(vma); 2725 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2726 (ULL)op->remap.start, 2727 (ULL)op->remap.range); 2728 } else { 2729 num_remap_ops++; 2730 } 2731 } 2732 if (!skip) 2733 num_remap_ops++; 2734 2735 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2736 break; 2737 } 2738 case DRM_GPUVA_OP_UNMAP: 2739 vma = gpuva_to_vma(op->base.unmap.va); 2740 2741 if (xe_vma_is_cpu_addr_mirror(vma) && 2742 xe_svm_has_mapping(vm, xe_vma_start(vma), 2743 xe_vma_end(vma)) && 2744 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) 2745 return -EBUSY; 2746 2747 if (!xe_vma_is_cpu_addr_mirror(vma)) 2748 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2749 break; 2750 case DRM_GPUVA_OP_PREFETCH: 2751 vma = gpuva_to_vma(op->base.prefetch.va); 2752 2753 if (xe_vma_is_userptr(vma)) { 2754 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2755 if (err) 2756 return err; 2757 } 2758 2759 if (xe_vma_is_cpu_addr_mirror(vma)) 2760 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2761 op->prefetch_range.ranges_count); 2762 else 2763 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2764 2765 break; 2766 default: 2767 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2768 } 2769 2770 err = xe_vma_op_commit(vm, op); 2771 if (err) 2772 return err; 2773 } 2774 2775 return 0; 2776 } 2777 2778 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2779 bool post_commit, bool prev_post_commit, 2780 bool next_post_commit) 2781 { 2782 lockdep_assert_held_write(&vm->lock); 2783 2784 switch (op->base.op) { 2785 case DRM_GPUVA_OP_MAP: 2786 if (op->map.vma) { 2787 prep_vma_destroy(vm, op->map.vma, post_commit); 2788 xe_vma_destroy_unlocked(op->map.vma); 2789 } 2790 break; 2791 case DRM_GPUVA_OP_UNMAP: 2792 { 2793 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2794 2795 if (vma) { 2796 xe_svm_notifier_lock(vm); 2797 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2798 xe_svm_notifier_unlock(vm); 2799 if (post_commit) 2800 xe_vm_insert_vma(vm, vma); 2801 } 2802 break; 2803 } 2804 case DRM_GPUVA_OP_REMAP: 2805 { 2806 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2807 2808 if (op->remap.prev) { 2809 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2810 xe_vma_destroy_unlocked(op->remap.prev); 2811 } 2812 if (op->remap.next) { 2813 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2814 xe_vma_destroy_unlocked(op->remap.next); 2815 } 2816 if (vma) { 2817 xe_svm_notifier_lock(vm); 2818 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2819 xe_svm_notifier_unlock(vm); 2820 if (post_commit) 2821 xe_vm_insert_vma(vm, vma); 2822 } 2823 break; 2824 } 2825 case DRM_GPUVA_OP_PREFETCH: 2826 /* Nothing to do */ 2827 break; 2828 default: 2829 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2830 } 2831 } 2832 2833 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2834 struct drm_gpuva_ops **ops, 2835 int num_ops_list) 2836 { 2837 int i; 2838 2839 for (i = num_ops_list - 1; i >= 0; --i) { 2840 struct drm_gpuva_ops *__ops = ops[i]; 2841 struct drm_gpuva_op *__op; 2842 2843 if (!__ops) 2844 continue; 2845 2846 drm_gpuva_for_each_op_reverse(__op, __ops) { 2847 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2848 2849 xe_vma_op_unwind(vm, op, 2850 op->flags & XE_VMA_OP_COMMITTED, 2851 op->flags & XE_VMA_OP_PREV_COMMITTED, 2852 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2853 } 2854 } 2855 } 2856 2857 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2858 bool res_evict, bool validate) 2859 { 2860 struct xe_bo *bo = xe_vma_bo(vma); 2861 struct xe_vm *vm = xe_vma_vm(vma); 2862 int err = 0; 2863 2864 if (bo) { 2865 if (!bo->vm) 2866 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2867 if (!err && validate) 2868 err = xe_bo_validate(bo, vm, 2869 !xe_vm_in_preempt_fence_mode(vm) && 2870 res_evict, exec); 2871 } 2872 2873 return err; 2874 } 2875 2876 static int check_ufence(struct xe_vma *vma) 2877 { 2878 if (vma->ufence) { 2879 struct xe_user_fence * const f = vma->ufence; 2880 2881 if (!xe_sync_ufence_get_status(f)) 2882 return -EBUSY; 2883 2884 vma->ufence = NULL; 2885 xe_sync_ufence_put(f); 2886 } 2887 2888 return 0; 2889 } 2890 2891 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2892 { 2893 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2894 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2895 struct xe_tile *tile = op->prefetch_range.tile; 2896 int err = 0; 2897 2898 struct xe_svm_range *svm_range; 2899 struct drm_gpusvm_ctx ctx = {}; 2900 unsigned long i; 2901 2902 if (!xe_vma_is_cpu_addr_mirror(vma)) 2903 return 0; 2904 2905 ctx.read_only = xe_vma_read_only(vma); 2906 ctx.devmem_possible = devmem_possible; 2907 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2908 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2909 2910 /* TODO: Threading the migration */ 2911 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2912 if (!tile) 2913 xe_svm_range_migrate_to_smem(vm, svm_range); 2914 2915 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2916 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2917 if (err) { 2918 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2919 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2920 return -ENODATA; 2921 } 2922 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2923 } 2924 2925 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2926 if (err) { 2927 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2928 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2929 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2930 err = -ENODATA; 2931 return err; 2932 } 2933 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2934 } 2935 2936 return err; 2937 } 2938 2939 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2940 struct xe_vma_ops *vops, struct xe_vma_op *op) 2941 { 2942 int err = 0; 2943 bool res_evict; 2944 2945 /* 2946 * We only allow evicting a BO within the VM if it is not part of an 2947 * array of binds, as an array of binds can evict another BO within the 2948 * bind. 2949 */ 2950 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 2951 2952 switch (op->base.op) { 2953 case DRM_GPUVA_OP_MAP: 2954 if (!op->map.invalidate_on_bind) 2955 err = vma_lock_and_validate(exec, op->map.vma, 2956 res_evict, 2957 !xe_vm_in_fault_mode(vm) || 2958 op->map.immediate); 2959 break; 2960 case DRM_GPUVA_OP_REMAP: 2961 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2962 if (err) 2963 break; 2964 2965 err = vma_lock_and_validate(exec, 2966 gpuva_to_vma(op->base.remap.unmap->va), 2967 res_evict, false); 2968 if (!err && op->remap.prev) 2969 err = vma_lock_and_validate(exec, op->remap.prev, 2970 res_evict, true); 2971 if (!err && op->remap.next) 2972 err = vma_lock_and_validate(exec, op->remap.next, 2973 res_evict, true); 2974 break; 2975 case DRM_GPUVA_OP_UNMAP: 2976 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2977 if (err) 2978 break; 2979 2980 err = vma_lock_and_validate(exec, 2981 gpuva_to_vma(op->base.unmap.va), 2982 res_evict, false); 2983 break; 2984 case DRM_GPUVA_OP_PREFETCH: 2985 { 2986 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2987 u32 region; 2988 2989 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2990 region = op->prefetch.region; 2991 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2992 region <= ARRAY_SIZE(region_to_mem_type)); 2993 } 2994 2995 err = vma_lock_and_validate(exec, 2996 gpuva_to_vma(op->base.prefetch.va), 2997 res_evict, false); 2998 if (!err && !xe_vma_has_no_bo(vma)) 2999 err = xe_bo_migrate(xe_vma_bo(vma), 3000 region_to_mem_type[region], 3001 NULL, 3002 exec); 3003 break; 3004 } 3005 default: 3006 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3007 } 3008 3009 return err; 3010 } 3011 3012 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3013 { 3014 struct xe_vma_op *op; 3015 int err; 3016 3017 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3018 return 0; 3019 3020 list_for_each_entry(op, &vops->list, link) { 3021 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3022 err = prefetch_ranges(vm, op); 3023 if (err) 3024 return err; 3025 } 3026 } 3027 3028 return 0; 3029 } 3030 3031 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3032 struct xe_vm *vm, 3033 struct xe_vma_ops *vops) 3034 { 3035 struct xe_vma_op *op; 3036 int err; 3037 3038 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3039 if (err) 3040 return err; 3041 3042 list_for_each_entry(op, &vops->list, link) { 3043 err = op_lock_and_prep(exec, vm, vops, op); 3044 if (err) 3045 return err; 3046 } 3047 3048 #ifdef TEST_VM_OPS_ERROR 3049 if (vops->inject_error && 3050 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3051 return -ENOSPC; 3052 #endif 3053 3054 return 0; 3055 } 3056 3057 static void op_trace(struct xe_vma_op *op) 3058 { 3059 switch (op->base.op) { 3060 case DRM_GPUVA_OP_MAP: 3061 trace_xe_vma_bind(op->map.vma); 3062 break; 3063 case DRM_GPUVA_OP_REMAP: 3064 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3065 if (op->remap.prev) 3066 trace_xe_vma_bind(op->remap.prev); 3067 if (op->remap.next) 3068 trace_xe_vma_bind(op->remap.next); 3069 break; 3070 case DRM_GPUVA_OP_UNMAP: 3071 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3072 break; 3073 case DRM_GPUVA_OP_PREFETCH: 3074 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3075 break; 3076 case DRM_GPUVA_OP_DRIVER: 3077 break; 3078 default: 3079 XE_WARN_ON("NOT POSSIBLE"); 3080 } 3081 } 3082 3083 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3084 { 3085 struct xe_vma_op *op; 3086 3087 list_for_each_entry(op, &vops->list, link) 3088 op_trace(op); 3089 } 3090 3091 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3092 { 3093 struct xe_exec_queue *q = vops->q; 3094 struct xe_tile *tile; 3095 int number_tiles = 0; 3096 u8 id; 3097 3098 for_each_tile(tile, vm->xe, id) { 3099 if (vops->pt_update_ops[id].num_ops) 3100 ++number_tiles; 3101 3102 if (vops->pt_update_ops[id].q) 3103 continue; 3104 3105 if (q) { 3106 vops->pt_update_ops[id].q = q; 3107 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3108 q = list_next_entry(q, multi_gt_list); 3109 } else { 3110 vops->pt_update_ops[id].q = vm->q[id]; 3111 } 3112 } 3113 3114 return number_tiles; 3115 } 3116 3117 static struct dma_fence *ops_execute(struct xe_vm *vm, 3118 struct xe_vma_ops *vops) 3119 { 3120 struct xe_tile *tile; 3121 struct dma_fence *fence = NULL; 3122 struct dma_fence **fences = NULL; 3123 struct dma_fence_array *cf = NULL; 3124 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; 3125 u8 id; 3126 3127 number_tiles = vm_ops_setup_tile_args(vm, vops); 3128 if (number_tiles == 0) 3129 return ERR_PTR(-ENODATA); 3130 3131 for_each_tile(tile, vm->xe, id) { 3132 ++n_fence; 3133 3134 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) 3135 for_each_tlb_inval(i) 3136 ++n_fence; 3137 } 3138 3139 fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); 3140 if (!fences) { 3141 fence = ERR_PTR(-ENOMEM); 3142 goto err_trace; 3143 } 3144 3145 cf = dma_fence_array_alloc(n_fence); 3146 if (!cf) { 3147 fence = ERR_PTR(-ENOMEM); 3148 goto err_out; 3149 } 3150 3151 for_each_tile(tile, vm->xe, id) { 3152 if (!vops->pt_update_ops[id].num_ops) 3153 continue; 3154 3155 err = xe_pt_update_ops_prepare(tile, vops); 3156 if (err) { 3157 fence = ERR_PTR(err); 3158 goto err_out; 3159 } 3160 } 3161 3162 trace_xe_vm_ops_execute(vops); 3163 3164 for_each_tile(tile, vm->xe, id) { 3165 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3166 3167 fence = NULL; 3168 if (!vops->pt_update_ops[id].num_ops) 3169 goto collect_fences; 3170 3171 fence = xe_pt_update_ops_run(tile, vops); 3172 if (IS_ERR(fence)) 3173 goto err_out; 3174 3175 collect_fences: 3176 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3177 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3178 continue; 3179 3180 xe_migrate_job_lock(tile->migrate, q); 3181 for_each_tlb_inval(i) 3182 fences[current_fence++] = 3183 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3184 xe_migrate_job_unlock(tile->migrate, q); 3185 } 3186 3187 xe_assert(vm->xe, current_fence == n_fence); 3188 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3189 1, false); 3190 fence = &cf->base; 3191 3192 for_each_tile(tile, vm->xe, id) { 3193 if (!vops->pt_update_ops[id].num_ops) 3194 continue; 3195 3196 xe_pt_update_ops_fini(tile, vops); 3197 } 3198 3199 return fence; 3200 3201 err_out: 3202 for_each_tile(tile, vm->xe, id) { 3203 if (!vops->pt_update_ops[id].num_ops) 3204 continue; 3205 3206 xe_pt_update_ops_abort(tile, vops); 3207 } 3208 while (current_fence) 3209 dma_fence_put(fences[--current_fence]); 3210 kfree(fences); 3211 kfree(cf); 3212 3213 err_trace: 3214 trace_xe_vm_ops_fail(vm); 3215 return fence; 3216 } 3217 3218 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3219 { 3220 if (vma->ufence) 3221 xe_sync_ufence_put(vma->ufence); 3222 vma->ufence = __xe_sync_ufence_get(ufence); 3223 } 3224 3225 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3226 struct xe_user_fence *ufence) 3227 { 3228 switch (op->base.op) { 3229 case DRM_GPUVA_OP_MAP: 3230 if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) 3231 vma_add_ufence(op->map.vma, ufence); 3232 break; 3233 case DRM_GPUVA_OP_REMAP: 3234 if (op->remap.prev) 3235 vma_add_ufence(op->remap.prev, ufence); 3236 if (op->remap.next) 3237 vma_add_ufence(op->remap.next, ufence); 3238 break; 3239 case DRM_GPUVA_OP_UNMAP: 3240 break; 3241 case DRM_GPUVA_OP_PREFETCH: 3242 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3243 break; 3244 default: 3245 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3246 } 3247 } 3248 3249 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3250 struct dma_fence *fence) 3251 { 3252 struct xe_user_fence *ufence; 3253 struct xe_vma_op *op; 3254 int i; 3255 3256 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3257 list_for_each_entry(op, &vops->list, link) { 3258 if (ufence) 3259 op_add_ufence(vm, op, ufence); 3260 3261 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3262 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3263 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3264 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3265 fence); 3266 } 3267 if (ufence) 3268 xe_sync_ufence_put(ufence); 3269 if (fence) { 3270 for (i = 0; i < vops->num_syncs; i++) 3271 xe_sync_entry_signal(vops->syncs + i, fence); 3272 } 3273 } 3274 3275 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3276 struct xe_vma_ops *vops) 3277 { 3278 struct xe_validation_ctx ctx; 3279 struct drm_exec exec; 3280 struct dma_fence *fence; 3281 int err = 0; 3282 3283 lockdep_assert_held_write(&vm->lock); 3284 3285 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3286 ((struct xe_val_flags) { 3287 .interruptible = true, 3288 .exec_ignore_duplicates = true, 3289 }), err) { 3290 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3291 drm_exec_retry_on_contention(&exec); 3292 xe_validation_retry_on_oom(&ctx, &err); 3293 if (err) 3294 return ERR_PTR(err); 3295 3296 xe_vm_set_validation_exec(vm, &exec); 3297 fence = ops_execute(vm, vops); 3298 xe_vm_set_validation_exec(vm, NULL); 3299 if (IS_ERR(fence)) { 3300 if (PTR_ERR(fence) == -ENODATA) 3301 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3302 return fence; 3303 } 3304 3305 vm_bind_ioctl_ops_fini(vm, vops, fence); 3306 } 3307 3308 return err ? ERR_PTR(err) : fence; 3309 } 3310 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3311 3312 #define SUPPORTED_FLAGS_STUB \ 3313 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3314 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3315 DRM_XE_VM_BIND_FLAG_NULL | \ 3316 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3317 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3318 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3319 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 3320 3321 #ifdef TEST_VM_OPS_ERROR 3322 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3323 #else 3324 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3325 #endif 3326 3327 #define XE_64K_PAGE_MASK 0xffffull 3328 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3329 3330 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3331 struct drm_xe_vm_bind *args, 3332 struct drm_xe_vm_bind_op **bind_ops) 3333 { 3334 int err; 3335 int i; 3336 3337 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3338 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3339 return -EINVAL; 3340 3341 if (XE_IOCTL_DBG(xe, args->extensions)) 3342 return -EINVAL; 3343 3344 if (args->num_binds > 1) { 3345 u64 __user *bind_user = 3346 u64_to_user_ptr(args->vector_of_binds); 3347 3348 *bind_ops = kvmalloc_array(args->num_binds, 3349 sizeof(struct drm_xe_vm_bind_op), 3350 GFP_KERNEL | __GFP_ACCOUNT | 3351 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3352 if (!*bind_ops) 3353 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3354 3355 err = copy_from_user(*bind_ops, bind_user, 3356 sizeof(struct drm_xe_vm_bind_op) * 3357 args->num_binds); 3358 if (XE_IOCTL_DBG(xe, err)) { 3359 err = -EFAULT; 3360 goto free_bind_ops; 3361 } 3362 } else { 3363 *bind_ops = &args->bind; 3364 } 3365 3366 for (i = 0; i < args->num_binds; ++i) { 3367 u64 range = (*bind_ops)[i].range; 3368 u64 addr = (*bind_ops)[i].addr; 3369 u32 op = (*bind_ops)[i].op; 3370 u32 flags = (*bind_ops)[i].flags; 3371 u32 obj = (*bind_ops)[i].obj; 3372 u64 obj_offset = (*bind_ops)[i].obj_offset; 3373 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3374 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3375 bool is_cpu_addr_mirror = flags & 3376 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3377 u16 pat_index = (*bind_ops)[i].pat_index; 3378 u16 coh_mode; 3379 3380 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3381 (!xe_vm_in_fault_mode(vm) || 3382 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3383 err = -EINVAL; 3384 goto free_bind_ops; 3385 } 3386 3387 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3388 err = -EINVAL; 3389 goto free_bind_ops; 3390 } 3391 3392 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3393 (*bind_ops)[i].pat_index = pat_index; 3394 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3395 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3396 err = -EINVAL; 3397 goto free_bind_ops; 3398 } 3399 3400 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3401 err = -EINVAL; 3402 goto free_bind_ops; 3403 } 3404 3405 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3406 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3407 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3408 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3409 is_cpu_addr_mirror)) || 3410 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3411 (is_null || is_cpu_addr_mirror)) || 3412 XE_IOCTL_DBG(xe, !obj && 3413 op == DRM_XE_VM_BIND_OP_MAP && 3414 !is_null && !is_cpu_addr_mirror) || 3415 XE_IOCTL_DBG(xe, !obj && 3416 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3417 XE_IOCTL_DBG(xe, addr && 3418 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3419 XE_IOCTL_DBG(xe, range && 3420 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3421 XE_IOCTL_DBG(xe, obj && 3422 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3423 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3424 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3425 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3426 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3427 XE_IOCTL_DBG(xe, obj && 3428 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3429 XE_IOCTL_DBG(xe, prefetch_region && 3430 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3431 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3432 /* Guard against undefined shift in BIT(prefetch_region) */ 3433 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3434 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3435 XE_IOCTL_DBG(xe, obj && 3436 op == DRM_XE_VM_BIND_OP_UNMAP) || 3437 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3438 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3439 err = -EINVAL; 3440 goto free_bind_ops; 3441 } 3442 3443 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3444 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3445 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3446 XE_IOCTL_DBG(xe, !range && 3447 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3448 err = -EINVAL; 3449 goto free_bind_ops; 3450 } 3451 } 3452 3453 return 0; 3454 3455 free_bind_ops: 3456 if (args->num_binds > 1) 3457 kvfree(*bind_ops); 3458 *bind_ops = NULL; 3459 return err; 3460 } 3461 3462 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3463 struct xe_exec_queue *q, 3464 struct xe_sync_entry *syncs, 3465 int num_syncs) 3466 { 3467 struct dma_fence *fence = NULL; 3468 int i, err = 0; 3469 3470 if (num_syncs) { 3471 fence = xe_sync_in_fence_get(syncs, num_syncs, 3472 to_wait_exec_queue(vm, q), vm); 3473 if (IS_ERR(fence)) 3474 return PTR_ERR(fence); 3475 3476 for (i = 0; i < num_syncs; i++) 3477 xe_sync_entry_signal(&syncs[i], fence); 3478 } 3479 3480 dma_fence_put(fence); 3481 3482 return err; 3483 } 3484 3485 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3486 struct xe_exec_queue *q, 3487 struct xe_sync_entry *syncs, u32 num_syncs) 3488 { 3489 memset(vops, 0, sizeof(*vops)); 3490 INIT_LIST_HEAD(&vops->list); 3491 vops->vm = vm; 3492 vops->q = q; 3493 vops->syncs = syncs; 3494 vops->num_syncs = num_syncs; 3495 vops->flags = 0; 3496 } 3497 3498 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3499 u64 addr, u64 range, u64 obj_offset, 3500 u16 pat_index, u32 op, u32 bind_flags) 3501 { 3502 u16 coh_mode; 3503 3504 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && 3505 xe_pat_index_get_comp_en(xe, pat_index))) 3506 return -EINVAL; 3507 3508 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3509 XE_IOCTL_DBG(xe, obj_offset > 3510 xe_bo_size(bo) - range)) { 3511 return -EINVAL; 3512 } 3513 3514 /* 3515 * Some platforms require 64k VM_BIND alignment, 3516 * specifically those with XE_VRAM_FLAGS_NEED64K. 3517 * 3518 * Other platforms may have BO's set to 64k physical placement, 3519 * but can be mapped at 4k offsets anyway. This check is only 3520 * there for the former case. 3521 */ 3522 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3523 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3524 if (XE_IOCTL_DBG(xe, obj_offset & 3525 XE_64K_PAGE_MASK) || 3526 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3527 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3528 return -EINVAL; 3529 } 3530 } 3531 3532 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3533 if (bo->cpu_caching) { 3534 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3535 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3536 return -EINVAL; 3537 } 3538 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3539 /* 3540 * Imported dma-buf from a different device should 3541 * require 1way or 2way coherency since we don't know 3542 * how it was mapped on the CPU. Just assume is it 3543 * potentially cached on CPU side. 3544 */ 3545 return -EINVAL; 3546 } 3547 3548 /* If a BO is protected it can only be mapped if the key is still valid */ 3549 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3550 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3551 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3552 return -ENOEXEC; 3553 3554 return 0; 3555 } 3556 3557 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3558 { 3559 struct xe_device *xe = to_xe_device(dev); 3560 struct xe_file *xef = to_xe_file(file); 3561 struct drm_xe_vm_bind *args = data; 3562 struct drm_xe_sync __user *syncs_user; 3563 struct xe_bo **bos = NULL; 3564 struct drm_gpuva_ops **ops = NULL; 3565 struct xe_vm *vm; 3566 struct xe_exec_queue *q = NULL; 3567 u32 num_syncs, num_ufence = 0; 3568 struct xe_sync_entry *syncs = NULL; 3569 struct drm_xe_vm_bind_op *bind_ops = NULL; 3570 struct xe_vma_ops vops; 3571 struct dma_fence *fence; 3572 int err; 3573 int i; 3574 3575 vm = xe_vm_lookup(xef, args->vm_id); 3576 if (XE_IOCTL_DBG(xe, !vm)) 3577 return -EINVAL; 3578 3579 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3580 if (err) 3581 goto put_vm; 3582 3583 if (args->exec_queue_id) { 3584 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3585 if (XE_IOCTL_DBG(xe, !q)) { 3586 err = -ENOENT; 3587 goto free_bind_ops; 3588 } 3589 3590 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3591 err = -EINVAL; 3592 goto put_exec_queue; 3593 } 3594 } 3595 3596 /* Ensure all UNMAPs visible */ 3597 xe_svm_flush(vm); 3598 3599 err = down_write_killable(&vm->lock); 3600 if (err) 3601 goto put_exec_queue; 3602 3603 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3604 err = -ENOENT; 3605 goto release_vm_lock; 3606 } 3607 3608 for (i = 0; i < args->num_binds; ++i) { 3609 u64 range = bind_ops[i].range; 3610 u64 addr = bind_ops[i].addr; 3611 3612 if (XE_IOCTL_DBG(xe, range > vm->size) || 3613 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3614 err = -EINVAL; 3615 goto release_vm_lock; 3616 } 3617 } 3618 3619 if (args->num_binds) { 3620 bos = kvcalloc(args->num_binds, sizeof(*bos), 3621 GFP_KERNEL | __GFP_ACCOUNT | 3622 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3623 if (!bos) { 3624 err = -ENOMEM; 3625 goto release_vm_lock; 3626 } 3627 3628 ops = kvcalloc(args->num_binds, sizeof(*ops), 3629 GFP_KERNEL | __GFP_ACCOUNT | 3630 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3631 if (!ops) { 3632 err = -ENOMEM; 3633 goto free_bos; 3634 } 3635 } 3636 3637 for (i = 0; i < args->num_binds; ++i) { 3638 struct drm_gem_object *gem_obj; 3639 u64 range = bind_ops[i].range; 3640 u64 addr = bind_ops[i].addr; 3641 u32 obj = bind_ops[i].obj; 3642 u64 obj_offset = bind_ops[i].obj_offset; 3643 u16 pat_index = bind_ops[i].pat_index; 3644 u32 op = bind_ops[i].op; 3645 u32 bind_flags = bind_ops[i].flags; 3646 3647 if (!obj) 3648 continue; 3649 3650 gem_obj = drm_gem_object_lookup(file, obj); 3651 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3652 err = -ENOENT; 3653 goto put_obj; 3654 } 3655 bos[i] = gem_to_xe_bo(gem_obj); 3656 3657 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3658 obj_offset, pat_index, op, 3659 bind_flags); 3660 if (err) 3661 goto put_obj; 3662 } 3663 3664 if (args->num_syncs) { 3665 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3666 if (!syncs) { 3667 err = -ENOMEM; 3668 goto put_obj; 3669 } 3670 } 3671 3672 syncs_user = u64_to_user_ptr(args->syncs); 3673 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3674 struct xe_exec_queue *__q = q ?: vm->q[0]; 3675 3676 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3677 &syncs_user[num_syncs], 3678 __q->ufence_syncobj, 3679 ++__q->ufence_timeline_value, 3680 (xe_vm_in_lr_mode(vm) ? 3681 SYNC_PARSE_FLAG_LR_MODE : 0) | 3682 (!args->num_binds ? 3683 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3684 if (err) 3685 goto free_syncs; 3686 3687 if (xe_sync_is_ufence(&syncs[num_syncs])) 3688 num_ufence++; 3689 } 3690 3691 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3692 err = -EINVAL; 3693 goto free_syncs; 3694 } 3695 3696 if (!args->num_binds) { 3697 err = -ENODATA; 3698 goto free_syncs; 3699 } 3700 3701 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3702 if (args->num_binds > 1) 3703 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3704 for (i = 0; i < args->num_binds; ++i) { 3705 u64 range = bind_ops[i].range; 3706 u64 addr = bind_ops[i].addr; 3707 u32 op = bind_ops[i].op; 3708 u32 flags = bind_ops[i].flags; 3709 u64 obj_offset = bind_ops[i].obj_offset; 3710 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3711 u16 pat_index = bind_ops[i].pat_index; 3712 3713 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3714 addr, range, op, flags, 3715 prefetch_region, pat_index); 3716 if (IS_ERR(ops[i])) { 3717 err = PTR_ERR(ops[i]); 3718 ops[i] = NULL; 3719 goto unwind_ops; 3720 } 3721 3722 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3723 if (err) 3724 goto unwind_ops; 3725 3726 #ifdef TEST_VM_OPS_ERROR 3727 if (flags & FORCE_OP_ERROR) { 3728 vops.inject_error = true; 3729 vm->xe->vm_inject_error_position = 3730 (vm->xe->vm_inject_error_position + 1) % 3731 FORCE_OP_ERROR_COUNT; 3732 } 3733 #endif 3734 } 3735 3736 /* Nothing to do */ 3737 if (list_empty(&vops.list)) { 3738 err = -ENODATA; 3739 goto unwind_ops; 3740 } 3741 3742 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3743 if (err) 3744 goto unwind_ops; 3745 3746 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3747 if (err) 3748 goto unwind_ops; 3749 3750 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3751 if (IS_ERR(fence)) 3752 err = PTR_ERR(fence); 3753 else 3754 dma_fence_put(fence); 3755 3756 unwind_ops: 3757 if (err && err != -ENODATA) 3758 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3759 xe_vma_ops_fini(&vops); 3760 for (i = args->num_binds - 1; i >= 0; --i) 3761 if (ops[i]) 3762 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3763 free_syncs: 3764 if (err == -ENODATA) 3765 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3766 while (num_syncs--) 3767 xe_sync_entry_cleanup(&syncs[num_syncs]); 3768 3769 kfree(syncs); 3770 put_obj: 3771 for (i = 0; i < args->num_binds; ++i) 3772 xe_bo_put(bos[i]); 3773 3774 kvfree(ops); 3775 free_bos: 3776 kvfree(bos); 3777 release_vm_lock: 3778 up_write(&vm->lock); 3779 put_exec_queue: 3780 if (q) 3781 xe_exec_queue_put(q); 3782 free_bind_ops: 3783 if (args->num_binds > 1) 3784 kvfree(bind_ops); 3785 put_vm: 3786 xe_vm_put(vm); 3787 return err; 3788 } 3789 3790 /** 3791 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3792 * @vm: VM to bind the BO to 3793 * @bo: BO to bind 3794 * @q: exec queue to use for the bind (optional) 3795 * @addr: address at which to bind the BO 3796 * @cache_lvl: PAT cache level to use 3797 * 3798 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3799 * kernel-owned VM. 3800 * 3801 * Returns a dma_fence to track the binding completion if the job to do so was 3802 * successfully submitted, an error pointer otherwise. 3803 */ 3804 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3805 struct xe_exec_queue *q, u64 addr, 3806 enum xe_cache_level cache_lvl) 3807 { 3808 struct xe_vma_ops vops; 3809 struct drm_gpuva_ops *ops = NULL; 3810 struct dma_fence *fence; 3811 int err; 3812 3813 xe_bo_get(bo); 3814 xe_vm_get(vm); 3815 if (q) 3816 xe_exec_queue_get(q); 3817 3818 down_write(&vm->lock); 3819 3820 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3821 3822 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3823 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3824 vm->xe->pat.idx[cache_lvl]); 3825 if (IS_ERR(ops)) { 3826 err = PTR_ERR(ops); 3827 goto release_vm_lock; 3828 } 3829 3830 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3831 if (err) 3832 goto release_vm_lock; 3833 3834 xe_assert(vm->xe, !list_empty(&vops.list)); 3835 3836 err = xe_vma_ops_alloc(&vops, false); 3837 if (err) 3838 goto unwind_ops; 3839 3840 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3841 if (IS_ERR(fence)) 3842 err = PTR_ERR(fence); 3843 3844 unwind_ops: 3845 if (err && err != -ENODATA) 3846 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3847 3848 xe_vma_ops_fini(&vops); 3849 drm_gpuva_ops_free(&vm->gpuvm, ops); 3850 3851 release_vm_lock: 3852 up_write(&vm->lock); 3853 3854 if (q) 3855 xe_exec_queue_put(q); 3856 xe_vm_put(vm); 3857 xe_bo_put(bo); 3858 3859 if (err) 3860 fence = ERR_PTR(err); 3861 3862 return fence; 3863 } 3864 3865 /** 3866 * xe_vm_lock() - Lock the vm's dma_resv object 3867 * @vm: The struct xe_vm whose lock is to be locked 3868 * @intr: Whether to perform any wait interruptible 3869 * 3870 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3871 * contended lock was interrupted. If @intr is false, the function 3872 * always returns 0. 3873 */ 3874 int xe_vm_lock(struct xe_vm *vm, bool intr) 3875 { 3876 int ret; 3877 3878 if (intr) 3879 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3880 else 3881 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3882 3883 return ret; 3884 } 3885 3886 /** 3887 * xe_vm_unlock() - Unlock the vm's dma_resv object 3888 * @vm: The struct xe_vm whose lock is to be released. 3889 * 3890 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3891 */ 3892 void xe_vm_unlock(struct xe_vm *vm) 3893 { 3894 dma_resv_unlock(xe_vm_resv(vm)); 3895 } 3896 3897 /** 3898 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3899 * address range 3900 * @vm: The VM 3901 * @start: start address 3902 * @end: end address 3903 * @tile_mask: mask for which gt's issue tlb invalidation 3904 * 3905 * Issue a range based TLB invalidation for gt's in tilemask 3906 * 3907 * Returns 0 for success, negative error code otherwise. 3908 */ 3909 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3910 u64 end, u8 tile_mask) 3911 { 3912 struct xe_tlb_inval_fence 3913 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3914 struct xe_tile *tile; 3915 u32 fence_id = 0; 3916 u8 id; 3917 int err; 3918 3919 if (!tile_mask) 3920 return 0; 3921 3922 for_each_tile(tile, vm->xe, id) { 3923 if (!(tile_mask & BIT(id))) 3924 continue; 3925 3926 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3927 &fence[fence_id], true); 3928 3929 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3930 &fence[fence_id], start, end, 3931 vm->usm.asid); 3932 if (err) 3933 goto wait; 3934 ++fence_id; 3935 3936 if (!tile->media_gt) 3937 continue; 3938 3939 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3940 &fence[fence_id], true); 3941 3942 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3943 &fence[fence_id], start, end, 3944 vm->usm.asid); 3945 if (err) 3946 goto wait; 3947 ++fence_id; 3948 } 3949 3950 wait: 3951 for (id = 0; id < fence_id; ++id) 3952 xe_tlb_inval_fence_wait(&fence[id]); 3953 3954 return err; 3955 } 3956 3957 /** 3958 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3959 * @vma: VMA to invalidate 3960 * 3961 * Walks a list of page tables leaves which it memset the entries owned by this 3962 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3963 * complete. 3964 * 3965 * Returns 0 for success, negative error code otherwise. 3966 */ 3967 int xe_vm_invalidate_vma(struct xe_vma *vma) 3968 { 3969 struct xe_device *xe = xe_vma_vm(vma)->xe; 3970 struct xe_vm *vm = xe_vma_vm(vma); 3971 struct xe_tile *tile; 3972 u8 tile_mask = 0; 3973 int ret = 0; 3974 u8 id; 3975 3976 xe_assert(xe, !xe_vma_is_null(vma)); 3977 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3978 trace_xe_vma_invalidate(vma); 3979 3980 vm_dbg(&vm->xe->drm, 3981 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3982 xe_vma_start(vma), xe_vma_size(vma)); 3983 3984 /* 3985 * Check that we don't race with page-table updates, tile_invalidated 3986 * update is safe 3987 */ 3988 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3989 if (xe_vma_is_userptr(vma)) { 3990 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3991 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3992 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3993 3994 WARN_ON_ONCE(!mmu_interval_check_retry 3995 (&to_userptr_vma(vma)->userptr.notifier, 3996 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3997 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3998 DMA_RESV_USAGE_BOOKKEEP)); 3999 4000 } else { 4001 xe_bo_assert_held(xe_vma_bo(vma)); 4002 } 4003 } 4004 4005 for_each_tile(tile, xe, id) 4006 if (xe_pt_zap_ptes(tile, vma)) 4007 tile_mask |= BIT(id); 4008 4009 xe_device_wmb(xe); 4010 4011 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 4012 xe_vma_end(vma), tile_mask); 4013 4014 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4015 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4016 4017 return ret; 4018 } 4019 4020 int xe_vm_validate_protected(struct xe_vm *vm) 4021 { 4022 struct drm_gpuva *gpuva; 4023 int err = 0; 4024 4025 if (!vm) 4026 return -ENODEV; 4027 4028 mutex_lock(&vm->snap_mutex); 4029 4030 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4031 struct xe_vma *vma = gpuva_to_vma(gpuva); 4032 struct xe_bo *bo = vma->gpuva.gem.obj ? 4033 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4034 4035 if (!bo) 4036 continue; 4037 4038 if (xe_bo_is_protected(bo)) { 4039 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4040 if (err) 4041 break; 4042 } 4043 } 4044 4045 mutex_unlock(&vm->snap_mutex); 4046 return err; 4047 } 4048 4049 struct xe_vm_snapshot { 4050 int uapi_flags; 4051 unsigned long num_snaps; 4052 struct { 4053 u64 ofs, bo_ofs; 4054 unsigned long len; 4055 #define XE_VM_SNAP_FLAG_USERPTR BIT(0) 4056 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) 4057 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2) 4058 unsigned long flags; 4059 int uapi_mem_region; 4060 int pat_index; 4061 int cpu_caching; 4062 struct xe_bo *bo; 4063 void *data; 4064 struct mm_struct *mm; 4065 } snap[]; 4066 }; 4067 4068 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4069 { 4070 unsigned long num_snaps = 0, i; 4071 struct xe_vm_snapshot *snap = NULL; 4072 struct drm_gpuva *gpuva; 4073 4074 if (!vm) 4075 return NULL; 4076 4077 mutex_lock(&vm->snap_mutex); 4078 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4079 if (gpuva->flags & XE_VMA_DUMPABLE) 4080 num_snaps++; 4081 } 4082 4083 if (num_snaps) 4084 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4085 if (!snap) { 4086 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4087 goto out_unlock; 4088 } 4089 4090 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 4091 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; 4092 if (vm->flags & XE_VM_FLAG_LR_MODE) 4093 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; 4094 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) 4095 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 4096 4097 snap->num_snaps = num_snaps; 4098 i = 0; 4099 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4100 struct xe_vma *vma = gpuva_to_vma(gpuva); 4101 struct xe_bo *bo = vma->gpuva.gem.obj ? 4102 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4103 4104 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4105 continue; 4106 4107 snap->snap[i].ofs = xe_vma_start(vma); 4108 snap->snap[i].len = xe_vma_size(vma); 4109 snap->snap[i].flags = xe_vma_read_only(vma) ? 4110 XE_VM_SNAP_FLAG_READ_ONLY : 0; 4111 snap->snap[i].pat_index = vma->attr.pat_index; 4112 if (bo) { 4113 snap->snap[i].cpu_caching = bo->cpu_caching; 4114 snap->snap[i].bo = xe_bo_get(bo); 4115 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4116 switch (bo->ttm.resource->mem_type) { 4117 case XE_PL_SYSTEM: 4118 case XE_PL_TT: 4119 snap->snap[i].uapi_mem_region = 0; 4120 break; 4121 case XE_PL_VRAM0: 4122 snap->snap[i].uapi_mem_region = 1; 4123 break; 4124 case XE_PL_VRAM1: 4125 snap->snap[i].uapi_mem_region = 2; 4126 break; 4127 } 4128 } else if (xe_vma_is_userptr(vma)) { 4129 struct mm_struct *mm = 4130 to_userptr_vma(vma)->userptr.notifier.mm; 4131 4132 if (mmget_not_zero(mm)) 4133 snap->snap[i].mm = mm; 4134 else 4135 snap->snap[i].data = ERR_PTR(-EFAULT); 4136 4137 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4138 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; 4139 snap->snap[i].uapi_mem_region = 0; 4140 } else if (xe_vma_is_null(vma)) { 4141 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; 4142 snap->snap[i].uapi_mem_region = -1; 4143 } else { 4144 snap->snap[i].data = ERR_PTR(-ENOENT); 4145 snap->snap[i].uapi_mem_region = -1; 4146 } 4147 i++; 4148 } 4149 4150 out_unlock: 4151 mutex_unlock(&vm->snap_mutex); 4152 return snap; 4153 } 4154 4155 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4156 { 4157 if (IS_ERR_OR_NULL(snap)) 4158 return; 4159 4160 for (int i = 0; i < snap->num_snaps; i++) { 4161 struct xe_bo *bo = snap->snap[i].bo; 4162 int err; 4163 4164 if (IS_ERR(snap->snap[i].data) || 4165 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4166 continue; 4167 4168 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4169 if (!snap->snap[i].data) { 4170 snap->snap[i].data = ERR_PTR(-ENOMEM); 4171 goto cleanup_bo; 4172 } 4173 4174 if (bo) { 4175 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4176 snap->snap[i].data, snap->snap[i].len); 4177 } else { 4178 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4179 4180 kthread_use_mm(snap->snap[i].mm); 4181 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4182 err = 0; 4183 else 4184 err = -EFAULT; 4185 kthread_unuse_mm(snap->snap[i].mm); 4186 4187 mmput(snap->snap[i].mm); 4188 snap->snap[i].mm = NULL; 4189 } 4190 4191 if (err) { 4192 kvfree(snap->snap[i].data); 4193 snap->snap[i].data = ERR_PTR(err); 4194 } 4195 4196 cleanup_bo: 4197 xe_bo_put(bo); 4198 snap->snap[i].bo = NULL; 4199 } 4200 } 4201 4202 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4203 { 4204 unsigned long i, j; 4205 4206 if (IS_ERR_OR_NULL(snap)) { 4207 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4208 return; 4209 } 4210 4211 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); 4212 for (i = 0; i < snap->num_snaps; i++) { 4213 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4214 4215 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", 4216 snap->snap[i].ofs, 4217 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? 4218 "read_only" : "read_write", 4219 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? 4220 "null_sparse" : 4221 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? 4222 "userptr" : "bo", 4223 snap->snap[i].uapi_mem_region == -1 ? 0 : 4224 BIT(snap->snap[i].uapi_mem_region), 4225 snap->snap[i].pat_index, 4226 snap->snap[i].cpu_caching); 4227 4228 if (IS_ERR(snap->snap[i].data)) { 4229 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4230 PTR_ERR(snap->snap[i].data)); 4231 continue; 4232 } 4233 4234 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4235 continue; 4236 4237 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4238 4239 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4240 u32 *val = snap->snap[i].data + j; 4241 char dumped[ASCII85_BUFSZ]; 4242 4243 drm_puts(p, ascii85_encode(*val, dumped)); 4244 } 4245 4246 drm_puts(p, "\n"); 4247 4248 if (drm_coredump_printer_is_full(p)) 4249 return; 4250 } 4251 } 4252 4253 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4254 { 4255 unsigned long i; 4256 4257 if (IS_ERR_OR_NULL(snap)) 4258 return; 4259 4260 for (i = 0; i < snap->num_snaps; i++) { 4261 if (!IS_ERR(snap->snap[i].data)) 4262 kvfree(snap->snap[i].data); 4263 xe_bo_put(snap->snap[i].bo); 4264 if (snap->snap[i].mm) 4265 mmput(snap->snap[i].mm); 4266 } 4267 kvfree(snap); 4268 } 4269 4270 /** 4271 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4272 * @xe: Pointer to the Xe device structure 4273 * @vma: Pointer to the virtual memory area (VMA) structure 4274 * @is_atomic: In pagefault path and atomic operation 4275 * 4276 * This function determines whether the given VMA needs to be migrated to 4277 * VRAM in order to do atomic GPU operation. 4278 * 4279 * Return: 4280 * 1 - Migration to VRAM is required 4281 * 0 - Migration is not required 4282 * -EACCES - Invalid access for atomic memory attr 4283 * 4284 */ 4285 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4286 { 4287 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4288 vma->attr.atomic_access; 4289 4290 if (!IS_DGFX(xe) || !is_atomic) 4291 return false; 4292 4293 /* 4294 * NOTE: The checks implemented here are platform-specific. For 4295 * instance, on a device supporting CXL atomics, these would ideally 4296 * work universally without additional handling. 4297 */ 4298 switch (atomic_access) { 4299 case DRM_XE_ATOMIC_DEVICE: 4300 return !xe->info.has_device_atomics_on_smem; 4301 4302 case DRM_XE_ATOMIC_CPU: 4303 return -EACCES; 4304 4305 case DRM_XE_ATOMIC_UNDEFINED: 4306 case DRM_XE_ATOMIC_GLOBAL: 4307 default: 4308 return 1; 4309 } 4310 } 4311 4312 static int xe_vm_alloc_vma(struct xe_vm *vm, 4313 struct drm_gpuvm_map_req *map_req, 4314 bool is_madvise) 4315 { 4316 struct xe_vma_ops vops; 4317 struct drm_gpuva_ops *ops = NULL; 4318 struct drm_gpuva_op *__op; 4319 unsigned int vma_flags = 0; 4320 bool remap_op = false; 4321 struct xe_vma_mem_attr tmp_attr; 4322 u16 default_pat; 4323 int err; 4324 4325 lockdep_assert_held_write(&vm->lock); 4326 4327 if (is_madvise) 4328 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4329 else 4330 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4331 4332 if (IS_ERR(ops)) 4333 return PTR_ERR(ops); 4334 4335 if (list_empty(&ops->list)) { 4336 err = 0; 4337 goto free_ops; 4338 } 4339 4340 drm_gpuva_for_each_op(__op, ops) { 4341 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4342 struct xe_vma *vma = NULL; 4343 4344 if (!is_madvise) { 4345 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4346 vma = gpuva_to_vma(op->base.unmap.va); 4347 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4348 default_pat = vma->attr.default_pat_index; 4349 vma_flags = vma->gpuva.flags; 4350 } 4351 4352 if (__op->op == DRM_GPUVA_OP_REMAP) { 4353 vma = gpuva_to_vma(op->base.remap.unmap->va); 4354 default_pat = vma->attr.default_pat_index; 4355 vma_flags = vma->gpuva.flags; 4356 } 4357 4358 if (__op->op == DRM_GPUVA_OP_MAP) { 4359 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4360 op->map.pat_index = default_pat; 4361 } 4362 } else { 4363 if (__op->op == DRM_GPUVA_OP_REMAP) { 4364 vma = gpuva_to_vma(op->base.remap.unmap->va); 4365 xe_assert(vm->xe, !remap_op); 4366 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4367 remap_op = true; 4368 vma_flags = vma->gpuva.flags; 4369 } 4370 4371 if (__op->op == DRM_GPUVA_OP_MAP) { 4372 xe_assert(vm->xe, remap_op); 4373 remap_op = false; 4374 /* 4375 * In case of madvise ops DRM_GPUVA_OP_MAP is 4376 * always after DRM_GPUVA_OP_REMAP, so ensure 4377 * to propagate the flags from the vma we're 4378 * unmapping. 4379 */ 4380 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4381 } 4382 } 4383 print_op(vm->xe, __op); 4384 } 4385 4386 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4387 4388 if (is_madvise) 4389 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4390 else 4391 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 4392 4393 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4394 if (err) 4395 goto unwind_ops; 4396 4397 xe_vm_lock(vm, false); 4398 4399 drm_gpuva_for_each_op(__op, ops) { 4400 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4401 struct xe_vma *vma; 4402 4403 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4404 vma = gpuva_to_vma(op->base.unmap.va); 4405 /* There should be no unmap for madvise */ 4406 if (is_madvise) 4407 XE_WARN_ON("UNEXPECTED UNMAP"); 4408 4409 xe_vma_destroy(vma, NULL); 4410 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4411 vma = gpuva_to_vma(op->base.remap.unmap->va); 4412 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4413 * VMA, so they can be assigned to newly MAP created vma. 4414 */ 4415 if (is_madvise) 4416 tmp_attr = vma->attr; 4417 4418 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4419 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4420 vma = op->map.vma; 4421 /* In case of madvise call, MAP will always be followed by REMAP. 4422 * Therefore temp_attr will always have sane values, making it safe to 4423 * copy them to new vma. 4424 */ 4425 if (is_madvise) 4426 vma->attr = tmp_attr; 4427 } 4428 } 4429 4430 xe_vm_unlock(vm); 4431 drm_gpuva_ops_free(&vm->gpuvm, ops); 4432 return 0; 4433 4434 unwind_ops: 4435 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4436 free_ops: 4437 drm_gpuva_ops_free(&vm->gpuvm, ops); 4438 return err; 4439 } 4440 4441 /** 4442 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4443 * @vm: Pointer to the xe_vm structure 4444 * @start: Starting input address 4445 * @range: Size of the input range 4446 * 4447 * This function splits existing vma to create new vma for user provided input range 4448 * 4449 * Return: 0 if success 4450 */ 4451 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4452 { 4453 struct drm_gpuvm_map_req map_req = { 4454 .map.va.addr = start, 4455 .map.va.range = range, 4456 }; 4457 4458 lockdep_assert_held_write(&vm->lock); 4459 4460 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4461 4462 return xe_vm_alloc_vma(vm, &map_req, true); 4463 } 4464 4465 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) 4466 { 4467 return vma && xe_vma_is_cpu_addr_mirror(vma) && 4468 xe_vma_has_default_mem_attrs(vma); 4469 } 4470 4471 /** 4472 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs 4473 * @vm: VM to search within 4474 * @start: Input/output pointer to the starting address of the range 4475 * @end: Input/output pointer to the end address of the range 4476 * 4477 * Given a range defined by @start and @range, this function checks the VMAs 4478 * immediately before and after the range. If those neighboring VMAs are 4479 * CPU-address-mirrored and have default memory attributes, the function 4480 * updates @start and @range to include them. This extended range can then 4481 * be used for merging or other operations that require a unified VMA. 4482 * 4483 * The function does not perform the merge itself; it only computes the 4484 * mergeable boundaries. 4485 */ 4486 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) 4487 { 4488 struct xe_vma *prev, *next; 4489 4490 lockdep_assert_held(&vm->lock); 4491 4492 if (*start >= SZ_4K) { 4493 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); 4494 if (is_cpu_addr_vma_with_default_attr(prev)) 4495 *start = xe_vma_start(prev); 4496 } 4497 4498 if (*end < vm->size) { 4499 next = xe_vm_find_vma_by_addr(vm, *end + 1); 4500 if (is_cpu_addr_vma_with_default_attr(next)) 4501 *end = xe_vma_end(next); 4502 } 4503 } 4504 4505 /** 4506 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4507 * @vm: Pointer to the xe_vm structure 4508 * @start: Starting input address 4509 * @range: Size of the input range 4510 * 4511 * This function splits/merges existing vma to create new vma for user provided input range 4512 * 4513 * Return: 0 if success 4514 */ 4515 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4516 { 4517 struct drm_gpuvm_map_req map_req = { 4518 .map.va.addr = start, 4519 .map.va.range = range, 4520 }; 4521 4522 lockdep_assert_held_write(&vm->lock); 4523 4524 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4525 start, range); 4526 4527 return xe_vm_alloc_vma(vm, &map_req, false); 4528 } 4529