1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_sriov_vf.h" 39 #include "xe_svm.h" 40 #include "xe_sync.h" 41 #include "xe_tile.h" 42 #include "xe_tlb_inval.h" 43 #include "xe_trace_bo.h" 44 #include "xe_wa.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 53 * @vm: The vm whose resv is to be locked. 54 * @exec: The drm_exec transaction. 55 * 56 * Helper to lock the vm's resv as part of a drm_exec transaction. 57 * 58 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 59 */ 60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 61 { 62 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 63 } 64 65 static bool preempt_fences_waiting(struct xe_vm *vm) 66 { 67 struct xe_exec_queue *q; 68 69 lockdep_assert_held(&vm->lock); 70 xe_vm_assert_held(vm); 71 72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 73 if (!q->lr.pfence || 74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 75 &q->lr.pfence->flags)) { 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 static void free_preempt_fences(struct list_head *list) 84 { 85 struct list_head *link, *next; 86 87 list_for_each_safe(link, next, list) 88 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 89 } 90 91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 92 unsigned int *count) 93 { 94 lockdep_assert_held(&vm->lock); 95 xe_vm_assert_held(vm); 96 97 if (*count >= vm->preempt.num_exec_queues) 98 return 0; 99 100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 102 103 if (IS_ERR(pfence)) 104 return PTR_ERR(pfence); 105 106 list_move_tail(xe_preempt_fence_link(pfence), list); 107 } 108 109 return 0; 110 } 111 112 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 113 { 114 struct xe_exec_queue *q; 115 bool vf_migration = IS_SRIOV_VF(vm->xe) && 116 xe_sriov_vf_migration_supported(vm->xe); 117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 118 119 xe_vm_assert_held(vm); 120 121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 122 if (q->lr.pfence) { 123 long timeout; 124 125 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 126 wait_time); 127 if (!timeout) { 128 xe_assert(vm->xe, vf_migration); 129 return -EAGAIN; 130 } 131 132 /* Only -ETIME on fence indicates VM needs to be killed */ 133 if (timeout < 0 || q->lr.pfence->error == -ETIME) 134 return -ETIME; 135 136 dma_fence_put(q->lr.pfence); 137 q->lr.pfence = NULL; 138 } 139 } 140 141 return 0; 142 } 143 144 static bool xe_vm_is_idle(struct xe_vm *vm) 145 { 146 struct xe_exec_queue *q; 147 148 xe_vm_assert_held(vm); 149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 150 if (!xe_exec_queue_is_idle(q)) 151 return false; 152 } 153 154 return true; 155 } 156 157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 158 { 159 struct list_head *link; 160 struct xe_exec_queue *q; 161 162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 163 struct dma_fence *fence; 164 165 link = list->next; 166 xe_assert(vm->xe, link != list); 167 168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 169 q, q->lr.context, 170 ++q->lr.seqno); 171 dma_fence_put(q->lr.pfence); 172 q->lr.pfence = fence; 173 } 174 } 175 176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 177 { 178 struct xe_exec_queue *q; 179 int err; 180 181 xe_bo_assert_held(bo); 182 183 if (!vm->preempt.num_exec_queues) 184 return 0; 185 186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 187 if (err) 188 return err; 189 190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 191 if (q->lr.pfence) { 192 dma_resv_add_fence(bo->ttm.base.resv, 193 q->lr.pfence, 194 DMA_RESV_USAGE_BOOKKEEP); 195 } 196 197 return 0; 198 } 199 200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 201 struct drm_exec *exec) 202 { 203 struct xe_exec_queue *q; 204 205 lockdep_assert_held(&vm->lock); 206 xe_vm_assert_held(vm); 207 208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 209 q->ops->resume(q); 210 211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 213 } 214 } 215 216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 217 { 218 struct drm_gpuvm_exec vm_exec = { 219 .vm = &vm->gpuvm, 220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 221 .num_fences = 1, 222 }; 223 struct drm_exec *exec = &vm_exec.exec; 224 struct xe_validation_ctx ctx; 225 struct dma_fence *pfence; 226 int err; 227 bool wait; 228 229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 230 231 down_write(&vm->lock); 232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 233 if (err) 234 goto out_up_write; 235 236 pfence = xe_preempt_fence_create(q, q->lr.context, 237 ++q->lr.seqno); 238 if (IS_ERR(pfence)) { 239 err = PTR_ERR(pfence); 240 goto out_fini; 241 } 242 243 list_add(&q->lr.link, &vm->preempt.exec_queues); 244 ++vm->preempt.num_exec_queues; 245 q->lr.pfence = pfence; 246 247 xe_svm_notifier_lock(vm); 248 249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 251 252 /* 253 * Check to see if a preemption on VM is in flight or userptr 254 * invalidation, if so trigger this preempt fence to sync state with 255 * other preempt fences on the VM. 256 */ 257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 258 if (wait) 259 dma_fence_enable_sw_signaling(pfence); 260 261 xe_svm_notifier_unlock(vm); 262 263 out_fini: 264 xe_validation_ctx_fini(&ctx); 265 out_up_write: 266 up_write(&vm->lock); 267 268 return err; 269 } 270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 271 272 /** 273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 274 * @vm: The VM. 275 * @q: The exec_queue 276 * 277 * Note that this function might be called multiple times on the same queue. 278 */ 279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 280 { 281 if (!xe_vm_in_preempt_fence_mode(vm)) 282 return; 283 284 down_write(&vm->lock); 285 if (!list_empty(&q->lr.link)) { 286 list_del_init(&q->lr.link); 287 --vm->preempt.num_exec_queues; 288 } 289 if (q->lr.pfence) { 290 dma_fence_enable_sw_signaling(q->lr.pfence); 291 dma_fence_put(q->lr.pfence); 292 q->lr.pfence = NULL; 293 } 294 up_write(&vm->lock); 295 } 296 297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 298 299 /** 300 * xe_vm_kill() - VM Kill 301 * @vm: The VM. 302 * @unlocked: Flag indicates the VM's dma-resv is not held 303 * 304 * Kill the VM by setting banned flag indicated VM is no longer available for 305 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 306 */ 307 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 308 { 309 struct xe_exec_queue *q; 310 311 lockdep_assert_held(&vm->lock); 312 313 if (unlocked) 314 xe_vm_lock(vm, false); 315 316 vm->flags |= XE_VM_FLAG_BANNED; 317 trace_xe_vm_kill(vm); 318 319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 320 q->ops->kill(q); 321 322 if (unlocked) 323 xe_vm_unlock(vm); 324 325 /* TODO: Inform user the VM is banned */ 326 } 327 328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 329 { 330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 331 struct drm_gpuva *gpuva; 332 int ret; 333 334 lockdep_assert_held(&vm->lock); 335 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 336 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 337 &vm->rebind_list); 338 339 if (!try_wait_for_completion(&vm->xe->pm_block)) 340 return -EAGAIN; 341 342 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 343 if (ret) 344 return ret; 345 346 vm_bo->evicted = false; 347 return 0; 348 } 349 350 /** 351 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 352 * @vm: The vm for which we are rebinding. 353 * @exec: The struct drm_exec with the locked GEM objects. 354 * @num_fences: The number of fences to reserve for the operation, not 355 * including rebinds and validations. 356 * 357 * Validates all evicted gem objects and rebinds their vmas. Note that 358 * rebindings may cause evictions and hence the validation-rebind 359 * sequence is rerun until there are no more objects to validate. 360 * 361 * Return: 0 on success, negative error code on error. In particular, 362 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 363 * the drm_exec transaction needs to be restarted. 364 */ 365 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 366 unsigned int num_fences) 367 { 368 struct drm_gem_object *obj; 369 unsigned long index; 370 int ret; 371 372 do { 373 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 374 if (ret) 375 return ret; 376 377 ret = xe_vm_rebind(vm, false); 378 if (ret) 379 return ret; 380 } while (!list_empty(&vm->gpuvm.evict.list)); 381 382 drm_exec_for_each_locked_object(exec, index, obj) { 383 ret = dma_resv_reserve_fences(obj->resv, num_fences); 384 if (ret) 385 return ret; 386 } 387 388 return 0; 389 } 390 391 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 392 bool *done) 393 { 394 int err; 395 396 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 397 if (err) 398 return err; 399 400 if (xe_vm_is_idle(vm)) { 401 vm->preempt.rebind_deactivated = true; 402 *done = true; 403 return 0; 404 } 405 406 if (!preempt_fences_waiting(vm)) { 407 *done = true; 408 return 0; 409 } 410 411 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 412 if (err) 413 return err; 414 415 err = wait_for_existing_preempt_fences(vm); 416 if (err) 417 return err; 418 419 /* 420 * Add validation and rebinding to the locking loop since both can 421 * cause evictions which may require blocing dma_resv locks. 422 * The fence reservation here is intended for the new preempt fences 423 * we attach at the end of the rebind work. 424 */ 425 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 426 } 427 428 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 429 { 430 struct xe_device *xe = vm->xe; 431 bool ret = false; 432 433 mutex_lock(&xe->rebind_resume_lock); 434 if (!try_wait_for_completion(&vm->xe->pm_block)) { 435 ret = true; 436 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 437 } 438 mutex_unlock(&xe->rebind_resume_lock); 439 440 return ret; 441 } 442 443 /** 444 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 445 * @vm: The vm whose preempt worker to resume. 446 * 447 * Resume a preempt worker that was previously suspended by 448 * vm_suspend_rebind_worker(). 449 */ 450 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 451 { 452 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 453 } 454 455 static void preempt_rebind_work_func(struct work_struct *w) 456 { 457 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 458 struct xe_validation_ctx ctx; 459 struct drm_exec exec; 460 unsigned int fence_count = 0; 461 LIST_HEAD(preempt_fences); 462 int err = 0; 463 long wait; 464 int __maybe_unused tries = 0; 465 466 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 467 trace_xe_vm_rebind_worker_enter(vm); 468 469 down_write(&vm->lock); 470 471 if (xe_vm_is_closed_or_banned(vm)) { 472 up_write(&vm->lock); 473 trace_xe_vm_rebind_worker_exit(vm); 474 return; 475 } 476 477 retry: 478 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 479 up_write(&vm->lock); 480 /* We don't actually block but don't make progress. */ 481 xe_pm_might_block_on_suspend(); 482 return; 483 } 484 485 if (xe_vm_userptr_check_repin(vm)) { 486 err = xe_vm_userptr_pin(vm); 487 if (err) 488 goto out_unlock_outer; 489 } 490 491 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 492 (struct xe_val_flags) {.interruptible = true}); 493 if (err) 494 goto out_unlock_outer; 495 496 drm_exec_until_all_locked(&exec) { 497 bool done = false; 498 499 err = xe_preempt_work_begin(&exec, vm, &done); 500 drm_exec_retry_on_contention(&exec); 501 xe_validation_retry_on_oom(&ctx, &err); 502 if (err || done) { 503 xe_validation_ctx_fini(&ctx); 504 goto out_unlock_outer; 505 } 506 } 507 508 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 509 if (err) 510 goto out_unlock; 511 512 xe_vm_set_validation_exec(vm, &exec); 513 err = xe_vm_rebind(vm, true); 514 xe_vm_set_validation_exec(vm, NULL); 515 if (err) 516 goto out_unlock; 517 518 /* Wait on rebinds and munmap style VM unbinds */ 519 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 520 DMA_RESV_USAGE_KERNEL, 521 false, MAX_SCHEDULE_TIMEOUT); 522 if (wait <= 0) { 523 err = -ETIME; 524 goto out_unlock; 525 } 526 527 #define retry_required(__tries, __vm) \ 528 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 529 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 530 __xe_vm_userptr_needs_repin(__vm)) 531 532 xe_svm_notifier_lock(vm); 533 if (retry_required(tries, vm)) { 534 xe_svm_notifier_unlock(vm); 535 err = -EAGAIN; 536 goto out_unlock; 537 } 538 539 #undef retry_required 540 541 spin_lock(&vm->xe->ttm.lru_lock); 542 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 543 spin_unlock(&vm->xe->ttm.lru_lock); 544 545 /* Point of no return. */ 546 arm_preempt_fences(vm, &preempt_fences); 547 resume_and_reinstall_preempt_fences(vm, &exec); 548 xe_svm_notifier_unlock(vm); 549 550 out_unlock: 551 xe_validation_ctx_fini(&ctx); 552 out_unlock_outer: 553 if (err == -EAGAIN) { 554 trace_xe_vm_rebind_worker_retry(vm); 555 556 /* 557 * We can't block in workers on a VF which supports migration 558 * given this can block the VF post-migration workers from 559 * getting scheduled. 560 */ 561 if (IS_SRIOV_VF(vm->xe) && 562 xe_sriov_vf_migration_supported(vm->xe)) { 563 up_write(&vm->lock); 564 xe_vm_queue_rebind_worker(vm); 565 return; 566 } 567 568 goto retry; 569 } 570 571 if (err) { 572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 573 xe_vm_kill(vm, true); 574 } 575 up_write(&vm->lock); 576 577 free_preempt_fences(&preempt_fences); 578 579 trace_xe_vm_rebind_worker_exit(vm); 580 } 581 582 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 583 { 584 int i; 585 586 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 587 if (!vops->pt_update_ops[i].num_ops) 588 continue; 589 590 vops->pt_update_ops[i].ops = 591 kmalloc_array(vops->pt_update_ops[i].num_ops, 592 sizeof(*vops->pt_update_ops[i].ops), 593 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 594 if (!vops->pt_update_ops[i].ops) 595 return array_of_binds ? -ENOBUFS : -ENOMEM; 596 } 597 598 return 0; 599 } 600 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 601 602 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 603 { 604 struct xe_vma *vma; 605 606 vma = gpuva_to_vma(op->base.prefetch.va); 607 608 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 609 xa_destroy(&op->prefetch_range.range); 610 } 611 612 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 613 { 614 struct xe_vma_op *op; 615 616 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 617 return; 618 619 list_for_each_entry(op, &vops->list, link) 620 xe_vma_svm_prefetch_op_fini(op); 621 } 622 623 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 624 { 625 int i; 626 627 xe_vma_svm_prefetch_ops_fini(vops); 628 629 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 630 kfree(vops->pt_update_ops[i].ops); 631 } 632 633 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 634 { 635 int i; 636 637 if (!inc_val) 638 return; 639 640 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 641 if (BIT(i) & tile_mask) 642 vops->pt_update_ops[i].num_ops += inc_val; 643 } 644 645 #define XE_VMA_CREATE_MASK ( \ 646 XE_VMA_READ_ONLY | \ 647 XE_VMA_DUMPABLE | \ 648 XE_VMA_SYSTEM_ALLOCATOR | \ 649 DRM_GPUVA_SPARSE | \ 650 XE_VMA_MADV_AUTORESET) 651 652 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 653 u8 tile_mask) 654 { 655 INIT_LIST_HEAD(&op->link); 656 op->tile_mask = tile_mask; 657 op->base.op = DRM_GPUVA_OP_MAP; 658 op->base.map.va.addr = vma->gpuva.va.addr; 659 op->base.map.va.range = vma->gpuva.va.range; 660 op->base.map.gem.obj = vma->gpuva.gem.obj; 661 op->base.map.gem.offset = vma->gpuva.gem.offset; 662 op->map.vma = vma; 663 op->map.immediate = true; 664 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 665 } 666 667 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 668 u8 tile_mask) 669 { 670 struct xe_vma_op *op; 671 672 op = kzalloc(sizeof(*op), GFP_KERNEL); 673 if (!op) 674 return -ENOMEM; 675 676 xe_vm_populate_rebind(op, vma, tile_mask); 677 list_add_tail(&op->link, &vops->list); 678 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 679 680 return 0; 681 } 682 683 static struct dma_fence *ops_execute(struct xe_vm *vm, 684 struct xe_vma_ops *vops); 685 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 686 struct xe_exec_queue *q, 687 struct xe_sync_entry *syncs, u32 num_syncs); 688 689 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 690 { 691 struct dma_fence *fence; 692 struct xe_vma *vma, *next; 693 struct xe_vma_ops vops; 694 struct xe_vma_op *op, *next_op; 695 int err, i; 696 697 lockdep_assert_held(&vm->lock); 698 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 699 list_empty(&vm->rebind_list)) 700 return 0; 701 702 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 703 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 704 vops.pt_update_ops[i].wait_vm_bookkeep = true; 705 706 xe_vm_assert_held(vm); 707 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 708 xe_assert(vm->xe, vma->tile_present); 709 710 if (rebind_worker) 711 trace_xe_vma_rebind_worker(vma); 712 else 713 trace_xe_vma_rebind_exec(vma); 714 715 err = xe_vm_ops_add_rebind(&vops, vma, 716 vma->tile_present); 717 if (err) 718 goto free_ops; 719 } 720 721 err = xe_vma_ops_alloc(&vops, false); 722 if (err) 723 goto free_ops; 724 725 fence = ops_execute(vm, &vops); 726 if (IS_ERR(fence)) { 727 err = PTR_ERR(fence); 728 } else { 729 dma_fence_put(fence); 730 list_for_each_entry_safe(vma, next, &vm->rebind_list, 731 combined_links.rebind) 732 list_del_init(&vma->combined_links.rebind); 733 } 734 free_ops: 735 list_for_each_entry_safe(op, next_op, &vops.list, link) { 736 list_del(&op->link); 737 kfree(op); 738 } 739 xe_vma_ops_fini(&vops); 740 741 return err; 742 } 743 744 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 745 { 746 struct dma_fence *fence = NULL; 747 struct xe_vma_ops vops; 748 struct xe_vma_op *op, *next_op; 749 struct xe_tile *tile; 750 u8 id; 751 int err; 752 753 lockdep_assert_held(&vm->lock); 754 xe_vm_assert_held(vm); 755 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 756 757 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 758 for_each_tile(tile, vm->xe, id) { 759 vops.pt_update_ops[id].wait_vm_bookkeep = true; 760 vops.pt_update_ops[tile->id].q = 761 xe_migrate_exec_queue(tile->migrate); 762 } 763 764 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 765 if (err) 766 return ERR_PTR(err); 767 768 err = xe_vma_ops_alloc(&vops, false); 769 if (err) { 770 fence = ERR_PTR(err); 771 goto free_ops; 772 } 773 774 fence = ops_execute(vm, &vops); 775 776 free_ops: 777 list_for_each_entry_safe(op, next_op, &vops.list, link) { 778 list_del(&op->link); 779 kfree(op); 780 } 781 xe_vma_ops_fini(&vops); 782 783 return fence; 784 } 785 786 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 787 struct xe_vma *vma, 788 struct xe_svm_range *range, 789 u8 tile_mask) 790 { 791 INIT_LIST_HEAD(&op->link); 792 op->tile_mask = tile_mask; 793 op->base.op = DRM_GPUVA_OP_DRIVER; 794 op->subop = XE_VMA_SUBOP_MAP_RANGE; 795 op->map_range.vma = vma; 796 op->map_range.range = range; 797 } 798 799 static int 800 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 801 struct xe_vma *vma, 802 struct xe_svm_range *range, 803 u8 tile_mask) 804 { 805 struct xe_vma_op *op; 806 807 op = kzalloc(sizeof(*op), GFP_KERNEL); 808 if (!op) 809 return -ENOMEM; 810 811 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 812 list_add_tail(&op->link, &vops->list); 813 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 814 815 return 0; 816 } 817 818 /** 819 * xe_vm_range_rebind() - VM range (re)bind 820 * @vm: The VM which the range belongs to. 821 * @vma: The VMA which the range belongs to. 822 * @range: SVM range to rebind. 823 * @tile_mask: Tile mask to bind the range to. 824 * 825 * (re)bind SVM range setting up GPU page tables for the range. 826 * 827 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 828 * failure 829 */ 830 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 831 struct xe_vma *vma, 832 struct xe_svm_range *range, 833 u8 tile_mask) 834 { 835 struct dma_fence *fence = NULL; 836 struct xe_vma_ops vops; 837 struct xe_vma_op *op, *next_op; 838 struct xe_tile *tile; 839 u8 id; 840 int err; 841 842 lockdep_assert_held(&vm->lock); 843 xe_vm_assert_held(vm); 844 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 845 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 846 847 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 848 for_each_tile(tile, vm->xe, id) { 849 vops.pt_update_ops[id].wait_vm_bookkeep = true; 850 vops.pt_update_ops[tile->id].q = 851 xe_migrate_exec_queue(tile->migrate); 852 } 853 854 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 855 if (err) 856 return ERR_PTR(err); 857 858 err = xe_vma_ops_alloc(&vops, false); 859 if (err) { 860 fence = ERR_PTR(err); 861 goto free_ops; 862 } 863 864 fence = ops_execute(vm, &vops); 865 866 free_ops: 867 list_for_each_entry_safe(op, next_op, &vops.list, link) { 868 list_del(&op->link); 869 kfree(op); 870 } 871 xe_vma_ops_fini(&vops); 872 873 return fence; 874 } 875 876 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 877 struct xe_svm_range *range) 878 { 879 INIT_LIST_HEAD(&op->link); 880 op->tile_mask = range->tile_present; 881 op->base.op = DRM_GPUVA_OP_DRIVER; 882 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 883 op->unmap_range.range = range; 884 } 885 886 static int 887 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 888 struct xe_svm_range *range) 889 { 890 struct xe_vma_op *op; 891 892 op = kzalloc(sizeof(*op), GFP_KERNEL); 893 if (!op) 894 return -ENOMEM; 895 896 xe_vm_populate_range_unbind(op, range); 897 list_add_tail(&op->link, &vops->list); 898 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 899 900 return 0; 901 } 902 903 /** 904 * xe_vm_range_unbind() - VM range unbind 905 * @vm: The VM which the range belongs to. 906 * @range: SVM range to rebind. 907 * 908 * Unbind SVM range removing the GPU page tables for the range. 909 * 910 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 911 * failure 912 */ 913 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 914 struct xe_svm_range *range) 915 { 916 struct dma_fence *fence = NULL; 917 struct xe_vma_ops vops; 918 struct xe_vma_op *op, *next_op; 919 struct xe_tile *tile; 920 u8 id; 921 int err; 922 923 lockdep_assert_held(&vm->lock); 924 xe_vm_assert_held(vm); 925 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 926 927 if (!range->tile_present) 928 return dma_fence_get_stub(); 929 930 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 931 for_each_tile(tile, vm->xe, id) { 932 vops.pt_update_ops[id].wait_vm_bookkeep = true; 933 vops.pt_update_ops[tile->id].q = 934 xe_migrate_exec_queue(tile->migrate); 935 } 936 937 err = xe_vm_ops_add_range_unbind(&vops, range); 938 if (err) 939 return ERR_PTR(err); 940 941 err = xe_vma_ops_alloc(&vops, false); 942 if (err) { 943 fence = ERR_PTR(err); 944 goto free_ops; 945 } 946 947 fence = ops_execute(vm, &vops); 948 949 free_ops: 950 list_for_each_entry_safe(op, next_op, &vops.list, link) { 951 list_del(&op->link); 952 kfree(op); 953 } 954 xe_vma_ops_fini(&vops); 955 956 return fence; 957 } 958 959 static void xe_vma_free(struct xe_vma *vma) 960 { 961 if (xe_vma_is_userptr(vma)) 962 kfree(to_userptr_vma(vma)); 963 else 964 kfree(vma); 965 } 966 967 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 968 struct xe_bo *bo, 969 u64 bo_offset_or_userptr, 970 u64 start, u64 end, 971 struct xe_vma_mem_attr *attr, 972 unsigned int flags) 973 { 974 struct xe_vma *vma; 975 struct xe_tile *tile; 976 u8 id; 977 bool is_null = (flags & DRM_GPUVA_SPARSE); 978 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 979 980 xe_assert(vm->xe, start < end); 981 xe_assert(vm->xe, end < vm->size); 982 983 /* 984 * Allocate and ensure that the xe_vma_is_userptr() return 985 * matches what was allocated. 986 */ 987 if (!bo && !is_null && !is_cpu_addr_mirror) { 988 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 989 990 if (!uvma) 991 return ERR_PTR(-ENOMEM); 992 993 vma = &uvma->vma; 994 } else { 995 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 996 if (!vma) 997 return ERR_PTR(-ENOMEM); 998 999 if (bo) 1000 vma->gpuva.gem.obj = &bo->ttm.base; 1001 } 1002 1003 INIT_LIST_HEAD(&vma->combined_links.rebind); 1004 1005 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1006 vma->gpuva.vm = &vm->gpuvm; 1007 vma->gpuva.va.addr = start; 1008 vma->gpuva.va.range = end - start + 1; 1009 vma->gpuva.flags = flags; 1010 1011 for_each_tile(tile, vm->xe, id) 1012 vma->tile_mask |= 0x1 << id; 1013 1014 if (vm->xe->info.has_atomic_enable_pte_bit) 1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1016 1017 vma->attr = *attr; 1018 1019 if (bo) { 1020 struct drm_gpuvm_bo *vm_bo; 1021 1022 xe_bo_assert_held(bo); 1023 1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1025 if (IS_ERR(vm_bo)) { 1026 xe_vma_free(vma); 1027 return ERR_CAST(vm_bo); 1028 } 1029 1030 drm_gpuvm_bo_extobj_add(vm_bo); 1031 drm_gem_object_get(&bo->ttm.base); 1032 vma->gpuva.gem.offset = bo_offset_or_userptr; 1033 drm_gpuva_link(&vma->gpuva, vm_bo); 1034 drm_gpuvm_bo_put(vm_bo); 1035 } else /* userptr or null */ { 1036 if (!is_null && !is_cpu_addr_mirror) { 1037 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1038 u64 size = end - start + 1; 1039 int err; 1040 1041 vma->gpuva.gem.offset = bo_offset_or_userptr; 1042 1043 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1044 if (err) { 1045 xe_vma_free(vma); 1046 return ERR_PTR(err); 1047 } 1048 } 1049 1050 xe_vm_get(vm); 1051 } 1052 1053 return vma; 1054 } 1055 1056 static void xe_vma_destroy_late(struct xe_vma *vma) 1057 { 1058 struct xe_vm *vm = xe_vma_vm(vma); 1059 1060 if (vma->ufence) { 1061 xe_sync_ufence_put(vma->ufence); 1062 vma->ufence = NULL; 1063 } 1064 1065 if (xe_vma_is_userptr(vma)) { 1066 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1067 1068 xe_userptr_remove(uvma); 1069 xe_vm_put(vm); 1070 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1071 xe_vm_put(vm); 1072 } else { 1073 xe_bo_put(xe_vma_bo(vma)); 1074 } 1075 1076 xe_vma_free(vma); 1077 } 1078 1079 static void vma_destroy_work_func(struct work_struct *w) 1080 { 1081 struct xe_vma *vma = 1082 container_of(w, struct xe_vma, destroy_work); 1083 1084 xe_vma_destroy_late(vma); 1085 } 1086 1087 static void vma_destroy_cb(struct dma_fence *fence, 1088 struct dma_fence_cb *cb) 1089 { 1090 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1091 1092 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1093 queue_work(system_unbound_wq, &vma->destroy_work); 1094 } 1095 1096 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1097 { 1098 struct xe_vm *vm = xe_vma_vm(vma); 1099 1100 lockdep_assert_held_write(&vm->lock); 1101 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1102 1103 if (xe_vma_is_userptr(vma)) { 1104 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1105 xe_userptr_destroy(to_userptr_vma(vma)); 1106 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1107 xe_bo_assert_held(xe_vma_bo(vma)); 1108 1109 drm_gpuva_unlink(&vma->gpuva); 1110 } 1111 1112 xe_vm_assert_held(vm); 1113 if (fence) { 1114 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1115 vma_destroy_cb); 1116 1117 if (ret) { 1118 XE_WARN_ON(ret != -ENOENT); 1119 xe_vma_destroy_late(vma); 1120 } 1121 } else { 1122 xe_vma_destroy_late(vma); 1123 } 1124 } 1125 1126 /** 1127 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1128 * @exec: The drm_exec object we're currently locking for. 1129 * @vma: The vma for witch we want to lock the vm resv and any attached 1130 * object's resv. 1131 * 1132 * Return: 0 on success, negative error code on error. In particular 1133 * may return -EDEADLK on WW transaction contention and -EINTR if 1134 * an interruptible wait is terminated by a signal. 1135 */ 1136 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1137 { 1138 struct xe_vm *vm = xe_vma_vm(vma); 1139 struct xe_bo *bo = xe_vma_bo(vma); 1140 int err; 1141 1142 XE_WARN_ON(!vm); 1143 1144 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1145 if (!err && bo && !bo->vm) 1146 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1147 1148 return err; 1149 } 1150 1151 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1152 { 1153 struct xe_device *xe = xe_vma_vm(vma)->xe; 1154 struct xe_validation_ctx ctx; 1155 struct drm_exec exec; 1156 int err = 0; 1157 1158 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1159 err = xe_vm_lock_vma(&exec, vma); 1160 drm_exec_retry_on_contention(&exec); 1161 if (XE_WARN_ON(err)) 1162 break; 1163 xe_vma_destroy(vma, NULL); 1164 } 1165 xe_assert(xe, !err); 1166 } 1167 1168 struct xe_vma * 1169 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1170 { 1171 struct drm_gpuva *gpuva; 1172 1173 lockdep_assert_held(&vm->lock); 1174 1175 if (xe_vm_is_closed_or_banned(vm)) 1176 return NULL; 1177 1178 xe_assert(vm->xe, start + range <= vm->size); 1179 1180 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1181 1182 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1183 } 1184 1185 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1186 { 1187 int err; 1188 1189 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1190 lockdep_assert_held(&vm->lock); 1191 1192 mutex_lock(&vm->snap_mutex); 1193 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1194 mutex_unlock(&vm->snap_mutex); 1195 XE_WARN_ON(err); /* Shouldn't be possible */ 1196 1197 return err; 1198 } 1199 1200 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1201 { 1202 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1203 lockdep_assert_held(&vm->lock); 1204 1205 mutex_lock(&vm->snap_mutex); 1206 drm_gpuva_remove(&vma->gpuva); 1207 mutex_unlock(&vm->snap_mutex); 1208 if (vm->usm.last_fault_vma == vma) 1209 vm->usm.last_fault_vma = NULL; 1210 } 1211 1212 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1213 { 1214 struct xe_vma_op *op; 1215 1216 op = kzalloc(sizeof(*op), GFP_KERNEL); 1217 1218 if (unlikely(!op)) 1219 return NULL; 1220 1221 return &op->base; 1222 } 1223 1224 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1225 1226 static const struct drm_gpuvm_ops gpuvm_ops = { 1227 .op_alloc = xe_vm_op_alloc, 1228 .vm_bo_validate = xe_gpuvm_validate, 1229 .vm_free = xe_vm_free, 1230 }; 1231 1232 static u64 pde_encode_pat_index(u16 pat_index) 1233 { 1234 u64 pte = 0; 1235 1236 if (pat_index & BIT(0)) 1237 pte |= XE_PPGTT_PTE_PAT0; 1238 1239 if (pat_index & BIT(1)) 1240 pte |= XE_PPGTT_PTE_PAT1; 1241 1242 return pte; 1243 } 1244 1245 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1246 { 1247 u64 pte = 0; 1248 1249 if (pat_index & BIT(0)) 1250 pte |= XE_PPGTT_PTE_PAT0; 1251 1252 if (pat_index & BIT(1)) 1253 pte |= XE_PPGTT_PTE_PAT1; 1254 1255 if (pat_index & BIT(2)) { 1256 if (pt_level) 1257 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1258 else 1259 pte |= XE_PPGTT_PTE_PAT2; 1260 } 1261 1262 if (pat_index & BIT(3)) 1263 pte |= XELPG_PPGTT_PTE_PAT3; 1264 1265 if (pat_index & (BIT(4))) 1266 pte |= XE2_PPGTT_PTE_PAT4; 1267 1268 return pte; 1269 } 1270 1271 static u64 pte_encode_ps(u32 pt_level) 1272 { 1273 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1274 1275 if (pt_level == 1) 1276 return XE_PDE_PS_2M; 1277 else if (pt_level == 2) 1278 return XE_PDPE_PS_1G; 1279 1280 return 0; 1281 } 1282 1283 static u16 pde_pat_index(struct xe_bo *bo) 1284 { 1285 struct xe_device *xe = xe_bo_device(bo); 1286 u16 pat_index; 1287 1288 /* 1289 * We only have two bits to encode the PAT index in non-leaf nodes, but 1290 * these only point to other paging structures so we only need a minimal 1291 * selection of options. The user PAT index is only for encoding leaf 1292 * nodes, where we have use of more bits to do the encoding. The 1293 * non-leaf nodes are instead under driver control so the chosen index 1294 * here should be distinct from the user PAT index. Also the 1295 * corresponding coherency of the PAT index should be tied to the 1296 * allocation type of the page table (or at least we should pick 1297 * something which is always safe). 1298 */ 1299 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1300 pat_index = xe->pat.idx[XE_CACHE_WB]; 1301 else 1302 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1303 1304 xe_assert(xe, pat_index <= 3); 1305 1306 return pat_index; 1307 } 1308 1309 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1310 { 1311 u64 pde; 1312 1313 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1314 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1315 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1316 1317 return pde; 1318 } 1319 1320 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1321 u16 pat_index, u32 pt_level) 1322 { 1323 u64 pte; 1324 1325 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1326 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1327 pte |= pte_encode_pat_index(pat_index, pt_level); 1328 pte |= pte_encode_ps(pt_level); 1329 1330 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1331 pte |= XE_PPGTT_PTE_DM; 1332 1333 return pte; 1334 } 1335 1336 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1337 u16 pat_index, u32 pt_level) 1338 { 1339 pte |= XE_PAGE_PRESENT; 1340 1341 if (likely(!xe_vma_read_only(vma))) 1342 pte |= XE_PAGE_RW; 1343 1344 pte |= pte_encode_pat_index(pat_index, pt_level); 1345 pte |= pte_encode_ps(pt_level); 1346 1347 if (unlikely(xe_vma_is_null(vma))) 1348 pte |= XE_PTE_NULL; 1349 1350 return pte; 1351 } 1352 1353 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1354 u16 pat_index, 1355 u32 pt_level, bool devmem, u64 flags) 1356 { 1357 u64 pte; 1358 1359 /* Avoid passing random bits directly as flags */ 1360 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1361 1362 pte = addr; 1363 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1364 pte |= pte_encode_pat_index(pat_index, pt_level); 1365 pte |= pte_encode_ps(pt_level); 1366 1367 if (devmem) 1368 pte |= XE_PPGTT_PTE_DM; 1369 1370 pte |= flags; 1371 1372 return pte; 1373 } 1374 1375 static const struct xe_pt_ops xelp_pt_ops = { 1376 .pte_encode_bo = xelp_pte_encode_bo, 1377 .pte_encode_vma = xelp_pte_encode_vma, 1378 .pte_encode_addr = xelp_pte_encode_addr, 1379 .pde_encode_bo = xelp_pde_encode_bo, 1380 }; 1381 1382 static void vm_destroy_work_func(struct work_struct *w); 1383 1384 /** 1385 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1386 * given tile and vm. 1387 * @xe: xe device. 1388 * @tile: tile to set up for. 1389 * @vm: vm to set up for. 1390 * @exec: The struct drm_exec object used to lock the vm resv. 1391 * 1392 * Sets up a pagetable tree with one page-table per level and a single 1393 * leaf PTE. All pagetable entries point to the single page-table or, 1394 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1395 * writes become NOPs. 1396 * 1397 * Return: 0 on success, negative error code on error. 1398 */ 1399 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1400 struct xe_vm *vm, struct drm_exec *exec) 1401 { 1402 u8 id = tile->id; 1403 int i; 1404 1405 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1406 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1407 if (IS_ERR(vm->scratch_pt[id][i])) { 1408 int err = PTR_ERR(vm->scratch_pt[id][i]); 1409 1410 vm->scratch_pt[id][i] = NULL; 1411 return err; 1412 } 1413 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1414 } 1415 1416 return 0; 1417 } 1418 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1419 1420 static void xe_vm_free_scratch(struct xe_vm *vm) 1421 { 1422 struct xe_tile *tile; 1423 u8 id; 1424 1425 if (!xe_vm_has_scratch(vm)) 1426 return; 1427 1428 for_each_tile(tile, vm->xe, id) { 1429 u32 i; 1430 1431 if (!vm->pt_root[id]) 1432 continue; 1433 1434 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1435 if (vm->scratch_pt[id][i]) 1436 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1437 } 1438 } 1439 1440 static void xe_vm_pt_destroy(struct xe_vm *vm) 1441 { 1442 struct xe_tile *tile; 1443 u8 id; 1444 1445 xe_vm_assert_held(vm); 1446 1447 for_each_tile(tile, vm->xe, id) { 1448 if (vm->pt_root[id]) { 1449 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1450 vm->pt_root[id] = NULL; 1451 } 1452 } 1453 } 1454 1455 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1456 { 1457 struct drm_gem_object *vm_resv_obj; 1458 struct xe_validation_ctx ctx; 1459 struct drm_exec exec; 1460 struct xe_vm *vm; 1461 int err, number_tiles = 0; 1462 struct xe_tile *tile; 1463 u8 id; 1464 1465 /* 1466 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1467 * ever be in faulting mode. 1468 */ 1469 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1470 1471 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1472 if (!vm) 1473 return ERR_PTR(-ENOMEM); 1474 1475 vm->xe = xe; 1476 1477 vm->size = 1ull << xe->info.va_bits; 1478 vm->flags = flags; 1479 1480 if (xef) 1481 vm->xef = xe_file_get(xef); 1482 /** 1483 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1484 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1485 * under a user-VM lock when the PXP session is started at exec_queue 1486 * creation time. Those are different VMs and therefore there is no risk 1487 * of deadlock, but we need to tell lockdep that this is the case or it 1488 * will print a warning. 1489 */ 1490 if (flags & XE_VM_FLAG_GSC) { 1491 static struct lock_class_key gsc_vm_key; 1492 1493 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1494 } else { 1495 init_rwsem(&vm->lock); 1496 } 1497 mutex_init(&vm->snap_mutex); 1498 1499 INIT_LIST_HEAD(&vm->rebind_list); 1500 1501 INIT_LIST_HEAD(&vm->userptr.repin_list); 1502 INIT_LIST_HEAD(&vm->userptr.invalidated); 1503 spin_lock_init(&vm->userptr.invalidated_lock); 1504 1505 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1506 1507 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1508 1509 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1510 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1511 1512 for_each_tile(tile, xe, id) 1513 xe_range_fence_tree_init(&vm->rftree[id]); 1514 1515 vm->pt_ops = &xelp_pt_ops; 1516 1517 /* 1518 * Long-running workloads are not protected by the scheduler references. 1519 * By design, run_job for long-running workloads returns NULL and the 1520 * scheduler drops all the references of it, hence protecting the VM 1521 * for this case is necessary. 1522 */ 1523 if (flags & XE_VM_FLAG_LR_MODE) { 1524 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1525 xe_pm_runtime_get_noresume(xe); 1526 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1527 } 1528 1529 err = xe_svm_init(vm); 1530 if (err) 1531 goto err_no_resv; 1532 1533 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1534 if (!vm_resv_obj) { 1535 err = -ENOMEM; 1536 goto err_svm_fini; 1537 } 1538 1539 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1540 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1541 1542 drm_gem_object_put(vm_resv_obj); 1543 1544 err = 0; 1545 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1546 err) { 1547 err = xe_vm_drm_exec_lock(vm, &exec); 1548 drm_exec_retry_on_contention(&exec); 1549 1550 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1551 vm->flags |= XE_VM_FLAG_64K; 1552 1553 for_each_tile(tile, xe, id) { 1554 if (flags & XE_VM_FLAG_MIGRATION && 1555 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1556 continue; 1557 1558 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1559 &exec); 1560 if (IS_ERR(vm->pt_root[id])) { 1561 err = PTR_ERR(vm->pt_root[id]); 1562 vm->pt_root[id] = NULL; 1563 xe_vm_pt_destroy(vm); 1564 drm_exec_retry_on_contention(&exec); 1565 xe_validation_retry_on_oom(&ctx, &err); 1566 break; 1567 } 1568 } 1569 if (err) 1570 break; 1571 1572 if (xe_vm_has_scratch(vm)) { 1573 for_each_tile(tile, xe, id) { 1574 if (!vm->pt_root[id]) 1575 continue; 1576 1577 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1578 if (err) { 1579 xe_vm_free_scratch(vm); 1580 xe_vm_pt_destroy(vm); 1581 drm_exec_retry_on_contention(&exec); 1582 xe_validation_retry_on_oom(&ctx, &err); 1583 break; 1584 } 1585 } 1586 if (err) 1587 break; 1588 vm->batch_invalidate_tlb = true; 1589 } 1590 1591 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1592 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1593 vm->batch_invalidate_tlb = false; 1594 } 1595 1596 /* Fill pt_root after allocating scratch tables */ 1597 for_each_tile(tile, xe, id) { 1598 if (!vm->pt_root[id]) 1599 continue; 1600 1601 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1602 } 1603 } 1604 if (err) 1605 goto err_close; 1606 1607 /* Kernel migration VM shouldn't have a circular loop.. */ 1608 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1609 for_each_tile(tile, xe, id) { 1610 struct xe_exec_queue *q; 1611 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1612 1613 if (!vm->pt_root[id]) 1614 continue; 1615 1616 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1617 if (IS_ERR(q)) { 1618 err = PTR_ERR(q); 1619 goto err_close; 1620 } 1621 vm->q[id] = q; 1622 number_tiles++; 1623 } 1624 } 1625 1626 if (number_tiles > 1) 1627 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1628 1629 if (xef && xe->info.has_asid) { 1630 u32 asid; 1631 1632 down_write(&xe->usm.lock); 1633 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1634 XA_LIMIT(1, XE_MAX_ASID - 1), 1635 &xe->usm.next_asid, GFP_KERNEL); 1636 up_write(&xe->usm.lock); 1637 if (err < 0) 1638 goto err_close; 1639 1640 vm->usm.asid = asid; 1641 } 1642 1643 trace_xe_vm_create(vm); 1644 1645 return vm; 1646 1647 err_close: 1648 xe_vm_close_and_put(vm); 1649 return ERR_PTR(err); 1650 1651 err_svm_fini: 1652 if (flags & XE_VM_FLAG_FAULT_MODE) { 1653 vm->size = 0; /* close the vm */ 1654 xe_svm_fini(vm); 1655 } 1656 err_no_resv: 1657 mutex_destroy(&vm->snap_mutex); 1658 for_each_tile(tile, xe, id) 1659 xe_range_fence_tree_fini(&vm->rftree[id]); 1660 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1661 if (vm->xef) 1662 xe_file_put(vm->xef); 1663 kfree(vm); 1664 if (flags & XE_VM_FLAG_LR_MODE) 1665 xe_pm_runtime_put(xe); 1666 return ERR_PTR(err); 1667 } 1668 1669 static void xe_vm_close(struct xe_vm *vm) 1670 { 1671 struct xe_device *xe = vm->xe; 1672 bool bound; 1673 int idx; 1674 1675 bound = drm_dev_enter(&xe->drm, &idx); 1676 1677 down_write(&vm->lock); 1678 if (xe_vm_in_fault_mode(vm)) 1679 xe_svm_notifier_lock(vm); 1680 1681 vm->size = 0; 1682 1683 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1684 struct xe_tile *tile; 1685 struct xe_gt *gt; 1686 u8 id; 1687 1688 /* Wait for pending binds */ 1689 dma_resv_wait_timeout(xe_vm_resv(vm), 1690 DMA_RESV_USAGE_BOOKKEEP, 1691 false, MAX_SCHEDULE_TIMEOUT); 1692 1693 if (bound) { 1694 for_each_tile(tile, xe, id) 1695 if (vm->pt_root[id]) 1696 xe_pt_clear(xe, vm->pt_root[id]); 1697 1698 for_each_gt(gt, xe, id) 1699 xe_tlb_inval_vm(>->tlb_inval, vm); 1700 } 1701 } 1702 1703 if (xe_vm_in_fault_mode(vm)) 1704 xe_svm_notifier_unlock(vm); 1705 up_write(&vm->lock); 1706 1707 if (bound) 1708 drm_dev_exit(idx); 1709 } 1710 1711 void xe_vm_close_and_put(struct xe_vm *vm) 1712 { 1713 LIST_HEAD(contested); 1714 struct xe_device *xe = vm->xe; 1715 struct xe_tile *tile; 1716 struct xe_vma *vma, *next_vma; 1717 struct drm_gpuva *gpuva, *next; 1718 u8 id; 1719 1720 xe_assert(xe, !vm->preempt.num_exec_queues); 1721 1722 xe_vm_close(vm); 1723 if (xe_vm_in_preempt_fence_mode(vm)) { 1724 mutex_lock(&xe->rebind_resume_lock); 1725 list_del_init(&vm->preempt.pm_activate_link); 1726 mutex_unlock(&xe->rebind_resume_lock); 1727 flush_work(&vm->preempt.rebind_work); 1728 } 1729 if (xe_vm_in_fault_mode(vm)) 1730 xe_svm_close(vm); 1731 1732 down_write(&vm->lock); 1733 for_each_tile(tile, xe, id) { 1734 if (vm->q[id]) 1735 xe_exec_queue_last_fence_put(vm->q[id], vm); 1736 } 1737 up_write(&vm->lock); 1738 1739 for_each_tile(tile, xe, id) { 1740 if (vm->q[id]) { 1741 xe_exec_queue_kill(vm->q[id]); 1742 xe_exec_queue_put(vm->q[id]); 1743 vm->q[id] = NULL; 1744 } 1745 } 1746 1747 down_write(&vm->lock); 1748 xe_vm_lock(vm, false); 1749 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1750 vma = gpuva_to_vma(gpuva); 1751 1752 if (xe_vma_has_no_bo(vma)) { 1753 xe_svm_notifier_lock(vm); 1754 vma->gpuva.flags |= XE_VMA_DESTROYED; 1755 xe_svm_notifier_unlock(vm); 1756 } 1757 1758 xe_vm_remove_vma(vm, vma); 1759 1760 /* easy case, remove from VMA? */ 1761 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1762 list_del_init(&vma->combined_links.rebind); 1763 xe_vma_destroy(vma, NULL); 1764 continue; 1765 } 1766 1767 list_move_tail(&vma->combined_links.destroy, &contested); 1768 vma->gpuva.flags |= XE_VMA_DESTROYED; 1769 } 1770 1771 /* 1772 * All vm operations will add shared fences to resv. 1773 * The only exception is eviction for a shared object, 1774 * but even so, the unbind when evicted would still 1775 * install a fence to resv. Hence it's safe to 1776 * destroy the pagetables immediately. 1777 */ 1778 xe_vm_free_scratch(vm); 1779 xe_vm_pt_destroy(vm); 1780 xe_vm_unlock(vm); 1781 1782 /* 1783 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1784 * Since we hold a refcount to the bo, we can remove and free 1785 * the members safely without locking. 1786 */ 1787 list_for_each_entry_safe(vma, next_vma, &contested, 1788 combined_links.destroy) { 1789 list_del_init(&vma->combined_links.destroy); 1790 xe_vma_destroy_unlocked(vma); 1791 } 1792 1793 xe_svm_fini(vm); 1794 1795 up_write(&vm->lock); 1796 1797 down_write(&xe->usm.lock); 1798 if (vm->usm.asid) { 1799 void *lookup; 1800 1801 xe_assert(xe, xe->info.has_asid); 1802 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1803 1804 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1805 xe_assert(xe, lookup == vm); 1806 } 1807 up_write(&xe->usm.lock); 1808 1809 for_each_tile(tile, xe, id) 1810 xe_range_fence_tree_fini(&vm->rftree[id]); 1811 1812 xe_vm_put(vm); 1813 } 1814 1815 static void vm_destroy_work_func(struct work_struct *w) 1816 { 1817 struct xe_vm *vm = 1818 container_of(w, struct xe_vm, destroy_work); 1819 struct xe_device *xe = vm->xe; 1820 struct xe_tile *tile; 1821 u8 id; 1822 1823 /* xe_vm_close_and_put was not called? */ 1824 xe_assert(xe, !vm->size); 1825 1826 if (xe_vm_in_preempt_fence_mode(vm)) 1827 flush_work(&vm->preempt.rebind_work); 1828 1829 mutex_destroy(&vm->snap_mutex); 1830 1831 if (vm->flags & XE_VM_FLAG_LR_MODE) 1832 xe_pm_runtime_put(xe); 1833 1834 for_each_tile(tile, xe, id) 1835 XE_WARN_ON(vm->pt_root[id]); 1836 1837 trace_xe_vm_free(vm); 1838 1839 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1840 1841 if (vm->xef) 1842 xe_file_put(vm->xef); 1843 1844 kfree(vm); 1845 } 1846 1847 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1848 { 1849 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1850 1851 /* To destroy the VM we need to be able to sleep */ 1852 queue_work(system_unbound_wq, &vm->destroy_work); 1853 } 1854 1855 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1856 { 1857 struct xe_vm *vm; 1858 1859 mutex_lock(&xef->vm.lock); 1860 vm = xa_load(&xef->vm.xa, id); 1861 if (vm) 1862 xe_vm_get(vm); 1863 mutex_unlock(&xef->vm.lock); 1864 1865 return vm; 1866 } 1867 1868 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1869 { 1870 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1871 } 1872 1873 static struct xe_exec_queue * 1874 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1875 { 1876 return q ? q : vm->q[0]; 1877 } 1878 1879 static struct xe_user_fence * 1880 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1881 { 1882 unsigned int i; 1883 1884 for (i = 0; i < num_syncs; i++) { 1885 struct xe_sync_entry *e = &syncs[i]; 1886 1887 if (xe_sync_is_ufence(e)) 1888 return xe_sync_ufence_get(e); 1889 } 1890 1891 return NULL; 1892 } 1893 1894 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1895 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1896 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1897 1898 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1899 struct drm_file *file) 1900 { 1901 struct xe_device *xe = to_xe_device(dev); 1902 struct xe_file *xef = to_xe_file(file); 1903 struct drm_xe_vm_create *args = data; 1904 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 1905 struct xe_vm *vm; 1906 u32 id; 1907 int err; 1908 u32 flags = 0; 1909 1910 if (XE_IOCTL_DBG(xe, args->extensions)) 1911 return -EINVAL; 1912 1913 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 1914 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1915 1916 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1917 !xe->info.has_usm)) 1918 return -EINVAL; 1919 1920 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1921 return -EINVAL; 1922 1923 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1924 return -EINVAL; 1925 1926 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1927 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1928 !xe->info.needs_scratch)) 1929 return -EINVAL; 1930 1931 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1932 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1933 return -EINVAL; 1934 1935 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1936 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1937 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1938 flags |= XE_VM_FLAG_LR_MODE; 1939 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1940 flags |= XE_VM_FLAG_FAULT_MODE; 1941 1942 vm = xe_vm_create(xe, flags, xef); 1943 if (IS_ERR(vm)) 1944 return PTR_ERR(vm); 1945 1946 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1947 /* Warning: Security issue - never enable by default */ 1948 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1949 #endif 1950 1951 /* user id alloc must always be last in ioctl to prevent UAF */ 1952 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1953 if (err) 1954 goto err_close_and_put; 1955 1956 args->vm_id = id; 1957 1958 return 0; 1959 1960 err_close_and_put: 1961 xe_vm_close_and_put(vm); 1962 1963 return err; 1964 } 1965 1966 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1967 struct drm_file *file) 1968 { 1969 struct xe_device *xe = to_xe_device(dev); 1970 struct xe_file *xef = to_xe_file(file); 1971 struct drm_xe_vm_destroy *args = data; 1972 struct xe_vm *vm; 1973 int err = 0; 1974 1975 if (XE_IOCTL_DBG(xe, args->pad) || 1976 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1977 return -EINVAL; 1978 1979 mutex_lock(&xef->vm.lock); 1980 vm = xa_load(&xef->vm.xa, args->vm_id); 1981 if (XE_IOCTL_DBG(xe, !vm)) 1982 err = -ENOENT; 1983 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1984 err = -EBUSY; 1985 else 1986 xa_erase(&xef->vm.xa, args->vm_id); 1987 mutex_unlock(&xef->vm.lock); 1988 1989 if (!err) 1990 xe_vm_close_and_put(vm); 1991 1992 return err; 1993 } 1994 1995 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1996 { 1997 struct drm_gpuva *gpuva; 1998 u32 num_vmas = 0; 1999 2000 lockdep_assert_held(&vm->lock); 2001 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2002 num_vmas++; 2003 2004 return num_vmas; 2005 } 2006 2007 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2008 u64 end, struct drm_xe_mem_range_attr *attrs) 2009 { 2010 struct drm_gpuva *gpuva; 2011 int i = 0; 2012 2013 lockdep_assert_held(&vm->lock); 2014 2015 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2016 struct xe_vma *vma = gpuva_to_vma(gpuva); 2017 2018 if (i == *num_vmas) 2019 return -ENOSPC; 2020 2021 attrs[i].start = xe_vma_start(vma); 2022 attrs[i].end = xe_vma_end(vma); 2023 attrs[i].atomic.val = vma->attr.atomic_access; 2024 attrs[i].pat_index.val = vma->attr.pat_index; 2025 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2026 attrs[i].preferred_mem_loc.migration_policy = 2027 vma->attr.preferred_loc.migration_policy; 2028 2029 i++; 2030 } 2031 2032 *num_vmas = i; 2033 return 0; 2034 } 2035 2036 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2037 { 2038 struct xe_device *xe = to_xe_device(dev); 2039 struct xe_file *xef = to_xe_file(file); 2040 struct drm_xe_mem_range_attr *mem_attrs; 2041 struct drm_xe_vm_query_mem_range_attr *args = data; 2042 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2043 struct xe_vm *vm; 2044 int err = 0; 2045 2046 if (XE_IOCTL_DBG(xe, 2047 ((args->num_mem_ranges == 0 && 2048 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2049 (args->num_mem_ranges > 0 && 2050 (!attrs_user || 2051 args->sizeof_mem_range_attr != 2052 sizeof(struct drm_xe_mem_range_attr)))))) 2053 return -EINVAL; 2054 2055 vm = xe_vm_lookup(xef, args->vm_id); 2056 if (XE_IOCTL_DBG(xe, !vm)) 2057 return -EINVAL; 2058 2059 err = down_read_interruptible(&vm->lock); 2060 if (err) 2061 goto put_vm; 2062 2063 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2064 2065 if (args->num_mem_ranges == 0 && !attrs_user) { 2066 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2067 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2068 goto unlock_vm; 2069 } 2070 2071 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2072 GFP_KERNEL | __GFP_ACCOUNT | 2073 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2074 if (!mem_attrs) { 2075 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2076 goto unlock_vm; 2077 } 2078 2079 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2080 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2081 args->start + args->range, mem_attrs); 2082 if (err) 2083 goto free_mem_attrs; 2084 2085 err = copy_to_user(attrs_user, mem_attrs, 2086 args->sizeof_mem_range_attr * args->num_mem_ranges); 2087 if (err) 2088 err = -EFAULT; 2089 2090 free_mem_attrs: 2091 kvfree(mem_attrs); 2092 unlock_vm: 2093 up_read(&vm->lock); 2094 put_vm: 2095 xe_vm_put(vm); 2096 return err; 2097 } 2098 2099 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2100 { 2101 if (page_addr > xe_vma_end(vma) - 1 || 2102 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2103 return false; 2104 2105 return true; 2106 } 2107 2108 /** 2109 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2110 * 2111 * @vm: the xe_vm the vma belongs to 2112 * @page_addr: address to look up 2113 */ 2114 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2115 { 2116 struct xe_vma *vma = NULL; 2117 2118 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2119 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2120 vma = vm->usm.last_fault_vma; 2121 } 2122 if (!vma) 2123 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2124 2125 return vma; 2126 } 2127 2128 static const u32 region_to_mem_type[] = { 2129 XE_PL_TT, 2130 XE_PL_VRAM0, 2131 XE_PL_VRAM1, 2132 }; 2133 2134 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2135 bool post_commit) 2136 { 2137 xe_svm_notifier_lock(vm); 2138 vma->gpuva.flags |= XE_VMA_DESTROYED; 2139 xe_svm_notifier_unlock(vm); 2140 if (post_commit) 2141 xe_vm_remove_vma(vm, vma); 2142 } 2143 2144 #undef ULL 2145 #define ULL unsigned long long 2146 2147 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2148 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2149 { 2150 struct xe_vma *vma; 2151 2152 switch (op->op) { 2153 case DRM_GPUVA_OP_MAP: 2154 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2155 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2156 break; 2157 case DRM_GPUVA_OP_REMAP: 2158 vma = gpuva_to_vma(op->remap.unmap->va); 2159 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2160 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2161 op->remap.unmap->keep ? 1 : 0); 2162 if (op->remap.prev) 2163 vm_dbg(&xe->drm, 2164 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2165 (ULL)op->remap.prev->va.addr, 2166 (ULL)op->remap.prev->va.range); 2167 if (op->remap.next) 2168 vm_dbg(&xe->drm, 2169 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2170 (ULL)op->remap.next->va.addr, 2171 (ULL)op->remap.next->va.range); 2172 break; 2173 case DRM_GPUVA_OP_UNMAP: 2174 vma = gpuva_to_vma(op->unmap.va); 2175 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2176 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2177 op->unmap.keep ? 1 : 0); 2178 break; 2179 case DRM_GPUVA_OP_PREFETCH: 2180 vma = gpuva_to_vma(op->prefetch.va); 2181 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2182 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2183 break; 2184 default: 2185 drm_warn(&xe->drm, "NOT POSSIBLE"); 2186 } 2187 } 2188 #else 2189 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2190 { 2191 } 2192 #endif 2193 2194 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2195 { 2196 if (!xe_vm_in_fault_mode(vm)) 2197 return false; 2198 2199 if (!xe_vm_has_scratch(vm)) 2200 return false; 2201 2202 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2203 return false; 2204 2205 return true; 2206 } 2207 2208 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2209 { 2210 struct drm_gpuva_op *__op; 2211 2212 drm_gpuva_for_each_op(__op, ops) { 2213 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2214 2215 xe_vma_svm_prefetch_op_fini(op); 2216 } 2217 } 2218 2219 /* 2220 * Create operations list from IOCTL arguments, setup operations fields so parse 2221 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2222 */ 2223 static struct drm_gpuva_ops * 2224 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2225 struct xe_bo *bo, u64 bo_offset_or_userptr, 2226 u64 addr, u64 range, 2227 u32 operation, u32 flags, 2228 u32 prefetch_region, u16 pat_index) 2229 { 2230 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2231 struct drm_gpuva_ops *ops; 2232 struct drm_gpuva_op *__op; 2233 struct drm_gpuvm_bo *vm_bo; 2234 u64 range_end = addr + range; 2235 int err; 2236 2237 lockdep_assert_held_write(&vm->lock); 2238 2239 vm_dbg(&vm->xe->drm, 2240 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2241 operation, (ULL)addr, (ULL)range, 2242 (ULL)bo_offset_or_userptr); 2243 2244 switch (operation) { 2245 case DRM_XE_VM_BIND_OP_MAP: 2246 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2247 struct drm_gpuvm_map_req map_req = { 2248 .map.va.addr = addr, 2249 .map.va.range = range, 2250 .map.gem.obj = obj, 2251 .map.gem.offset = bo_offset_or_userptr, 2252 }; 2253 2254 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2255 break; 2256 } 2257 case DRM_XE_VM_BIND_OP_UNMAP: 2258 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2259 break; 2260 case DRM_XE_VM_BIND_OP_PREFETCH: 2261 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2262 break; 2263 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2264 xe_assert(vm->xe, bo); 2265 2266 err = xe_bo_lock(bo, true); 2267 if (err) 2268 return ERR_PTR(err); 2269 2270 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2271 if (IS_ERR(vm_bo)) { 2272 xe_bo_unlock(bo); 2273 return ERR_CAST(vm_bo); 2274 } 2275 2276 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2277 drm_gpuvm_bo_put(vm_bo); 2278 xe_bo_unlock(bo); 2279 break; 2280 default: 2281 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2282 ops = ERR_PTR(-EINVAL); 2283 } 2284 if (IS_ERR(ops)) 2285 return ops; 2286 2287 drm_gpuva_for_each_op(__op, ops) { 2288 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2289 2290 if (__op->op == DRM_GPUVA_OP_MAP) { 2291 op->map.immediate = 2292 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2293 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2294 op->map.vma_flags |= XE_VMA_READ_ONLY; 2295 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2296 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2297 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2298 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2299 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2300 op->map.vma_flags |= XE_VMA_DUMPABLE; 2301 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2302 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2303 op->map.pat_index = pat_index; 2304 op->map.invalidate_on_bind = 2305 __xe_vm_needs_clear_scratch_pages(vm, flags); 2306 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2307 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2308 struct xe_tile *tile; 2309 struct xe_svm_range *svm_range; 2310 struct drm_gpusvm_ctx ctx = {}; 2311 struct drm_pagemap *dpagemap; 2312 u8 id, tile_mask = 0; 2313 u32 i; 2314 2315 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2316 op->prefetch.region = prefetch_region; 2317 break; 2318 } 2319 2320 ctx.read_only = xe_vma_read_only(vma); 2321 ctx.devmem_possible = IS_DGFX(vm->xe) && 2322 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2323 2324 for_each_tile(tile, vm->xe, id) 2325 tile_mask |= 0x1 << id; 2326 2327 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2328 op->prefetch_range.ranges_count = 0; 2329 tile = NULL; 2330 2331 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2332 dpagemap = xe_vma_resolve_pagemap(vma, 2333 xe_device_get_root_tile(vm->xe)); 2334 /* 2335 * TODO: Once multigpu support is enabled will need 2336 * something to dereference tile from dpagemap. 2337 */ 2338 if (dpagemap) 2339 tile = xe_device_get_root_tile(vm->xe); 2340 } else if (prefetch_region) { 2341 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2342 XE_PL_VRAM0]; 2343 } 2344 2345 op->prefetch_range.tile = tile; 2346 alloc_next_range: 2347 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2348 2349 if (PTR_ERR(svm_range) == -ENOENT) { 2350 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2351 2352 addr = ret == ULONG_MAX ? 0 : ret; 2353 if (addr) 2354 goto alloc_next_range; 2355 else 2356 goto print_op_label; 2357 } 2358 2359 if (IS_ERR(svm_range)) { 2360 err = PTR_ERR(svm_range); 2361 goto unwind_prefetch_ops; 2362 } 2363 2364 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2365 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2366 goto check_next_range; 2367 } 2368 2369 err = xa_alloc(&op->prefetch_range.range, 2370 &i, svm_range, xa_limit_32b, 2371 GFP_KERNEL); 2372 2373 if (err) 2374 goto unwind_prefetch_ops; 2375 2376 op->prefetch_range.ranges_count++; 2377 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2378 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2379 check_next_range: 2380 if (range_end > xe_svm_range_end(svm_range) && 2381 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2382 addr = xe_svm_range_end(svm_range); 2383 goto alloc_next_range; 2384 } 2385 } 2386 print_op_label: 2387 print_op(vm->xe, __op); 2388 } 2389 2390 return ops; 2391 2392 unwind_prefetch_ops: 2393 xe_svm_prefetch_gpuva_ops_fini(ops); 2394 drm_gpuva_ops_free(&vm->gpuvm, ops); 2395 return ERR_PTR(err); 2396 } 2397 2398 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2399 2400 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2401 struct xe_vma_mem_attr *attr, unsigned int flags) 2402 { 2403 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2404 struct xe_validation_ctx ctx; 2405 struct drm_exec exec; 2406 struct xe_vma *vma; 2407 int err = 0; 2408 2409 lockdep_assert_held_write(&vm->lock); 2410 2411 if (bo) { 2412 err = 0; 2413 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2414 (struct xe_val_flags) {.interruptible = true}, err) { 2415 if (!bo->vm) { 2416 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2417 drm_exec_retry_on_contention(&exec); 2418 } 2419 if (!err) { 2420 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2421 drm_exec_retry_on_contention(&exec); 2422 } 2423 if (err) 2424 return ERR_PTR(err); 2425 2426 vma = xe_vma_create(vm, bo, op->gem.offset, 2427 op->va.addr, op->va.addr + 2428 op->va.range - 1, attr, flags); 2429 if (IS_ERR(vma)) 2430 return vma; 2431 2432 if (!bo->vm) { 2433 err = add_preempt_fences(vm, bo); 2434 if (err) { 2435 prep_vma_destroy(vm, vma, false); 2436 xe_vma_destroy(vma, NULL); 2437 } 2438 } 2439 } 2440 if (err) 2441 return ERR_PTR(err); 2442 } else { 2443 vma = xe_vma_create(vm, NULL, op->gem.offset, 2444 op->va.addr, op->va.addr + 2445 op->va.range - 1, attr, flags); 2446 if (IS_ERR(vma)) 2447 return vma; 2448 2449 if (xe_vma_is_userptr(vma)) 2450 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2451 } 2452 if (err) { 2453 prep_vma_destroy(vm, vma, false); 2454 xe_vma_destroy_unlocked(vma); 2455 vma = ERR_PTR(err); 2456 } 2457 2458 return vma; 2459 } 2460 2461 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2462 { 2463 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2464 return SZ_1G; 2465 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2466 return SZ_2M; 2467 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2468 return SZ_64K; 2469 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2470 return SZ_4K; 2471 2472 return SZ_1G; /* Uninitialized, used max size */ 2473 } 2474 2475 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2476 { 2477 switch (size) { 2478 case SZ_1G: 2479 vma->gpuva.flags |= XE_VMA_PTE_1G; 2480 break; 2481 case SZ_2M: 2482 vma->gpuva.flags |= XE_VMA_PTE_2M; 2483 break; 2484 case SZ_64K: 2485 vma->gpuva.flags |= XE_VMA_PTE_64K; 2486 break; 2487 case SZ_4K: 2488 vma->gpuva.flags |= XE_VMA_PTE_4K; 2489 break; 2490 } 2491 } 2492 2493 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2494 { 2495 int err = 0; 2496 2497 lockdep_assert_held_write(&vm->lock); 2498 2499 switch (op->base.op) { 2500 case DRM_GPUVA_OP_MAP: 2501 err |= xe_vm_insert_vma(vm, op->map.vma); 2502 if (!err) 2503 op->flags |= XE_VMA_OP_COMMITTED; 2504 break; 2505 case DRM_GPUVA_OP_REMAP: 2506 { 2507 u8 tile_present = 2508 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2509 2510 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2511 true); 2512 op->flags |= XE_VMA_OP_COMMITTED; 2513 2514 if (op->remap.prev) { 2515 err |= xe_vm_insert_vma(vm, op->remap.prev); 2516 if (!err) 2517 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2518 if (!err && op->remap.skip_prev) { 2519 op->remap.prev->tile_present = 2520 tile_present; 2521 op->remap.prev = NULL; 2522 } 2523 } 2524 if (op->remap.next) { 2525 err |= xe_vm_insert_vma(vm, op->remap.next); 2526 if (!err) 2527 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2528 if (!err && op->remap.skip_next) { 2529 op->remap.next->tile_present = 2530 tile_present; 2531 op->remap.next = NULL; 2532 } 2533 } 2534 2535 /* Adjust for partial unbind after removing VMA from VM */ 2536 if (!err) { 2537 op->base.remap.unmap->va->va.addr = op->remap.start; 2538 op->base.remap.unmap->va->va.range = op->remap.range; 2539 } 2540 break; 2541 } 2542 case DRM_GPUVA_OP_UNMAP: 2543 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2544 op->flags |= XE_VMA_OP_COMMITTED; 2545 break; 2546 case DRM_GPUVA_OP_PREFETCH: 2547 op->flags |= XE_VMA_OP_COMMITTED; 2548 break; 2549 default: 2550 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2551 } 2552 2553 return err; 2554 } 2555 2556 /** 2557 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2558 * @vma: Pointer to the xe_vma structure to check 2559 * 2560 * This function determines whether the given VMA (Virtual Memory Area) 2561 * has its memory attributes set to their default values. Specifically, 2562 * it checks the following conditions: 2563 * 2564 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2565 * - `pat_index` is equal to `default_pat_index` 2566 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2567 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2568 * 2569 * Return: true if all attributes are at their default values, false otherwise. 2570 */ 2571 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2572 { 2573 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2574 vma->attr.pat_index == vma->attr.default_pat_index && 2575 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2576 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2577 } 2578 2579 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2580 struct xe_vma_ops *vops) 2581 { 2582 struct xe_device *xe = vm->xe; 2583 struct drm_gpuva_op *__op; 2584 struct xe_tile *tile; 2585 u8 id, tile_mask = 0; 2586 int err = 0; 2587 2588 lockdep_assert_held_write(&vm->lock); 2589 2590 for_each_tile(tile, vm->xe, id) 2591 tile_mask |= 0x1 << id; 2592 2593 drm_gpuva_for_each_op(__op, ops) { 2594 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2595 struct xe_vma *vma; 2596 unsigned int flags = 0; 2597 2598 INIT_LIST_HEAD(&op->link); 2599 list_add_tail(&op->link, &vops->list); 2600 op->tile_mask = tile_mask; 2601 2602 switch (op->base.op) { 2603 case DRM_GPUVA_OP_MAP: 2604 { 2605 struct xe_vma_mem_attr default_attr = { 2606 .preferred_loc = { 2607 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2608 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2609 }, 2610 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2611 .default_pat_index = op->map.pat_index, 2612 .pat_index = op->map.pat_index, 2613 }; 2614 2615 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2616 2617 vma = new_vma(vm, &op->base.map, &default_attr, 2618 flags); 2619 if (IS_ERR(vma)) 2620 return PTR_ERR(vma); 2621 2622 op->map.vma = vma; 2623 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2624 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2625 op->map.invalidate_on_bind) 2626 xe_vma_ops_incr_pt_update_ops(vops, 2627 op->tile_mask, 1); 2628 break; 2629 } 2630 case DRM_GPUVA_OP_REMAP: 2631 { 2632 struct xe_vma *old = 2633 gpuva_to_vma(op->base.remap.unmap->va); 2634 bool skip = xe_vma_is_cpu_addr_mirror(old); 2635 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2636 int num_remap_ops = 0; 2637 2638 if (op->base.remap.prev) 2639 start = op->base.remap.prev->va.addr + 2640 op->base.remap.prev->va.range; 2641 if (op->base.remap.next) 2642 end = op->base.remap.next->va.addr; 2643 2644 if (xe_vma_is_cpu_addr_mirror(old) && 2645 xe_svm_has_mapping(vm, start, end)) { 2646 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2647 xe_svm_unmap_address_range(vm, start, end); 2648 else 2649 return -EBUSY; 2650 } 2651 2652 op->remap.start = xe_vma_start(old); 2653 op->remap.range = xe_vma_size(old); 2654 2655 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2656 if (op->base.remap.prev) { 2657 vma = new_vma(vm, op->base.remap.prev, 2658 &old->attr, flags); 2659 if (IS_ERR(vma)) 2660 return PTR_ERR(vma); 2661 2662 op->remap.prev = vma; 2663 2664 /* 2665 * Userptr creates a new SG mapping so 2666 * we must also rebind. 2667 */ 2668 op->remap.skip_prev = skip || 2669 (!xe_vma_is_userptr(old) && 2670 IS_ALIGNED(xe_vma_end(vma), 2671 xe_vma_max_pte_size(old))); 2672 if (op->remap.skip_prev) { 2673 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2674 op->remap.range -= 2675 xe_vma_end(vma) - 2676 xe_vma_start(old); 2677 op->remap.start = xe_vma_end(vma); 2678 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2679 (ULL)op->remap.start, 2680 (ULL)op->remap.range); 2681 } else { 2682 num_remap_ops++; 2683 } 2684 } 2685 2686 if (op->base.remap.next) { 2687 vma = new_vma(vm, op->base.remap.next, 2688 &old->attr, flags); 2689 if (IS_ERR(vma)) 2690 return PTR_ERR(vma); 2691 2692 op->remap.next = vma; 2693 2694 /* 2695 * Userptr creates a new SG mapping so 2696 * we must also rebind. 2697 */ 2698 op->remap.skip_next = skip || 2699 (!xe_vma_is_userptr(old) && 2700 IS_ALIGNED(xe_vma_start(vma), 2701 xe_vma_max_pte_size(old))); 2702 if (op->remap.skip_next) { 2703 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2704 op->remap.range -= 2705 xe_vma_end(old) - 2706 xe_vma_start(vma); 2707 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2708 (ULL)op->remap.start, 2709 (ULL)op->remap.range); 2710 } else { 2711 num_remap_ops++; 2712 } 2713 } 2714 if (!skip) 2715 num_remap_ops++; 2716 2717 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2718 break; 2719 } 2720 case DRM_GPUVA_OP_UNMAP: 2721 vma = gpuva_to_vma(op->base.unmap.va); 2722 2723 if (xe_vma_is_cpu_addr_mirror(vma) && 2724 xe_svm_has_mapping(vm, xe_vma_start(vma), 2725 xe_vma_end(vma))) 2726 return -EBUSY; 2727 2728 if (!xe_vma_is_cpu_addr_mirror(vma)) 2729 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2730 break; 2731 case DRM_GPUVA_OP_PREFETCH: 2732 vma = gpuva_to_vma(op->base.prefetch.va); 2733 2734 if (xe_vma_is_userptr(vma)) { 2735 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2736 if (err) 2737 return err; 2738 } 2739 2740 if (xe_vma_is_cpu_addr_mirror(vma)) 2741 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2742 op->prefetch_range.ranges_count); 2743 else 2744 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2745 2746 break; 2747 default: 2748 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2749 } 2750 2751 err = xe_vma_op_commit(vm, op); 2752 if (err) 2753 return err; 2754 } 2755 2756 return 0; 2757 } 2758 2759 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2760 bool post_commit, bool prev_post_commit, 2761 bool next_post_commit) 2762 { 2763 lockdep_assert_held_write(&vm->lock); 2764 2765 switch (op->base.op) { 2766 case DRM_GPUVA_OP_MAP: 2767 if (op->map.vma) { 2768 prep_vma_destroy(vm, op->map.vma, post_commit); 2769 xe_vma_destroy_unlocked(op->map.vma); 2770 } 2771 break; 2772 case DRM_GPUVA_OP_UNMAP: 2773 { 2774 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2775 2776 if (vma) { 2777 xe_svm_notifier_lock(vm); 2778 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2779 xe_svm_notifier_unlock(vm); 2780 if (post_commit) 2781 xe_vm_insert_vma(vm, vma); 2782 } 2783 break; 2784 } 2785 case DRM_GPUVA_OP_REMAP: 2786 { 2787 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2788 2789 if (op->remap.prev) { 2790 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2791 xe_vma_destroy_unlocked(op->remap.prev); 2792 } 2793 if (op->remap.next) { 2794 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2795 xe_vma_destroy_unlocked(op->remap.next); 2796 } 2797 if (vma) { 2798 xe_svm_notifier_lock(vm); 2799 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2800 xe_svm_notifier_unlock(vm); 2801 if (post_commit) 2802 xe_vm_insert_vma(vm, vma); 2803 } 2804 break; 2805 } 2806 case DRM_GPUVA_OP_PREFETCH: 2807 /* Nothing to do */ 2808 break; 2809 default: 2810 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2811 } 2812 } 2813 2814 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2815 struct drm_gpuva_ops **ops, 2816 int num_ops_list) 2817 { 2818 int i; 2819 2820 for (i = num_ops_list - 1; i >= 0; --i) { 2821 struct drm_gpuva_ops *__ops = ops[i]; 2822 struct drm_gpuva_op *__op; 2823 2824 if (!__ops) 2825 continue; 2826 2827 drm_gpuva_for_each_op_reverse(__op, __ops) { 2828 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2829 2830 xe_vma_op_unwind(vm, op, 2831 op->flags & XE_VMA_OP_COMMITTED, 2832 op->flags & XE_VMA_OP_PREV_COMMITTED, 2833 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2834 } 2835 } 2836 } 2837 2838 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2839 bool res_evict, bool validate) 2840 { 2841 struct xe_bo *bo = xe_vma_bo(vma); 2842 struct xe_vm *vm = xe_vma_vm(vma); 2843 int err = 0; 2844 2845 if (bo) { 2846 if (!bo->vm) 2847 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2848 if (!err && validate) 2849 err = xe_bo_validate(bo, vm, 2850 !xe_vm_in_preempt_fence_mode(vm) && 2851 res_evict, exec); 2852 } 2853 2854 return err; 2855 } 2856 2857 static int check_ufence(struct xe_vma *vma) 2858 { 2859 if (vma->ufence) { 2860 struct xe_user_fence * const f = vma->ufence; 2861 2862 if (!xe_sync_ufence_get_status(f)) 2863 return -EBUSY; 2864 2865 vma->ufence = NULL; 2866 xe_sync_ufence_put(f); 2867 } 2868 2869 return 0; 2870 } 2871 2872 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2873 { 2874 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2875 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2876 struct xe_tile *tile = op->prefetch_range.tile; 2877 int err = 0; 2878 2879 struct xe_svm_range *svm_range; 2880 struct drm_gpusvm_ctx ctx = {}; 2881 unsigned long i; 2882 2883 if (!xe_vma_is_cpu_addr_mirror(vma)) 2884 return 0; 2885 2886 ctx.read_only = xe_vma_read_only(vma); 2887 ctx.devmem_possible = devmem_possible; 2888 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2889 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2890 2891 /* TODO: Threading the migration */ 2892 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2893 if (!tile) 2894 xe_svm_range_migrate_to_smem(vm, svm_range); 2895 2896 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2897 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2898 if (err) { 2899 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2900 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2901 return -ENODATA; 2902 } 2903 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2904 } 2905 2906 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2907 if (err) { 2908 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2909 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2910 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2911 err = -ENODATA; 2912 return err; 2913 } 2914 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2915 } 2916 2917 return err; 2918 } 2919 2920 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2921 struct xe_vma_ops *vops, struct xe_vma_op *op) 2922 { 2923 int err = 0; 2924 bool res_evict; 2925 2926 /* 2927 * We only allow evicting a BO within the VM if it is not part of an 2928 * array of binds, as an array of binds can evict another BO within the 2929 * bind. 2930 */ 2931 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 2932 2933 switch (op->base.op) { 2934 case DRM_GPUVA_OP_MAP: 2935 if (!op->map.invalidate_on_bind) 2936 err = vma_lock_and_validate(exec, op->map.vma, 2937 res_evict, 2938 !xe_vm_in_fault_mode(vm) || 2939 op->map.immediate); 2940 break; 2941 case DRM_GPUVA_OP_REMAP: 2942 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2943 if (err) 2944 break; 2945 2946 err = vma_lock_and_validate(exec, 2947 gpuva_to_vma(op->base.remap.unmap->va), 2948 res_evict, false); 2949 if (!err && op->remap.prev) 2950 err = vma_lock_and_validate(exec, op->remap.prev, 2951 res_evict, true); 2952 if (!err && op->remap.next) 2953 err = vma_lock_and_validate(exec, op->remap.next, 2954 res_evict, true); 2955 break; 2956 case DRM_GPUVA_OP_UNMAP: 2957 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2958 if (err) 2959 break; 2960 2961 err = vma_lock_and_validate(exec, 2962 gpuva_to_vma(op->base.unmap.va), 2963 res_evict, false); 2964 break; 2965 case DRM_GPUVA_OP_PREFETCH: 2966 { 2967 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2968 u32 region; 2969 2970 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2971 region = op->prefetch.region; 2972 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2973 region <= ARRAY_SIZE(region_to_mem_type)); 2974 } 2975 2976 err = vma_lock_and_validate(exec, 2977 gpuva_to_vma(op->base.prefetch.va), 2978 res_evict, false); 2979 if (!err && !xe_vma_has_no_bo(vma)) 2980 err = xe_bo_migrate(xe_vma_bo(vma), 2981 region_to_mem_type[region], 2982 NULL, 2983 exec); 2984 break; 2985 } 2986 default: 2987 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2988 } 2989 2990 return err; 2991 } 2992 2993 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2994 { 2995 struct xe_vma_op *op; 2996 int err; 2997 2998 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2999 return 0; 3000 3001 list_for_each_entry(op, &vops->list, link) { 3002 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3003 err = prefetch_ranges(vm, op); 3004 if (err) 3005 return err; 3006 } 3007 } 3008 3009 return 0; 3010 } 3011 3012 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3013 struct xe_vm *vm, 3014 struct xe_vma_ops *vops) 3015 { 3016 struct xe_vma_op *op; 3017 int err; 3018 3019 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3020 if (err) 3021 return err; 3022 3023 list_for_each_entry(op, &vops->list, link) { 3024 err = op_lock_and_prep(exec, vm, vops, op); 3025 if (err) 3026 return err; 3027 } 3028 3029 #ifdef TEST_VM_OPS_ERROR 3030 if (vops->inject_error && 3031 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3032 return -ENOSPC; 3033 #endif 3034 3035 return 0; 3036 } 3037 3038 static void op_trace(struct xe_vma_op *op) 3039 { 3040 switch (op->base.op) { 3041 case DRM_GPUVA_OP_MAP: 3042 trace_xe_vma_bind(op->map.vma); 3043 break; 3044 case DRM_GPUVA_OP_REMAP: 3045 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3046 if (op->remap.prev) 3047 trace_xe_vma_bind(op->remap.prev); 3048 if (op->remap.next) 3049 trace_xe_vma_bind(op->remap.next); 3050 break; 3051 case DRM_GPUVA_OP_UNMAP: 3052 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3053 break; 3054 case DRM_GPUVA_OP_PREFETCH: 3055 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3056 break; 3057 case DRM_GPUVA_OP_DRIVER: 3058 break; 3059 default: 3060 XE_WARN_ON("NOT POSSIBLE"); 3061 } 3062 } 3063 3064 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3065 { 3066 struct xe_vma_op *op; 3067 3068 list_for_each_entry(op, &vops->list, link) 3069 op_trace(op); 3070 } 3071 3072 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3073 { 3074 struct xe_exec_queue *q = vops->q; 3075 struct xe_tile *tile; 3076 int number_tiles = 0; 3077 u8 id; 3078 3079 for_each_tile(tile, vm->xe, id) { 3080 if (vops->pt_update_ops[id].num_ops) 3081 ++number_tiles; 3082 3083 if (vops->pt_update_ops[id].q) 3084 continue; 3085 3086 if (q) { 3087 vops->pt_update_ops[id].q = q; 3088 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3089 q = list_next_entry(q, multi_gt_list); 3090 } else { 3091 vops->pt_update_ops[id].q = vm->q[id]; 3092 } 3093 } 3094 3095 return number_tiles; 3096 } 3097 3098 static struct dma_fence *ops_execute(struct xe_vm *vm, 3099 struct xe_vma_ops *vops) 3100 { 3101 struct xe_tile *tile; 3102 struct dma_fence *fence = NULL; 3103 struct dma_fence **fences = NULL; 3104 struct dma_fence_array *cf = NULL; 3105 int number_tiles = 0, current_fence = 0, err; 3106 u8 id; 3107 3108 number_tiles = vm_ops_setup_tile_args(vm, vops); 3109 if (number_tiles == 0) 3110 return ERR_PTR(-ENODATA); 3111 3112 if (number_tiles > 1) { 3113 fences = kmalloc_array(number_tiles, sizeof(*fences), 3114 GFP_KERNEL); 3115 if (!fences) { 3116 fence = ERR_PTR(-ENOMEM); 3117 goto err_trace; 3118 } 3119 } 3120 3121 for_each_tile(tile, vm->xe, id) { 3122 if (!vops->pt_update_ops[id].num_ops) 3123 continue; 3124 3125 err = xe_pt_update_ops_prepare(tile, vops); 3126 if (err) { 3127 fence = ERR_PTR(err); 3128 goto err_out; 3129 } 3130 } 3131 3132 trace_xe_vm_ops_execute(vops); 3133 3134 for_each_tile(tile, vm->xe, id) { 3135 if (!vops->pt_update_ops[id].num_ops) 3136 continue; 3137 3138 fence = xe_pt_update_ops_run(tile, vops); 3139 if (IS_ERR(fence)) 3140 goto err_out; 3141 3142 if (fences) 3143 fences[current_fence++] = fence; 3144 } 3145 3146 if (fences) { 3147 cf = dma_fence_array_create(number_tiles, fences, 3148 vm->composite_fence_ctx, 3149 vm->composite_fence_seqno++, 3150 false); 3151 if (!cf) { 3152 --vm->composite_fence_seqno; 3153 fence = ERR_PTR(-ENOMEM); 3154 goto err_out; 3155 } 3156 fence = &cf->base; 3157 } 3158 3159 for_each_tile(tile, vm->xe, id) { 3160 if (!vops->pt_update_ops[id].num_ops) 3161 continue; 3162 3163 xe_pt_update_ops_fini(tile, vops); 3164 } 3165 3166 return fence; 3167 3168 err_out: 3169 for_each_tile(tile, vm->xe, id) { 3170 if (!vops->pt_update_ops[id].num_ops) 3171 continue; 3172 3173 xe_pt_update_ops_abort(tile, vops); 3174 } 3175 while (current_fence) 3176 dma_fence_put(fences[--current_fence]); 3177 kfree(fences); 3178 kfree(cf); 3179 3180 err_trace: 3181 trace_xe_vm_ops_fail(vm); 3182 return fence; 3183 } 3184 3185 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3186 { 3187 if (vma->ufence) 3188 xe_sync_ufence_put(vma->ufence); 3189 vma->ufence = __xe_sync_ufence_get(ufence); 3190 } 3191 3192 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3193 struct xe_user_fence *ufence) 3194 { 3195 switch (op->base.op) { 3196 case DRM_GPUVA_OP_MAP: 3197 vma_add_ufence(op->map.vma, ufence); 3198 break; 3199 case DRM_GPUVA_OP_REMAP: 3200 if (op->remap.prev) 3201 vma_add_ufence(op->remap.prev, ufence); 3202 if (op->remap.next) 3203 vma_add_ufence(op->remap.next, ufence); 3204 break; 3205 case DRM_GPUVA_OP_UNMAP: 3206 break; 3207 case DRM_GPUVA_OP_PREFETCH: 3208 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3209 break; 3210 default: 3211 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3212 } 3213 } 3214 3215 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3216 struct dma_fence *fence) 3217 { 3218 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3219 struct xe_user_fence *ufence; 3220 struct xe_vma_op *op; 3221 int i; 3222 3223 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3224 list_for_each_entry(op, &vops->list, link) { 3225 if (ufence) 3226 op_add_ufence(vm, op, ufence); 3227 3228 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3229 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3230 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3231 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3232 fence); 3233 } 3234 if (ufence) 3235 xe_sync_ufence_put(ufence); 3236 if (fence) { 3237 for (i = 0; i < vops->num_syncs; i++) 3238 xe_sync_entry_signal(vops->syncs + i, fence); 3239 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3240 } 3241 } 3242 3243 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3244 struct xe_vma_ops *vops) 3245 { 3246 struct xe_validation_ctx ctx; 3247 struct drm_exec exec; 3248 struct dma_fence *fence; 3249 int err = 0; 3250 3251 lockdep_assert_held_write(&vm->lock); 3252 3253 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3254 ((struct xe_val_flags) { 3255 .interruptible = true, 3256 .exec_ignore_duplicates = true, 3257 }), err) { 3258 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3259 drm_exec_retry_on_contention(&exec); 3260 xe_validation_retry_on_oom(&ctx, &err); 3261 if (err) 3262 return ERR_PTR(err); 3263 3264 xe_vm_set_validation_exec(vm, &exec); 3265 fence = ops_execute(vm, vops); 3266 xe_vm_set_validation_exec(vm, NULL); 3267 if (IS_ERR(fence)) { 3268 if (PTR_ERR(fence) == -ENODATA) 3269 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3270 return fence; 3271 } 3272 3273 vm_bind_ioctl_ops_fini(vm, vops, fence); 3274 } 3275 3276 return err ? ERR_PTR(err) : fence; 3277 } 3278 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3279 3280 #define SUPPORTED_FLAGS_STUB \ 3281 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3282 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3283 DRM_XE_VM_BIND_FLAG_NULL | \ 3284 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3285 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3286 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3287 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 3288 3289 #ifdef TEST_VM_OPS_ERROR 3290 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3291 #else 3292 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3293 #endif 3294 3295 #define XE_64K_PAGE_MASK 0xffffull 3296 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3297 3298 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3299 struct drm_xe_vm_bind *args, 3300 struct drm_xe_vm_bind_op **bind_ops) 3301 { 3302 int err; 3303 int i; 3304 3305 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3306 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3307 return -EINVAL; 3308 3309 if (XE_IOCTL_DBG(xe, args->extensions)) 3310 return -EINVAL; 3311 3312 if (args->num_binds > 1) { 3313 u64 __user *bind_user = 3314 u64_to_user_ptr(args->vector_of_binds); 3315 3316 *bind_ops = kvmalloc_array(args->num_binds, 3317 sizeof(struct drm_xe_vm_bind_op), 3318 GFP_KERNEL | __GFP_ACCOUNT | 3319 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3320 if (!*bind_ops) 3321 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3322 3323 err = copy_from_user(*bind_ops, bind_user, 3324 sizeof(struct drm_xe_vm_bind_op) * 3325 args->num_binds); 3326 if (XE_IOCTL_DBG(xe, err)) { 3327 err = -EFAULT; 3328 goto free_bind_ops; 3329 } 3330 } else { 3331 *bind_ops = &args->bind; 3332 } 3333 3334 for (i = 0; i < args->num_binds; ++i) { 3335 u64 range = (*bind_ops)[i].range; 3336 u64 addr = (*bind_ops)[i].addr; 3337 u32 op = (*bind_ops)[i].op; 3338 u32 flags = (*bind_ops)[i].flags; 3339 u32 obj = (*bind_ops)[i].obj; 3340 u64 obj_offset = (*bind_ops)[i].obj_offset; 3341 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3342 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3343 bool is_cpu_addr_mirror = flags & 3344 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3345 u16 pat_index = (*bind_ops)[i].pat_index; 3346 u16 coh_mode; 3347 3348 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3349 (!xe_vm_in_fault_mode(vm) || 3350 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3351 err = -EINVAL; 3352 goto free_bind_ops; 3353 } 3354 3355 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3356 err = -EINVAL; 3357 goto free_bind_ops; 3358 } 3359 3360 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3361 (*bind_ops)[i].pat_index = pat_index; 3362 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3363 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3364 err = -EINVAL; 3365 goto free_bind_ops; 3366 } 3367 3368 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3369 err = -EINVAL; 3370 goto free_bind_ops; 3371 } 3372 3373 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3374 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3375 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3376 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3377 is_cpu_addr_mirror)) || 3378 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3379 (is_null || is_cpu_addr_mirror)) || 3380 XE_IOCTL_DBG(xe, !obj && 3381 op == DRM_XE_VM_BIND_OP_MAP && 3382 !is_null && !is_cpu_addr_mirror) || 3383 XE_IOCTL_DBG(xe, !obj && 3384 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3385 XE_IOCTL_DBG(xe, addr && 3386 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3387 XE_IOCTL_DBG(xe, range && 3388 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3389 XE_IOCTL_DBG(xe, obj && 3390 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3391 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3392 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3393 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3394 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3395 XE_IOCTL_DBG(xe, obj && 3396 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3397 XE_IOCTL_DBG(xe, prefetch_region && 3398 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3399 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3400 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3401 XE_IOCTL_DBG(xe, obj && 3402 op == DRM_XE_VM_BIND_OP_UNMAP) || 3403 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3404 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3405 err = -EINVAL; 3406 goto free_bind_ops; 3407 } 3408 3409 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3410 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3411 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3412 XE_IOCTL_DBG(xe, !range && 3413 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3414 err = -EINVAL; 3415 goto free_bind_ops; 3416 } 3417 } 3418 3419 return 0; 3420 3421 free_bind_ops: 3422 if (args->num_binds > 1) 3423 kvfree(*bind_ops); 3424 *bind_ops = NULL; 3425 return err; 3426 } 3427 3428 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3429 struct xe_exec_queue *q, 3430 struct xe_sync_entry *syncs, 3431 int num_syncs) 3432 { 3433 struct dma_fence *fence; 3434 int i, err = 0; 3435 3436 fence = xe_sync_in_fence_get(syncs, num_syncs, 3437 to_wait_exec_queue(vm, q), vm); 3438 if (IS_ERR(fence)) 3439 return PTR_ERR(fence); 3440 3441 for (i = 0; i < num_syncs; i++) 3442 xe_sync_entry_signal(&syncs[i], fence); 3443 3444 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3445 fence); 3446 dma_fence_put(fence); 3447 3448 return err; 3449 } 3450 3451 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3452 struct xe_exec_queue *q, 3453 struct xe_sync_entry *syncs, u32 num_syncs) 3454 { 3455 memset(vops, 0, sizeof(*vops)); 3456 INIT_LIST_HEAD(&vops->list); 3457 vops->vm = vm; 3458 vops->q = q; 3459 vops->syncs = syncs; 3460 vops->num_syncs = num_syncs; 3461 vops->flags = 0; 3462 } 3463 3464 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3465 u64 addr, u64 range, u64 obj_offset, 3466 u16 pat_index, u32 op, u32 bind_flags) 3467 { 3468 u16 coh_mode; 3469 3470 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3471 XE_IOCTL_DBG(xe, obj_offset > 3472 xe_bo_size(bo) - range)) { 3473 return -EINVAL; 3474 } 3475 3476 /* 3477 * Some platforms require 64k VM_BIND alignment, 3478 * specifically those with XE_VRAM_FLAGS_NEED64K. 3479 * 3480 * Other platforms may have BO's set to 64k physical placement, 3481 * but can be mapped at 4k offsets anyway. This check is only 3482 * there for the former case. 3483 */ 3484 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3485 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3486 if (XE_IOCTL_DBG(xe, obj_offset & 3487 XE_64K_PAGE_MASK) || 3488 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3489 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3490 return -EINVAL; 3491 } 3492 } 3493 3494 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3495 if (bo->cpu_caching) { 3496 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3497 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3498 return -EINVAL; 3499 } 3500 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3501 /* 3502 * Imported dma-buf from a different device should 3503 * require 1way or 2way coherency since we don't know 3504 * how it was mapped on the CPU. Just assume is it 3505 * potentially cached on CPU side. 3506 */ 3507 return -EINVAL; 3508 } 3509 3510 /* If a BO is protected it can only be mapped if the key is still valid */ 3511 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3512 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3513 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3514 return -ENOEXEC; 3515 3516 return 0; 3517 } 3518 3519 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3520 { 3521 struct xe_device *xe = to_xe_device(dev); 3522 struct xe_file *xef = to_xe_file(file); 3523 struct drm_xe_vm_bind *args = data; 3524 struct drm_xe_sync __user *syncs_user; 3525 struct xe_bo **bos = NULL; 3526 struct drm_gpuva_ops **ops = NULL; 3527 struct xe_vm *vm; 3528 struct xe_exec_queue *q = NULL; 3529 u32 num_syncs, num_ufence = 0; 3530 struct xe_sync_entry *syncs = NULL; 3531 struct drm_xe_vm_bind_op *bind_ops = NULL; 3532 struct xe_vma_ops vops; 3533 struct dma_fence *fence; 3534 int err; 3535 int i; 3536 3537 vm = xe_vm_lookup(xef, args->vm_id); 3538 if (XE_IOCTL_DBG(xe, !vm)) 3539 return -EINVAL; 3540 3541 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3542 if (err) 3543 goto put_vm; 3544 3545 if (args->exec_queue_id) { 3546 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3547 if (XE_IOCTL_DBG(xe, !q)) { 3548 err = -ENOENT; 3549 goto free_bind_ops; 3550 } 3551 3552 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3553 err = -EINVAL; 3554 goto put_exec_queue; 3555 } 3556 } 3557 3558 /* Ensure all UNMAPs visible */ 3559 xe_svm_flush(vm); 3560 3561 err = down_write_killable(&vm->lock); 3562 if (err) 3563 goto put_exec_queue; 3564 3565 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3566 err = -ENOENT; 3567 goto release_vm_lock; 3568 } 3569 3570 for (i = 0; i < args->num_binds; ++i) { 3571 u64 range = bind_ops[i].range; 3572 u64 addr = bind_ops[i].addr; 3573 3574 if (XE_IOCTL_DBG(xe, range > vm->size) || 3575 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3576 err = -EINVAL; 3577 goto release_vm_lock; 3578 } 3579 } 3580 3581 if (args->num_binds) { 3582 bos = kvcalloc(args->num_binds, sizeof(*bos), 3583 GFP_KERNEL | __GFP_ACCOUNT | 3584 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3585 if (!bos) { 3586 err = -ENOMEM; 3587 goto release_vm_lock; 3588 } 3589 3590 ops = kvcalloc(args->num_binds, sizeof(*ops), 3591 GFP_KERNEL | __GFP_ACCOUNT | 3592 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3593 if (!ops) { 3594 err = -ENOMEM; 3595 goto free_bos; 3596 } 3597 } 3598 3599 for (i = 0; i < args->num_binds; ++i) { 3600 struct drm_gem_object *gem_obj; 3601 u64 range = bind_ops[i].range; 3602 u64 addr = bind_ops[i].addr; 3603 u32 obj = bind_ops[i].obj; 3604 u64 obj_offset = bind_ops[i].obj_offset; 3605 u16 pat_index = bind_ops[i].pat_index; 3606 u32 op = bind_ops[i].op; 3607 u32 bind_flags = bind_ops[i].flags; 3608 3609 if (!obj) 3610 continue; 3611 3612 gem_obj = drm_gem_object_lookup(file, obj); 3613 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3614 err = -ENOENT; 3615 goto put_obj; 3616 } 3617 bos[i] = gem_to_xe_bo(gem_obj); 3618 3619 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3620 obj_offset, pat_index, op, 3621 bind_flags); 3622 if (err) 3623 goto put_obj; 3624 } 3625 3626 if (args->num_syncs) { 3627 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3628 if (!syncs) { 3629 err = -ENOMEM; 3630 goto put_obj; 3631 } 3632 } 3633 3634 syncs_user = u64_to_user_ptr(args->syncs); 3635 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3636 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3637 &syncs_user[num_syncs], 3638 (xe_vm_in_lr_mode(vm) ? 3639 SYNC_PARSE_FLAG_LR_MODE : 0) | 3640 (!args->num_binds ? 3641 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3642 if (err) 3643 goto free_syncs; 3644 3645 if (xe_sync_is_ufence(&syncs[num_syncs])) 3646 num_ufence++; 3647 } 3648 3649 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3650 err = -EINVAL; 3651 goto free_syncs; 3652 } 3653 3654 if (!args->num_binds) { 3655 err = -ENODATA; 3656 goto free_syncs; 3657 } 3658 3659 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3660 if (args->num_binds > 1) 3661 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3662 for (i = 0; i < args->num_binds; ++i) { 3663 u64 range = bind_ops[i].range; 3664 u64 addr = bind_ops[i].addr; 3665 u32 op = bind_ops[i].op; 3666 u32 flags = bind_ops[i].flags; 3667 u64 obj_offset = bind_ops[i].obj_offset; 3668 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3669 u16 pat_index = bind_ops[i].pat_index; 3670 3671 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3672 addr, range, op, flags, 3673 prefetch_region, pat_index); 3674 if (IS_ERR(ops[i])) { 3675 err = PTR_ERR(ops[i]); 3676 ops[i] = NULL; 3677 goto unwind_ops; 3678 } 3679 3680 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3681 if (err) 3682 goto unwind_ops; 3683 3684 #ifdef TEST_VM_OPS_ERROR 3685 if (flags & FORCE_OP_ERROR) { 3686 vops.inject_error = true; 3687 vm->xe->vm_inject_error_position = 3688 (vm->xe->vm_inject_error_position + 1) % 3689 FORCE_OP_ERROR_COUNT; 3690 } 3691 #endif 3692 } 3693 3694 /* Nothing to do */ 3695 if (list_empty(&vops.list)) { 3696 err = -ENODATA; 3697 goto unwind_ops; 3698 } 3699 3700 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3701 if (err) 3702 goto unwind_ops; 3703 3704 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3705 if (err) 3706 goto unwind_ops; 3707 3708 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3709 if (IS_ERR(fence)) 3710 err = PTR_ERR(fence); 3711 else 3712 dma_fence_put(fence); 3713 3714 unwind_ops: 3715 if (err && err != -ENODATA) 3716 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3717 xe_vma_ops_fini(&vops); 3718 for (i = args->num_binds - 1; i >= 0; --i) 3719 if (ops[i]) 3720 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3721 free_syncs: 3722 if (err == -ENODATA) 3723 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3724 while (num_syncs--) 3725 xe_sync_entry_cleanup(&syncs[num_syncs]); 3726 3727 kfree(syncs); 3728 put_obj: 3729 for (i = 0; i < args->num_binds; ++i) 3730 xe_bo_put(bos[i]); 3731 3732 kvfree(ops); 3733 free_bos: 3734 kvfree(bos); 3735 release_vm_lock: 3736 up_write(&vm->lock); 3737 put_exec_queue: 3738 if (q) 3739 xe_exec_queue_put(q); 3740 free_bind_ops: 3741 if (args->num_binds > 1) 3742 kvfree(bind_ops); 3743 put_vm: 3744 xe_vm_put(vm); 3745 return err; 3746 } 3747 3748 /** 3749 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3750 * @vm: VM to bind the BO to 3751 * @bo: BO to bind 3752 * @q: exec queue to use for the bind (optional) 3753 * @addr: address at which to bind the BO 3754 * @cache_lvl: PAT cache level to use 3755 * 3756 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3757 * kernel-owned VM. 3758 * 3759 * Returns a dma_fence to track the binding completion if the job to do so was 3760 * successfully submitted, an error pointer otherwise. 3761 */ 3762 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3763 struct xe_exec_queue *q, u64 addr, 3764 enum xe_cache_level cache_lvl) 3765 { 3766 struct xe_vma_ops vops; 3767 struct drm_gpuva_ops *ops = NULL; 3768 struct dma_fence *fence; 3769 int err; 3770 3771 xe_bo_get(bo); 3772 xe_vm_get(vm); 3773 if (q) 3774 xe_exec_queue_get(q); 3775 3776 down_write(&vm->lock); 3777 3778 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3779 3780 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3781 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3782 vm->xe->pat.idx[cache_lvl]); 3783 if (IS_ERR(ops)) { 3784 err = PTR_ERR(ops); 3785 goto release_vm_lock; 3786 } 3787 3788 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3789 if (err) 3790 goto release_vm_lock; 3791 3792 xe_assert(vm->xe, !list_empty(&vops.list)); 3793 3794 err = xe_vma_ops_alloc(&vops, false); 3795 if (err) 3796 goto unwind_ops; 3797 3798 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3799 if (IS_ERR(fence)) 3800 err = PTR_ERR(fence); 3801 3802 unwind_ops: 3803 if (err && err != -ENODATA) 3804 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3805 3806 xe_vma_ops_fini(&vops); 3807 drm_gpuva_ops_free(&vm->gpuvm, ops); 3808 3809 release_vm_lock: 3810 up_write(&vm->lock); 3811 3812 if (q) 3813 xe_exec_queue_put(q); 3814 xe_vm_put(vm); 3815 xe_bo_put(bo); 3816 3817 if (err) 3818 fence = ERR_PTR(err); 3819 3820 return fence; 3821 } 3822 3823 /** 3824 * xe_vm_lock() - Lock the vm's dma_resv object 3825 * @vm: The struct xe_vm whose lock is to be locked 3826 * @intr: Whether to perform any wait interruptible 3827 * 3828 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3829 * contended lock was interrupted. If @intr is false, the function 3830 * always returns 0. 3831 */ 3832 int xe_vm_lock(struct xe_vm *vm, bool intr) 3833 { 3834 int ret; 3835 3836 if (intr) 3837 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3838 else 3839 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3840 3841 return ret; 3842 } 3843 3844 /** 3845 * xe_vm_unlock() - Unlock the vm's dma_resv object 3846 * @vm: The struct xe_vm whose lock is to be released. 3847 * 3848 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3849 */ 3850 void xe_vm_unlock(struct xe_vm *vm) 3851 { 3852 dma_resv_unlock(xe_vm_resv(vm)); 3853 } 3854 3855 /** 3856 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3857 * address range 3858 * @vm: The VM 3859 * @start: start address 3860 * @end: end address 3861 * @tile_mask: mask for which gt's issue tlb invalidation 3862 * 3863 * Issue a range based TLB invalidation for gt's in tilemask 3864 * 3865 * Returns 0 for success, negative error code otherwise. 3866 */ 3867 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3868 u64 end, u8 tile_mask) 3869 { 3870 struct xe_tlb_inval_fence 3871 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3872 struct xe_tile *tile; 3873 u32 fence_id = 0; 3874 u8 id; 3875 int err; 3876 3877 if (!tile_mask) 3878 return 0; 3879 3880 for_each_tile(tile, vm->xe, id) { 3881 if (!(tile_mask & BIT(id))) 3882 continue; 3883 3884 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3885 &fence[fence_id], true); 3886 3887 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3888 &fence[fence_id], start, end, 3889 vm->usm.asid); 3890 if (err) 3891 goto wait; 3892 ++fence_id; 3893 3894 if (!tile->media_gt) 3895 continue; 3896 3897 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3898 &fence[fence_id], true); 3899 3900 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3901 &fence[fence_id], start, end, 3902 vm->usm.asid); 3903 if (err) 3904 goto wait; 3905 ++fence_id; 3906 } 3907 3908 wait: 3909 for (id = 0; id < fence_id; ++id) 3910 xe_tlb_inval_fence_wait(&fence[id]); 3911 3912 return err; 3913 } 3914 3915 /** 3916 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3917 * @vma: VMA to invalidate 3918 * 3919 * Walks a list of page tables leaves which it memset the entries owned by this 3920 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3921 * complete. 3922 * 3923 * Returns 0 for success, negative error code otherwise. 3924 */ 3925 int xe_vm_invalidate_vma(struct xe_vma *vma) 3926 { 3927 struct xe_device *xe = xe_vma_vm(vma)->xe; 3928 struct xe_vm *vm = xe_vma_vm(vma); 3929 struct xe_tile *tile; 3930 u8 tile_mask = 0; 3931 int ret = 0; 3932 u8 id; 3933 3934 xe_assert(xe, !xe_vma_is_null(vma)); 3935 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3936 trace_xe_vma_invalidate(vma); 3937 3938 vm_dbg(&vm->xe->drm, 3939 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3940 xe_vma_start(vma), xe_vma_size(vma)); 3941 3942 /* 3943 * Check that we don't race with page-table updates, tile_invalidated 3944 * update is safe 3945 */ 3946 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3947 if (xe_vma_is_userptr(vma)) { 3948 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3949 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3950 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3951 3952 WARN_ON_ONCE(!mmu_interval_check_retry 3953 (&to_userptr_vma(vma)->userptr.notifier, 3954 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3955 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3956 DMA_RESV_USAGE_BOOKKEEP)); 3957 3958 } else { 3959 xe_bo_assert_held(xe_vma_bo(vma)); 3960 } 3961 } 3962 3963 for_each_tile(tile, xe, id) 3964 if (xe_pt_zap_ptes(tile, vma)) 3965 tile_mask |= BIT(id); 3966 3967 xe_device_wmb(xe); 3968 3969 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3970 xe_vma_end(vma), tile_mask); 3971 3972 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3973 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3974 3975 return ret; 3976 } 3977 3978 int xe_vm_validate_protected(struct xe_vm *vm) 3979 { 3980 struct drm_gpuva *gpuva; 3981 int err = 0; 3982 3983 if (!vm) 3984 return -ENODEV; 3985 3986 mutex_lock(&vm->snap_mutex); 3987 3988 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3989 struct xe_vma *vma = gpuva_to_vma(gpuva); 3990 struct xe_bo *bo = vma->gpuva.gem.obj ? 3991 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3992 3993 if (!bo) 3994 continue; 3995 3996 if (xe_bo_is_protected(bo)) { 3997 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3998 if (err) 3999 break; 4000 } 4001 } 4002 4003 mutex_unlock(&vm->snap_mutex); 4004 return err; 4005 } 4006 4007 struct xe_vm_snapshot { 4008 unsigned long num_snaps; 4009 struct { 4010 u64 ofs, bo_ofs; 4011 unsigned long len; 4012 struct xe_bo *bo; 4013 void *data; 4014 struct mm_struct *mm; 4015 } snap[]; 4016 }; 4017 4018 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4019 { 4020 unsigned long num_snaps = 0, i; 4021 struct xe_vm_snapshot *snap = NULL; 4022 struct drm_gpuva *gpuva; 4023 4024 if (!vm) 4025 return NULL; 4026 4027 mutex_lock(&vm->snap_mutex); 4028 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4029 if (gpuva->flags & XE_VMA_DUMPABLE) 4030 num_snaps++; 4031 } 4032 4033 if (num_snaps) 4034 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4035 if (!snap) { 4036 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4037 goto out_unlock; 4038 } 4039 4040 snap->num_snaps = num_snaps; 4041 i = 0; 4042 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4043 struct xe_vma *vma = gpuva_to_vma(gpuva); 4044 struct xe_bo *bo = vma->gpuva.gem.obj ? 4045 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4046 4047 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4048 continue; 4049 4050 snap->snap[i].ofs = xe_vma_start(vma); 4051 snap->snap[i].len = xe_vma_size(vma); 4052 if (bo) { 4053 snap->snap[i].bo = xe_bo_get(bo); 4054 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4055 } else if (xe_vma_is_userptr(vma)) { 4056 struct mm_struct *mm = 4057 to_userptr_vma(vma)->userptr.notifier.mm; 4058 4059 if (mmget_not_zero(mm)) 4060 snap->snap[i].mm = mm; 4061 else 4062 snap->snap[i].data = ERR_PTR(-EFAULT); 4063 4064 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4065 } else { 4066 snap->snap[i].data = ERR_PTR(-ENOENT); 4067 } 4068 i++; 4069 } 4070 4071 out_unlock: 4072 mutex_unlock(&vm->snap_mutex); 4073 return snap; 4074 } 4075 4076 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4077 { 4078 if (IS_ERR_OR_NULL(snap)) 4079 return; 4080 4081 for (int i = 0; i < snap->num_snaps; i++) { 4082 struct xe_bo *bo = snap->snap[i].bo; 4083 int err; 4084 4085 if (IS_ERR(snap->snap[i].data)) 4086 continue; 4087 4088 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4089 if (!snap->snap[i].data) { 4090 snap->snap[i].data = ERR_PTR(-ENOMEM); 4091 goto cleanup_bo; 4092 } 4093 4094 if (bo) { 4095 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4096 snap->snap[i].data, snap->snap[i].len); 4097 } else { 4098 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4099 4100 kthread_use_mm(snap->snap[i].mm); 4101 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4102 err = 0; 4103 else 4104 err = -EFAULT; 4105 kthread_unuse_mm(snap->snap[i].mm); 4106 4107 mmput(snap->snap[i].mm); 4108 snap->snap[i].mm = NULL; 4109 } 4110 4111 if (err) { 4112 kvfree(snap->snap[i].data); 4113 snap->snap[i].data = ERR_PTR(err); 4114 } 4115 4116 cleanup_bo: 4117 xe_bo_put(bo); 4118 snap->snap[i].bo = NULL; 4119 } 4120 } 4121 4122 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4123 { 4124 unsigned long i, j; 4125 4126 if (IS_ERR_OR_NULL(snap)) { 4127 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4128 return; 4129 } 4130 4131 for (i = 0; i < snap->num_snaps; i++) { 4132 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4133 4134 if (IS_ERR(snap->snap[i].data)) { 4135 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4136 PTR_ERR(snap->snap[i].data)); 4137 continue; 4138 } 4139 4140 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4141 4142 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4143 u32 *val = snap->snap[i].data + j; 4144 char dumped[ASCII85_BUFSZ]; 4145 4146 drm_puts(p, ascii85_encode(*val, dumped)); 4147 } 4148 4149 drm_puts(p, "\n"); 4150 4151 if (drm_coredump_printer_is_full(p)) 4152 return; 4153 } 4154 } 4155 4156 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4157 { 4158 unsigned long i; 4159 4160 if (IS_ERR_OR_NULL(snap)) 4161 return; 4162 4163 for (i = 0; i < snap->num_snaps; i++) { 4164 if (!IS_ERR(snap->snap[i].data)) 4165 kvfree(snap->snap[i].data); 4166 xe_bo_put(snap->snap[i].bo); 4167 if (snap->snap[i].mm) 4168 mmput(snap->snap[i].mm); 4169 } 4170 kvfree(snap); 4171 } 4172 4173 /** 4174 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4175 * @xe: Pointer to the Xe device structure 4176 * @vma: Pointer to the virtual memory area (VMA) structure 4177 * @is_atomic: In pagefault path and atomic operation 4178 * 4179 * This function determines whether the given VMA needs to be migrated to 4180 * VRAM in order to do atomic GPU operation. 4181 * 4182 * Return: 4183 * 1 - Migration to VRAM is required 4184 * 0 - Migration is not required 4185 * -EACCES - Invalid access for atomic memory attr 4186 * 4187 */ 4188 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4189 { 4190 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4191 vma->attr.atomic_access; 4192 4193 if (!IS_DGFX(xe) || !is_atomic) 4194 return false; 4195 4196 /* 4197 * NOTE: The checks implemented here are platform-specific. For 4198 * instance, on a device supporting CXL atomics, these would ideally 4199 * work universally without additional handling. 4200 */ 4201 switch (atomic_access) { 4202 case DRM_XE_ATOMIC_DEVICE: 4203 return !xe->info.has_device_atomics_on_smem; 4204 4205 case DRM_XE_ATOMIC_CPU: 4206 return -EACCES; 4207 4208 case DRM_XE_ATOMIC_UNDEFINED: 4209 case DRM_XE_ATOMIC_GLOBAL: 4210 default: 4211 return 1; 4212 } 4213 } 4214 4215 static int xe_vm_alloc_vma(struct xe_vm *vm, 4216 struct drm_gpuvm_map_req *map_req, 4217 bool is_madvise) 4218 { 4219 struct xe_vma_ops vops; 4220 struct drm_gpuva_ops *ops = NULL; 4221 struct drm_gpuva_op *__op; 4222 unsigned int vma_flags = 0; 4223 bool remap_op = false; 4224 struct xe_vma_mem_attr tmp_attr; 4225 u16 default_pat; 4226 int err; 4227 4228 lockdep_assert_held_write(&vm->lock); 4229 4230 if (is_madvise) 4231 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4232 else 4233 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4234 4235 if (IS_ERR(ops)) 4236 return PTR_ERR(ops); 4237 4238 if (list_empty(&ops->list)) { 4239 err = 0; 4240 goto free_ops; 4241 } 4242 4243 drm_gpuva_for_each_op(__op, ops) { 4244 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4245 struct xe_vma *vma = NULL; 4246 4247 if (!is_madvise) { 4248 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4249 vma = gpuva_to_vma(op->base.unmap.va); 4250 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4251 default_pat = vma->attr.default_pat_index; 4252 vma_flags = vma->gpuva.flags; 4253 } 4254 4255 if (__op->op == DRM_GPUVA_OP_REMAP) { 4256 vma = gpuva_to_vma(op->base.remap.unmap->va); 4257 default_pat = vma->attr.default_pat_index; 4258 vma_flags = vma->gpuva.flags; 4259 } 4260 4261 if (__op->op == DRM_GPUVA_OP_MAP) { 4262 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4263 op->map.pat_index = default_pat; 4264 } 4265 } else { 4266 if (__op->op == DRM_GPUVA_OP_REMAP) { 4267 vma = gpuva_to_vma(op->base.remap.unmap->va); 4268 xe_assert(vm->xe, !remap_op); 4269 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4270 remap_op = true; 4271 vma_flags = vma->gpuva.flags; 4272 } 4273 4274 if (__op->op == DRM_GPUVA_OP_MAP) { 4275 xe_assert(vm->xe, remap_op); 4276 remap_op = false; 4277 /* 4278 * In case of madvise ops DRM_GPUVA_OP_MAP is 4279 * always after DRM_GPUVA_OP_REMAP, so ensure 4280 * to propagate the flags from the vma we're 4281 * unmapping. 4282 */ 4283 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4284 } 4285 } 4286 print_op(vm->xe, __op); 4287 } 4288 4289 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4290 4291 if (is_madvise) 4292 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4293 4294 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4295 if (err) 4296 goto unwind_ops; 4297 4298 xe_vm_lock(vm, false); 4299 4300 drm_gpuva_for_each_op(__op, ops) { 4301 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4302 struct xe_vma *vma; 4303 4304 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4305 vma = gpuva_to_vma(op->base.unmap.va); 4306 /* There should be no unmap for madvise */ 4307 if (is_madvise) 4308 XE_WARN_ON("UNEXPECTED UNMAP"); 4309 4310 xe_vma_destroy(vma, NULL); 4311 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4312 vma = gpuva_to_vma(op->base.remap.unmap->va); 4313 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4314 * VMA, so they can be assigned to newly MAP created vma. 4315 */ 4316 if (is_madvise) 4317 tmp_attr = vma->attr; 4318 4319 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4320 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4321 vma = op->map.vma; 4322 /* In case of madvise call, MAP will always be followed by REMAP. 4323 * Therefore temp_attr will always have sane values, making it safe to 4324 * copy them to new vma. 4325 */ 4326 if (is_madvise) 4327 vma->attr = tmp_attr; 4328 } 4329 } 4330 4331 xe_vm_unlock(vm); 4332 drm_gpuva_ops_free(&vm->gpuvm, ops); 4333 return 0; 4334 4335 unwind_ops: 4336 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4337 free_ops: 4338 drm_gpuva_ops_free(&vm->gpuvm, ops); 4339 return err; 4340 } 4341 4342 /** 4343 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4344 * @vm: Pointer to the xe_vm structure 4345 * @start: Starting input address 4346 * @range: Size of the input range 4347 * 4348 * This function splits existing vma to create new vma for user provided input range 4349 * 4350 * Return: 0 if success 4351 */ 4352 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4353 { 4354 struct drm_gpuvm_map_req map_req = { 4355 .map.va.addr = start, 4356 .map.va.range = range, 4357 }; 4358 4359 lockdep_assert_held_write(&vm->lock); 4360 4361 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4362 4363 return xe_vm_alloc_vma(vm, &map_req, true); 4364 } 4365 4366 /** 4367 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4368 * @vm: Pointer to the xe_vm structure 4369 * @start: Starting input address 4370 * @range: Size of the input range 4371 * 4372 * This function splits/merges existing vma to create new vma for user provided input range 4373 * 4374 * Return: 0 if success 4375 */ 4376 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4377 { 4378 struct drm_gpuvm_map_req map_req = { 4379 .map.va.addr = start, 4380 .map.va.range = range, 4381 }; 4382 4383 lockdep_assert_held_write(&vm->lock); 4384 4385 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4386 start, range); 4387 4388 return xe_vm_alloc_vma(vm, &map_req, false); 4389 } 4390