1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 115 xe_vm_assert_held(vm); 116 117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 118 if (q->lr.pfence) { 119 long timeout = dma_fence_wait(q->lr.pfence, false); 120 121 /* Only -ETIME on fence indicates VM needs to be killed */ 122 if (timeout < 0 || q->lr.pfence->error == -ETIME) 123 return -ETIME; 124 125 dma_fence_put(q->lr.pfence); 126 q->lr.pfence = NULL; 127 } 128 } 129 130 return 0; 131 } 132 133 static bool xe_vm_is_idle(struct xe_vm *vm) 134 { 135 struct xe_exec_queue *q; 136 137 xe_vm_assert_held(vm); 138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 139 if (!xe_exec_queue_is_idle(q)) 140 return false; 141 } 142 143 return true; 144 } 145 146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 147 { 148 struct list_head *link; 149 struct xe_exec_queue *q; 150 151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 152 struct dma_fence *fence; 153 154 link = list->next; 155 xe_assert(vm->xe, link != list); 156 157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 158 q, q->lr.context, 159 ++q->lr.seqno); 160 dma_fence_put(q->lr.pfence); 161 q->lr.pfence = fence; 162 } 163 } 164 165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 166 { 167 struct xe_exec_queue *q; 168 int err; 169 170 xe_bo_assert_held(bo); 171 172 if (!vm->preempt.num_exec_queues) 173 return 0; 174 175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 176 if (err) 177 return err; 178 179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 180 if (q->lr.pfence) { 181 dma_resv_add_fence(bo->ttm.base.resv, 182 q->lr.pfence, 183 DMA_RESV_USAGE_BOOKKEEP); 184 } 185 186 return 0; 187 } 188 189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 190 struct drm_exec *exec) 191 { 192 struct xe_exec_queue *q; 193 194 lockdep_assert_held(&vm->lock); 195 xe_vm_assert_held(vm); 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 198 q->ops->resume(q); 199 200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 202 } 203 } 204 205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 206 { 207 struct drm_gpuvm_exec vm_exec = { 208 .vm = &vm->gpuvm, 209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 210 .num_fences = 1, 211 }; 212 struct drm_exec *exec = &vm_exec.exec; 213 struct xe_validation_ctx ctx; 214 struct dma_fence *pfence; 215 int err; 216 bool wait; 217 218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 219 220 down_write(&vm->lock); 221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 222 if (err) 223 goto out_up_write; 224 225 pfence = xe_preempt_fence_create(q, q->lr.context, 226 ++q->lr.seqno); 227 if (IS_ERR(pfence)) { 228 err = PTR_ERR(pfence); 229 goto out_fini; 230 } 231 232 list_add(&q->lr.link, &vm->preempt.exec_queues); 233 ++vm->preempt.num_exec_queues; 234 q->lr.pfence = pfence; 235 236 xe_svm_notifier_lock(vm); 237 238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 240 241 /* 242 * Check to see if a preemption on VM is in flight or userptr 243 * invalidation, if so trigger this preempt fence to sync state with 244 * other preempt fences on the VM. 245 */ 246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 247 if (wait) 248 dma_fence_enable_sw_signaling(pfence); 249 250 xe_svm_notifier_unlock(vm); 251 252 out_fini: 253 xe_validation_ctx_fini(&ctx); 254 out_up_write: 255 up_write(&vm->lock); 256 257 return err; 258 } 259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 260 261 /** 262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 263 * @vm: The VM. 264 * @q: The exec_queue 265 * 266 * Note that this function might be called multiple times on the same queue. 267 */ 268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 269 { 270 if (!xe_vm_in_preempt_fence_mode(vm)) 271 return; 272 273 down_write(&vm->lock); 274 if (!list_empty(&q->lr.link)) { 275 list_del_init(&q->lr.link); 276 --vm->preempt.num_exec_queues; 277 } 278 if (q->lr.pfence) { 279 dma_fence_enable_sw_signaling(q->lr.pfence); 280 dma_fence_put(q->lr.pfence); 281 q->lr.pfence = NULL; 282 } 283 up_write(&vm->lock); 284 } 285 286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 287 288 /** 289 * xe_vm_kill() - VM Kill 290 * @vm: The VM. 291 * @unlocked: Flag indicates the VM's dma-resv is not held 292 * 293 * Kill the VM by setting banned flag indicated VM is no longer available for 294 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 295 */ 296 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 297 { 298 struct xe_exec_queue *q; 299 300 lockdep_assert_held(&vm->lock); 301 302 if (unlocked) 303 xe_vm_lock(vm, false); 304 305 vm->flags |= XE_VM_FLAG_BANNED; 306 trace_xe_vm_kill(vm); 307 308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 309 q->ops->kill(q); 310 311 if (unlocked) 312 xe_vm_unlock(vm); 313 314 /* TODO: Inform user the VM is banned */ 315 } 316 317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 318 { 319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 320 struct drm_gpuva *gpuva; 321 int ret; 322 323 lockdep_assert_held(&vm->lock); 324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 326 &vm->rebind_list); 327 328 if (!try_wait_for_completion(&vm->xe->pm_block)) 329 return -EAGAIN; 330 331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 332 if (ret) 333 return ret; 334 335 vm_bo->evicted = false; 336 return 0; 337 } 338 339 /** 340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 341 * @vm: The vm for which we are rebinding. 342 * @exec: The struct drm_exec with the locked GEM objects. 343 * @num_fences: The number of fences to reserve for the operation, not 344 * including rebinds and validations. 345 * 346 * Validates all evicted gem objects and rebinds their vmas. Note that 347 * rebindings may cause evictions and hence the validation-rebind 348 * sequence is rerun until there are no more objects to validate. 349 * 350 * Return: 0 on success, negative error code on error. In particular, 351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 352 * the drm_exec transaction needs to be restarted. 353 */ 354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 355 unsigned int num_fences) 356 { 357 struct drm_gem_object *obj; 358 unsigned long index; 359 int ret; 360 361 do { 362 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 363 if (ret) 364 return ret; 365 366 ret = xe_vm_rebind(vm, false); 367 if (ret) 368 return ret; 369 } while (!list_empty(&vm->gpuvm.evict.list)); 370 371 drm_exec_for_each_locked_object(exec, index, obj) { 372 ret = dma_resv_reserve_fences(obj->resv, num_fences); 373 if (ret) 374 return ret; 375 } 376 377 return 0; 378 } 379 380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 381 bool *done) 382 { 383 int err; 384 385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 386 if (err) 387 return err; 388 389 if (xe_vm_is_idle(vm)) { 390 vm->preempt.rebind_deactivated = true; 391 *done = true; 392 return 0; 393 } 394 395 if (!preempt_fences_waiting(vm)) { 396 *done = true; 397 return 0; 398 } 399 400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 401 if (err) 402 return err; 403 404 err = wait_for_existing_preempt_fences(vm); 405 if (err) 406 return err; 407 408 /* 409 * Add validation and rebinding to the locking loop since both can 410 * cause evictions which may require blocing dma_resv locks. 411 * The fence reservation here is intended for the new preempt fences 412 * we attach at the end of the rebind work. 413 */ 414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 415 } 416 417 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 418 { 419 struct xe_device *xe = vm->xe; 420 bool ret = false; 421 422 mutex_lock(&xe->rebind_resume_lock); 423 if (!try_wait_for_completion(&vm->xe->pm_block)) { 424 ret = true; 425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 426 } 427 mutex_unlock(&xe->rebind_resume_lock); 428 429 return ret; 430 } 431 432 /** 433 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 434 * @vm: The vm whose preempt worker to resume. 435 * 436 * Resume a preempt worker that was previously suspended by 437 * vm_suspend_rebind_worker(). 438 */ 439 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 440 { 441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 442 } 443 444 static void preempt_rebind_work_func(struct work_struct *w) 445 { 446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 447 struct xe_validation_ctx ctx; 448 struct drm_exec exec; 449 unsigned int fence_count = 0; 450 LIST_HEAD(preempt_fences); 451 int err = 0; 452 long wait; 453 int __maybe_unused tries = 0; 454 455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 456 trace_xe_vm_rebind_worker_enter(vm); 457 458 down_write(&vm->lock); 459 460 if (xe_vm_is_closed_or_banned(vm)) { 461 up_write(&vm->lock); 462 trace_xe_vm_rebind_worker_exit(vm); 463 return; 464 } 465 466 retry: 467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 468 up_write(&vm->lock); 469 return; 470 } 471 472 if (xe_vm_userptr_check_repin(vm)) { 473 err = xe_vm_userptr_pin(vm); 474 if (err) 475 goto out_unlock_outer; 476 } 477 478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 479 (struct xe_val_flags) {.interruptible = true}); 480 if (err) 481 goto out_unlock_outer; 482 483 drm_exec_until_all_locked(&exec) { 484 bool done = false; 485 486 err = xe_preempt_work_begin(&exec, vm, &done); 487 drm_exec_retry_on_contention(&exec); 488 xe_validation_retry_on_oom(&ctx, &err); 489 if (err || done) { 490 xe_validation_ctx_fini(&ctx); 491 goto out_unlock_outer; 492 } 493 } 494 495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 496 if (err) 497 goto out_unlock; 498 499 xe_vm_set_validation_exec(vm, &exec); 500 err = xe_vm_rebind(vm, true); 501 xe_vm_set_validation_exec(vm, NULL); 502 if (err) 503 goto out_unlock; 504 505 /* Wait on rebinds and munmap style VM unbinds */ 506 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 507 DMA_RESV_USAGE_KERNEL, 508 false, MAX_SCHEDULE_TIMEOUT); 509 if (wait <= 0) { 510 err = -ETIME; 511 goto out_unlock; 512 } 513 514 #define retry_required(__tries, __vm) \ 515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 517 __xe_vm_userptr_needs_repin(__vm)) 518 519 xe_svm_notifier_lock(vm); 520 if (retry_required(tries, vm)) { 521 xe_svm_notifier_unlock(vm); 522 err = -EAGAIN; 523 goto out_unlock; 524 } 525 526 #undef retry_required 527 528 spin_lock(&vm->xe->ttm.lru_lock); 529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 530 spin_unlock(&vm->xe->ttm.lru_lock); 531 532 /* Point of no return. */ 533 arm_preempt_fences(vm, &preempt_fences); 534 resume_and_reinstall_preempt_fences(vm, &exec); 535 xe_svm_notifier_unlock(vm); 536 537 out_unlock: 538 xe_validation_ctx_fini(&ctx); 539 out_unlock_outer: 540 if (err == -EAGAIN) { 541 trace_xe_vm_rebind_worker_retry(vm); 542 goto retry; 543 } 544 545 if (err) { 546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 547 xe_vm_kill(vm, true); 548 } 549 up_write(&vm->lock); 550 551 free_preempt_fences(&preempt_fences); 552 553 trace_xe_vm_rebind_worker_exit(vm); 554 } 555 556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 557 { 558 int i; 559 560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 561 if (!vops->pt_update_ops[i].num_ops) 562 continue; 563 564 vops->pt_update_ops[i].ops = 565 kmalloc_array(vops->pt_update_ops[i].num_ops, 566 sizeof(*vops->pt_update_ops[i].ops), 567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 568 if (!vops->pt_update_ops[i].ops) 569 return array_of_binds ? -ENOBUFS : -ENOMEM; 570 } 571 572 return 0; 573 } 574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 575 576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 577 { 578 struct xe_vma *vma; 579 580 vma = gpuva_to_vma(op->base.prefetch.va); 581 582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 583 xa_destroy(&op->prefetch_range.range); 584 } 585 586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 587 { 588 struct xe_vma_op *op; 589 590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 591 return; 592 593 list_for_each_entry(op, &vops->list, link) 594 xe_vma_svm_prefetch_op_fini(op); 595 } 596 597 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 598 { 599 int i; 600 601 xe_vma_svm_prefetch_ops_fini(vops); 602 603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 604 kfree(vops->pt_update_ops[i].ops); 605 } 606 607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 608 { 609 int i; 610 611 if (!inc_val) 612 return; 613 614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 615 if (BIT(i) & tile_mask) 616 vops->pt_update_ops[i].num_ops += inc_val; 617 } 618 619 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 620 u8 tile_mask) 621 { 622 INIT_LIST_HEAD(&op->link); 623 op->tile_mask = tile_mask; 624 op->base.op = DRM_GPUVA_OP_MAP; 625 op->base.map.va.addr = vma->gpuva.va.addr; 626 op->base.map.va.range = vma->gpuva.va.range; 627 op->base.map.gem.obj = vma->gpuva.gem.obj; 628 op->base.map.gem.offset = vma->gpuva.gem.offset; 629 op->map.vma = vma; 630 op->map.immediate = true; 631 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 632 op->map.is_null = xe_vma_is_null(vma); 633 } 634 635 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 636 u8 tile_mask) 637 { 638 struct xe_vma_op *op; 639 640 op = kzalloc(sizeof(*op), GFP_KERNEL); 641 if (!op) 642 return -ENOMEM; 643 644 xe_vm_populate_rebind(op, vma, tile_mask); 645 list_add_tail(&op->link, &vops->list); 646 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 647 648 return 0; 649 } 650 651 static struct dma_fence *ops_execute(struct xe_vm *vm, 652 struct xe_vma_ops *vops); 653 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 654 struct xe_exec_queue *q, 655 struct xe_sync_entry *syncs, u32 num_syncs); 656 657 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 658 { 659 struct dma_fence *fence; 660 struct xe_vma *vma, *next; 661 struct xe_vma_ops vops; 662 struct xe_vma_op *op, *next_op; 663 int err, i; 664 665 lockdep_assert_held(&vm->lock); 666 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 667 list_empty(&vm->rebind_list)) 668 return 0; 669 670 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 671 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 672 vops.pt_update_ops[i].wait_vm_bookkeep = true; 673 674 xe_vm_assert_held(vm); 675 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 676 xe_assert(vm->xe, vma->tile_present); 677 678 if (rebind_worker) 679 trace_xe_vma_rebind_worker(vma); 680 else 681 trace_xe_vma_rebind_exec(vma); 682 683 err = xe_vm_ops_add_rebind(&vops, vma, 684 vma->tile_present); 685 if (err) 686 goto free_ops; 687 } 688 689 err = xe_vma_ops_alloc(&vops, false); 690 if (err) 691 goto free_ops; 692 693 fence = ops_execute(vm, &vops); 694 if (IS_ERR(fence)) { 695 err = PTR_ERR(fence); 696 } else { 697 dma_fence_put(fence); 698 list_for_each_entry_safe(vma, next, &vm->rebind_list, 699 combined_links.rebind) 700 list_del_init(&vma->combined_links.rebind); 701 } 702 free_ops: 703 list_for_each_entry_safe(op, next_op, &vops.list, link) { 704 list_del(&op->link); 705 kfree(op); 706 } 707 xe_vma_ops_fini(&vops); 708 709 return err; 710 } 711 712 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 713 { 714 struct dma_fence *fence = NULL; 715 struct xe_vma_ops vops; 716 struct xe_vma_op *op, *next_op; 717 struct xe_tile *tile; 718 u8 id; 719 int err; 720 721 lockdep_assert_held(&vm->lock); 722 xe_vm_assert_held(vm); 723 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 724 725 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 726 for_each_tile(tile, vm->xe, id) { 727 vops.pt_update_ops[id].wait_vm_bookkeep = true; 728 vops.pt_update_ops[tile->id].q = 729 xe_migrate_exec_queue(tile->migrate); 730 } 731 732 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 733 if (err) 734 return ERR_PTR(err); 735 736 err = xe_vma_ops_alloc(&vops, false); 737 if (err) { 738 fence = ERR_PTR(err); 739 goto free_ops; 740 } 741 742 fence = ops_execute(vm, &vops); 743 744 free_ops: 745 list_for_each_entry_safe(op, next_op, &vops.list, link) { 746 list_del(&op->link); 747 kfree(op); 748 } 749 xe_vma_ops_fini(&vops); 750 751 return fence; 752 } 753 754 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 755 struct xe_vma *vma, 756 struct xe_svm_range *range, 757 u8 tile_mask) 758 { 759 INIT_LIST_HEAD(&op->link); 760 op->tile_mask = tile_mask; 761 op->base.op = DRM_GPUVA_OP_DRIVER; 762 op->subop = XE_VMA_SUBOP_MAP_RANGE; 763 op->map_range.vma = vma; 764 op->map_range.range = range; 765 } 766 767 static int 768 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 769 struct xe_vma *vma, 770 struct xe_svm_range *range, 771 u8 tile_mask) 772 { 773 struct xe_vma_op *op; 774 775 op = kzalloc(sizeof(*op), GFP_KERNEL); 776 if (!op) 777 return -ENOMEM; 778 779 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 780 list_add_tail(&op->link, &vops->list); 781 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 782 783 return 0; 784 } 785 786 /** 787 * xe_vm_range_rebind() - VM range (re)bind 788 * @vm: The VM which the range belongs to. 789 * @vma: The VMA which the range belongs to. 790 * @range: SVM range to rebind. 791 * @tile_mask: Tile mask to bind the range to. 792 * 793 * (re)bind SVM range setting up GPU page tables for the range. 794 * 795 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 796 * failure 797 */ 798 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 799 struct xe_vma *vma, 800 struct xe_svm_range *range, 801 u8 tile_mask) 802 { 803 struct dma_fence *fence = NULL; 804 struct xe_vma_ops vops; 805 struct xe_vma_op *op, *next_op; 806 struct xe_tile *tile; 807 u8 id; 808 int err; 809 810 lockdep_assert_held(&vm->lock); 811 xe_vm_assert_held(vm); 812 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 813 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 814 815 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 816 for_each_tile(tile, vm->xe, id) { 817 vops.pt_update_ops[id].wait_vm_bookkeep = true; 818 vops.pt_update_ops[tile->id].q = 819 xe_migrate_exec_queue(tile->migrate); 820 } 821 822 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 823 if (err) 824 return ERR_PTR(err); 825 826 err = xe_vma_ops_alloc(&vops, false); 827 if (err) { 828 fence = ERR_PTR(err); 829 goto free_ops; 830 } 831 832 fence = ops_execute(vm, &vops); 833 834 free_ops: 835 list_for_each_entry_safe(op, next_op, &vops.list, link) { 836 list_del(&op->link); 837 kfree(op); 838 } 839 xe_vma_ops_fini(&vops); 840 841 return fence; 842 } 843 844 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 845 struct xe_svm_range *range) 846 { 847 INIT_LIST_HEAD(&op->link); 848 op->tile_mask = range->tile_present; 849 op->base.op = DRM_GPUVA_OP_DRIVER; 850 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 851 op->unmap_range.range = range; 852 } 853 854 static int 855 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 856 struct xe_svm_range *range) 857 { 858 struct xe_vma_op *op; 859 860 op = kzalloc(sizeof(*op), GFP_KERNEL); 861 if (!op) 862 return -ENOMEM; 863 864 xe_vm_populate_range_unbind(op, range); 865 list_add_tail(&op->link, &vops->list); 866 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 867 868 return 0; 869 } 870 871 /** 872 * xe_vm_range_unbind() - VM range unbind 873 * @vm: The VM which the range belongs to. 874 * @range: SVM range to rebind. 875 * 876 * Unbind SVM range removing the GPU page tables for the range. 877 * 878 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 879 * failure 880 */ 881 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 882 struct xe_svm_range *range) 883 { 884 struct dma_fence *fence = NULL; 885 struct xe_vma_ops vops; 886 struct xe_vma_op *op, *next_op; 887 struct xe_tile *tile; 888 u8 id; 889 int err; 890 891 lockdep_assert_held(&vm->lock); 892 xe_vm_assert_held(vm); 893 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 894 895 if (!range->tile_present) 896 return dma_fence_get_stub(); 897 898 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 899 for_each_tile(tile, vm->xe, id) { 900 vops.pt_update_ops[id].wait_vm_bookkeep = true; 901 vops.pt_update_ops[tile->id].q = 902 xe_migrate_exec_queue(tile->migrate); 903 } 904 905 err = xe_vm_ops_add_range_unbind(&vops, range); 906 if (err) 907 return ERR_PTR(err); 908 909 err = xe_vma_ops_alloc(&vops, false); 910 if (err) { 911 fence = ERR_PTR(err); 912 goto free_ops; 913 } 914 915 fence = ops_execute(vm, &vops); 916 917 free_ops: 918 list_for_each_entry_safe(op, next_op, &vops.list, link) { 919 list_del(&op->link); 920 kfree(op); 921 } 922 xe_vma_ops_fini(&vops); 923 924 return fence; 925 } 926 927 static void xe_vma_free(struct xe_vma *vma) 928 { 929 if (xe_vma_is_userptr(vma)) 930 kfree(to_userptr_vma(vma)); 931 else 932 kfree(vma); 933 } 934 935 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 936 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 937 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 938 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 939 940 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 941 struct xe_bo *bo, 942 u64 bo_offset_or_userptr, 943 u64 start, u64 end, 944 struct xe_vma_mem_attr *attr, 945 unsigned int flags) 946 { 947 struct xe_vma *vma; 948 struct xe_tile *tile; 949 u8 id; 950 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 951 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 952 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 953 bool is_cpu_addr_mirror = 954 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 955 956 xe_assert(vm->xe, start < end); 957 xe_assert(vm->xe, end < vm->size); 958 959 /* 960 * Allocate and ensure that the xe_vma_is_userptr() return 961 * matches what was allocated. 962 */ 963 if (!bo && !is_null && !is_cpu_addr_mirror) { 964 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 965 966 if (!uvma) 967 return ERR_PTR(-ENOMEM); 968 969 vma = &uvma->vma; 970 } else { 971 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 972 if (!vma) 973 return ERR_PTR(-ENOMEM); 974 975 if (is_cpu_addr_mirror) 976 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 977 if (is_null) 978 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 979 if (bo) 980 vma->gpuva.gem.obj = &bo->ttm.base; 981 } 982 983 INIT_LIST_HEAD(&vma->combined_links.rebind); 984 985 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 986 vma->gpuva.vm = &vm->gpuvm; 987 vma->gpuva.va.addr = start; 988 vma->gpuva.va.range = end - start + 1; 989 if (read_only) 990 vma->gpuva.flags |= XE_VMA_READ_ONLY; 991 if (dumpable) 992 vma->gpuva.flags |= XE_VMA_DUMPABLE; 993 994 for_each_tile(tile, vm->xe, id) 995 vma->tile_mask |= 0x1 << id; 996 997 if (vm->xe->info.has_atomic_enable_pte_bit) 998 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 999 1000 vma->attr = *attr; 1001 1002 if (bo) { 1003 struct drm_gpuvm_bo *vm_bo; 1004 1005 xe_bo_assert_held(bo); 1006 1007 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1008 if (IS_ERR(vm_bo)) { 1009 xe_vma_free(vma); 1010 return ERR_CAST(vm_bo); 1011 } 1012 1013 drm_gpuvm_bo_extobj_add(vm_bo); 1014 drm_gem_object_get(&bo->ttm.base); 1015 vma->gpuva.gem.offset = bo_offset_or_userptr; 1016 drm_gpuva_link(&vma->gpuva, vm_bo); 1017 drm_gpuvm_bo_put(vm_bo); 1018 } else /* userptr or null */ { 1019 if (!is_null && !is_cpu_addr_mirror) { 1020 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1021 u64 size = end - start + 1; 1022 int err; 1023 1024 vma->gpuva.gem.offset = bo_offset_or_userptr; 1025 1026 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1027 if (err) { 1028 xe_vma_free(vma); 1029 return ERR_PTR(err); 1030 } 1031 } 1032 1033 xe_vm_get(vm); 1034 } 1035 1036 return vma; 1037 } 1038 1039 static void xe_vma_destroy_late(struct xe_vma *vma) 1040 { 1041 struct xe_vm *vm = xe_vma_vm(vma); 1042 1043 if (vma->ufence) { 1044 xe_sync_ufence_put(vma->ufence); 1045 vma->ufence = NULL; 1046 } 1047 1048 if (xe_vma_is_userptr(vma)) { 1049 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1050 1051 xe_userptr_remove(uvma); 1052 xe_vm_put(vm); 1053 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1054 xe_vm_put(vm); 1055 } else { 1056 xe_bo_put(xe_vma_bo(vma)); 1057 } 1058 1059 xe_vma_free(vma); 1060 } 1061 1062 static void vma_destroy_work_func(struct work_struct *w) 1063 { 1064 struct xe_vma *vma = 1065 container_of(w, struct xe_vma, destroy_work); 1066 1067 xe_vma_destroy_late(vma); 1068 } 1069 1070 static void vma_destroy_cb(struct dma_fence *fence, 1071 struct dma_fence_cb *cb) 1072 { 1073 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1074 1075 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1076 queue_work(system_unbound_wq, &vma->destroy_work); 1077 } 1078 1079 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1080 { 1081 struct xe_vm *vm = xe_vma_vm(vma); 1082 1083 lockdep_assert_held_write(&vm->lock); 1084 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1085 1086 if (xe_vma_is_userptr(vma)) { 1087 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1088 xe_userptr_destroy(to_userptr_vma(vma)); 1089 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1090 xe_bo_assert_held(xe_vma_bo(vma)); 1091 1092 drm_gpuva_unlink(&vma->gpuva); 1093 } 1094 1095 xe_vm_assert_held(vm); 1096 if (fence) { 1097 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1098 vma_destroy_cb); 1099 1100 if (ret) { 1101 XE_WARN_ON(ret != -ENOENT); 1102 xe_vma_destroy_late(vma); 1103 } 1104 } else { 1105 xe_vma_destroy_late(vma); 1106 } 1107 } 1108 1109 /** 1110 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1111 * @exec: The drm_exec object we're currently locking for. 1112 * @vma: The vma for witch we want to lock the vm resv and any attached 1113 * object's resv. 1114 * 1115 * Return: 0 on success, negative error code on error. In particular 1116 * may return -EDEADLK on WW transaction contention and -EINTR if 1117 * an interruptible wait is terminated by a signal. 1118 */ 1119 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1120 { 1121 struct xe_vm *vm = xe_vma_vm(vma); 1122 struct xe_bo *bo = xe_vma_bo(vma); 1123 int err; 1124 1125 XE_WARN_ON(!vm); 1126 1127 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1128 if (!err && bo && !bo->vm) 1129 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1130 1131 return err; 1132 } 1133 1134 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1135 { 1136 struct xe_device *xe = xe_vma_vm(vma)->xe; 1137 struct xe_validation_ctx ctx; 1138 struct drm_exec exec; 1139 int err = 0; 1140 1141 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1142 err = xe_vm_lock_vma(&exec, vma); 1143 drm_exec_retry_on_contention(&exec); 1144 if (XE_WARN_ON(err)) 1145 break; 1146 xe_vma_destroy(vma, NULL); 1147 } 1148 xe_assert(xe, !err); 1149 } 1150 1151 struct xe_vma * 1152 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1153 { 1154 struct drm_gpuva *gpuva; 1155 1156 lockdep_assert_held(&vm->lock); 1157 1158 if (xe_vm_is_closed_or_banned(vm)) 1159 return NULL; 1160 1161 xe_assert(vm->xe, start + range <= vm->size); 1162 1163 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1164 1165 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1166 } 1167 1168 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1169 { 1170 int err; 1171 1172 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1173 lockdep_assert_held(&vm->lock); 1174 1175 mutex_lock(&vm->snap_mutex); 1176 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1177 mutex_unlock(&vm->snap_mutex); 1178 XE_WARN_ON(err); /* Shouldn't be possible */ 1179 1180 return err; 1181 } 1182 1183 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1184 { 1185 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1186 lockdep_assert_held(&vm->lock); 1187 1188 mutex_lock(&vm->snap_mutex); 1189 drm_gpuva_remove(&vma->gpuva); 1190 mutex_unlock(&vm->snap_mutex); 1191 if (vm->usm.last_fault_vma == vma) 1192 vm->usm.last_fault_vma = NULL; 1193 } 1194 1195 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1196 { 1197 struct xe_vma_op *op; 1198 1199 op = kzalloc(sizeof(*op), GFP_KERNEL); 1200 1201 if (unlikely(!op)) 1202 return NULL; 1203 1204 return &op->base; 1205 } 1206 1207 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1208 1209 static const struct drm_gpuvm_ops gpuvm_ops = { 1210 .op_alloc = xe_vm_op_alloc, 1211 .vm_bo_validate = xe_gpuvm_validate, 1212 .vm_free = xe_vm_free, 1213 }; 1214 1215 static u64 pde_encode_pat_index(u16 pat_index) 1216 { 1217 u64 pte = 0; 1218 1219 if (pat_index & BIT(0)) 1220 pte |= XE_PPGTT_PTE_PAT0; 1221 1222 if (pat_index & BIT(1)) 1223 pte |= XE_PPGTT_PTE_PAT1; 1224 1225 return pte; 1226 } 1227 1228 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1229 { 1230 u64 pte = 0; 1231 1232 if (pat_index & BIT(0)) 1233 pte |= XE_PPGTT_PTE_PAT0; 1234 1235 if (pat_index & BIT(1)) 1236 pte |= XE_PPGTT_PTE_PAT1; 1237 1238 if (pat_index & BIT(2)) { 1239 if (pt_level) 1240 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1241 else 1242 pte |= XE_PPGTT_PTE_PAT2; 1243 } 1244 1245 if (pat_index & BIT(3)) 1246 pte |= XELPG_PPGTT_PTE_PAT3; 1247 1248 if (pat_index & (BIT(4))) 1249 pte |= XE2_PPGTT_PTE_PAT4; 1250 1251 return pte; 1252 } 1253 1254 static u64 pte_encode_ps(u32 pt_level) 1255 { 1256 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1257 1258 if (pt_level == 1) 1259 return XE_PDE_PS_2M; 1260 else if (pt_level == 2) 1261 return XE_PDPE_PS_1G; 1262 1263 return 0; 1264 } 1265 1266 static u16 pde_pat_index(struct xe_bo *bo) 1267 { 1268 struct xe_device *xe = xe_bo_device(bo); 1269 u16 pat_index; 1270 1271 /* 1272 * We only have two bits to encode the PAT index in non-leaf nodes, but 1273 * these only point to other paging structures so we only need a minimal 1274 * selection of options. The user PAT index is only for encoding leaf 1275 * nodes, where we have use of more bits to do the encoding. The 1276 * non-leaf nodes are instead under driver control so the chosen index 1277 * here should be distict from the user PAT index. Also the 1278 * corresponding coherency of the PAT index should be tied to the 1279 * allocation type of the page table (or at least we should pick 1280 * something which is always safe). 1281 */ 1282 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1283 pat_index = xe->pat.idx[XE_CACHE_WB]; 1284 else 1285 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1286 1287 xe_assert(xe, pat_index <= 3); 1288 1289 return pat_index; 1290 } 1291 1292 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1293 { 1294 u64 pde; 1295 1296 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1297 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1298 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1299 1300 return pde; 1301 } 1302 1303 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1304 u16 pat_index, u32 pt_level) 1305 { 1306 u64 pte; 1307 1308 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1309 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1310 pte |= pte_encode_pat_index(pat_index, pt_level); 1311 pte |= pte_encode_ps(pt_level); 1312 1313 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1314 pte |= XE_PPGTT_PTE_DM; 1315 1316 return pte; 1317 } 1318 1319 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1320 u16 pat_index, u32 pt_level) 1321 { 1322 pte |= XE_PAGE_PRESENT; 1323 1324 if (likely(!xe_vma_read_only(vma))) 1325 pte |= XE_PAGE_RW; 1326 1327 pte |= pte_encode_pat_index(pat_index, pt_level); 1328 pte |= pte_encode_ps(pt_level); 1329 1330 if (unlikely(xe_vma_is_null(vma))) 1331 pte |= XE_PTE_NULL; 1332 1333 return pte; 1334 } 1335 1336 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1337 u16 pat_index, 1338 u32 pt_level, bool devmem, u64 flags) 1339 { 1340 u64 pte; 1341 1342 /* Avoid passing random bits directly as flags */ 1343 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1344 1345 pte = addr; 1346 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1347 pte |= pte_encode_pat_index(pat_index, pt_level); 1348 pte |= pte_encode_ps(pt_level); 1349 1350 if (devmem) 1351 pte |= XE_PPGTT_PTE_DM; 1352 1353 pte |= flags; 1354 1355 return pte; 1356 } 1357 1358 static const struct xe_pt_ops xelp_pt_ops = { 1359 .pte_encode_bo = xelp_pte_encode_bo, 1360 .pte_encode_vma = xelp_pte_encode_vma, 1361 .pte_encode_addr = xelp_pte_encode_addr, 1362 .pde_encode_bo = xelp_pde_encode_bo, 1363 }; 1364 1365 static void vm_destroy_work_func(struct work_struct *w); 1366 1367 /** 1368 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1369 * given tile and vm. 1370 * @xe: xe device. 1371 * @tile: tile to set up for. 1372 * @vm: vm to set up for. 1373 * @exec: The struct drm_exec object used to lock the vm resv. 1374 * 1375 * Sets up a pagetable tree with one page-table per level and a single 1376 * leaf PTE. All pagetable entries point to the single page-table or, 1377 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1378 * writes become NOPs. 1379 * 1380 * Return: 0 on success, negative error code on error. 1381 */ 1382 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1383 struct xe_vm *vm, struct drm_exec *exec) 1384 { 1385 u8 id = tile->id; 1386 int i; 1387 1388 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1389 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1390 if (IS_ERR(vm->scratch_pt[id][i])) { 1391 int err = PTR_ERR(vm->scratch_pt[id][i]); 1392 1393 vm->scratch_pt[id][i] = NULL; 1394 return err; 1395 } 1396 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1397 } 1398 1399 return 0; 1400 } 1401 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1402 1403 static void xe_vm_free_scratch(struct xe_vm *vm) 1404 { 1405 struct xe_tile *tile; 1406 u8 id; 1407 1408 if (!xe_vm_has_scratch(vm)) 1409 return; 1410 1411 for_each_tile(tile, vm->xe, id) { 1412 u32 i; 1413 1414 if (!vm->pt_root[id]) 1415 continue; 1416 1417 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1418 if (vm->scratch_pt[id][i]) 1419 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1420 } 1421 } 1422 1423 static void xe_vm_pt_destroy(struct xe_vm *vm) 1424 { 1425 struct xe_tile *tile; 1426 u8 id; 1427 1428 xe_vm_assert_held(vm); 1429 1430 for_each_tile(tile, vm->xe, id) { 1431 if (vm->pt_root[id]) { 1432 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1433 vm->pt_root[id] = NULL; 1434 } 1435 } 1436 } 1437 1438 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1439 { 1440 struct drm_gem_object *vm_resv_obj; 1441 struct xe_validation_ctx ctx; 1442 struct drm_exec exec; 1443 struct xe_vm *vm; 1444 int err, number_tiles = 0; 1445 struct xe_tile *tile; 1446 u8 id; 1447 1448 /* 1449 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1450 * ever be in faulting mode. 1451 */ 1452 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1453 1454 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1455 if (!vm) 1456 return ERR_PTR(-ENOMEM); 1457 1458 vm->xe = xe; 1459 1460 vm->size = 1ull << xe->info.va_bits; 1461 vm->flags = flags; 1462 1463 if (xef) 1464 vm->xef = xe_file_get(xef); 1465 /** 1466 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1467 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1468 * under a user-VM lock when the PXP session is started at exec_queue 1469 * creation time. Those are different VMs and therefore there is no risk 1470 * of deadlock, but we need to tell lockdep that this is the case or it 1471 * will print a warning. 1472 */ 1473 if (flags & XE_VM_FLAG_GSC) { 1474 static struct lock_class_key gsc_vm_key; 1475 1476 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1477 } else { 1478 init_rwsem(&vm->lock); 1479 } 1480 mutex_init(&vm->snap_mutex); 1481 1482 INIT_LIST_HEAD(&vm->rebind_list); 1483 1484 INIT_LIST_HEAD(&vm->userptr.repin_list); 1485 INIT_LIST_HEAD(&vm->userptr.invalidated); 1486 spin_lock_init(&vm->userptr.invalidated_lock); 1487 1488 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1489 1490 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1491 1492 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1493 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1494 1495 for_each_tile(tile, xe, id) 1496 xe_range_fence_tree_init(&vm->rftree[id]); 1497 1498 vm->pt_ops = &xelp_pt_ops; 1499 1500 /* 1501 * Long-running workloads are not protected by the scheduler references. 1502 * By design, run_job for long-running workloads returns NULL and the 1503 * scheduler drops all the references of it, hence protecting the VM 1504 * for this case is necessary. 1505 */ 1506 if (flags & XE_VM_FLAG_LR_MODE) { 1507 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1508 xe_pm_runtime_get_noresume(xe); 1509 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1510 } 1511 1512 err = xe_svm_init(vm); 1513 if (err) 1514 goto err_no_resv; 1515 1516 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1517 if (!vm_resv_obj) { 1518 err = -ENOMEM; 1519 goto err_svm_fini; 1520 } 1521 1522 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1523 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1524 1525 drm_gem_object_put(vm_resv_obj); 1526 1527 err = 0; 1528 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1529 err) { 1530 err = xe_vm_drm_exec_lock(vm, &exec); 1531 drm_exec_retry_on_contention(&exec); 1532 1533 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1534 vm->flags |= XE_VM_FLAG_64K; 1535 1536 for_each_tile(tile, xe, id) { 1537 if (flags & XE_VM_FLAG_MIGRATION && 1538 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1539 continue; 1540 1541 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1542 &exec); 1543 if (IS_ERR(vm->pt_root[id])) { 1544 err = PTR_ERR(vm->pt_root[id]); 1545 vm->pt_root[id] = NULL; 1546 xe_vm_pt_destroy(vm); 1547 drm_exec_retry_on_contention(&exec); 1548 xe_validation_retry_on_oom(&ctx, &err); 1549 break; 1550 } 1551 } 1552 if (err) 1553 break; 1554 1555 if (xe_vm_has_scratch(vm)) { 1556 for_each_tile(tile, xe, id) { 1557 if (!vm->pt_root[id]) 1558 continue; 1559 1560 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1561 if (err) { 1562 xe_vm_free_scratch(vm); 1563 xe_vm_pt_destroy(vm); 1564 drm_exec_retry_on_contention(&exec); 1565 xe_validation_retry_on_oom(&ctx, &err); 1566 break; 1567 } 1568 } 1569 if (err) 1570 break; 1571 vm->batch_invalidate_tlb = true; 1572 } 1573 1574 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1575 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1576 vm->batch_invalidate_tlb = false; 1577 } 1578 1579 /* Fill pt_root after allocating scratch tables */ 1580 for_each_tile(tile, xe, id) { 1581 if (!vm->pt_root[id]) 1582 continue; 1583 1584 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1585 } 1586 } 1587 if (err) 1588 goto err_close; 1589 1590 /* Kernel migration VM shouldn't have a circular loop.. */ 1591 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1592 for_each_tile(tile, xe, id) { 1593 struct xe_exec_queue *q; 1594 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1595 1596 if (!vm->pt_root[id]) 1597 continue; 1598 1599 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1600 if (IS_ERR(q)) { 1601 err = PTR_ERR(q); 1602 goto err_close; 1603 } 1604 vm->q[id] = q; 1605 number_tiles++; 1606 } 1607 } 1608 1609 if (number_tiles > 1) 1610 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1611 1612 if (xef && xe->info.has_asid) { 1613 u32 asid; 1614 1615 down_write(&xe->usm.lock); 1616 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1617 XA_LIMIT(1, XE_MAX_ASID - 1), 1618 &xe->usm.next_asid, GFP_KERNEL); 1619 up_write(&xe->usm.lock); 1620 if (err < 0) 1621 goto err_close; 1622 1623 vm->usm.asid = asid; 1624 } 1625 1626 trace_xe_vm_create(vm); 1627 1628 return vm; 1629 1630 err_close: 1631 xe_vm_close_and_put(vm); 1632 return ERR_PTR(err); 1633 1634 err_svm_fini: 1635 if (flags & XE_VM_FLAG_FAULT_MODE) { 1636 vm->size = 0; /* close the vm */ 1637 xe_svm_fini(vm); 1638 } 1639 err_no_resv: 1640 mutex_destroy(&vm->snap_mutex); 1641 for_each_tile(tile, xe, id) 1642 xe_range_fence_tree_fini(&vm->rftree[id]); 1643 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1644 if (vm->xef) 1645 xe_file_put(vm->xef); 1646 kfree(vm); 1647 if (flags & XE_VM_FLAG_LR_MODE) 1648 xe_pm_runtime_put(xe); 1649 return ERR_PTR(err); 1650 } 1651 1652 static void xe_vm_close(struct xe_vm *vm) 1653 { 1654 struct xe_device *xe = vm->xe; 1655 bool bound; 1656 int idx; 1657 1658 bound = drm_dev_enter(&xe->drm, &idx); 1659 1660 down_write(&vm->lock); 1661 if (xe_vm_in_fault_mode(vm)) 1662 xe_svm_notifier_lock(vm); 1663 1664 vm->size = 0; 1665 1666 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1667 struct xe_tile *tile; 1668 struct xe_gt *gt; 1669 u8 id; 1670 1671 /* Wait for pending binds */ 1672 dma_resv_wait_timeout(xe_vm_resv(vm), 1673 DMA_RESV_USAGE_BOOKKEEP, 1674 false, MAX_SCHEDULE_TIMEOUT); 1675 1676 if (bound) { 1677 for_each_tile(tile, xe, id) 1678 if (vm->pt_root[id]) 1679 xe_pt_clear(xe, vm->pt_root[id]); 1680 1681 for_each_gt(gt, xe, id) 1682 xe_tlb_inval_vm(>->tlb_inval, vm); 1683 } 1684 } 1685 1686 if (xe_vm_in_fault_mode(vm)) 1687 xe_svm_notifier_unlock(vm); 1688 up_write(&vm->lock); 1689 1690 if (bound) 1691 drm_dev_exit(idx); 1692 } 1693 1694 void xe_vm_close_and_put(struct xe_vm *vm) 1695 { 1696 LIST_HEAD(contested); 1697 struct xe_device *xe = vm->xe; 1698 struct xe_tile *tile; 1699 struct xe_vma *vma, *next_vma; 1700 struct drm_gpuva *gpuva, *next; 1701 u8 id; 1702 1703 xe_assert(xe, !vm->preempt.num_exec_queues); 1704 1705 xe_vm_close(vm); 1706 if (xe_vm_in_preempt_fence_mode(vm)) { 1707 mutex_lock(&xe->rebind_resume_lock); 1708 list_del_init(&vm->preempt.pm_activate_link); 1709 mutex_unlock(&xe->rebind_resume_lock); 1710 flush_work(&vm->preempt.rebind_work); 1711 } 1712 if (xe_vm_in_fault_mode(vm)) 1713 xe_svm_close(vm); 1714 1715 down_write(&vm->lock); 1716 for_each_tile(tile, xe, id) { 1717 if (vm->q[id]) 1718 xe_exec_queue_last_fence_put(vm->q[id], vm); 1719 } 1720 up_write(&vm->lock); 1721 1722 for_each_tile(tile, xe, id) { 1723 if (vm->q[id]) { 1724 xe_exec_queue_kill(vm->q[id]); 1725 xe_exec_queue_put(vm->q[id]); 1726 vm->q[id] = NULL; 1727 } 1728 } 1729 1730 down_write(&vm->lock); 1731 xe_vm_lock(vm, false); 1732 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1733 vma = gpuva_to_vma(gpuva); 1734 1735 if (xe_vma_has_no_bo(vma)) { 1736 xe_svm_notifier_lock(vm); 1737 vma->gpuva.flags |= XE_VMA_DESTROYED; 1738 xe_svm_notifier_unlock(vm); 1739 } 1740 1741 xe_vm_remove_vma(vm, vma); 1742 1743 /* easy case, remove from VMA? */ 1744 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1745 list_del_init(&vma->combined_links.rebind); 1746 xe_vma_destroy(vma, NULL); 1747 continue; 1748 } 1749 1750 list_move_tail(&vma->combined_links.destroy, &contested); 1751 vma->gpuva.flags |= XE_VMA_DESTROYED; 1752 } 1753 1754 /* 1755 * All vm operations will add shared fences to resv. 1756 * The only exception is eviction for a shared object, 1757 * but even so, the unbind when evicted would still 1758 * install a fence to resv. Hence it's safe to 1759 * destroy the pagetables immediately. 1760 */ 1761 xe_vm_free_scratch(vm); 1762 xe_vm_pt_destroy(vm); 1763 xe_vm_unlock(vm); 1764 1765 /* 1766 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1767 * Since we hold a refcount to the bo, we can remove and free 1768 * the members safely without locking. 1769 */ 1770 list_for_each_entry_safe(vma, next_vma, &contested, 1771 combined_links.destroy) { 1772 list_del_init(&vma->combined_links.destroy); 1773 xe_vma_destroy_unlocked(vma); 1774 } 1775 1776 xe_svm_fini(vm); 1777 1778 up_write(&vm->lock); 1779 1780 down_write(&xe->usm.lock); 1781 if (vm->usm.asid) { 1782 void *lookup; 1783 1784 xe_assert(xe, xe->info.has_asid); 1785 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1786 1787 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1788 xe_assert(xe, lookup == vm); 1789 } 1790 up_write(&xe->usm.lock); 1791 1792 for_each_tile(tile, xe, id) 1793 xe_range_fence_tree_fini(&vm->rftree[id]); 1794 1795 xe_vm_put(vm); 1796 } 1797 1798 static void vm_destroy_work_func(struct work_struct *w) 1799 { 1800 struct xe_vm *vm = 1801 container_of(w, struct xe_vm, destroy_work); 1802 struct xe_device *xe = vm->xe; 1803 struct xe_tile *tile; 1804 u8 id; 1805 1806 /* xe_vm_close_and_put was not called? */ 1807 xe_assert(xe, !vm->size); 1808 1809 if (xe_vm_in_preempt_fence_mode(vm)) 1810 flush_work(&vm->preempt.rebind_work); 1811 1812 mutex_destroy(&vm->snap_mutex); 1813 1814 if (vm->flags & XE_VM_FLAG_LR_MODE) 1815 xe_pm_runtime_put(xe); 1816 1817 for_each_tile(tile, xe, id) 1818 XE_WARN_ON(vm->pt_root[id]); 1819 1820 trace_xe_vm_free(vm); 1821 1822 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1823 1824 if (vm->xef) 1825 xe_file_put(vm->xef); 1826 1827 kfree(vm); 1828 } 1829 1830 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1831 { 1832 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1833 1834 /* To destroy the VM we need to be able to sleep */ 1835 queue_work(system_unbound_wq, &vm->destroy_work); 1836 } 1837 1838 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1839 { 1840 struct xe_vm *vm; 1841 1842 mutex_lock(&xef->vm.lock); 1843 vm = xa_load(&xef->vm.xa, id); 1844 if (vm) 1845 xe_vm_get(vm); 1846 mutex_unlock(&xef->vm.lock); 1847 1848 return vm; 1849 } 1850 1851 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1852 { 1853 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1854 } 1855 1856 static struct xe_exec_queue * 1857 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1858 { 1859 return q ? q : vm->q[0]; 1860 } 1861 1862 static struct xe_user_fence * 1863 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1864 { 1865 unsigned int i; 1866 1867 for (i = 0; i < num_syncs; i++) { 1868 struct xe_sync_entry *e = &syncs[i]; 1869 1870 if (xe_sync_is_ufence(e)) 1871 return xe_sync_ufence_get(e); 1872 } 1873 1874 return NULL; 1875 } 1876 1877 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1878 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1879 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1880 1881 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1882 struct drm_file *file) 1883 { 1884 struct xe_device *xe = to_xe_device(dev); 1885 struct xe_file *xef = to_xe_file(file); 1886 struct drm_xe_vm_create *args = data; 1887 struct xe_vm *vm; 1888 u32 id; 1889 int err; 1890 u32 flags = 0; 1891 1892 if (XE_IOCTL_DBG(xe, args->extensions)) 1893 return -EINVAL; 1894 1895 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 1896 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1897 1898 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1899 !xe->info.has_usm)) 1900 return -EINVAL; 1901 1902 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1903 return -EINVAL; 1904 1905 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1906 return -EINVAL; 1907 1908 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1909 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1910 !xe->info.needs_scratch)) 1911 return -EINVAL; 1912 1913 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1914 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1915 return -EINVAL; 1916 1917 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1918 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1919 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1920 flags |= XE_VM_FLAG_LR_MODE; 1921 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1922 flags |= XE_VM_FLAG_FAULT_MODE; 1923 1924 vm = xe_vm_create(xe, flags, xef); 1925 if (IS_ERR(vm)) 1926 return PTR_ERR(vm); 1927 1928 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1929 /* Warning: Security issue - never enable by default */ 1930 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1931 #endif 1932 1933 /* user id alloc must always be last in ioctl to prevent UAF */ 1934 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1935 if (err) 1936 goto err_close_and_put; 1937 1938 args->vm_id = id; 1939 1940 return 0; 1941 1942 err_close_and_put: 1943 xe_vm_close_and_put(vm); 1944 1945 return err; 1946 } 1947 1948 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1949 struct drm_file *file) 1950 { 1951 struct xe_device *xe = to_xe_device(dev); 1952 struct xe_file *xef = to_xe_file(file); 1953 struct drm_xe_vm_destroy *args = data; 1954 struct xe_vm *vm; 1955 int err = 0; 1956 1957 if (XE_IOCTL_DBG(xe, args->pad) || 1958 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1959 return -EINVAL; 1960 1961 mutex_lock(&xef->vm.lock); 1962 vm = xa_load(&xef->vm.xa, args->vm_id); 1963 if (XE_IOCTL_DBG(xe, !vm)) 1964 err = -ENOENT; 1965 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1966 err = -EBUSY; 1967 else 1968 xa_erase(&xef->vm.xa, args->vm_id); 1969 mutex_unlock(&xef->vm.lock); 1970 1971 if (!err) 1972 xe_vm_close_and_put(vm); 1973 1974 return err; 1975 } 1976 1977 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1978 { 1979 struct drm_gpuva *gpuva; 1980 u32 num_vmas = 0; 1981 1982 lockdep_assert_held(&vm->lock); 1983 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 1984 num_vmas++; 1985 1986 return num_vmas; 1987 } 1988 1989 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 1990 u64 end, struct drm_xe_mem_range_attr *attrs) 1991 { 1992 struct drm_gpuva *gpuva; 1993 int i = 0; 1994 1995 lockdep_assert_held(&vm->lock); 1996 1997 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 1998 struct xe_vma *vma = gpuva_to_vma(gpuva); 1999 2000 if (i == *num_vmas) 2001 return -ENOSPC; 2002 2003 attrs[i].start = xe_vma_start(vma); 2004 attrs[i].end = xe_vma_end(vma); 2005 attrs[i].atomic.val = vma->attr.atomic_access; 2006 attrs[i].pat_index.val = vma->attr.pat_index; 2007 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2008 attrs[i].preferred_mem_loc.migration_policy = 2009 vma->attr.preferred_loc.migration_policy; 2010 2011 i++; 2012 } 2013 2014 *num_vmas = i; 2015 return 0; 2016 } 2017 2018 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2019 { 2020 struct xe_device *xe = to_xe_device(dev); 2021 struct xe_file *xef = to_xe_file(file); 2022 struct drm_xe_mem_range_attr *mem_attrs; 2023 struct drm_xe_vm_query_mem_range_attr *args = data; 2024 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2025 struct xe_vm *vm; 2026 int err = 0; 2027 2028 if (XE_IOCTL_DBG(xe, 2029 ((args->num_mem_ranges == 0 && 2030 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2031 (args->num_mem_ranges > 0 && 2032 (!attrs_user || 2033 args->sizeof_mem_range_attr != 2034 sizeof(struct drm_xe_mem_range_attr)))))) 2035 return -EINVAL; 2036 2037 vm = xe_vm_lookup(xef, args->vm_id); 2038 if (XE_IOCTL_DBG(xe, !vm)) 2039 return -EINVAL; 2040 2041 err = down_read_interruptible(&vm->lock); 2042 if (err) 2043 goto put_vm; 2044 2045 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2046 2047 if (args->num_mem_ranges == 0 && !attrs_user) { 2048 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2049 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2050 goto unlock_vm; 2051 } 2052 2053 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2054 GFP_KERNEL | __GFP_ACCOUNT | 2055 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2056 if (!mem_attrs) { 2057 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2058 goto unlock_vm; 2059 } 2060 2061 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2062 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2063 args->start + args->range, mem_attrs); 2064 if (err) 2065 goto free_mem_attrs; 2066 2067 err = copy_to_user(attrs_user, mem_attrs, 2068 args->sizeof_mem_range_attr * args->num_mem_ranges); 2069 if (err) 2070 err = -EFAULT; 2071 2072 free_mem_attrs: 2073 kvfree(mem_attrs); 2074 unlock_vm: 2075 up_read(&vm->lock); 2076 put_vm: 2077 xe_vm_put(vm); 2078 return err; 2079 } 2080 2081 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2082 { 2083 if (page_addr > xe_vma_end(vma) - 1 || 2084 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2085 return false; 2086 2087 return true; 2088 } 2089 2090 /** 2091 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2092 * 2093 * @vm: the xe_vm the vma belongs to 2094 * @page_addr: address to look up 2095 */ 2096 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2097 { 2098 struct xe_vma *vma = NULL; 2099 2100 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2101 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2102 vma = vm->usm.last_fault_vma; 2103 } 2104 if (!vma) 2105 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2106 2107 return vma; 2108 } 2109 2110 static const u32 region_to_mem_type[] = { 2111 XE_PL_TT, 2112 XE_PL_VRAM0, 2113 XE_PL_VRAM1, 2114 }; 2115 2116 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2117 bool post_commit) 2118 { 2119 xe_svm_notifier_lock(vm); 2120 vma->gpuva.flags |= XE_VMA_DESTROYED; 2121 xe_svm_notifier_unlock(vm); 2122 if (post_commit) 2123 xe_vm_remove_vma(vm, vma); 2124 } 2125 2126 #undef ULL 2127 #define ULL unsigned long long 2128 2129 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2130 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2131 { 2132 struct xe_vma *vma; 2133 2134 switch (op->op) { 2135 case DRM_GPUVA_OP_MAP: 2136 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2137 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2138 break; 2139 case DRM_GPUVA_OP_REMAP: 2140 vma = gpuva_to_vma(op->remap.unmap->va); 2141 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2142 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2143 op->remap.unmap->keep ? 1 : 0); 2144 if (op->remap.prev) 2145 vm_dbg(&xe->drm, 2146 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2147 (ULL)op->remap.prev->va.addr, 2148 (ULL)op->remap.prev->va.range); 2149 if (op->remap.next) 2150 vm_dbg(&xe->drm, 2151 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2152 (ULL)op->remap.next->va.addr, 2153 (ULL)op->remap.next->va.range); 2154 break; 2155 case DRM_GPUVA_OP_UNMAP: 2156 vma = gpuva_to_vma(op->unmap.va); 2157 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2158 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2159 op->unmap.keep ? 1 : 0); 2160 break; 2161 case DRM_GPUVA_OP_PREFETCH: 2162 vma = gpuva_to_vma(op->prefetch.va); 2163 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2164 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2165 break; 2166 default: 2167 drm_warn(&xe->drm, "NOT POSSIBLE"); 2168 } 2169 } 2170 #else 2171 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2172 { 2173 } 2174 #endif 2175 2176 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2177 { 2178 if (!xe_vm_in_fault_mode(vm)) 2179 return false; 2180 2181 if (!xe_vm_has_scratch(vm)) 2182 return false; 2183 2184 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2185 return false; 2186 2187 return true; 2188 } 2189 2190 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2191 { 2192 struct drm_gpuva_op *__op; 2193 2194 drm_gpuva_for_each_op(__op, ops) { 2195 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2196 2197 xe_vma_svm_prefetch_op_fini(op); 2198 } 2199 } 2200 2201 /* 2202 * Create operations list from IOCTL arguments, setup operations fields so parse 2203 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2204 */ 2205 static struct drm_gpuva_ops * 2206 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2207 struct xe_bo *bo, u64 bo_offset_or_userptr, 2208 u64 addr, u64 range, 2209 u32 operation, u32 flags, 2210 u32 prefetch_region, u16 pat_index) 2211 { 2212 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2213 struct drm_gpuva_ops *ops; 2214 struct drm_gpuva_op *__op; 2215 struct drm_gpuvm_bo *vm_bo; 2216 u64 range_end = addr + range; 2217 int err; 2218 2219 lockdep_assert_held_write(&vm->lock); 2220 2221 vm_dbg(&vm->xe->drm, 2222 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2223 operation, (ULL)addr, (ULL)range, 2224 (ULL)bo_offset_or_userptr); 2225 2226 switch (operation) { 2227 case DRM_XE_VM_BIND_OP_MAP: 2228 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2229 struct drm_gpuvm_map_req map_req = { 2230 .map.va.addr = addr, 2231 .map.va.range = range, 2232 .map.gem.obj = obj, 2233 .map.gem.offset = bo_offset_or_userptr, 2234 }; 2235 2236 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2237 break; 2238 } 2239 case DRM_XE_VM_BIND_OP_UNMAP: 2240 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2241 break; 2242 case DRM_XE_VM_BIND_OP_PREFETCH: 2243 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2244 break; 2245 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2246 xe_assert(vm->xe, bo); 2247 2248 err = xe_bo_lock(bo, true); 2249 if (err) 2250 return ERR_PTR(err); 2251 2252 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2253 if (IS_ERR(vm_bo)) { 2254 xe_bo_unlock(bo); 2255 return ERR_CAST(vm_bo); 2256 } 2257 2258 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2259 drm_gpuvm_bo_put(vm_bo); 2260 xe_bo_unlock(bo); 2261 break; 2262 default: 2263 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2264 ops = ERR_PTR(-EINVAL); 2265 } 2266 if (IS_ERR(ops)) 2267 return ops; 2268 2269 drm_gpuva_for_each_op(__op, ops) { 2270 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2271 2272 if (__op->op == DRM_GPUVA_OP_MAP) { 2273 op->map.immediate = 2274 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2275 op->map.read_only = 2276 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2277 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2278 op->map.is_cpu_addr_mirror = flags & 2279 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2280 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2281 op->map.pat_index = pat_index; 2282 op->map.invalidate_on_bind = 2283 __xe_vm_needs_clear_scratch_pages(vm, flags); 2284 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2285 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2286 struct xe_tile *tile; 2287 struct xe_svm_range *svm_range; 2288 struct drm_gpusvm_ctx ctx = {}; 2289 struct drm_pagemap *dpagemap; 2290 u8 id, tile_mask = 0; 2291 u32 i; 2292 2293 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2294 op->prefetch.region = prefetch_region; 2295 break; 2296 } 2297 2298 ctx.read_only = xe_vma_read_only(vma); 2299 ctx.devmem_possible = IS_DGFX(vm->xe) && 2300 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2301 2302 for_each_tile(tile, vm->xe, id) 2303 tile_mask |= 0x1 << id; 2304 2305 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2306 op->prefetch_range.ranges_count = 0; 2307 tile = NULL; 2308 2309 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2310 dpagemap = xe_vma_resolve_pagemap(vma, 2311 xe_device_get_root_tile(vm->xe)); 2312 /* 2313 * TODO: Once multigpu support is enabled will need 2314 * something to dereference tile from dpagemap. 2315 */ 2316 if (dpagemap) 2317 tile = xe_device_get_root_tile(vm->xe); 2318 } else if (prefetch_region) { 2319 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2320 XE_PL_VRAM0]; 2321 } 2322 2323 op->prefetch_range.tile = tile; 2324 alloc_next_range: 2325 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2326 2327 if (PTR_ERR(svm_range) == -ENOENT) { 2328 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2329 2330 addr = ret == ULONG_MAX ? 0 : ret; 2331 if (addr) 2332 goto alloc_next_range; 2333 else 2334 goto print_op_label; 2335 } 2336 2337 if (IS_ERR(svm_range)) { 2338 err = PTR_ERR(svm_range); 2339 goto unwind_prefetch_ops; 2340 } 2341 2342 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2343 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2344 goto check_next_range; 2345 } 2346 2347 err = xa_alloc(&op->prefetch_range.range, 2348 &i, svm_range, xa_limit_32b, 2349 GFP_KERNEL); 2350 2351 if (err) 2352 goto unwind_prefetch_ops; 2353 2354 op->prefetch_range.ranges_count++; 2355 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2356 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2357 check_next_range: 2358 if (range_end > xe_svm_range_end(svm_range) && 2359 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2360 addr = xe_svm_range_end(svm_range); 2361 goto alloc_next_range; 2362 } 2363 } 2364 print_op_label: 2365 print_op(vm->xe, __op); 2366 } 2367 2368 return ops; 2369 2370 unwind_prefetch_ops: 2371 xe_svm_prefetch_gpuva_ops_fini(ops); 2372 drm_gpuva_ops_free(&vm->gpuvm, ops); 2373 return ERR_PTR(err); 2374 } 2375 2376 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2377 2378 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2379 struct xe_vma_mem_attr *attr, unsigned int flags) 2380 { 2381 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2382 struct xe_validation_ctx ctx; 2383 struct drm_exec exec; 2384 struct xe_vma *vma; 2385 int err = 0; 2386 2387 lockdep_assert_held_write(&vm->lock); 2388 2389 if (bo) { 2390 err = 0; 2391 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2392 (struct xe_val_flags) {.interruptible = true}, err) { 2393 if (!bo->vm) { 2394 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2395 drm_exec_retry_on_contention(&exec); 2396 } 2397 if (!err) { 2398 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2399 drm_exec_retry_on_contention(&exec); 2400 } 2401 if (err) 2402 return ERR_PTR(err); 2403 2404 vma = xe_vma_create(vm, bo, op->gem.offset, 2405 op->va.addr, op->va.addr + 2406 op->va.range - 1, attr, flags); 2407 if (IS_ERR(vma)) 2408 return vma; 2409 2410 if (!bo->vm) { 2411 err = add_preempt_fences(vm, bo); 2412 if (err) { 2413 prep_vma_destroy(vm, vma, false); 2414 xe_vma_destroy(vma, NULL); 2415 } 2416 } 2417 } 2418 if (err) 2419 return ERR_PTR(err); 2420 } else { 2421 vma = xe_vma_create(vm, NULL, op->gem.offset, 2422 op->va.addr, op->va.addr + 2423 op->va.range - 1, attr, flags); 2424 if (IS_ERR(vma)) 2425 return vma; 2426 2427 if (xe_vma_is_userptr(vma)) 2428 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2429 } 2430 if (err) { 2431 prep_vma_destroy(vm, vma, false); 2432 xe_vma_destroy_unlocked(vma); 2433 vma = ERR_PTR(err); 2434 } 2435 2436 return vma; 2437 } 2438 2439 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2440 { 2441 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2442 return SZ_1G; 2443 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2444 return SZ_2M; 2445 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2446 return SZ_64K; 2447 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2448 return SZ_4K; 2449 2450 return SZ_1G; /* Uninitialized, used max size */ 2451 } 2452 2453 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2454 { 2455 switch (size) { 2456 case SZ_1G: 2457 vma->gpuva.flags |= XE_VMA_PTE_1G; 2458 break; 2459 case SZ_2M: 2460 vma->gpuva.flags |= XE_VMA_PTE_2M; 2461 break; 2462 case SZ_64K: 2463 vma->gpuva.flags |= XE_VMA_PTE_64K; 2464 break; 2465 case SZ_4K: 2466 vma->gpuva.flags |= XE_VMA_PTE_4K; 2467 break; 2468 } 2469 } 2470 2471 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2472 { 2473 int err = 0; 2474 2475 lockdep_assert_held_write(&vm->lock); 2476 2477 switch (op->base.op) { 2478 case DRM_GPUVA_OP_MAP: 2479 err |= xe_vm_insert_vma(vm, op->map.vma); 2480 if (!err) 2481 op->flags |= XE_VMA_OP_COMMITTED; 2482 break; 2483 case DRM_GPUVA_OP_REMAP: 2484 { 2485 u8 tile_present = 2486 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2487 2488 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2489 true); 2490 op->flags |= XE_VMA_OP_COMMITTED; 2491 2492 if (op->remap.prev) { 2493 err |= xe_vm_insert_vma(vm, op->remap.prev); 2494 if (!err) 2495 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2496 if (!err && op->remap.skip_prev) { 2497 op->remap.prev->tile_present = 2498 tile_present; 2499 op->remap.prev = NULL; 2500 } 2501 } 2502 if (op->remap.next) { 2503 err |= xe_vm_insert_vma(vm, op->remap.next); 2504 if (!err) 2505 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2506 if (!err && op->remap.skip_next) { 2507 op->remap.next->tile_present = 2508 tile_present; 2509 op->remap.next = NULL; 2510 } 2511 } 2512 2513 /* Adjust for partial unbind after removing VMA from VM */ 2514 if (!err) { 2515 op->base.remap.unmap->va->va.addr = op->remap.start; 2516 op->base.remap.unmap->va->va.range = op->remap.range; 2517 } 2518 break; 2519 } 2520 case DRM_GPUVA_OP_UNMAP: 2521 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2522 op->flags |= XE_VMA_OP_COMMITTED; 2523 break; 2524 case DRM_GPUVA_OP_PREFETCH: 2525 op->flags |= XE_VMA_OP_COMMITTED; 2526 break; 2527 default: 2528 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2529 } 2530 2531 return err; 2532 } 2533 2534 /** 2535 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2536 * @vma: Pointer to the xe_vma structure to check 2537 * 2538 * This function determines whether the given VMA (Virtual Memory Area) 2539 * has its memory attributes set to their default values. Specifically, 2540 * it checks the following conditions: 2541 * 2542 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2543 * - `pat_index` is equal to `default_pat_index` 2544 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2545 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2546 * 2547 * Return: true if all attributes are at their default values, false otherwise. 2548 */ 2549 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2550 { 2551 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2552 vma->attr.pat_index == vma->attr.default_pat_index && 2553 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2554 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2555 } 2556 2557 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2558 struct xe_vma_ops *vops) 2559 { 2560 struct xe_device *xe = vm->xe; 2561 struct drm_gpuva_op *__op; 2562 struct xe_tile *tile; 2563 u8 id, tile_mask = 0; 2564 int err = 0; 2565 2566 lockdep_assert_held_write(&vm->lock); 2567 2568 for_each_tile(tile, vm->xe, id) 2569 tile_mask |= 0x1 << id; 2570 2571 drm_gpuva_for_each_op(__op, ops) { 2572 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2573 struct xe_vma *vma; 2574 unsigned int flags = 0; 2575 2576 INIT_LIST_HEAD(&op->link); 2577 list_add_tail(&op->link, &vops->list); 2578 op->tile_mask = tile_mask; 2579 2580 switch (op->base.op) { 2581 case DRM_GPUVA_OP_MAP: 2582 { 2583 struct xe_vma_mem_attr default_attr = { 2584 .preferred_loc = { 2585 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2586 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2587 }, 2588 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2589 .default_pat_index = op->map.pat_index, 2590 .pat_index = op->map.pat_index, 2591 }; 2592 2593 flags |= op->map.read_only ? 2594 VMA_CREATE_FLAG_READ_ONLY : 0; 2595 flags |= op->map.is_null ? 2596 VMA_CREATE_FLAG_IS_NULL : 0; 2597 flags |= op->map.dumpable ? 2598 VMA_CREATE_FLAG_DUMPABLE : 0; 2599 flags |= op->map.is_cpu_addr_mirror ? 2600 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2601 2602 vma = new_vma(vm, &op->base.map, &default_attr, 2603 flags); 2604 if (IS_ERR(vma)) 2605 return PTR_ERR(vma); 2606 2607 op->map.vma = vma; 2608 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2609 !op->map.is_cpu_addr_mirror) || 2610 op->map.invalidate_on_bind) 2611 xe_vma_ops_incr_pt_update_ops(vops, 2612 op->tile_mask, 1); 2613 break; 2614 } 2615 case DRM_GPUVA_OP_REMAP: 2616 { 2617 struct xe_vma *old = 2618 gpuva_to_vma(op->base.remap.unmap->va); 2619 bool skip = xe_vma_is_cpu_addr_mirror(old); 2620 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2621 int num_remap_ops = 0; 2622 2623 if (op->base.remap.prev) 2624 start = op->base.remap.prev->va.addr + 2625 op->base.remap.prev->va.range; 2626 if (op->base.remap.next) 2627 end = op->base.remap.next->va.addr; 2628 2629 if (xe_vma_is_cpu_addr_mirror(old) && 2630 xe_svm_has_mapping(vm, start, end)) { 2631 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2632 xe_svm_unmap_address_range(vm, start, end); 2633 else 2634 return -EBUSY; 2635 } 2636 2637 op->remap.start = xe_vma_start(old); 2638 op->remap.range = xe_vma_size(old); 2639 2640 flags |= op->base.remap.unmap->va->flags & 2641 XE_VMA_READ_ONLY ? 2642 VMA_CREATE_FLAG_READ_ONLY : 0; 2643 flags |= op->base.remap.unmap->va->flags & 2644 DRM_GPUVA_SPARSE ? 2645 VMA_CREATE_FLAG_IS_NULL : 0; 2646 flags |= op->base.remap.unmap->va->flags & 2647 XE_VMA_DUMPABLE ? 2648 VMA_CREATE_FLAG_DUMPABLE : 0; 2649 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2650 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2651 2652 if (op->base.remap.prev) { 2653 vma = new_vma(vm, op->base.remap.prev, 2654 &old->attr, flags); 2655 if (IS_ERR(vma)) 2656 return PTR_ERR(vma); 2657 2658 op->remap.prev = vma; 2659 2660 /* 2661 * Userptr creates a new SG mapping so 2662 * we must also rebind. 2663 */ 2664 op->remap.skip_prev = skip || 2665 (!xe_vma_is_userptr(old) && 2666 IS_ALIGNED(xe_vma_end(vma), 2667 xe_vma_max_pte_size(old))); 2668 if (op->remap.skip_prev) { 2669 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2670 op->remap.range -= 2671 xe_vma_end(vma) - 2672 xe_vma_start(old); 2673 op->remap.start = xe_vma_end(vma); 2674 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2675 (ULL)op->remap.start, 2676 (ULL)op->remap.range); 2677 } else { 2678 num_remap_ops++; 2679 } 2680 } 2681 2682 if (op->base.remap.next) { 2683 vma = new_vma(vm, op->base.remap.next, 2684 &old->attr, flags); 2685 if (IS_ERR(vma)) 2686 return PTR_ERR(vma); 2687 2688 op->remap.next = vma; 2689 2690 /* 2691 * Userptr creates a new SG mapping so 2692 * we must also rebind. 2693 */ 2694 op->remap.skip_next = skip || 2695 (!xe_vma_is_userptr(old) && 2696 IS_ALIGNED(xe_vma_start(vma), 2697 xe_vma_max_pte_size(old))); 2698 if (op->remap.skip_next) { 2699 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2700 op->remap.range -= 2701 xe_vma_end(old) - 2702 xe_vma_start(vma); 2703 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2704 (ULL)op->remap.start, 2705 (ULL)op->remap.range); 2706 } else { 2707 num_remap_ops++; 2708 } 2709 } 2710 if (!skip) 2711 num_remap_ops++; 2712 2713 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2714 break; 2715 } 2716 case DRM_GPUVA_OP_UNMAP: 2717 vma = gpuva_to_vma(op->base.unmap.va); 2718 2719 if (xe_vma_is_cpu_addr_mirror(vma) && 2720 xe_svm_has_mapping(vm, xe_vma_start(vma), 2721 xe_vma_end(vma))) 2722 return -EBUSY; 2723 2724 if (!xe_vma_is_cpu_addr_mirror(vma)) 2725 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2726 break; 2727 case DRM_GPUVA_OP_PREFETCH: 2728 vma = gpuva_to_vma(op->base.prefetch.va); 2729 2730 if (xe_vma_is_userptr(vma)) { 2731 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2732 if (err) 2733 return err; 2734 } 2735 2736 if (xe_vma_is_cpu_addr_mirror(vma)) 2737 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2738 op->prefetch_range.ranges_count); 2739 else 2740 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2741 2742 break; 2743 default: 2744 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2745 } 2746 2747 err = xe_vma_op_commit(vm, op); 2748 if (err) 2749 return err; 2750 } 2751 2752 return 0; 2753 } 2754 2755 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2756 bool post_commit, bool prev_post_commit, 2757 bool next_post_commit) 2758 { 2759 lockdep_assert_held_write(&vm->lock); 2760 2761 switch (op->base.op) { 2762 case DRM_GPUVA_OP_MAP: 2763 if (op->map.vma) { 2764 prep_vma_destroy(vm, op->map.vma, post_commit); 2765 xe_vma_destroy_unlocked(op->map.vma); 2766 } 2767 break; 2768 case DRM_GPUVA_OP_UNMAP: 2769 { 2770 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2771 2772 if (vma) { 2773 xe_svm_notifier_lock(vm); 2774 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2775 xe_svm_notifier_unlock(vm); 2776 if (post_commit) 2777 xe_vm_insert_vma(vm, vma); 2778 } 2779 break; 2780 } 2781 case DRM_GPUVA_OP_REMAP: 2782 { 2783 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2784 2785 if (op->remap.prev) { 2786 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2787 xe_vma_destroy_unlocked(op->remap.prev); 2788 } 2789 if (op->remap.next) { 2790 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2791 xe_vma_destroy_unlocked(op->remap.next); 2792 } 2793 if (vma) { 2794 xe_svm_notifier_lock(vm); 2795 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2796 xe_svm_notifier_unlock(vm); 2797 if (post_commit) 2798 xe_vm_insert_vma(vm, vma); 2799 } 2800 break; 2801 } 2802 case DRM_GPUVA_OP_PREFETCH: 2803 /* Nothing to do */ 2804 break; 2805 default: 2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2807 } 2808 } 2809 2810 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2811 struct drm_gpuva_ops **ops, 2812 int num_ops_list) 2813 { 2814 int i; 2815 2816 for (i = num_ops_list - 1; i >= 0; --i) { 2817 struct drm_gpuva_ops *__ops = ops[i]; 2818 struct drm_gpuva_op *__op; 2819 2820 if (!__ops) 2821 continue; 2822 2823 drm_gpuva_for_each_op_reverse(__op, __ops) { 2824 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2825 2826 xe_vma_op_unwind(vm, op, 2827 op->flags & XE_VMA_OP_COMMITTED, 2828 op->flags & XE_VMA_OP_PREV_COMMITTED, 2829 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2830 } 2831 } 2832 } 2833 2834 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2835 bool validate) 2836 { 2837 struct xe_bo *bo = xe_vma_bo(vma); 2838 struct xe_vm *vm = xe_vma_vm(vma); 2839 int err = 0; 2840 2841 if (bo) { 2842 if (!bo->vm) 2843 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2844 if (!err && validate) 2845 err = xe_bo_validate(bo, vm, 2846 !xe_vm_in_preempt_fence_mode(vm), exec); 2847 } 2848 2849 return err; 2850 } 2851 2852 static int check_ufence(struct xe_vma *vma) 2853 { 2854 if (vma->ufence) { 2855 struct xe_user_fence * const f = vma->ufence; 2856 2857 if (!xe_sync_ufence_get_status(f)) 2858 return -EBUSY; 2859 2860 vma->ufence = NULL; 2861 xe_sync_ufence_put(f); 2862 } 2863 2864 return 0; 2865 } 2866 2867 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2868 { 2869 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2870 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2871 struct xe_tile *tile = op->prefetch_range.tile; 2872 int err = 0; 2873 2874 struct xe_svm_range *svm_range; 2875 struct drm_gpusvm_ctx ctx = {}; 2876 unsigned long i; 2877 2878 if (!xe_vma_is_cpu_addr_mirror(vma)) 2879 return 0; 2880 2881 ctx.read_only = xe_vma_read_only(vma); 2882 ctx.devmem_possible = devmem_possible; 2883 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2884 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2885 2886 /* TODO: Threading the migration */ 2887 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2888 if (!tile) 2889 xe_svm_range_migrate_to_smem(vm, svm_range); 2890 2891 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2892 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2893 if (err) { 2894 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2895 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2896 return -ENODATA; 2897 } 2898 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2899 } 2900 2901 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2902 if (err) { 2903 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2904 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2905 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2906 err = -ENODATA; 2907 return err; 2908 } 2909 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2910 } 2911 2912 return err; 2913 } 2914 2915 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2916 struct xe_vma_op *op) 2917 { 2918 int err = 0; 2919 2920 switch (op->base.op) { 2921 case DRM_GPUVA_OP_MAP: 2922 if (!op->map.invalidate_on_bind) 2923 err = vma_lock_and_validate(exec, op->map.vma, 2924 !xe_vm_in_fault_mode(vm) || 2925 op->map.immediate); 2926 break; 2927 case DRM_GPUVA_OP_REMAP: 2928 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2929 if (err) 2930 break; 2931 2932 err = vma_lock_and_validate(exec, 2933 gpuva_to_vma(op->base.remap.unmap->va), 2934 false); 2935 if (!err && op->remap.prev) 2936 err = vma_lock_and_validate(exec, op->remap.prev, true); 2937 if (!err && op->remap.next) 2938 err = vma_lock_and_validate(exec, op->remap.next, true); 2939 break; 2940 case DRM_GPUVA_OP_UNMAP: 2941 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2942 if (err) 2943 break; 2944 2945 err = vma_lock_and_validate(exec, 2946 gpuva_to_vma(op->base.unmap.va), 2947 false); 2948 break; 2949 case DRM_GPUVA_OP_PREFETCH: 2950 { 2951 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2952 u32 region; 2953 2954 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2955 region = op->prefetch.region; 2956 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2957 region <= ARRAY_SIZE(region_to_mem_type)); 2958 } 2959 2960 err = vma_lock_and_validate(exec, 2961 gpuva_to_vma(op->base.prefetch.va), 2962 false); 2963 if (!err && !xe_vma_has_no_bo(vma)) 2964 err = xe_bo_migrate(xe_vma_bo(vma), 2965 region_to_mem_type[region], 2966 NULL, 2967 exec); 2968 break; 2969 } 2970 default: 2971 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2972 } 2973 2974 return err; 2975 } 2976 2977 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2978 { 2979 struct xe_vma_op *op; 2980 int err; 2981 2982 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2983 return 0; 2984 2985 list_for_each_entry(op, &vops->list, link) { 2986 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 2987 err = prefetch_ranges(vm, op); 2988 if (err) 2989 return err; 2990 } 2991 } 2992 2993 return 0; 2994 } 2995 2996 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2997 struct xe_vm *vm, 2998 struct xe_vma_ops *vops) 2999 { 3000 struct xe_vma_op *op; 3001 int err; 3002 3003 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3004 if (err) 3005 return err; 3006 3007 list_for_each_entry(op, &vops->list, link) { 3008 err = op_lock_and_prep(exec, vm, op); 3009 if (err) 3010 return err; 3011 } 3012 3013 #ifdef TEST_VM_OPS_ERROR 3014 if (vops->inject_error && 3015 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3016 return -ENOSPC; 3017 #endif 3018 3019 return 0; 3020 } 3021 3022 static void op_trace(struct xe_vma_op *op) 3023 { 3024 switch (op->base.op) { 3025 case DRM_GPUVA_OP_MAP: 3026 trace_xe_vma_bind(op->map.vma); 3027 break; 3028 case DRM_GPUVA_OP_REMAP: 3029 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3030 if (op->remap.prev) 3031 trace_xe_vma_bind(op->remap.prev); 3032 if (op->remap.next) 3033 trace_xe_vma_bind(op->remap.next); 3034 break; 3035 case DRM_GPUVA_OP_UNMAP: 3036 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3037 break; 3038 case DRM_GPUVA_OP_PREFETCH: 3039 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3040 break; 3041 case DRM_GPUVA_OP_DRIVER: 3042 break; 3043 default: 3044 XE_WARN_ON("NOT POSSIBLE"); 3045 } 3046 } 3047 3048 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3049 { 3050 struct xe_vma_op *op; 3051 3052 list_for_each_entry(op, &vops->list, link) 3053 op_trace(op); 3054 } 3055 3056 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3057 { 3058 struct xe_exec_queue *q = vops->q; 3059 struct xe_tile *tile; 3060 int number_tiles = 0; 3061 u8 id; 3062 3063 for_each_tile(tile, vm->xe, id) { 3064 if (vops->pt_update_ops[id].num_ops) 3065 ++number_tiles; 3066 3067 if (vops->pt_update_ops[id].q) 3068 continue; 3069 3070 if (q) { 3071 vops->pt_update_ops[id].q = q; 3072 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3073 q = list_next_entry(q, multi_gt_list); 3074 } else { 3075 vops->pt_update_ops[id].q = vm->q[id]; 3076 } 3077 } 3078 3079 return number_tiles; 3080 } 3081 3082 static struct dma_fence *ops_execute(struct xe_vm *vm, 3083 struct xe_vma_ops *vops) 3084 { 3085 struct xe_tile *tile; 3086 struct dma_fence *fence = NULL; 3087 struct dma_fence **fences = NULL; 3088 struct dma_fence_array *cf = NULL; 3089 int number_tiles = 0, current_fence = 0, err; 3090 u8 id; 3091 3092 number_tiles = vm_ops_setup_tile_args(vm, vops); 3093 if (number_tiles == 0) 3094 return ERR_PTR(-ENODATA); 3095 3096 if (number_tiles > 1) { 3097 fences = kmalloc_array(number_tiles, sizeof(*fences), 3098 GFP_KERNEL); 3099 if (!fences) { 3100 fence = ERR_PTR(-ENOMEM); 3101 goto err_trace; 3102 } 3103 } 3104 3105 for_each_tile(tile, vm->xe, id) { 3106 if (!vops->pt_update_ops[id].num_ops) 3107 continue; 3108 3109 err = xe_pt_update_ops_prepare(tile, vops); 3110 if (err) { 3111 fence = ERR_PTR(err); 3112 goto err_out; 3113 } 3114 } 3115 3116 trace_xe_vm_ops_execute(vops); 3117 3118 for_each_tile(tile, vm->xe, id) { 3119 if (!vops->pt_update_ops[id].num_ops) 3120 continue; 3121 3122 fence = xe_pt_update_ops_run(tile, vops); 3123 if (IS_ERR(fence)) 3124 goto err_out; 3125 3126 if (fences) 3127 fences[current_fence++] = fence; 3128 } 3129 3130 if (fences) { 3131 cf = dma_fence_array_create(number_tiles, fences, 3132 vm->composite_fence_ctx, 3133 vm->composite_fence_seqno++, 3134 false); 3135 if (!cf) { 3136 --vm->composite_fence_seqno; 3137 fence = ERR_PTR(-ENOMEM); 3138 goto err_out; 3139 } 3140 fence = &cf->base; 3141 } 3142 3143 for_each_tile(tile, vm->xe, id) { 3144 if (!vops->pt_update_ops[id].num_ops) 3145 continue; 3146 3147 xe_pt_update_ops_fini(tile, vops); 3148 } 3149 3150 return fence; 3151 3152 err_out: 3153 for_each_tile(tile, vm->xe, id) { 3154 if (!vops->pt_update_ops[id].num_ops) 3155 continue; 3156 3157 xe_pt_update_ops_abort(tile, vops); 3158 } 3159 while (current_fence) 3160 dma_fence_put(fences[--current_fence]); 3161 kfree(fences); 3162 kfree(cf); 3163 3164 err_trace: 3165 trace_xe_vm_ops_fail(vm); 3166 return fence; 3167 } 3168 3169 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3170 { 3171 if (vma->ufence) 3172 xe_sync_ufence_put(vma->ufence); 3173 vma->ufence = __xe_sync_ufence_get(ufence); 3174 } 3175 3176 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3177 struct xe_user_fence *ufence) 3178 { 3179 switch (op->base.op) { 3180 case DRM_GPUVA_OP_MAP: 3181 vma_add_ufence(op->map.vma, ufence); 3182 break; 3183 case DRM_GPUVA_OP_REMAP: 3184 if (op->remap.prev) 3185 vma_add_ufence(op->remap.prev, ufence); 3186 if (op->remap.next) 3187 vma_add_ufence(op->remap.next, ufence); 3188 break; 3189 case DRM_GPUVA_OP_UNMAP: 3190 break; 3191 case DRM_GPUVA_OP_PREFETCH: 3192 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3193 break; 3194 default: 3195 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3196 } 3197 } 3198 3199 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3200 struct dma_fence *fence) 3201 { 3202 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3203 struct xe_user_fence *ufence; 3204 struct xe_vma_op *op; 3205 int i; 3206 3207 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3208 list_for_each_entry(op, &vops->list, link) { 3209 if (ufence) 3210 op_add_ufence(vm, op, ufence); 3211 3212 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3213 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3214 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3215 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3216 fence); 3217 } 3218 if (ufence) 3219 xe_sync_ufence_put(ufence); 3220 if (fence) { 3221 for (i = 0; i < vops->num_syncs; i++) 3222 xe_sync_entry_signal(vops->syncs + i, fence); 3223 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3224 } 3225 } 3226 3227 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3228 struct xe_vma_ops *vops) 3229 { 3230 struct xe_validation_ctx ctx; 3231 struct drm_exec exec; 3232 struct dma_fence *fence; 3233 int err = 0; 3234 3235 lockdep_assert_held_write(&vm->lock); 3236 3237 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3238 ((struct xe_val_flags) { 3239 .interruptible = true, 3240 .exec_ignore_duplicates = true, 3241 }), err) { 3242 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3243 drm_exec_retry_on_contention(&exec); 3244 xe_validation_retry_on_oom(&ctx, &err); 3245 if (err) 3246 return ERR_PTR(err); 3247 3248 xe_vm_set_validation_exec(vm, &exec); 3249 fence = ops_execute(vm, vops); 3250 xe_vm_set_validation_exec(vm, NULL); 3251 if (IS_ERR(fence)) { 3252 if (PTR_ERR(fence) == -ENODATA) 3253 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3254 return fence; 3255 } 3256 3257 vm_bind_ioctl_ops_fini(vm, vops, fence); 3258 } 3259 3260 return err ? ERR_PTR(err) : fence; 3261 } 3262 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3263 3264 #define SUPPORTED_FLAGS_STUB \ 3265 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3266 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3267 DRM_XE_VM_BIND_FLAG_NULL | \ 3268 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3269 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3270 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3271 3272 #ifdef TEST_VM_OPS_ERROR 3273 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3274 #else 3275 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3276 #endif 3277 3278 #define XE_64K_PAGE_MASK 0xffffull 3279 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3280 3281 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3282 struct drm_xe_vm_bind *args, 3283 struct drm_xe_vm_bind_op **bind_ops) 3284 { 3285 int err; 3286 int i; 3287 3288 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3289 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3290 return -EINVAL; 3291 3292 if (XE_IOCTL_DBG(xe, args->extensions)) 3293 return -EINVAL; 3294 3295 if (args->num_binds > 1) { 3296 u64 __user *bind_user = 3297 u64_to_user_ptr(args->vector_of_binds); 3298 3299 *bind_ops = kvmalloc_array(args->num_binds, 3300 sizeof(struct drm_xe_vm_bind_op), 3301 GFP_KERNEL | __GFP_ACCOUNT | 3302 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3303 if (!*bind_ops) 3304 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3305 3306 err = copy_from_user(*bind_ops, bind_user, 3307 sizeof(struct drm_xe_vm_bind_op) * 3308 args->num_binds); 3309 if (XE_IOCTL_DBG(xe, err)) { 3310 err = -EFAULT; 3311 goto free_bind_ops; 3312 } 3313 } else { 3314 *bind_ops = &args->bind; 3315 } 3316 3317 for (i = 0; i < args->num_binds; ++i) { 3318 u64 range = (*bind_ops)[i].range; 3319 u64 addr = (*bind_ops)[i].addr; 3320 u32 op = (*bind_ops)[i].op; 3321 u32 flags = (*bind_ops)[i].flags; 3322 u32 obj = (*bind_ops)[i].obj; 3323 u64 obj_offset = (*bind_ops)[i].obj_offset; 3324 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3325 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3326 bool is_cpu_addr_mirror = flags & 3327 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3328 u16 pat_index = (*bind_ops)[i].pat_index; 3329 u16 coh_mode; 3330 3331 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3332 (!xe_vm_in_fault_mode(vm) || 3333 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3334 err = -EINVAL; 3335 goto free_bind_ops; 3336 } 3337 3338 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3339 err = -EINVAL; 3340 goto free_bind_ops; 3341 } 3342 3343 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3344 (*bind_ops)[i].pat_index = pat_index; 3345 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3346 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3347 err = -EINVAL; 3348 goto free_bind_ops; 3349 } 3350 3351 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3352 err = -EINVAL; 3353 goto free_bind_ops; 3354 } 3355 3356 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3357 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3358 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3359 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3360 is_cpu_addr_mirror)) || 3361 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3362 (is_null || is_cpu_addr_mirror)) || 3363 XE_IOCTL_DBG(xe, !obj && 3364 op == DRM_XE_VM_BIND_OP_MAP && 3365 !is_null && !is_cpu_addr_mirror) || 3366 XE_IOCTL_DBG(xe, !obj && 3367 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3368 XE_IOCTL_DBG(xe, addr && 3369 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3370 XE_IOCTL_DBG(xe, range && 3371 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3372 XE_IOCTL_DBG(xe, obj && 3373 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3374 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3375 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3376 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3377 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3378 XE_IOCTL_DBG(xe, obj && 3379 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3380 XE_IOCTL_DBG(xe, prefetch_region && 3381 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3382 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3383 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3384 XE_IOCTL_DBG(xe, obj && 3385 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3386 err = -EINVAL; 3387 goto free_bind_ops; 3388 } 3389 3390 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3391 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3392 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3393 XE_IOCTL_DBG(xe, !range && 3394 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3395 err = -EINVAL; 3396 goto free_bind_ops; 3397 } 3398 } 3399 3400 return 0; 3401 3402 free_bind_ops: 3403 if (args->num_binds > 1) 3404 kvfree(*bind_ops); 3405 *bind_ops = NULL; 3406 return err; 3407 } 3408 3409 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3410 struct xe_exec_queue *q, 3411 struct xe_sync_entry *syncs, 3412 int num_syncs) 3413 { 3414 struct dma_fence *fence; 3415 int i, err = 0; 3416 3417 fence = xe_sync_in_fence_get(syncs, num_syncs, 3418 to_wait_exec_queue(vm, q), vm); 3419 if (IS_ERR(fence)) 3420 return PTR_ERR(fence); 3421 3422 for (i = 0; i < num_syncs; i++) 3423 xe_sync_entry_signal(&syncs[i], fence); 3424 3425 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3426 fence); 3427 dma_fence_put(fence); 3428 3429 return err; 3430 } 3431 3432 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3433 struct xe_exec_queue *q, 3434 struct xe_sync_entry *syncs, u32 num_syncs) 3435 { 3436 memset(vops, 0, sizeof(*vops)); 3437 INIT_LIST_HEAD(&vops->list); 3438 vops->vm = vm; 3439 vops->q = q; 3440 vops->syncs = syncs; 3441 vops->num_syncs = num_syncs; 3442 vops->flags = 0; 3443 } 3444 3445 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3446 u64 addr, u64 range, u64 obj_offset, 3447 u16 pat_index, u32 op, u32 bind_flags) 3448 { 3449 u16 coh_mode; 3450 3451 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3452 XE_IOCTL_DBG(xe, obj_offset > 3453 xe_bo_size(bo) - range)) { 3454 return -EINVAL; 3455 } 3456 3457 /* 3458 * Some platforms require 64k VM_BIND alignment, 3459 * specifically those with XE_VRAM_FLAGS_NEED64K. 3460 * 3461 * Other platforms may have BO's set to 64k physical placement, 3462 * but can be mapped at 4k offsets anyway. This check is only 3463 * there for the former case. 3464 */ 3465 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3466 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3467 if (XE_IOCTL_DBG(xe, obj_offset & 3468 XE_64K_PAGE_MASK) || 3469 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3470 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3471 return -EINVAL; 3472 } 3473 } 3474 3475 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3476 if (bo->cpu_caching) { 3477 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3478 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3479 return -EINVAL; 3480 } 3481 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3482 /* 3483 * Imported dma-buf from a different device should 3484 * require 1way or 2way coherency since we don't know 3485 * how it was mapped on the CPU. Just assume is it 3486 * potentially cached on CPU side. 3487 */ 3488 return -EINVAL; 3489 } 3490 3491 /* If a BO is protected it can only be mapped if the key is still valid */ 3492 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3493 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3494 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3495 return -ENOEXEC; 3496 3497 return 0; 3498 } 3499 3500 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3501 { 3502 struct xe_device *xe = to_xe_device(dev); 3503 struct xe_file *xef = to_xe_file(file); 3504 struct drm_xe_vm_bind *args = data; 3505 struct drm_xe_sync __user *syncs_user; 3506 struct xe_bo **bos = NULL; 3507 struct drm_gpuva_ops **ops = NULL; 3508 struct xe_vm *vm; 3509 struct xe_exec_queue *q = NULL; 3510 u32 num_syncs, num_ufence = 0; 3511 struct xe_sync_entry *syncs = NULL; 3512 struct drm_xe_vm_bind_op *bind_ops = NULL; 3513 struct xe_vma_ops vops; 3514 struct dma_fence *fence; 3515 int err; 3516 int i; 3517 3518 vm = xe_vm_lookup(xef, args->vm_id); 3519 if (XE_IOCTL_DBG(xe, !vm)) 3520 return -EINVAL; 3521 3522 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3523 if (err) 3524 goto put_vm; 3525 3526 if (args->exec_queue_id) { 3527 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3528 if (XE_IOCTL_DBG(xe, !q)) { 3529 err = -ENOENT; 3530 goto free_bind_ops; 3531 } 3532 3533 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3534 err = -EINVAL; 3535 goto put_exec_queue; 3536 } 3537 } 3538 3539 /* Ensure all UNMAPs visible */ 3540 xe_svm_flush(vm); 3541 3542 err = down_write_killable(&vm->lock); 3543 if (err) 3544 goto put_exec_queue; 3545 3546 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3547 err = -ENOENT; 3548 goto release_vm_lock; 3549 } 3550 3551 for (i = 0; i < args->num_binds; ++i) { 3552 u64 range = bind_ops[i].range; 3553 u64 addr = bind_ops[i].addr; 3554 3555 if (XE_IOCTL_DBG(xe, range > vm->size) || 3556 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3557 err = -EINVAL; 3558 goto release_vm_lock; 3559 } 3560 } 3561 3562 if (args->num_binds) { 3563 bos = kvcalloc(args->num_binds, sizeof(*bos), 3564 GFP_KERNEL | __GFP_ACCOUNT | 3565 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3566 if (!bos) { 3567 err = -ENOMEM; 3568 goto release_vm_lock; 3569 } 3570 3571 ops = kvcalloc(args->num_binds, sizeof(*ops), 3572 GFP_KERNEL | __GFP_ACCOUNT | 3573 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3574 if (!ops) { 3575 err = -ENOMEM; 3576 goto free_bos; 3577 } 3578 } 3579 3580 for (i = 0; i < args->num_binds; ++i) { 3581 struct drm_gem_object *gem_obj; 3582 u64 range = bind_ops[i].range; 3583 u64 addr = bind_ops[i].addr; 3584 u32 obj = bind_ops[i].obj; 3585 u64 obj_offset = bind_ops[i].obj_offset; 3586 u16 pat_index = bind_ops[i].pat_index; 3587 u32 op = bind_ops[i].op; 3588 u32 bind_flags = bind_ops[i].flags; 3589 3590 if (!obj) 3591 continue; 3592 3593 gem_obj = drm_gem_object_lookup(file, obj); 3594 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3595 err = -ENOENT; 3596 goto put_obj; 3597 } 3598 bos[i] = gem_to_xe_bo(gem_obj); 3599 3600 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3601 obj_offset, pat_index, op, 3602 bind_flags); 3603 if (err) 3604 goto put_obj; 3605 } 3606 3607 if (args->num_syncs) { 3608 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3609 if (!syncs) { 3610 err = -ENOMEM; 3611 goto put_obj; 3612 } 3613 } 3614 3615 syncs_user = u64_to_user_ptr(args->syncs); 3616 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3617 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3618 &syncs_user[num_syncs], 3619 (xe_vm_in_lr_mode(vm) ? 3620 SYNC_PARSE_FLAG_LR_MODE : 0) | 3621 (!args->num_binds ? 3622 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3623 if (err) 3624 goto free_syncs; 3625 3626 if (xe_sync_is_ufence(&syncs[num_syncs])) 3627 num_ufence++; 3628 } 3629 3630 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3631 err = -EINVAL; 3632 goto free_syncs; 3633 } 3634 3635 if (!args->num_binds) { 3636 err = -ENODATA; 3637 goto free_syncs; 3638 } 3639 3640 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3641 for (i = 0; i < args->num_binds; ++i) { 3642 u64 range = bind_ops[i].range; 3643 u64 addr = bind_ops[i].addr; 3644 u32 op = bind_ops[i].op; 3645 u32 flags = bind_ops[i].flags; 3646 u64 obj_offset = bind_ops[i].obj_offset; 3647 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3648 u16 pat_index = bind_ops[i].pat_index; 3649 3650 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3651 addr, range, op, flags, 3652 prefetch_region, pat_index); 3653 if (IS_ERR(ops[i])) { 3654 err = PTR_ERR(ops[i]); 3655 ops[i] = NULL; 3656 goto unwind_ops; 3657 } 3658 3659 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3660 if (err) 3661 goto unwind_ops; 3662 3663 #ifdef TEST_VM_OPS_ERROR 3664 if (flags & FORCE_OP_ERROR) { 3665 vops.inject_error = true; 3666 vm->xe->vm_inject_error_position = 3667 (vm->xe->vm_inject_error_position + 1) % 3668 FORCE_OP_ERROR_COUNT; 3669 } 3670 #endif 3671 } 3672 3673 /* Nothing to do */ 3674 if (list_empty(&vops.list)) { 3675 err = -ENODATA; 3676 goto unwind_ops; 3677 } 3678 3679 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3680 if (err) 3681 goto unwind_ops; 3682 3683 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3684 if (err) 3685 goto unwind_ops; 3686 3687 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3688 if (IS_ERR(fence)) 3689 err = PTR_ERR(fence); 3690 else 3691 dma_fence_put(fence); 3692 3693 unwind_ops: 3694 if (err && err != -ENODATA) 3695 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3696 xe_vma_ops_fini(&vops); 3697 for (i = args->num_binds - 1; i >= 0; --i) 3698 if (ops[i]) 3699 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3700 free_syncs: 3701 if (err == -ENODATA) 3702 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3703 while (num_syncs--) 3704 xe_sync_entry_cleanup(&syncs[num_syncs]); 3705 3706 kfree(syncs); 3707 put_obj: 3708 for (i = 0; i < args->num_binds; ++i) 3709 xe_bo_put(bos[i]); 3710 3711 kvfree(ops); 3712 free_bos: 3713 kvfree(bos); 3714 release_vm_lock: 3715 up_write(&vm->lock); 3716 put_exec_queue: 3717 if (q) 3718 xe_exec_queue_put(q); 3719 free_bind_ops: 3720 if (args->num_binds > 1) 3721 kvfree(bind_ops); 3722 put_vm: 3723 xe_vm_put(vm); 3724 return err; 3725 } 3726 3727 /** 3728 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3729 * @vm: VM to bind the BO to 3730 * @bo: BO to bind 3731 * @q: exec queue to use for the bind (optional) 3732 * @addr: address at which to bind the BO 3733 * @cache_lvl: PAT cache level to use 3734 * 3735 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3736 * kernel-owned VM. 3737 * 3738 * Returns a dma_fence to track the binding completion if the job to do so was 3739 * successfully submitted, an error pointer otherwise. 3740 */ 3741 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3742 struct xe_exec_queue *q, u64 addr, 3743 enum xe_cache_level cache_lvl) 3744 { 3745 struct xe_vma_ops vops; 3746 struct drm_gpuva_ops *ops = NULL; 3747 struct dma_fence *fence; 3748 int err; 3749 3750 xe_bo_get(bo); 3751 xe_vm_get(vm); 3752 if (q) 3753 xe_exec_queue_get(q); 3754 3755 down_write(&vm->lock); 3756 3757 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3758 3759 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3760 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3761 vm->xe->pat.idx[cache_lvl]); 3762 if (IS_ERR(ops)) { 3763 err = PTR_ERR(ops); 3764 goto release_vm_lock; 3765 } 3766 3767 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3768 if (err) 3769 goto release_vm_lock; 3770 3771 xe_assert(vm->xe, !list_empty(&vops.list)); 3772 3773 err = xe_vma_ops_alloc(&vops, false); 3774 if (err) 3775 goto unwind_ops; 3776 3777 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3778 if (IS_ERR(fence)) 3779 err = PTR_ERR(fence); 3780 3781 unwind_ops: 3782 if (err && err != -ENODATA) 3783 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3784 3785 xe_vma_ops_fini(&vops); 3786 drm_gpuva_ops_free(&vm->gpuvm, ops); 3787 3788 release_vm_lock: 3789 up_write(&vm->lock); 3790 3791 if (q) 3792 xe_exec_queue_put(q); 3793 xe_vm_put(vm); 3794 xe_bo_put(bo); 3795 3796 if (err) 3797 fence = ERR_PTR(err); 3798 3799 return fence; 3800 } 3801 3802 /** 3803 * xe_vm_lock() - Lock the vm's dma_resv object 3804 * @vm: The struct xe_vm whose lock is to be locked 3805 * @intr: Whether to perform any wait interruptible 3806 * 3807 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3808 * contended lock was interrupted. If @intr is false, the function 3809 * always returns 0. 3810 */ 3811 int xe_vm_lock(struct xe_vm *vm, bool intr) 3812 { 3813 int ret; 3814 3815 if (intr) 3816 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3817 else 3818 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3819 3820 return ret; 3821 } 3822 3823 /** 3824 * xe_vm_unlock() - Unlock the vm's dma_resv object 3825 * @vm: The struct xe_vm whose lock is to be released. 3826 * 3827 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3828 */ 3829 void xe_vm_unlock(struct xe_vm *vm) 3830 { 3831 dma_resv_unlock(xe_vm_resv(vm)); 3832 } 3833 3834 /** 3835 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3836 * address range 3837 * @vm: The VM 3838 * @start: start address 3839 * @end: end address 3840 * @tile_mask: mask for which gt's issue tlb invalidation 3841 * 3842 * Issue a range based TLB invalidation for gt's in tilemask 3843 * 3844 * Returns 0 for success, negative error code otherwise. 3845 */ 3846 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3847 u64 end, u8 tile_mask) 3848 { 3849 struct xe_tlb_inval_fence 3850 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3851 struct xe_tile *tile; 3852 u32 fence_id = 0; 3853 u8 id; 3854 int err; 3855 3856 if (!tile_mask) 3857 return 0; 3858 3859 for_each_tile(tile, vm->xe, id) { 3860 if (!(tile_mask & BIT(id))) 3861 continue; 3862 3863 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3864 &fence[fence_id], true); 3865 3866 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3867 &fence[fence_id], start, end, 3868 vm->usm.asid); 3869 if (err) 3870 goto wait; 3871 ++fence_id; 3872 3873 if (!tile->media_gt) 3874 continue; 3875 3876 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3877 &fence[fence_id], true); 3878 3879 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3880 &fence[fence_id], start, end, 3881 vm->usm.asid); 3882 if (err) 3883 goto wait; 3884 ++fence_id; 3885 } 3886 3887 wait: 3888 for (id = 0; id < fence_id; ++id) 3889 xe_tlb_inval_fence_wait(&fence[id]); 3890 3891 return err; 3892 } 3893 3894 /** 3895 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3896 * @vma: VMA to invalidate 3897 * 3898 * Walks a list of page tables leaves which it memset the entries owned by this 3899 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3900 * complete. 3901 * 3902 * Returns 0 for success, negative error code otherwise. 3903 */ 3904 int xe_vm_invalidate_vma(struct xe_vma *vma) 3905 { 3906 struct xe_device *xe = xe_vma_vm(vma)->xe; 3907 struct xe_vm *vm = xe_vma_vm(vma); 3908 struct xe_tile *tile; 3909 u8 tile_mask = 0; 3910 int ret = 0; 3911 u8 id; 3912 3913 xe_assert(xe, !xe_vma_is_null(vma)); 3914 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3915 trace_xe_vma_invalidate(vma); 3916 3917 vm_dbg(&vm->xe->drm, 3918 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3919 xe_vma_start(vma), xe_vma_size(vma)); 3920 3921 /* 3922 * Check that we don't race with page-table updates, tile_invalidated 3923 * update is safe 3924 */ 3925 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3926 if (xe_vma_is_userptr(vma)) { 3927 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3928 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3929 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3930 3931 WARN_ON_ONCE(!mmu_interval_check_retry 3932 (&to_userptr_vma(vma)->userptr.notifier, 3933 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3934 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3935 DMA_RESV_USAGE_BOOKKEEP)); 3936 3937 } else { 3938 xe_bo_assert_held(xe_vma_bo(vma)); 3939 } 3940 } 3941 3942 for_each_tile(tile, xe, id) 3943 if (xe_pt_zap_ptes(tile, vma)) 3944 tile_mask |= BIT(id); 3945 3946 xe_device_wmb(xe); 3947 3948 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3949 xe_vma_end(vma), tile_mask); 3950 3951 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3952 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3953 3954 return ret; 3955 } 3956 3957 int xe_vm_validate_protected(struct xe_vm *vm) 3958 { 3959 struct drm_gpuva *gpuva; 3960 int err = 0; 3961 3962 if (!vm) 3963 return -ENODEV; 3964 3965 mutex_lock(&vm->snap_mutex); 3966 3967 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3968 struct xe_vma *vma = gpuva_to_vma(gpuva); 3969 struct xe_bo *bo = vma->gpuva.gem.obj ? 3970 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3971 3972 if (!bo) 3973 continue; 3974 3975 if (xe_bo_is_protected(bo)) { 3976 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3977 if (err) 3978 break; 3979 } 3980 } 3981 3982 mutex_unlock(&vm->snap_mutex); 3983 return err; 3984 } 3985 3986 struct xe_vm_snapshot { 3987 unsigned long num_snaps; 3988 struct { 3989 u64 ofs, bo_ofs; 3990 unsigned long len; 3991 struct xe_bo *bo; 3992 void *data; 3993 struct mm_struct *mm; 3994 } snap[]; 3995 }; 3996 3997 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3998 { 3999 unsigned long num_snaps = 0, i; 4000 struct xe_vm_snapshot *snap = NULL; 4001 struct drm_gpuva *gpuva; 4002 4003 if (!vm) 4004 return NULL; 4005 4006 mutex_lock(&vm->snap_mutex); 4007 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4008 if (gpuva->flags & XE_VMA_DUMPABLE) 4009 num_snaps++; 4010 } 4011 4012 if (num_snaps) 4013 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4014 if (!snap) { 4015 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4016 goto out_unlock; 4017 } 4018 4019 snap->num_snaps = num_snaps; 4020 i = 0; 4021 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4022 struct xe_vma *vma = gpuva_to_vma(gpuva); 4023 struct xe_bo *bo = vma->gpuva.gem.obj ? 4024 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4025 4026 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4027 continue; 4028 4029 snap->snap[i].ofs = xe_vma_start(vma); 4030 snap->snap[i].len = xe_vma_size(vma); 4031 if (bo) { 4032 snap->snap[i].bo = xe_bo_get(bo); 4033 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4034 } else if (xe_vma_is_userptr(vma)) { 4035 struct mm_struct *mm = 4036 to_userptr_vma(vma)->userptr.notifier.mm; 4037 4038 if (mmget_not_zero(mm)) 4039 snap->snap[i].mm = mm; 4040 else 4041 snap->snap[i].data = ERR_PTR(-EFAULT); 4042 4043 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4044 } else { 4045 snap->snap[i].data = ERR_PTR(-ENOENT); 4046 } 4047 i++; 4048 } 4049 4050 out_unlock: 4051 mutex_unlock(&vm->snap_mutex); 4052 return snap; 4053 } 4054 4055 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4056 { 4057 if (IS_ERR_OR_NULL(snap)) 4058 return; 4059 4060 for (int i = 0; i < snap->num_snaps; i++) { 4061 struct xe_bo *bo = snap->snap[i].bo; 4062 int err; 4063 4064 if (IS_ERR(snap->snap[i].data)) 4065 continue; 4066 4067 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4068 if (!snap->snap[i].data) { 4069 snap->snap[i].data = ERR_PTR(-ENOMEM); 4070 goto cleanup_bo; 4071 } 4072 4073 if (bo) { 4074 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4075 snap->snap[i].data, snap->snap[i].len); 4076 } else { 4077 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4078 4079 kthread_use_mm(snap->snap[i].mm); 4080 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4081 err = 0; 4082 else 4083 err = -EFAULT; 4084 kthread_unuse_mm(snap->snap[i].mm); 4085 4086 mmput(snap->snap[i].mm); 4087 snap->snap[i].mm = NULL; 4088 } 4089 4090 if (err) { 4091 kvfree(snap->snap[i].data); 4092 snap->snap[i].data = ERR_PTR(err); 4093 } 4094 4095 cleanup_bo: 4096 xe_bo_put(bo); 4097 snap->snap[i].bo = NULL; 4098 } 4099 } 4100 4101 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4102 { 4103 unsigned long i, j; 4104 4105 if (IS_ERR_OR_NULL(snap)) { 4106 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4107 return; 4108 } 4109 4110 for (i = 0; i < snap->num_snaps; i++) { 4111 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4112 4113 if (IS_ERR(snap->snap[i].data)) { 4114 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4115 PTR_ERR(snap->snap[i].data)); 4116 continue; 4117 } 4118 4119 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4120 4121 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4122 u32 *val = snap->snap[i].data + j; 4123 char dumped[ASCII85_BUFSZ]; 4124 4125 drm_puts(p, ascii85_encode(*val, dumped)); 4126 } 4127 4128 drm_puts(p, "\n"); 4129 4130 if (drm_coredump_printer_is_full(p)) 4131 return; 4132 } 4133 } 4134 4135 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4136 { 4137 unsigned long i; 4138 4139 if (IS_ERR_OR_NULL(snap)) 4140 return; 4141 4142 for (i = 0; i < snap->num_snaps; i++) { 4143 if (!IS_ERR(snap->snap[i].data)) 4144 kvfree(snap->snap[i].data); 4145 xe_bo_put(snap->snap[i].bo); 4146 if (snap->snap[i].mm) 4147 mmput(snap->snap[i].mm); 4148 } 4149 kvfree(snap); 4150 } 4151 4152 /** 4153 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4154 * @xe: Pointer to the XE device structure 4155 * @vma: Pointer to the virtual memory area (VMA) structure 4156 * @is_atomic: In pagefault path and atomic operation 4157 * 4158 * This function determines whether the given VMA needs to be migrated to 4159 * VRAM in order to do atomic GPU operation. 4160 * 4161 * Return: 4162 * 1 - Migration to VRAM is required 4163 * 0 - Migration is not required 4164 * -EACCES - Invalid access for atomic memory attr 4165 * 4166 */ 4167 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4168 { 4169 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4170 vma->attr.atomic_access; 4171 4172 if (!IS_DGFX(xe) || !is_atomic) 4173 return false; 4174 4175 /* 4176 * NOTE: The checks implemented here are platform-specific. For 4177 * instance, on a device supporting CXL atomics, these would ideally 4178 * work universally without additional handling. 4179 */ 4180 switch (atomic_access) { 4181 case DRM_XE_ATOMIC_DEVICE: 4182 return !xe->info.has_device_atomics_on_smem; 4183 4184 case DRM_XE_ATOMIC_CPU: 4185 return -EACCES; 4186 4187 case DRM_XE_ATOMIC_UNDEFINED: 4188 case DRM_XE_ATOMIC_GLOBAL: 4189 default: 4190 return 1; 4191 } 4192 } 4193 4194 static int xe_vm_alloc_vma(struct xe_vm *vm, 4195 struct drm_gpuvm_map_req *map_req, 4196 bool is_madvise) 4197 { 4198 struct xe_vma_ops vops; 4199 struct drm_gpuva_ops *ops = NULL; 4200 struct drm_gpuva_op *__op; 4201 bool is_cpu_addr_mirror = false; 4202 bool remap_op = false; 4203 struct xe_vma_mem_attr tmp_attr; 4204 u16 default_pat; 4205 int err; 4206 4207 lockdep_assert_held_write(&vm->lock); 4208 4209 if (is_madvise) 4210 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4211 else 4212 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4213 4214 if (IS_ERR(ops)) 4215 return PTR_ERR(ops); 4216 4217 if (list_empty(&ops->list)) { 4218 err = 0; 4219 goto free_ops; 4220 } 4221 4222 drm_gpuva_for_each_op(__op, ops) { 4223 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4224 struct xe_vma *vma = NULL; 4225 4226 if (!is_madvise) { 4227 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4228 vma = gpuva_to_vma(op->base.unmap.va); 4229 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4230 default_pat = vma->attr.default_pat_index; 4231 } 4232 4233 if (__op->op == DRM_GPUVA_OP_REMAP) { 4234 vma = gpuva_to_vma(op->base.remap.unmap->va); 4235 default_pat = vma->attr.default_pat_index; 4236 } 4237 4238 if (__op->op == DRM_GPUVA_OP_MAP) { 4239 op->map.is_cpu_addr_mirror = true; 4240 op->map.pat_index = default_pat; 4241 } 4242 } else { 4243 if (__op->op == DRM_GPUVA_OP_REMAP) { 4244 vma = gpuva_to_vma(op->base.remap.unmap->va); 4245 xe_assert(vm->xe, !remap_op); 4246 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4247 remap_op = true; 4248 4249 if (xe_vma_is_cpu_addr_mirror(vma)) 4250 is_cpu_addr_mirror = true; 4251 else 4252 is_cpu_addr_mirror = false; 4253 } 4254 4255 if (__op->op == DRM_GPUVA_OP_MAP) { 4256 xe_assert(vm->xe, remap_op); 4257 remap_op = false; 4258 /* 4259 * In case of madvise ops DRM_GPUVA_OP_MAP is 4260 * always after DRM_GPUVA_OP_REMAP, so ensure 4261 * we assign op->map.is_cpu_addr_mirror true 4262 * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4263 */ 4264 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4265 } 4266 } 4267 print_op(vm->xe, __op); 4268 } 4269 4270 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4271 4272 if (is_madvise) 4273 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4274 4275 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4276 if (err) 4277 goto unwind_ops; 4278 4279 xe_vm_lock(vm, false); 4280 4281 drm_gpuva_for_each_op(__op, ops) { 4282 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4283 struct xe_vma *vma; 4284 4285 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4286 vma = gpuva_to_vma(op->base.unmap.va); 4287 /* There should be no unmap for madvise */ 4288 if (is_madvise) 4289 XE_WARN_ON("UNEXPECTED UNMAP"); 4290 4291 xe_vma_destroy(vma, NULL); 4292 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4293 vma = gpuva_to_vma(op->base.remap.unmap->va); 4294 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4295 * VMA, so they can be assigned to newly MAP created vma. 4296 */ 4297 if (is_madvise) 4298 tmp_attr = vma->attr; 4299 4300 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4301 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4302 vma = op->map.vma; 4303 /* In case of madvise call, MAP will always be follwed by REMAP. 4304 * Therefore temp_attr will always have sane values, making it safe to 4305 * copy them to new vma. 4306 */ 4307 if (is_madvise) 4308 vma->attr = tmp_attr; 4309 } 4310 } 4311 4312 xe_vm_unlock(vm); 4313 drm_gpuva_ops_free(&vm->gpuvm, ops); 4314 return 0; 4315 4316 unwind_ops: 4317 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4318 free_ops: 4319 drm_gpuva_ops_free(&vm->gpuvm, ops); 4320 return err; 4321 } 4322 4323 /** 4324 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4325 * @vm: Pointer to the xe_vm structure 4326 * @start: Starting input address 4327 * @range: Size of the input range 4328 * 4329 * This function splits existing vma to create new vma for user provided input range 4330 * 4331 * Return: 0 if success 4332 */ 4333 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4334 { 4335 struct drm_gpuvm_map_req map_req = { 4336 .map.va.addr = start, 4337 .map.va.range = range, 4338 }; 4339 4340 lockdep_assert_held_write(&vm->lock); 4341 4342 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4343 4344 return xe_vm_alloc_vma(vm, &map_req, true); 4345 } 4346 4347 /** 4348 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4349 * @vm: Pointer to the xe_vm structure 4350 * @start: Starting input address 4351 * @range: Size of the input range 4352 * 4353 * This function splits/merges existing vma to create new vma for user provided input range 4354 * 4355 * Return: 0 if success 4356 */ 4357 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4358 { 4359 struct drm_gpuvm_map_req map_req = { 4360 .map.va.addr = start, 4361 .map.va.range = range, 4362 }; 4363 4364 lockdep_assert_held_write(&vm->lock); 4365 4366 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4367 start, range); 4368 4369 return xe_vm_alloc_vma(vm, &map_req, false); 4370 } 4371