1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 115 xe_vm_assert_held(vm); 116 117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 118 if (q->lr.pfence) { 119 long timeout = dma_fence_wait(q->lr.pfence, false); 120 121 /* Only -ETIME on fence indicates VM needs to be killed */ 122 if (timeout < 0 || q->lr.pfence->error == -ETIME) 123 return -ETIME; 124 125 dma_fence_put(q->lr.pfence); 126 q->lr.pfence = NULL; 127 } 128 } 129 130 return 0; 131 } 132 133 static bool xe_vm_is_idle(struct xe_vm *vm) 134 { 135 struct xe_exec_queue *q; 136 137 xe_vm_assert_held(vm); 138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 139 if (!xe_exec_queue_is_idle(q)) 140 return false; 141 } 142 143 return true; 144 } 145 146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 147 { 148 struct list_head *link; 149 struct xe_exec_queue *q; 150 151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 152 struct dma_fence *fence; 153 154 link = list->next; 155 xe_assert(vm->xe, link != list); 156 157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 158 q, q->lr.context, 159 ++q->lr.seqno); 160 dma_fence_put(q->lr.pfence); 161 q->lr.pfence = fence; 162 } 163 } 164 165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 166 { 167 struct xe_exec_queue *q; 168 int err; 169 170 xe_bo_assert_held(bo); 171 172 if (!vm->preempt.num_exec_queues) 173 return 0; 174 175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 176 if (err) 177 return err; 178 179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 180 if (q->lr.pfence) { 181 dma_resv_add_fence(bo->ttm.base.resv, 182 q->lr.pfence, 183 DMA_RESV_USAGE_BOOKKEEP); 184 } 185 186 return 0; 187 } 188 189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 190 struct drm_exec *exec) 191 { 192 struct xe_exec_queue *q; 193 194 lockdep_assert_held(&vm->lock); 195 xe_vm_assert_held(vm); 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 198 q->ops->resume(q); 199 200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 202 } 203 } 204 205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 206 { 207 struct drm_gpuvm_exec vm_exec = { 208 .vm = &vm->gpuvm, 209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 210 .num_fences = 1, 211 }; 212 struct drm_exec *exec = &vm_exec.exec; 213 struct xe_validation_ctx ctx; 214 struct dma_fence *pfence; 215 int err; 216 bool wait; 217 218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 219 220 down_write(&vm->lock); 221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 222 if (err) 223 goto out_up_write; 224 225 pfence = xe_preempt_fence_create(q, q->lr.context, 226 ++q->lr.seqno); 227 if (IS_ERR(pfence)) { 228 err = PTR_ERR(pfence); 229 goto out_fini; 230 } 231 232 list_add(&q->lr.link, &vm->preempt.exec_queues); 233 ++vm->preempt.num_exec_queues; 234 q->lr.pfence = pfence; 235 236 xe_svm_notifier_lock(vm); 237 238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 240 241 /* 242 * Check to see if a preemption on VM is in flight or userptr 243 * invalidation, if so trigger this preempt fence to sync state with 244 * other preempt fences on the VM. 245 */ 246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 247 if (wait) 248 dma_fence_enable_sw_signaling(pfence); 249 250 xe_svm_notifier_unlock(vm); 251 252 out_fini: 253 xe_validation_ctx_fini(&ctx); 254 out_up_write: 255 up_write(&vm->lock); 256 257 return err; 258 } 259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 260 261 /** 262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 263 * @vm: The VM. 264 * @q: The exec_queue 265 * 266 * Note that this function might be called multiple times on the same queue. 267 */ 268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 269 { 270 if (!xe_vm_in_preempt_fence_mode(vm)) 271 return; 272 273 down_write(&vm->lock); 274 if (!list_empty(&q->lr.link)) { 275 list_del_init(&q->lr.link); 276 --vm->preempt.num_exec_queues; 277 } 278 if (q->lr.pfence) { 279 dma_fence_enable_sw_signaling(q->lr.pfence); 280 dma_fence_put(q->lr.pfence); 281 q->lr.pfence = NULL; 282 } 283 up_write(&vm->lock); 284 } 285 286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 287 288 /** 289 * xe_vm_kill() - VM Kill 290 * @vm: The VM. 291 * @unlocked: Flag indicates the VM's dma-resv is not held 292 * 293 * Kill the VM by setting banned flag indicated VM is no longer available for 294 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 295 */ 296 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 297 { 298 struct xe_exec_queue *q; 299 300 lockdep_assert_held(&vm->lock); 301 302 if (unlocked) 303 xe_vm_lock(vm, false); 304 305 vm->flags |= XE_VM_FLAG_BANNED; 306 trace_xe_vm_kill(vm); 307 308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 309 q->ops->kill(q); 310 311 if (unlocked) 312 xe_vm_unlock(vm); 313 314 /* TODO: Inform user the VM is banned */ 315 } 316 317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 318 { 319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 320 struct drm_gpuva *gpuva; 321 int ret; 322 323 lockdep_assert_held(&vm->lock); 324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 326 &vm->rebind_list); 327 328 if (!try_wait_for_completion(&vm->xe->pm_block)) 329 return -EAGAIN; 330 331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 332 if (ret) 333 return ret; 334 335 vm_bo->evicted = false; 336 return 0; 337 } 338 339 /** 340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 341 * @vm: The vm for which we are rebinding. 342 * @exec: The struct drm_exec with the locked GEM objects. 343 * @num_fences: The number of fences to reserve for the operation, not 344 * including rebinds and validations. 345 * 346 * Validates all evicted gem objects and rebinds their vmas. Note that 347 * rebindings may cause evictions and hence the validation-rebind 348 * sequence is rerun until there are no more objects to validate. 349 * 350 * Return: 0 on success, negative error code on error. In particular, 351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 352 * the drm_exec transaction needs to be restarted. 353 */ 354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 355 unsigned int num_fences) 356 { 357 struct drm_gem_object *obj; 358 unsigned long index; 359 int ret; 360 361 do { 362 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 363 if (ret) 364 return ret; 365 366 ret = xe_vm_rebind(vm, false); 367 if (ret) 368 return ret; 369 } while (!list_empty(&vm->gpuvm.evict.list)); 370 371 drm_exec_for_each_locked_object(exec, index, obj) { 372 ret = dma_resv_reserve_fences(obj->resv, num_fences); 373 if (ret) 374 return ret; 375 } 376 377 return 0; 378 } 379 380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 381 bool *done) 382 { 383 int err; 384 385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 386 if (err) 387 return err; 388 389 if (xe_vm_is_idle(vm)) { 390 vm->preempt.rebind_deactivated = true; 391 *done = true; 392 return 0; 393 } 394 395 if (!preempt_fences_waiting(vm)) { 396 *done = true; 397 return 0; 398 } 399 400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 401 if (err) 402 return err; 403 404 err = wait_for_existing_preempt_fences(vm); 405 if (err) 406 return err; 407 408 /* 409 * Add validation and rebinding to the locking loop since both can 410 * cause evictions which may require blocing dma_resv locks. 411 * The fence reservation here is intended for the new preempt fences 412 * we attach at the end of the rebind work. 413 */ 414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 415 } 416 417 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 418 { 419 struct xe_device *xe = vm->xe; 420 bool ret = false; 421 422 mutex_lock(&xe->rebind_resume_lock); 423 if (!try_wait_for_completion(&vm->xe->pm_block)) { 424 ret = true; 425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 426 } 427 mutex_unlock(&xe->rebind_resume_lock); 428 429 return ret; 430 } 431 432 /** 433 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 434 * @vm: The vm whose preempt worker to resume. 435 * 436 * Resume a preempt worker that was previously suspended by 437 * vm_suspend_rebind_worker(). 438 */ 439 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 440 { 441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 442 } 443 444 static void preempt_rebind_work_func(struct work_struct *w) 445 { 446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 447 struct xe_validation_ctx ctx; 448 struct drm_exec exec; 449 unsigned int fence_count = 0; 450 LIST_HEAD(preempt_fences); 451 int err = 0; 452 long wait; 453 int __maybe_unused tries = 0; 454 455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 456 trace_xe_vm_rebind_worker_enter(vm); 457 458 down_write(&vm->lock); 459 460 if (xe_vm_is_closed_or_banned(vm)) { 461 up_write(&vm->lock); 462 trace_xe_vm_rebind_worker_exit(vm); 463 return; 464 } 465 466 retry: 467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 468 up_write(&vm->lock); 469 return; 470 } 471 472 if (xe_vm_userptr_check_repin(vm)) { 473 err = xe_vm_userptr_pin(vm); 474 if (err) 475 goto out_unlock_outer; 476 } 477 478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 479 (struct xe_val_flags) {.interruptible = true}); 480 if (err) 481 goto out_unlock_outer; 482 483 drm_exec_until_all_locked(&exec) { 484 bool done = false; 485 486 err = xe_preempt_work_begin(&exec, vm, &done); 487 drm_exec_retry_on_contention(&exec); 488 xe_validation_retry_on_oom(&ctx, &err); 489 if (err || done) { 490 xe_validation_ctx_fini(&ctx); 491 goto out_unlock_outer; 492 } 493 } 494 495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 496 if (err) 497 goto out_unlock; 498 499 xe_vm_set_validation_exec(vm, &exec); 500 err = xe_vm_rebind(vm, true); 501 xe_vm_set_validation_exec(vm, NULL); 502 if (err) 503 goto out_unlock; 504 505 /* Wait on rebinds and munmap style VM unbinds */ 506 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 507 DMA_RESV_USAGE_KERNEL, 508 false, MAX_SCHEDULE_TIMEOUT); 509 if (wait <= 0) { 510 err = -ETIME; 511 goto out_unlock; 512 } 513 514 #define retry_required(__tries, __vm) \ 515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 517 __xe_vm_userptr_needs_repin(__vm)) 518 519 xe_svm_notifier_lock(vm); 520 if (retry_required(tries, vm)) { 521 xe_svm_notifier_unlock(vm); 522 err = -EAGAIN; 523 goto out_unlock; 524 } 525 526 #undef retry_required 527 528 spin_lock(&vm->xe->ttm.lru_lock); 529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 530 spin_unlock(&vm->xe->ttm.lru_lock); 531 532 /* Point of no return. */ 533 arm_preempt_fences(vm, &preempt_fences); 534 resume_and_reinstall_preempt_fences(vm, &exec); 535 xe_svm_notifier_unlock(vm); 536 537 out_unlock: 538 xe_validation_ctx_fini(&ctx); 539 out_unlock_outer: 540 if (err == -EAGAIN) { 541 trace_xe_vm_rebind_worker_retry(vm); 542 goto retry; 543 } 544 545 if (err) { 546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 547 xe_vm_kill(vm, true); 548 } 549 up_write(&vm->lock); 550 551 free_preempt_fences(&preempt_fences); 552 553 trace_xe_vm_rebind_worker_exit(vm); 554 } 555 556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 557 { 558 int i; 559 560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 561 if (!vops->pt_update_ops[i].num_ops) 562 continue; 563 564 vops->pt_update_ops[i].ops = 565 kmalloc_array(vops->pt_update_ops[i].num_ops, 566 sizeof(*vops->pt_update_ops[i].ops), 567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 568 if (!vops->pt_update_ops[i].ops) 569 return array_of_binds ? -ENOBUFS : -ENOMEM; 570 } 571 572 return 0; 573 } 574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 575 576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 577 { 578 struct xe_vma *vma; 579 580 vma = gpuva_to_vma(op->base.prefetch.va); 581 582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 583 xa_destroy(&op->prefetch_range.range); 584 } 585 586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 587 { 588 struct xe_vma_op *op; 589 590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 591 return; 592 593 list_for_each_entry(op, &vops->list, link) 594 xe_vma_svm_prefetch_op_fini(op); 595 } 596 597 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 598 { 599 int i; 600 601 xe_vma_svm_prefetch_ops_fini(vops); 602 603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 604 kfree(vops->pt_update_ops[i].ops); 605 } 606 607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 608 { 609 int i; 610 611 if (!inc_val) 612 return; 613 614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 615 if (BIT(i) & tile_mask) 616 vops->pt_update_ops[i].num_ops += inc_val; 617 } 618 619 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 620 u8 tile_mask) 621 { 622 INIT_LIST_HEAD(&op->link); 623 op->tile_mask = tile_mask; 624 op->base.op = DRM_GPUVA_OP_MAP; 625 op->base.map.va.addr = vma->gpuva.va.addr; 626 op->base.map.va.range = vma->gpuva.va.range; 627 op->base.map.gem.obj = vma->gpuva.gem.obj; 628 op->base.map.gem.offset = vma->gpuva.gem.offset; 629 op->map.vma = vma; 630 op->map.immediate = true; 631 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 632 op->map.is_null = xe_vma_is_null(vma); 633 } 634 635 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 636 u8 tile_mask) 637 { 638 struct xe_vma_op *op; 639 640 op = kzalloc(sizeof(*op), GFP_KERNEL); 641 if (!op) 642 return -ENOMEM; 643 644 xe_vm_populate_rebind(op, vma, tile_mask); 645 list_add_tail(&op->link, &vops->list); 646 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 647 648 return 0; 649 } 650 651 static struct dma_fence *ops_execute(struct xe_vm *vm, 652 struct xe_vma_ops *vops); 653 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 654 struct xe_exec_queue *q, 655 struct xe_sync_entry *syncs, u32 num_syncs); 656 657 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 658 { 659 struct dma_fence *fence; 660 struct xe_vma *vma, *next; 661 struct xe_vma_ops vops; 662 struct xe_vma_op *op, *next_op; 663 int err, i; 664 665 lockdep_assert_held(&vm->lock); 666 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 667 list_empty(&vm->rebind_list)) 668 return 0; 669 670 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 671 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 672 vops.pt_update_ops[i].wait_vm_bookkeep = true; 673 674 xe_vm_assert_held(vm); 675 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 676 xe_assert(vm->xe, vma->tile_present); 677 678 if (rebind_worker) 679 trace_xe_vma_rebind_worker(vma); 680 else 681 trace_xe_vma_rebind_exec(vma); 682 683 err = xe_vm_ops_add_rebind(&vops, vma, 684 vma->tile_present); 685 if (err) 686 goto free_ops; 687 } 688 689 err = xe_vma_ops_alloc(&vops, false); 690 if (err) 691 goto free_ops; 692 693 fence = ops_execute(vm, &vops); 694 if (IS_ERR(fence)) { 695 err = PTR_ERR(fence); 696 } else { 697 dma_fence_put(fence); 698 list_for_each_entry_safe(vma, next, &vm->rebind_list, 699 combined_links.rebind) 700 list_del_init(&vma->combined_links.rebind); 701 } 702 free_ops: 703 list_for_each_entry_safe(op, next_op, &vops.list, link) { 704 list_del(&op->link); 705 kfree(op); 706 } 707 xe_vma_ops_fini(&vops); 708 709 return err; 710 } 711 712 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 713 { 714 struct dma_fence *fence = NULL; 715 struct xe_vma_ops vops; 716 struct xe_vma_op *op, *next_op; 717 struct xe_tile *tile; 718 u8 id; 719 int err; 720 721 lockdep_assert_held(&vm->lock); 722 xe_vm_assert_held(vm); 723 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 724 725 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 726 for_each_tile(tile, vm->xe, id) { 727 vops.pt_update_ops[id].wait_vm_bookkeep = true; 728 vops.pt_update_ops[tile->id].q = 729 xe_migrate_exec_queue(tile->migrate); 730 } 731 732 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 733 if (err) 734 return ERR_PTR(err); 735 736 err = xe_vma_ops_alloc(&vops, false); 737 if (err) { 738 fence = ERR_PTR(err); 739 goto free_ops; 740 } 741 742 fence = ops_execute(vm, &vops); 743 744 free_ops: 745 list_for_each_entry_safe(op, next_op, &vops.list, link) { 746 list_del(&op->link); 747 kfree(op); 748 } 749 xe_vma_ops_fini(&vops); 750 751 return fence; 752 } 753 754 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 755 struct xe_vma *vma, 756 struct xe_svm_range *range, 757 u8 tile_mask) 758 { 759 INIT_LIST_HEAD(&op->link); 760 op->tile_mask = tile_mask; 761 op->base.op = DRM_GPUVA_OP_DRIVER; 762 op->subop = XE_VMA_SUBOP_MAP_RANGE; 763 op->map_range.vma = vma; 764 op->map_range.range = range; 765 } 766 767 static int 768 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 769 struct xe_vma *vma, 770 struct xe_svm_range *range, 771 u8 tile_mask) 772 { 773 struct xe_vma_op *op; 774 775 op = kzalloc(sizeof(*op), GFP_KERNEL); 776 if (!op) 777 return -ENOMEM; 778 779 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 780 list_add_tail(&op->link, &vops->list); 781 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 782 783 return 0; 784 } 785 786 /** 787 * xe_vm_range_rebind() - VM range (re)bind 788 * @vm: The VM which the range belongs to. 789 * @vma: The VMA which the range belongs to. 790 * @range: SVM range to rebind. 791 * @tile_mask: Tile mask to bind the range to. 792 * 793 * (re)bind SVM range setting up GPU page tables for the range. 794 * 795 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 796 * failure 797 */ 798 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 799 struct xe_vma *vma, 800 struct xe_svm_range *range, 801 u8 tile_mask) 802 { 803 struct dma_fence *fence = NULL; 804 struct xe_vma_ops vops; 805 struct xe_vma_op *op, *next_op; 806 struct xe_tile *tile; 807 u8 id; 808 int err; 809 810 lockdep_assert_held(&vm->lock); 811 xe_vm_assert_held(vm); 812 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 813 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 814 815 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 816 for_each_tile(tile, vm->xe, id) { 817 vops.pt_update_ops[id].wait_vm_bookkeep = true; 818 vops.pt_update_ops[tile->id].q = 819 xe_migrate_exec_queue(tile->migrate); 820 } 821 822 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 823 if (err) 824 return ERR_PTR(err); 825 826 err = xe_vma_ops_alloc(&vops, false); 827 if (err) { 828 fence = ERR_PTR(err); 829 goto free_ops; 830 } 831 832 fence = ops_execute(vm, &vops); 833 834 free_ops: 835 list_for_each_entry_safe(op, next_op, &vops.list, link) { 836 list_del(&op->link); 837 kfree(op); 838 } 839 xe_vma_ops_fini(&vops); 840 841 return fence; 842 } 843 844 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 845 struct xe_svm_range *range) 846 { 847 INIT_LIST_HEAD(&op->link); 848 op->tile_mask = range->tile_present; 849 op->base.op = DRM_GPUVA_OP_DRIVER; 850 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 851 op->unmap_range.range = range; 852 } 853 854 static int 855 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 856 struct xe_svm_range *range) 857 { 858 struct xe_vma_op *op; 859 860 op = kzalloc(sizeof(*op), GFP_KERNEL); 861 if (!op) 862 return -ENOMEM; 863 864 xe_vm_populate_range_unbind(op, range); 865 list_add_tail(&op->link, &vops->list); 866 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 867 868 return 0; 869 } 870 871 /** 872 * xe_vm_range_unbind() - VM range unbind 873 * @vm: The VM which the range belongs to. 874 * @range: SVM range to rebind. 875 * 876 * Unbind SVM range removing the GPU page tables for the range. 877 * 878 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 879 * failure 880 */ 881 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 882 struct xe_svm_range *range) 883 { 884 struct dma_fence *fence = NULL; 885 struct xe_vma_ops vops; 886 struct xe_vma_op *op, *next_op; 887 struct xe_tile *tile; 888 u8 id; 889 int err; 890 891 lockdep_assert_held(&vm->lock); 892 xe_vm_assert_held(vm); 893 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 894 895 if (!range->tile_present) 896 return dma_fence_get_stub(); 897 898 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 899 for_each_tile(tile, vm->xe, id) { 900 vops.pt_update_ops[id].wait_vm_bookkeep = true; 901 vops.pt_update_ops[tile->id].q = 902 xe_migrate_exec_queue(tile->migrate); 903 } 904 905 err = xe_vm_ops_add_range_unbind(&vops, range); 906 if (err) 907 return ERR_PTR(err); 908 909 err = xe_vma_ops_alloc(&vops, false); 910 if (err) { 911 fence = ERR_PTR(err); 912 goto free_ops; 913 } 914 915 fence = ops_execute(vm, &vops); 916 917 free_ops: 918 list_for_each_entry_safe(op, next_op, &vops.list, link) { 919 list_del(&op->link); 920 kfree(op); 921 } 922 xe_vma_ops_fini(&vops); 923 924 return fence; 925 } 926 927 static void xe_vma_free(struct xe_vma *vma) 928 { 929 if (xe_vma_is_userptr(vma)) 930 kfree(to_userptr_vma(vma)); 931 else 932 kfree(vma); 933 } 934 935 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 936 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 937 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 938 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 939 940 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 941 struct xe_bo *bo, 942 u64 bo_offset_or_userptr, 943 u64 start, u64 end, 944 struct xe_vma_mem_attr *attr, 945 unsigned int flags) 946 { 947 struct xe_vma *vma; 948 struct xe_tile *tile; 949 u8 id; 950 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 951 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 952 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 953 bool is_cpu_addr_mirror = 954 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 955 956 xe_assert(vm->xe, start < end); 957 xe_assert(vm->xe, end < vm->size); 958 959 /* 960 * Allocate and ensure that the xe_vma_is_userptr() return 961 * matches what was allocated. 962 */ 963 if (!bo && !is_null && !is_cpu_addr_mirror) { 964 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 965 966 if (!uvma) 967 return ERR_PTR(-ENOMEM); 968 969 vma = &uvma->vma; 970 } else { 971 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 972 if (!vma) 973 return ERR_PTR(-ENOMEM); 974 975 if (is_cpu_addr_mirror) 976 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 977 if (is_null) 978 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 979 if (bo) 980 vma->gpuva.gem.obj = &bo->ttm.base; 981 } 982 983 INIT_LIST_HEAD(&vma->combined_links.rebind); 984 985 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 986 vma->gpuva.vm = &vm->gpuvm; 987 vma->gpuva.va.addr = start; 988 vma->gpuva.va.range = end - start + 1; 989 if (read_only) 990 vma->gpuva.flags |= XE_VMA_READ_ONLY; 991 if (dumpable) 992 vma->gpuva.flags |= XE_VMA_DUMPABLE; 993 994 for_each_tile(tile, vm->xe, id) 995 vma->tile_mask |= 0x1 << id; 996 997 if (vm->xe->info.has_atomic_enable_pte_bit) 998 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 999 1000 vma->attr = *attr; 1001 1002 if (bo) { 1003 struct drm_gpuvm_bo *vm_bo; 1004 1005 xe_bo_assert_held(bo); 1006 1007 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1008 if (IS_ERR(vm_bo)) { 1009 xe_vma_free(vma); 1010 return ERR_CAST(vm_bo); 1011 } 1012 1013 drm_gpuvm_bo_extobj_add(vm_bo); 1014 drm_gem_object_get(&bo->ttm.base); 1015 vma->gpuva.gem.offset = bo_offset_or_userptr; 1016 drm_gpuva_link(&vma->gpuva, vm_bo); 1017 drm_gpuvm_bo_put(vm_bo); 1018 } else /* userptr or null */ { 1019 if (!is_null && !is_cpu_addr_mirror) { 1020 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1021 u64 size = end - start + 1; 1022 int err; 1023 1024 vma->gpuva.gem.offset = bo_offset_or_userptr; 1025 1026 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1027 if (err) { 1028 xe_vma_free(vma); 1029 return ERR_PTR(err); 1030 } 1031 } 1032 1033 xe_vm_get(vm); 1034 } 1035 1036 return vma; 1037 } 1038 1039 static void xe_vma_destroy_late(struct xe_vma *vma) 1040 { 1041 struct xe_vm *vm = xe_vma_vm(vma); 1042 1043 if (vma->ufence) { 1044 xe_sync_ufence_put(vma->ufence); 1045 vma->ufence = NULL; 1046 } 1047 1048 if (xe_vma_is_userptr(vma)) { 1049 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1050 1051 xe_userptr_remove(uvma); 1052 xe_vm_put(vm); 1053 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1054 xe_vm_put(vm); 1055 } else { 1056 xe_bo_put(xe_vma_bo(vma)); 1057 } 1058 1059 xe_vma_free(vma); 1060 } 1061 1062 static void vma_destroy_work_func(struct work_struct *w) 1063 { 1064 struct xe_vma *vma = 1065 container_of(w, struct xe_vma, destroy_work); 1066 1067 xe_vma_destroy_late(vma); 1068 } 1069 1070 static void vma_destroy_cb(struct dma_fence *fence, 1071 struct dma_fence_cb *cb) 1072 { 1073 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1074 1075 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1076 queue_work(system_unbound_wq, &vma->destroy_work); 1077 } 1078 1079 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1080 { 1081 struct xe_vm *vm = xe_vma_vm(vma); 1082 1083 lockdep_assert_held_write(&vm->lock); 1084 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1085 1086 if (xe_vma_is_userptr(vma)) { 1087 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1088 xe_userptr_destroy(to_userptr_vma(vma)); 1089 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1090 xe_bo_assert_held(xe_vma_bo(vma)); 1091 1092 drm_gpuva_unlink(&vma->gpuva); 1093 } 1094 1095 xe_vm_assert_held(vm); 1096 if (fence) { 1097 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1098 vma_destroy_cb); 1099 1100 if (ret) { 1101 XE_WARN_ON(ret != -ENOENT); 1102 xe_vma_destroy_late(vma); 1103 } 1104 } else { 1105 xe_vma_destroy_late(vma); 1106 } 1107 } 1108 1109 /** 1110 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1111 * @exec: The drm_exec object we're currently locking for. 1112 * @vma: The vma for witch we want to lock the vm resv and any attached 1113 * object's resv. 1114 * 1115 * Return: 0 on success, negative error code on error. In particular 1116 * may return -EDEADLK on WW transaction contention and -EINTR if 1117 * an interruptible wait is terminated by a signal. 1118 */ 1119 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1120 { 1121 struct xe_vm *vm = xe_vma_vm(vma); 1122 struct xe_bo *bo = xe_vma_bo(vma); 1123 int err; 1124 1125 XE_WARN_ON(!vm); 1126 1127 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1128 if (!err && bo && !bo->vm) 1129 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1130 1131 return err; 1132 } 1133 1134 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1135 { 1136 struct xe_device *xe = xe_vma_vm(vma)->xe; 1137 struct xe_validation_ctx ctx; 1138 struct drm_exec exec; 1139 int err = 0; 1140 1141 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1142 err = xe_vm_lock_vma(&exec, vma); 1143 drm_exec_retry_on_contention(&exec); 1144 if (XE_WARN_ON(err)) 1145 break; 1146 xe_vma_destroy(vma, NULL); 1147 } 1148 xe_assert(xe, !err); 1149 } 1150 1151 struct xe_vma * 1152 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1153 { 1154 struct drm_gpuva *gpuva; 1155 1156 lockdep_assert_held(&vm->lock); 1157 1158 if (xe_vm_is_closed_or_banned(vm)) 1159 return NULL; 1160 1161 xe_assert(vm->xe, start + range <= vm->size); 1162 1163 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1164 1165 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1166 } 1167 1168 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1169 { 1170 int err; 1171 1172 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1173 lockdep_assert_held(&vm->lock); 1174 1175 mutex_lock(&vm->snap_mutex); 1176 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1177 mutex_unlock(&vm->snap_mutex); 1178 XE_WARN_ON(err); /* Shouldn't be possible */ 1179 1180 return err; 1181 } 1182 1183 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1184 { 1185 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1186 lockdep_assert_held(&vm->lock); 1187 1188 mutex_lock(&vm->snap_mutex); 1189 drm_gpuva_remove(&vma->gpuva); 1190 mutex_unlock(&vm->snap_mutex); 1191 if (vm->usm.last_fault_vma == vma) 1192 vm->usm.last_fault_vma = NULL; 1193 } 1194 1195 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1196 { 1197 struct xe_vma_op *op; 1198 1199 op = kzalloc(sizeof(*op), GFP_KERNEL); 1200 1201 if (unlikely(!op)) 1202 return NULL; 1203 1204 return &op->base; 1205 } 1206 1207 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1208 1209 static const struct drm_gpuvm_ops gpuvm_ops = { 1210 .op_alloc = xe_vm_op_alloc, 1211 .vm_bo_validate = xe_gpuvm_validate, 1212 .vm_free = xe_vm_free, 1213 }; 1214 1215 static u64 pde_encode_pat_index(u16 pat_index) 1216 { 1217 u64 pte = 0; 1218 1219 if (pat_index & BIT(0)) 1220 pte |= XE_PPGTT_PTE_PAT0; 1221 1222 if (pat_index & BIT(1)) 1223 pte |= XE_PPGTT_PTE_PAT1; 1224 1225 return pte; 1226 } 1227 1228 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1229 { 1230 u64 pte = 0; 1231 1232 if (pat_index & BIT(0)) 1233 pte |= XE_PPGTT_PTE_PAT0; 1234 1235 if (pat_index & BIT(1)) 1236 pte |= XE_PPGTT_PTE_PAT1; 1237 1238 if (pat_index & BIT(2)) { 1239 if (pt_level) 1240 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1241 else 1242 pte |= XE_PPGTT_PTE_PAT2; 1243 } 1244 1245 if (pat_index & BIT(3)) 1246 pte |= XELPG_PPGTT_PTE_PAT3; 1247 1248 if (pat_index & (BIT(4))) 1249 pte |= XE2_PPGTT_PTE_PAT4; 1250 1251 return pte; 1252 } 1253 1254 static u64 pte_encode_ps(u32 pt_level) 1255 { 1256 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1257 1258 if (pt_level == 1) 1259 return XE_PDE_PS_2M; 1260 else if (pt_level == 2) 1261 return XE_PDPE_PS_1G; 1262 1263 return 0; 1264 } 1265 1266 static u16 pde_pat_index(struct xe_bo *bo) 1267 { 1268 struct xe_device *xe = xe_bo_device(bo); 1269 u16 pat_index; 1270 1271 /* 1272 * We only have two bits to encode the PAT index in non-leaf nodes, but 1273 * these only point to other paging structures so we only need a minimal 1274 * selection of options. The user PAT index is only for encoding leaf 1275 * nodes, where we have use of more bits to do the encoding. The 1276 * non-leaf nodes are instead under driver control so the chosen index 1277 * here should be distict from the user PAT index. Also the 1278 * corresponding coherency of the PAT index should be tied to the 1279 * allocation type of the page table (or at least we should pick 1280 * something which is always safe). 1281 */ 1282 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1283 pat_index = xe->pat.idx[XE_CACHE_WB]; 1284 else 1285 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1286 1287 xe_assert(xe, pat_index <= 3); 1288 1289 return pat_index; 1290 } 1291 1292 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1293 { 1294 u64 pde; 1295 1296 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1297 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1298 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1299 1300 return pde; 1301 } 1302 1303 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1304 u16 pat_index, u32 pt_level) 1305 { 1306 u64 pte; 1307 1308 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1309 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1310 pte |= pte_encode_pat_index(pat_index, pt_level); 1311 pte |= pte_encode_ps(pt_level); 1312 1313 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1314 pte |= XE_PPGTT_PTE_DM; 1315 1316 return pte; 1317 } 1318 1319 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1320 u16 pat_index, u32 pt_level) 1321 { 1322 pte |= XE_PAGE_PRESENT; 1323 1324 if (likely(!xe_vma_read_only(vma))) 1325 pte |= XE_PAGE_RW; 1326 1327 pte |= pte_encode_pat_index(pat_index, pt_level); 1328 pte |= pte_encode_ps(pt_level); 1329 1330 if (unlikely(xe_vma_is_null(vma))) 1331 pte |= XE_PTE_NULL; 1332 1333 return pte; 1334 } 1335 1336 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1337 u16 pat_index, 1338 u32 pt_level, bool devmem, u64 flags) 1339 { 1340 u64 pte; 1341 1342 /* Avoid passing random bits directly as flags */ 1343 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1344 1345 pte = addr; 1346 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1347 pte |= pte_encode_pat_index(pat_index, pt_level); 1348 pte |= pte_encode_ps(pt_level); 1349 1350 if (devmem) 1351 pte |= XE_PPGTT_PTE_DM; 1352 1353 pte |= flags; 1354 1355 return pte; 1356 } 1357 1358 static const struct xe_pt_ops xelp_pt_ops = { 1359 .pte_encode_bo = xelp_pte_encode_bo, 1360 .pte_encode_vma = xelp_pte_encode_vma, 1361 .pte_encode_addr = xelp_pte_encode_addr, 1362 .pde_encode_bo = xelp_pde_encode_bo, 1363 }; 1364 1365 static void vm_destroy_work_func(struct work_struct *w); 1366 1367 /** 1368 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1369 * given tile and vm. 1370 * @xe: xe device. 1371 * @tile: tile to set up for. 1372 * @vm: vm to set up for. 1373 * @exec: The struct drm_exec object used to lock the vm resv. 1374 * 1375 * Sets up a pagetable tree with one page-table per level and a single 1376 * leaf PTE. All pagetable entries point to the single page-table or, 1377 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1378 * writes become NOPs. 1379 * 1380 * Return: 0 on success, negative error code on error. 1381 */ 1382 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1383 struct xe_vm *vm, struct drm_exec *exec) 1384 { 1385 u8 id = tile->id; 1386 int i; 1387 1388 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1389 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1390 if (IS_ERR(vm->scratch_pt[id][i])) { 1391 int err = PTR_ERR(vm->scratch_pt[id][i]); 1392 1393 vm->scratch_pt[id][i] = NULL; 1394 return err; 1395 } 1396 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1397 } 1398 1399 return 0; 1400 } 1401 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1402 1403 static void xe_vm_free_scratch(struct xe_vm *vm) 1404 { 1405 struct xe_tile *tile; 1406 u8 id; 1407 1408 if (!xe_vm_has_scratch(vm)) 1409 return; 1410 1411 for_each_tile(tile, vm->xe, id) { 1412 u32 i; 1413 1414 if (!vm->pt_root[id]) 1415 continue; 1416 1417 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1418 if (vm->scratch_pt[id][i]) 1419 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1420 } 1421 } 1422 1423 static void xe_vm_pt_destroy(struct xe_vm *vm) 1424 { 1425 struct xe_tile *tile; 1426 u8 id; 1427 1428 xe_vm_assert_held(vm); 1429 1430 for_each_tile(tile, vm->xe, id) { 1431 if (vm->pt_root[id]) { 1432 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1433 vm->pt_root[id] = NULL; 1434 } 1435 } 1436 } 1437 1438 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1439 { 1440 struct drm_gem_object *vm_resv_obj; 1441 struct xe_validation_ctx ctx; 1442 struct drm_exec exec; 1443 struct xe_vm *vm; 1444 int err, number_tiles = 0; 1445 struct xe_tile *tile; 1446 u8 id; 1447 1448 /* 1449 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1450 * ever be in faulting mode. 1451 */ 1452 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1453 1454 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1455 if (!vm) 1456 return ERR_PTR(-ENOMEM); 1457 1458 vm->xe = xe; 1459 1460 vm->size = 1ull << xe->info.va_bits; 1461 vm->flags = flags; 1462 1463 if (xef) 1464 vm->xef = xe_file_get(xef); 1465 /** 1466 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1467 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1468 * under a user-VM lock when the PXP session is started at exec_queue 1469 * creation time. Those are different VMs and therefore there is no risk 1470 * of deadlock, but we need to tell lockdep that this is the case or it 1471 * will print a warning. 1472 */ 1473 if (flags & XE_VM_FLAG_GSC) { 1474 static struct lock_class_key gsc_vm_key; 1475 1476 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1477 } else { 1478 init_rwsem(&vm->lock); 1479 } 1480 mutex_init(&vm->snap_mutex); 1481 1482 INIT_LIST_HEAD(&vm->rebind_list); 1483 1484 INIT_LIST_HEAD(&vm->userptr.repin_list); 1485 INIT_LIST_HEAD(&vm->userptr.invalidated); 1486 spin_lock_init(&vm->userptr.invalidated_lock); 1487 1488 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1489 1490 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1491 1492 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1493 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1494 1495 for_each_tile(tile, xe, id) 1496 xe_range_fence_tree_init(&vm->rftree[id]); 1497 1498 vm->pt_ops = &xelp_pt_ops; 1499 1500 /* 1501 * Long-running workloads are not protected by the scheduler references. 1502 * By design, run_job for long-running workloads returns NULL and the 1503 * scheduler drops all the references of it, hence protecting the VM 1504 * for this case is necessary. 1505 */ 1506 if (flags & XE_VM_FLAG_LR_MODE) { 1507 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1508 xe_pm_runtime_get_noresume(xe); 1509 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1510 } 1511 1512 err = xe_svm_init(vm); 1513 if (err) 1514 goto err_no_resv; 1515 1516 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1517 if (!vm_resv_obj) { 1518 err = -ENOMEM; 1519 goto err_svm_fini; 1520 } 1521 1522 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1523 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1524 1525 drm_gem_object_put(vm_resv_obj); 1526 1527 err = 0; 1528 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1529 err) { 1530 err = xe_vm_drm_exec_lock(vm, &exec); 1531 drm_exec_retry_on_contention(&exec); 1532 1533 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1534 vm->flags |= XE_VM_FLAG_64K; 1535 1536 for_each_tile(tile, xe, id) { 1537 if (flags & XE_VM_FLAG_MIGRATION && 1538 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1539 continue; 1540 1541 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1542 &exec); 1543 if (IS_ERR(vm->pt_root[id])) { 1544 err = PTR_ERR(vm->pt_root[id]); 1545 vm->pt_root[id] = NULL; 1546 xe_vm_pt_destroy(vm); 1547 drm_exec_retry_on_contention(&exec); 1548 xe_validation_retry_on_oom(&ctx, &err); 1549 break; 1550 } 1551 } 1552 if (err) 1553 break; 1554 1555 if (xe_vm_has_scratch(vm)) { 1556 for_each_tile(tile, xe, id) { 1557 if (!vm->pt_root[id]) 1558 continue; 1559 1560 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1561 if (err) { 1562 xe_vm_free_scratch(vm); 1563 xe_vm_pt_destroy(vm); 1564 drm_exec_retry_on_contention(&exec); 1565 xe_validation_retry_on_oom(&ctx, &err); 1566 break; 1567 } 1568 } 1569 if (err) 1570 break; 1571 vm->batch_invalidate_tlb = true; 1572 } 1573 1574 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1575 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1576 vm->batch_invalidate_tlb = false; 1577 } 1578 1579 /* Fill pt_root after allocating scratch tables */ 1580 for_each_tile(tile, xe, id) { 1581 if (!vm->pt_root[id]) 1582 continue; 1583 1584 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1585 } 1586 } 1587 if (err) 1588 goto err_close; 1589 1590 /* Kernel migration VM shouldn't have a circular loop.. */ 1591 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1592 for_each_tile(tile, xe, id) { 1593 struct xe_exec_queue *q; 1594 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1595 1596 if (!vm->pt_root[id]) 1597 continue; 1598 1599 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1600 if (IS_ERR(q)) { 1601 err = PTR_ERR(q); 1602 goto err_close; 1603 } 1604 vm->q[id] = q; 1605 number_tiles++; 1606 } 1607 } 1608 1609 if (number_tiles > 1) 1610 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1611 1612 if (xef && xe->info.has_asid) { 1613 u32 asid; 1614 1615 down_write(&xe->usm.lock); 1616 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1617 XA_LIMIT(1, XE_MAX_ASID - 1), 1618 &xe->usm.next_asid, GFP_KERNEL); 1619 up_write(&xe->usm.lock); 1620 if (err < 0) 1621 goto err_close; 1622 1623 vm->usm.asid = asid; 1624 } 1625 1626 trace_xe_vm_create(vm); 1627 1628 return vm; 1629 1630 err_close: 1631 xe_vm_close_and_put(vm); 1632 return ERR_PTR(err); 1633 1634 err_svm_fini: 1635 if (flags & XE_VM_FLAG_FAULT_MODE) { 1636 vm->size = 0; /* close the vm */ 1637 xe_svm_fini(vm); 1638 } 1639 err_no_resv: 1640 mutex_destroy(&vm->snap_mutex); 1641 for_each_tile(tile, xe, id) 1642 xe_range_fence_tree_fini(&vm->rftree[id]); 1643 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1644 if (vm->xef) 1645 xe_file_put(vm->xef); 1646 kfree(vm); 1647 if (flags & XE_VM_FLAG_LR_MODE) 1648 xe_pm_runtime_put(xe); 1649 return ERR_PTR(err); 1650 } 1651 1652 static void xe_vm_close(struct xe_vm *vm) 1653 { 1654 struct xe_device *xe = vm->xe; 1655 bool bound; 1656 int idx; 1657 1658 bound = drm_dev_enter(&xe->drm, &idx); 1659 1660 down_write(&vm->lock); 1661 if (xe_vm_in_fault_mode(vm)) 1662 xe_svm_notifier_lock(vm); 1663 1664 vm->size = 0; 1665 1666 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1667 struct xe_tile *tile; 1668 struct xe_gt *gt; 1669 u8 id; 1670 1671 /* Wait for pending binds */ 1672 dma_resv_wait_timeout(xe_vm_resv(vm), 1673 DMA_RESV_USAGE_BOOKKEEP, 1674 false, MAX_SCHEDULE_TIMEOUT); 1675 1676 if (bound) { 1677 for_each_tile(tile, xe, id) 1678 if (vm->pt_root[id]) 1679 xe_pt_clear(xe, vm->pt_root[id]); 1680 1681 for_each_gt(gt, xe, id) 1682 xe_tlb_inval_vm(>->tlb_inval, vm); 1683 } 1684 } 1685 1686 if (xe_vm_in_fault_mode(vm)) 1687 xe_svm_notifier_unlock(vm); 1688 up_write(&vm->lock); 1689 1690 if (bound) 1691 drm_dev_exit(idx); 1692 } 1693 1694 void xe_vm_close_and_put(struct xe_vm *vm) 1695 { 1696 LIST_HEAD(contested); 1697 struct xe_device *xe = vm->xe; 1698 struct xe_tile *tile; 1699 struct xe_vma *vma, *next_vma; 1700 struct drm_gpuva *gpuva, *next; 1701 u8 id; 1702 1703 xe_assert(xe, !vm->preempt.num_exec_queues); 1704 1705 xe_vm_close(vm); 1706 if (xe_vm_in_preempt_fence_mode(vm)) { 1707 mutex_lock(&xe->rebind_resume_lock); 1708 list_del_init(&vm->preempt.pm_activate_link); 1709 mutex_unlock(&xe->rebind_resume_lock); 1710 flush_work(&vm->preempt.rebind_work); 1711 } 1712 if (xe_vm_in_fault_mode(vm)) 1713 xe_svm_close(vm); 1714 1715 down_write(&vm->lock); 1716 for_each_tile(tile, xe, id) { 1717 if (vm->q[id]) 1718 xe_exec_queue_last_fence_put(vm->q[id], vm); 1719 } 1720 up_write(&vm->lock); 1721 1722 for_each_tile(tile, xe, id) { 1723 if (vm->q[id]) { 1724 xe_exec_queue_kill(vm->q[id]); 1725 xe_exec_queue_put(vm->q[id]); 1726 vm->q[id] = NULL; 1727 } 1728 } 1729 1730 down_write(&vm->lock); 1731 xe_vm_lock(vm, false); 1732 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1733 vma = gpuva_to_vma(gpuva); 1734 1735 if (xe_vma_has_no_bo(vma)) { 1736 xe_svm_notifier_lock(vm); 1737 vma->gpuva.flags |= XE_VMA_DESTROYED; 1738 xe_svm_notifier_unlock(vm); 1739 } 1740 1741 xe_vm_remove_vma(vm, vma); 1742 1743 /* easy case, remove from VMA? */ 1744 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1745 list_del_init(&vma->combined_links.rebind); 1746 xe_vma_destroy(vma, NULL); 1747 continue; 1748 } 1749 1750 list_move_tail(&vma->combined_links.destroy, &contested); 1751 vma->gpuva.flags |= XE_VMA_DESTROYED; 1752 } 1753 1754 /* 1755 * All vm operations will add shared fences to resv. 1756 * The only exception is eviction for a shared object, 1757 * but even so, the unbind when evicted would still 1758 * install a fence to resv. Hence it's safe to 1759 * destroy the pagetables immediately. 1760 */ 1761 xe_vm_free_scratch(vm); 1762 xe_vm_pt_destroy(vm); 1763 xe_vm_unlock(vm); 1764 1765 /* 1766 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1767 * Since we hold a refcount to the bo, we can remove and free 1768 * the members safely without locking. 1769 */ 1770 list_for_each_entry_safe(vma, next_vma, &contested, 1771 combined_links.destroy) { 1772 list_del_init(&vma->combined_links.destroy); 1773 xe_vma_destroy_unlocked(vma); 1774 } 1775 1776 xe_svm_fini(vm); 1777 1778 up_write(&vm->lock); 1779 1780 down_write(&xe->usm.lock); 1781 if (vm->usm.asid) { 1782 void *lookup; 1783 1784 xe_assert(xe, xe->info.has_asid); 1785 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1786 1787 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1788 xe_assert(xe, lookup == vm); 1789 } 1790 up_write(&xe->usm.lock); 1791 1792 for_each_tile(tile, xe, id) 1793 xe_range_fence_tree_fini(&vm->rftree[id]); 1794 1795 xe_vm_put(vm); 1796 } 1797 1798 static void vm_destroy_work_func(struct work_struct *w) 1799 { 1800 struct xe_vm *vm = 1801 container_of(w, struct xe_vm, destroy_work); 1802 struct xe_device *xe = vm->xe; 1803 struct xe_tile *tile; 1804 u8 id; 1805 1806 /* xe_vm_close_and_put was not called? */ 1807 xe_assert(xe, !vm->size); 1808 1809 if (xe_vm_in_preempt_fence_mode(vm)) 1810 flush_work(&vm->preempt.rebind_work); 1811 1812 mutex_destroy(&vm->snap_mutex); 1813 1814 if (vm->flags & XE_VM_FLAG_LR_MODE) 1815 xe_pm_runtime_put(xe); 1816 1817 for_each_tile(tile, xe, id) 1818 XE_WARN_ON(vm->pt_root[id]); 1819 1820 trace_xe_vm_free(vm); 1821 1822 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1823 1824 if (vm->xef) 1825 xe_file_put(vm->xef); 1826 1827 kfree(vm); 1828 } 1829 1830 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1831 { 1832 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1833 1834 /* To destroy the VM we need to be able to sleep */ 1835 queue_work(system_unbound_wq, &vm->destroy_work); 1836 } 1837 1838 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1839 { 1840 struct xe_vm *vm; 1841 1842 mutex_lock(&xef->vm.lock); 1843 vm = xa_load(&xef->vm.xa, id); 1844 if (vm) 1845 xe_vm_get(vm); 1846 mutex_unlock(&xef->vm.lock); 1847 1848 return vm; 1849 } 1850 1851 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1852 { 1853 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1854 } 1855 1856 static struct xe_exec_queue * 1857 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1858 { 1859 return q ? q : vm->q[0]; 1860 } 1861 1862 static struct xe_user_fence * 1863 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1864 { 1865 unsigned int i; 1866 1867 for (i = 0; i < num_syncs; i++) { 1868 struct xe_sync_entry *e = &syncs[i]; 1869 1870 if (xe_sync_is_ufence(e)) 1871 return xe_sync_ufence_get(e); 1872 } 1873 1874 return NULL; 1875 } 1876 1877 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1878 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1879 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1880 1881 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1882 struct drm_file *file) 1883 { 1884 struct xe_device *xe = to_xe_device(dev); 1885 struct xe_file *xef = to_xe_file(file); 1886 struct drm_xe_vm_create *args = data; 1887 struct xe_vm *vm; 1888 u32 id; 1889 int err; 1890 u32 flags = 0; 1891 1892 if (XE_IOCTL_DBG(xe, args->extensions)) 1893 return -EINVAL; 1894 1895 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 1896 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1897 1898 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1899 !xe->info.has_usm)) 1900 return -EINVAL; 1901 1902 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1903 return -EINVAL; 1904 1905 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1906 return -EINVAL; 1907 1908 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1909 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1910 !xe->info.needs_scratch)) 1911 return -EINVAL; 1912 1913 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1914 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1915 return -EINVAL; 1916 1917 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1918 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1919 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1920 flags |= XE_VM_FLAG_LR_MODE; 1921 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1922 flags |= XE_VM_FLAG_FAULT_MODE; 1923 1924 vm = xe_vm_create(xe, flags, xef); 1925 if (IS_ERR(vm)) 1926 return PTR_ERR(vm); 1927 1928 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1929 /* Warning: Security issue - never enable by default */ 1930 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1931 #endif 1932 1933 /* user id alloc must always be last in ioctl to prevent UAF */ 1934 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1935 if (err) 1936 goto err_close_and_put; 1937 1938 args->vm_id = id; 1939 1940 return 0; 1941 1942 err_close_and_put: 1943 xe_vm_close_and_put(vm); 1944 1945 return err; 1946 } 1947 1948 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1949 struct drm_file *file) 1950 { 1951 struct xe_device *xe = to_xe_device(dev); 1952 struct xe_file *xef = to_xe_file(file); 1953 struct drm_xe_vm_destroy *args = data; 1954 struct xe_vm *vm; 1955 int err = 0; 1956 1957 if (XE_IOCTL_DBG(xe, args->pad) || 1958 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1959 return -EINVAL; 1960 1961 mutex_lock(&xef->vm.lock); 1962 vm = xa_load(&xef->vm.xa, args->vm_id); 1963 if (XE_IOCTL_DBG(xe, !vm)) 1964 err = -ENOENT; 1965 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1966 err = -EBUSY; 1967 else 1968 xa_erase(&xef->vm.xa, args->vm_id); 1969 mutex_unlock(&xef->vm.lock); 1970 1971 if (!err) 1972 xe_vm_close_and_put(vm); 1973 1974 return err; 1975 } 1976 1977 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1978 { 1979 struct drm_gpuva *gpuva; 1980 u32 num_vmas = 0; 1981 1982 lockdep_assert_held(&vm->lock); 1983 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 1984 num_vmas++; 1985 1986 return num_vmas; 1987 } 1988 1989 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 1990 u64 end, struct drm_xe_mem_range_attr *attrs) 1991 { 1992 struct drm_gpuva *gpuva; 1993 int i = 0; 1994 1995 lockdep_assert_held(&vm->lock); 1996 1997 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 1998 struct xe_vma *vma = gpuva_to_vma(gpuva); 1999 2000 if (i == *num_vmas) 2001 return -ENOSPC; 2002 2003 attrs[i].start = xe_vma_start(vma); 2004 attrs[i].end = xe_vma_end(vma); 2005 attrs[i].atomic.val = vma->attr.atomic_access; 2006 attrs[i].pat_index.val = vma->attr.pat_index; 2007 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2008 attrs[i].preferred_mem_loc.migration_policy = 2009 vma->attr.preferred_loc.migration_policy; 2010 2011 i++; 2012 } 2013 2014 *num_vmas = i; 2015 return 0; 2016 } 2017 2018 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2019 { 2020 struct xe_device *xe = to_xe_device(dev); 2021 struct xe_file *xef = to_xe_file(file); 2022 struct drm_xe_mem_range_attr *mem_attrs; 2023 struct drm_xe_vm_query_mem_range_attr *args = data; 2024 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2025 struct xe_vm *vm; 2026 int err = 0; 2027 2028 if (XE_IOCTL_DBG(xe, 2029 ((args->num_mem_ranges == 0 && 2030 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2031 (args->num_mem_ranges > 0 && 2032 (!attrs_user || 2033 args->sizeof_mem_range_attr != 2034 sizeof(struct drm_xe_mem_range_attr)))))) 2035 return -EINVAL; 2036 2037 vm = xe_vm_lookup(xef, args->vm_id); 2038 if (XE_IOCTL_DBG(xe, !vm)) 2039 return -EINVAL; 2040 2041 err = down_read_interruptible(&vm->lock); 2042 if (err) 2043 goto put_vm; 2044 2045 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2046 2047 if (args->num_mem_ranges == 0 && !attrs_user) { 2048 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2049 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2050 goto unlock_vm; 2051 } 2052 2053 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2054 GFP_KERNEL | __GFP_ACCOUNT | 2055 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2056 if (!mem_attrs) { 2057 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2058 goto unlock_vm; 2059 } 2060 2061 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2062 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2063 args->start + args->range, mem_attrs); 2064 if (err) 2065 goto free_mem_attrs; 2066 2067 err = copy_to_user(attrs_user, mem_attrs, 2068 args->sizeof_mem_range_attr * args->num_mem_ranges); 2069 if (err) 2070 err = -EFAULT; 2071 2072 free_mem_attrs: 2073 kvfree(mem_attrs); 2074 unlock_vm: 2075 up_read(&vm->lock); 2076 put_vm: 2077 xe_vm_put(vm); 2078 return err; 2079 } 2080 2081 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2082 { 2083 if (page_addr > xe_vma_end(vma) - 1 || 2084 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2085 return false; 2086 2087 return true; 2088 } 2089 2090 /** 2091 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2092 * 2093 * @vm: the xe_vm the vma belongs to 2094 * @page_addr: address to look up 2095 */ 2096 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2097 { 2098 struct xe_vma *vma = NULL; 2099 2100 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2101 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2102 vma = vm->usm.last_fault_vma; 2103 } 2104 if (!vma) 2105 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2106 2107 return vma; 2108 } 2109 2110 static const u32 region_to_mem_type[] = { 2111 XE_PL_TT, 2112 XE_PL_VRAM0, 2113 XE_PL_VRAM1, 2114 }; 2115 2116 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2117 bool post_commit) 2118 { 2119 xe_svm_notifier_lock(vm); 2120 vma->gpuva.flags |= XE_VMA_DESTROYED; 2121 xe_svm_notifier_unlock(vm); 2122 if (post_commit) 2123 xe_vm_remove_vma(vm, vma); 2124 } 2125 2126 #undef ULL 2127 #define ULL unsigned long long 2128 2129 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2130 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2131 { 2132 struct xe_vma *vma; 2133 2134 switch (op->op) { 2135 case DRM_GPUVA_OP_MAP: 2136 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2137 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2138 break; 2139 case DRM_GPUVA_OP_REMAP: 2140 vma = gpuva_to_vma(op->remap.unmap->va); 2141 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2142 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2143 op->remap.unmap->keep ? 1 : 0); 2144 if (op->remap.prev) 2145 vm_dbg(&xe->drm, 2146 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2147 (ULL)op->remap.prev->va.addr, 2148 (ULL)op->remap.prev->va.range); 2149 if (op->remap.next) 2150 vm_dbg(&xe->drm, 2151 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2152 (ULL)op->remap.next->va.addr, 2153 (ULL)op->remap.next->va.range); 2154 break; 2155 case DRM_GPUVA_OP_UNMAP: 2156 vma = gpuva_to_vma(op->unmap.va); 2157 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2158 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2159 op->unmap.keep ? 1 : 0); 2160 break; 2161 case DRM_GPUVA_OP_PREFETCH: 2162 vma = gpuva_to_vma(op->prefetch.va); 2163 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2164 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2165 break; 2166 default: 2167 drm_warn(&xe->drm, "NOT POSSIBLE"); 2168 } 2169 } 2170 #else 2171 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2172 { 2173 } 2174 #endif 2175 2176 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2177 { 2178 if (!xe_vm_in_fault_mode(vm)) 2179 return false; 2180 2181 if (!xe_vm_has_scratch(vm)) 2182 return false; 2183 2184 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2185 return false; 2186 2187 return true; 2188 } 2189 2190 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2191 { 2192 struct drm_gpuva_op *__op; 2193 2194 drm_gpuva_for_each_op(__op, ops) { 2195 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2196 2197 xe_vma_svm_prefetch_op_fini(op); 2198 } 2199 } 2200 2201 /* 2202 * Create operations list from IOCTL arguments, setup operations fields so parse 2203 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2204 */ 2205 static struct drm_gpuva_ops * 2206 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2207 struct xe_bo *bo, u64 bo_offset_or_userptr, 2208 u64 addr, u64 range, 2209 u32 operation, u32 flags, 2210 u32 prefetch_region, u16 pat_index) 2211 { 2212 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2213 struct drm_gpuva_ops *ops; 2214 struct drm_gpuva_op *__op; 2215 struct drm_gpuvm_bo *vm_bo; 2216 u64 range_end = addr + range; 2217 int err; 2218 2219 lockdep_assert_held_write(&vm->lock); 2220 2221 vm_dbg(&vm->xe->drm, 2222 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2223 operation, (ULL)addr, (ULL)range, 2224 (ULL)bo_offset_or_userptr); 2225 2226 switch (operation) { 2227 case DRM_XE_VM_BIND_OP_MAP: 2228 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2229 struct drm_gpuvm_map_req map_req = { 2230 .map.va.addr = addr, 2231 .map.va.range = range, 2232 .map.gem.obj = obj, 2233 .map.gem.offset = bo_offset_or_userptr, 2234 }; 2235 2236 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2237 break; 2238 } 2239 case DRM_XE_VM_BIND_OP_UNMAP: 2240 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2241 break; 2242 case DRM_XE_VM_BIND_OP_PREFETCH: 2243 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2244 break; 2245 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2246 xe_assert(vm->xe, bo); 2247 2248 err = xe_bo_lock(bo, true); 2249 if (err) 2250 return ERR_PTR(err); 2251 2252 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2253 if (IS_ERR(vm_bo)) { 2254 xe_bo_unlock(bo); 2255 return ERR_CAST(vm_bo); 2256 } 2257 2258 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2259 drm_gpuvm_bo_put(vm_bo); 2260 xe_bo_unlock(bo); 2261 break; 2262 default: 2263 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2264 ops = ERR_PTR(-EINVAL); 2265 } 2266 if (IS_ERR(ops)) 2267 return ops; 2268 2269 drm_gpuva_for_each_op(__op, ops) { 2270 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2271 2272 if (__op->op == DRM_GPUVA_OP_MAP) { 2273 op->map.immediate = 2274 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2275 op->map.read_only = 2276 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2277 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2278 op->map.is_cpu_addr_mirror = flags & 2279 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2280 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2281 op->map.pat_index = pat_index; 2282 op->map.invalidate_on_bind = 2283 __xe_vm_needs_clear_scratch_pages(vm, flags); 2284 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2285 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2286 struct xe_tile *tile; 2287 struct xe_svm_range *svm_range; 2288 struct drm_gpusvm_ctx ctx = {}; 2289 struct drm_pagemap *dpagemap; 2290 u8 id, tile_mask = 0; 2291 u32 i; 2292 2293 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2294 op->prefetch.region = prefetch_region; 2295 break; 2296 } 2297 2298 ctx.read_only = xe_vma_read_only(vma); 2299 ctx.devmem_possible = IS_DGFX(vm->xe) && 2300 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2301 2302 for_each_tile(tile, vm->xe, id) 2303 tile_mask |= 0x1 << id; 2304 2305 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2306 op->prefetch_range.ranges_count = 0; 2307 tile = NULL; 2308 2309 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2310 dpagemap = xe_vma_resolve_pagemap(vma, 2311 xe_device_get_root_tile(vm->xe)); 2312 /* 2313 * TODO: Once multigpu support is enabled will need 2314 * something to dereference tile from dpagemap. 2315 */ 2316 if (dpagemap) 2317 tile = xe_device_get_root_tile(vm->xe); 2318 } else if (prefetch_region) { 2319 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2320 XE_PL_VRAM0]; 2321 } 2322 2323 op->prefetch_range.tile = tile; 2324 alloc_next_range: 2325 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2326 2327 if (PTR_ERR(svm_range) == -ENOENT) { 2328 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2329 2330 addr = ret == ULONG_MAX ? 0 : ret; 2331 if (addr) 2332 goto alloc_next_range; 2333 else 2334 goto print_op_label; 2335 } 2336 2337 if (IS_ERR(svm_range)) { 2338 err = PTR_ERR(svm_range); 2339 goto unwind_prefetch_ops; 2340 } 2341 2342 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2343 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2344 goto check_next_range; 2345 } 2346 2347 err = xa_alloc(&op->prefetch_range.range, 2348 &i, svm_range, xa_limit_32b, 2349 GFP_KERNEL); 2350 2351 if (err) 2352 goto unwind_prefetch_ops; 2353 2354 op->prefetch_range.ranges_count++; 2355 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2356 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2357 check_next_range: 2358 if (range_end > xe_svm_range_end(svm_range) && 2359 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2360 addr = xe_svm_range_end(svm_range); 2361 goto alloc_next_range; 2362 } 2363 } 2364 print_op_label: 2365 print_op(vm->xe, __op); 2366 } 2367 2368 return ops; 2369 2370 unwind_prefetch_ops: 2371 xe_svm_prefetch_gpuva_ops_fini(ops); 2372 drm_gpuva_ops_free(&vm->gpuvm, ops); 2373 return ERR_PTR(err); 2374 } 2375 2376 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2377 2378 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2379 struct xe_vma_mem_attr *attr, unsigned int flags) 2380 { 2381 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2382 struct xe_validation_ctx ctx; 2383 struct drm_exec exec; 2384 struct xe_vma *vma; 2385 int err = 0; 2386 2387 lockdep_assert_held_write(&vm->lock); 2388 2389 if (bo) { 2390 err = 0; 2391 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2392 (struct xe_val_flags) {.interruptible = true}, err) { 2393 if (!bo->vm) { 2394 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2395 drm_exec_retry_on_contention(&exec); 2396 } 2397 if (!err) { 2398 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2399 drm_exec_retry_on_contention(&exec); 2400 } 2401 if (err) 2402 return ERR_PTR(err); 2403 2404 vma = xe_vma_create(vm, bo, op->gem.offset, 2405 op->va.addr, op->va.addr + 2406 op->va.range - 1, attr, flags); 2407 if (IS_ERR(vma)) 2408 return vma; 2409 2410 if (!bo->vm) { 2411 err = add_preempt_fences(vm, bo); 2412 if (err) { 2413 prep_vma_destroy(vm, vma, false); 2414 xe_vma_destroy(vma, NULL); 2415 } 2416 } 2417 } 2418 if (err) 2419 return ERR_PTR(err); 2420 } else { 2421 vma = xe_vma_create(vm, NULL, op->gem.offset, 2422 op->va.addr, op->va.addr + 2423 op->va.range - 1, attr, flags); 2424 if (IS_ERR(vma)) 2425 return vma; 2426 2427 if (xe_vma_is_userptr(vma)) 2428 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2429 } 2430 if (err) { 2431 prep_vma_destroy(vm, vma, false); 2432 xe_vma_destroy_unlocked(vma); 2433 vma = ERR_PTR(err); 2434 } 2435 2436 return vma; 2437 } 2438 2439 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2440 { 2441 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2442 return SZ_1G; 2443 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2444 return SZ_2M; 2445 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2446 return SZ_64K; 2447 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2448 return SZ_4K; 2449 2450 return SZ_1G; /* Uninitialized, used max size */ 2451 } 2452 2453 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2454 { 2455 switch (size) { 2456 case SZ_1G: 2457 vma->gpuva.flags |= XE_VMA_PTE_1G; 2458 break; 2459 case SZ_2M: 2460 vma->gpuva.flags |= XE_VMA_PTE_2M; 2461 break; 2462 case SZ_64K: 2463 vma->gpuva.flags |= XE_VMA_PTE_64K; 2464 break; 2465 case SZ_4K: 2466 vma->gpuva.flags |= XE_VMA_PTE_4K; 2467 break; 2468 } 2469 } 2470 2471 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2472 { 2473 int err = 0; 2474 2475 lockdep_assert_held_write(&vm->lock); 2476 2477 switch (op->base.op) { 2478 case DRM_GPUVA_OP_MAP: 2479 err |= xe_vm_insert_vma(vm, op->map.vma); 2480 if (!err) 2481 op->flags |= XE_VMA_OP_COMMITTED; 2482 break; 2483 case DRM_GPUVA_OP_REMAP: 2484 { 2485 u8 tile_present = 2486 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2487 2488 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2489 true); 2490 op->flags |= XE_VMA_OP_COMMITTED; 2491 2492 if (op->remap.prev) { 2493 err |= xe_vm_insert_vma(vm, op->remap.prev); 2494 if (!err) 2495 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2496 if (!err && op->remap.skip_prev) { 2497 op->remap.prev->tile_present = 2498 tile_present; 2499 op->remap.prev = NULL; 2500 } 2501 } 2502 if (op->remap.next) { 2503 err |= xe_vm_insert_vma(vm, op->remap.next); 2504 if (!err) 2505 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2506 if (!err && op->remap.skip_next) { 2507 op->remap.next->tile_present = 2508 tile_present; 2509 op->remap.next = NULL; 2510 } 2511 } 2512 2513 /* Adjust for partial unbind after removing VMA from VM */ 2514 if (!err) { 2515 op->base.remap.unmap->va->va.addr = op->remap.start; 2516 op->base.remap.unmap->va->va.range = op->remap.range; 2517 } 2518 break; 2519 } 2520 case DRM_GPUVA_OP_UNMAP: 2521 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2522 op->flags |= XE_VMA_OP_COMMITTED; 2523 break; 2524 case DRM_GPUVA_OP_PREFETCH: 2525 op->flags |= XE_VMA_OP_COMMITTED; 2526 break; 2527 default: 2528 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2529 } 2530 2531 return err; 2532 } 2533 2534 /** 2535 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2536 * @vma: Pointer to the xe_vma structure to check 2537 * 2538 * This function determines whether the given VMA (Virtual Memory Area) 2539 * has its memory attributes set to their default values. Specifically, 2540 * it checks the following conditions: 2541 * 2542 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2543 * - `pat_index` is equal to `default_pat_index` 2544 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2545 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2546 * 2547 * Return: true if all attributes are at their default values, false otherwise. 2548 */ 2549 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2550 { 2551 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2552 vma->attr.pat_index == vma->attr.default_pat_index && 2553 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2554 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2555 } 2556 2557 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2558 struct xe_vma_ops *vops) 2559 { 2560 struct xe_device *xe = vm->xe; 2561 struct drm_gpuva_op *__op; 2562 struct xe_tile *tile; 2563 u8 id, tile_mask = 0; 2564 int err = 0; 2565 2566 lockdep_assert_held_write(&vm->lock); 2567 2568 for_each_tile(tile, vm->xe, id) 2569 tile_mask |= 0x1 << id; 2570 2571 drm_gpuva_for_each_op(__op, ops) { 2572 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2573 struct xe_vma *vma; 2574 unsigned int flags = 0; 2575 2576 INIT_LIST_HEAD(&op->link); 2577 list_add_tail(&op->link, &vops->list); 2578 op->tile_mask = tile_mask; 2579 2580 switch (op->base.op) { 2581 case DRM_GPUVA_OP_MAP: 2582 { 2583 struct xe_vma_mem_attr default_attr = { 2584 .preferred_loc = { 2585 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2586 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2587 }, 2588 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2589 .default_pat_index = op->map.pat_index, 2590 .pat_index = op->map.pat_index, 2591 }; 2592 2593 flags |= op->map.read_only ? 2594 VMA_CREATE_FLAG_READ_ONLY : 0; 2595 flags |= op->map.is_null ? 2596 VMA_CREATE_FLAG_IS_NULL : 0; 2597 flags |= op->map.dumpable ? 2598 VMA_CREATE_FLAG_DUMPABLE : 0; 2599 flags |= op->map.is_cpu_addr_mirror ? 2600 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2601 2602 vma = new_vma(vm, &op->base.map, &default_attr, 2603 flags); 2604 if (IS_ERR(vma)) 2605 return PTR_ERR(vma); 2606 2607 op->map.vma = vma; 2608 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2609 !op->map.is_cpu_addr_mirror) || 2610 op->map.invalidate_on_bind) 2611 xe_vma_ops_incr_pt_update_ops(vops, 2612 op->tile_mask, 1); 2613 break; 2614 } 2615 case DRM_GPUVA_OP_REMAP: 2616 { 2617 struct xe_vma *old = 2618 gpuva_to_vma(op->base.remap.unmap->va); 2619 bool skip = xe_vma_is_cpu_addr_mirror(old); 2620 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2621 int num_remap_ops = 0; 2622 2623 if (op->base.remap.prev) 2624 start = op->base.remap.prev->va.addr + 2625 op->base.remap.prev->va.range; 2626 if (op->base.remap.next) 2627 end = op->base.remap.next->va.addr; 2628 2629 if (xe_vma_is_cpu_addr_mirror(old) && 2630 xe_svm_has_mapping(vm, start, end)) { 2631 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2632 xe_svm_unmap_address_range(vm, start, end); 2633 else 2634 return -EBUSY; 2635 } 2636 2637 op->remap.start = xe_vma_start(old); 2638 op->remap.range = xe_vma_size(old); 2639 2640 flags |= op->base.remap.unmap->va->flags & 2641 XE_VMA_READ_ONLY ? 2642 VMA_CREATE_FLAG_READ_ONLY : 0; 2643 flags |= op->base.remap.unmap->va->flags & 2644 DRM_GPUVA_SPARSE ? 2645 VMA_CREATE_FLAG_IS_NULL : 0; 2646 flags |= op->base.remap.unmap->va->flags & 2647 XE_VMA_DUMPABLE ? 2648 VMA_CREATE_FLAG_DUMPABLE : 0; 2649 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2650 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2651 2652 if (op->base.remap.prev) { 2653 vma = new_vma(vm, op->base.remap.prev, 2654 &old->attr, flags); 2655 if (IS_ERR(vma)) 2656 return PTR_ERR(vma); 2657 2658 op->remap.prev = vma; 2659 2660 /* 2661 * Userptr creates a new SG mapping so 2662 * we must also rebind. 2663 */ 2664 op->remap.skip_prev = skip || 2665 (!xe_vma_is_userptr(old) && 2666 IS_ALIGNED(xe_vma_end(vma), 2667 xe_vma_max_pte_size(old))); 2668 if (op->remap.skip_prev) { 2669 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2670 op->remap.range -= 2671 xe_vma_end(vma) - 2672 xe_vma_start(old); 2673 op->remap.start = xe_vma_end(vma); 2674 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2675 (ULL)op->remap.start, 2676 (ULL)op->remap.range); 2677 } else { 2678 num_remap_ops++; 2679 } 2680 } 2681 2682 if (op->base.remap.next) { 2683 vma = new_vma(vm, op->base.remap.next, 2684 &old->attr, flags); 2685 if (IS_ERR(vma)) 2686 return PTR_ERR(vma); 2687 2688 op->remap.next = vma; 2689 2690 /* 2691 * Userptr creates a new SG mapping so 2692 * we must also rebind. 2693 */ 2694 op->remap.skip_next = skip || 2695 (!xe_vma_is_userptr(old) && 2696 IS_ALIGNED(xe_vma_start(vma), 2697 xe_vma_max_pte_size(old))); 2698 if (op->remap.skip_next) { 2699 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2700 op->remap.range -= 2701 xe_vma_end(old) - 2702 xe_vma_start(vma); 2703 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2704 (ULL)op->remap.start, 2705 (ULL)op->remap.range); 2706 } else { 2707 num_remap_ops++; 2708 } 2709 } 2710 if (!skip) 2711 num_remap_ops++; 2712 2713 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2714 break; 2715 } 2716 case DRM_GPUVA_OP_UNMAP: 2717 vma = gpuva_to_vma(op->base.unmap.va); 2718 2719 if (xe_vma_is_cpu_addr_mirror(vma) && 2720 xe_svm_has_mapping(vm, xe_vma_start(vma), 2721 xe_vma_end(vma))) 2722 return -EBUSY; 2723 2724 if (!xe_vma_is_cpu_addr_mirror(vma)) 2725 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2726 break; 2727 case DRM_GPUVA_OP_PREFETCH: 2728 vma = gpuva_to_vma(op->base.prefetch.va); 2729 2730 if (xe_vma_is_userptr(vma)) { 2731 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2732 if (err) 2733 return err; 2734 } 2735 2736 if (xe_vma_is_cpu_addr_mirror(vma)) 2737 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2738 op->prefetch_range.ranges_count); 2739 else 2740 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2741 2742 break; 2743 default: 2744 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2745 } 2746 2747 err = xe_vma_op_commit(vm, op); 2748 if (err) 2749 return err; 2750 } 2751 2752 return 0; 2753 } 2754 2755 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2756 bool post_commit, bool prev_post_commit, 2757 bool next_post_commit) 2758 { 2759 lockdep_assert_held_write(&vm->lock); 2760 2761 switch (op->base.op) { 2762 case DRM_GPUVA_OP_MAP: 2763 if (op->map.vma) { 2764 prep_vma_destroy(vm, op->map.vma, post_commit); 2765 xe_vma_destroy_unlocked(op->map.vma); 2766 } 2767 break; 2768 case DRM_GPUVA_OP_UNMAP: 2769 { 2770 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2771 2772 if (vma) { 2773 xe_svm_notifier_lock(vm); 2774 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2775 xe_svm_notifier_unlock(vm); 2776 if (post_commit) 2777 xe_vm_insert_vma(vm, vma); 2778 } 2779 break; 2780 } 2781 case DRM_GPUVA_OP_REMAP: 2782 { 2783 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2784 2785 if (op->remap.prev) { 2786 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2787 xe_vma_destroy_unlocked(op->remap.prev); 2788 } 2789 if (op->remap.next) { 2790 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2791 xe_vma_destroy_unlocked(op->remap.next); 2792 } 2793 if (vma) { 2794 xe_svm_notifier_lock(vm); 2795 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2796 xe_svm_notifier_unlock(vm); 2797 if (post_commit) 2798 xe_vm_insert_vma(vm, vma); 2799 } 2800 break; 2801 } 2802 case DRM_GPUVA_OP_PREFETCH: 2803 /* Nothing to do */ 2804 break; 2805 default: 2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2807 } 2808 } 2809 2810 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2811 struct drm_gpuva_ops **ops, 2812 int num_ops_list) 2813 { 2814 int i; 2815 2816 for (i = num_ops_list - 1; i >= 0; --i) { 2817 struct drm_gpuva_ops *__ops = ops[i]; 2818 struct drm_gpuva_op *__op; 2819 2820 if (!__ops) 2821 continue; 2822 2823 drm_gpuva_for_each_op_reverse(__op, __ops) { 2824 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2825 2826 xe_vma_op_unwind(vm, op, 2827 op->flags & XE_VMA_OP_COMMITTED, 2828 op->flags & XE_VMA_OP_PREV_COMMITTED, 2829 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2830 } 2831 } 2832 } 2833 2834 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2835 bool validate) 2836 { 2837 struct xe_bo *bo = xe_vma_bo(vma); 2838 struct xe_vm *vm = xe_vma_vm(vma); 2839 int err = 0; 2840 2841 if (bo) { 2842 if (!bo->vm) 2843 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2844 if (!err && validate) 2845 err = xe_bo_validate(bo, vm, 2846 !xe_vm_in_preempt_fence_mode(vm), exec); 2847 } 2848 2849 return err; 2850 } 2851 2852 static int check_ufence(struct xe_vma *vma) 2853 { 2854 if (vma->ufence) { 2855 struct xe_user_fence * const f = vma->ufence; 2856 2857 if (!xe_sync_ufence_get_status(f)) 2858 return -EBUSY; 2859 2860 vma->ufence = NULL; 2861 xe_sync_ufence_put(f); 2862 } 2863 2864 return 0; 2865 } 2866 2867 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2868 { 2869 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2870 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2871 struct xe_tile *tile = op->prefetch_range.tile; 2872 int err = 0; 2873 2874 struct xe_svm_range *svm_range; 2875 struct drm_gpusvm_ctx ctx = {}; 2876 unsigned long i; 2877 2878 if (!xe_vma_is_cpu_addr_mirror(vma)) 2879 return 0; 2880 2881 ctx.read_only = xe_vma_read_only(vma); 2882 ctx.devmem_possible = devmem_possible; 2883 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2884 2885 /* TODO: Threading the migration */ 2886 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2887 if (!tile) 2888 xe_svm_range_migrate_to_smem(vm, svm_range); 2889 2890 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2891 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2892 if (err) { 2893 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2894 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2895 return -ENODATA; 2896 } 2897 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2898 } 2899 2900 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2901 if (err) { 2902 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2903 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2904 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2905 err = -ENODATA; 2906 return err; 2907 } 2908 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2909 } 2910 2911 return err; 2912 } 2913 2914 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2915 struct xe_vma_op *op) 2916 { 2917 int err = 0; 2918 2919 switch (op->base.op) { 2920 case DRM_GPUVA_OP_MAP: 2921 if (!op->map.invalidate_on_bind) 2922 err = vma_lock_and_validate(exec, op->map.vma, 2923 !xe_vm_in_fault_mode(vm) || 2924 op->map.immediate); 2925 break; 2926 case DRM_GPUVA_OP_REMAP: 2927 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2928 if (err) 2929 break; 2930 2931 err = vma_lock_and_validate(exec, 2932 gpuva_to_vma(op->base.remap.unmap->va), 2933 false); 2934 if (!err && op->remap.prev) 2935 err = vma_lock_and_validate(exec, op->remap.prev, true); 2936 if (!err && op->remap.next) 2937 err = vma_lock_and_validate(exec, op->remap.next, true); 2938 break; 2939 case DRM_GPUVA_OP_UNMAP: 2940 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2941 if (err) 2942 break; 2943 2944 err = vma_lock_and_validate(exec, 2945 gpuva_to_vma(op->base.unmap.va), 2946 false); 2947 break; 2948 case DRM_GPUVA_OP_PREFETCH: 2949 { 2950 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2951 u32 region; 2952 2953 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2954 region = op->prefetch.region; 2955 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2956 region <= ARRAY_SIZE(region_to_mem_type)); 2957 } 2958 2959 err = vma_lock_and_validate(exec, 2960 gpuva_to_vma(op->base.prefetch.va), 2961 false); 2962 if (!err && !xe_vma_has_no_bo(vma)) 2963 err = xe_bo_migrate(xe_vma_bo(vma), 2964 region_to_mem_type[region], 2965 NULL, 2966 exec); 2967 break; 2968 } 2969 default: 2970 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2971 } 2972 2973 return err; 2974 } 2975 2976 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2977 { 2978 struct xe_vma_op *op; 2979 int err; 2980 2981 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2982 return 0; 2983 2984 list_for_each_entry(op, &vops->list, link) { 2985 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 2986 err = prefetch_ranges(vm, op); 2987 if (err) 2988 return err; 2989 } 2990 } 2991 2992 return 0; 2993 } 2994 2995 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2996 struct xe_vm *vm, 2997 struct xe_vma_ops *vops) 2998 { 2999 struct xe_vma_op *op; 3000 int err; 3001 3002 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3003 if (err) 3004 return err; 3005 3006 list_for_each_entry(op, &vops->list, link) { 3007 err = op_lock_and_prep(exec, vm, op); 3008 if (err) 3009 return err; 3010 } 3011 3012 #ifdef TEST_VM_OPS_ERROR 3013 if (vops->inject_error && 3014 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3015 return -ENOSPC; 3016 #endif 3017 3018 return 0; 3019 } 3020 3021 static void op_trace(struct xe_vma_op *op) 3022 { 3023 switch (op->base.op) { 3024 case DRM_GPUVA_OP_MAP: 3025 trace_xe_vma_bind(op->map.vma); 3026 break; 3027 case DRM_GPUVA_OP_REMAP: 3028 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3029 if (op->remap.prev) 3030 trace_xe_vma_bind(op->remap.prev); 3031 if (op->remap.next) 3032 trace_xe_vma_bind(op->remap.next); 3033 break; 3034 case DRM_GPUVA_OP_UNMAP: 3035 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3036 break; 3037 case DRM_GPUVA_OP_PREFETCH: 3038 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3039 break; 3040 case DRM_GPUVA_OP_DRIVER: 3041 break; 3042 default: 3043 XE_WARN_ON("NOT POSSIBLE"); 3044 } 3045 } 3046 3047 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3048 { 3049 struct xe_vma_op *op; 3050 3051 list_for_each_entry(op, &vops->list, link) 3052 op_trace(op); 3053 } 3054 3055 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3056 { 3057 struct xe_exec_queue *q = vops->q; 3058 struct xe_tile *tile; 3059 int number_tiles = 0; 3060 u8 id; 3061 3062 for_each_tile(tile, vm->xe, id) { 3063 if (vops->pt_update_ops[id].num_ops) 3064 ++number_tiles; 3065 3066 if (vops->pt_update_ops[id].q) 3067 continue; 3068 3069 if (q) { 3070 vops->pt_update_ops[id].q = q; 3071 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3072 q = list_next_entry(q, multi_gt_list); 3073 } else { 3074 vops->pt_update_ops[id].q = vm->q[id]; 3075 } 3076 } 3077 3078 return number_tiles; 3079 } 3080 3081 static struct dma_fence *ops_execute(struct xe_vm *vm, 3082 struct xe_vma_ops *vops) 3083 { 3084 struct xe_tile *tile; 3085 struct dma_fence *fence = NULL; 3086 struct dma_fence **fences = NULL; 3087 struct dma_fence_array *cf = NULL; 3088 int number_tiles = 0, current_fence = 0, err; 3089 u8 id; 3090 3091 number_tiles = vm_ops_setup_tile_args(vm, vops); 3092 if (number_tiles == 0) 3093 return ERR_PTR(-ENODATA); 3094 3095 if (number_tiles > 1) { 3096 fences = kmalloc_array(number_tiles, sizeof(*fences), 3097 GFP_KERNEL); 3098 if (!fences) { 3099 fence = ERR_PTR(-ENOMEM); 3100 goto err_trace; 3101 } 3102 } 3103 3104 for_each_tile(tile, vm->xe, id) { 3105 if (!vops->pt_update_ops[id].num_ops) 3106 continue; 3107 3108 err = xe_pt_update_ops_prepare(tile, vops); 3109 if (err) { 3110 fence = ERR_PTR(err); 3111 goto err_out; 3112 } 3113 } 3114 3115 trace_xe_vm_ops_execute(vops); 3116 3117 for_each_tile(tile, vm->xe, id) { 3118 if (!vops->pt_update_ops[id].num_ops) 3119 continue; 3120 3121 fence = xe_pt_update_ops_run(tile, vops); 3122 if (IS_ERR(fence)) 3123 goto err_out; 3124 3125 if (fences) 3126 fences[current_fence++] = fence; 3127 } 3128 3129 if (fences) { 3130 cf = dma_fence_array_create(number_tiles, fences, 3131 vm->composite_fence_ctx, 3132 vm->composite_fence_seqno++, 3133 false); 3134 if (!cf) { 3135 --vm->composite_fence_seqno; 3136 fence = ERR_PTR(-ENOMEM); 3137 goto err_out; 3138 } 3139 fence = &cf->base; 3140 } 3141 3142 for_each_tile(tile, vm->xe, id) { 3143 if (!vops->pt_update_ops[id].num_ops) 3144 continue; 3145 3146 xe_pt_update_ops_fini(tile, vops); 3147 } 3148 3149 return fence; 3150 3151 err_out: 3152 for_each_tile(tile, vm->xe, id) { 3153 if (!vops->pt_update_ops[id].num_ops) 3154 continue; 3155 3156 xe_pt_update_ops_abort(tile, vops); 3157 } 3158 while (current_fence) 3159 dma_fence_put(fences[--current_fence]); 3160 kfree(fences); 3161 kfree(cf); 3162 3163 err_trace: 3164 trace_xe_vm_ops_fail(vm); 3165 return fence; 3166 } 3167 3168 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3169 { 3170 if (vma->ufence) 3171 xe_sync_ufence_put(vma->ufence); 3172 vma->ufence = __xe_sync_ufence_get(ufence); 3173 } 3174 3175 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3176 struct xe_user_fence *ufence) 3177 { 3178 switch (op->base.op) { 3179 case DRM_GPUVA_OP_MAP: 3180 vma_add_ufence(op->map.vma, ufence); 3181 break; 3182 case DRM_GPUVA_OP_REMAP: 3183 if (op->remap.prev) 3184 vma_add_ufence(op->remap.prev, ufence); 3185 if (op->remap.next) 3186 vma_add_ufence(op->remap.next, ufence); 3187 break; 3188 case DRM_GPUVA_OP_UNMAP: 3189 break; 3190 case DRM_GPUVA_OP_PREFETCH: 3191 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3192 break; 3193 default: 3194 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3195 } 3196 } 3197 3198 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3199 struct dma_fence *fence) 3200 { 3201 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3202 struct xe_user_fence *ufence; 3203 struct xe_vma_op *op; 3204 int i; 3205 3206 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3207 list_for_each_entry(op, &vops->list, link) { 3208 if (ufence) 3209 op_add_ufence(vm, op, ufence); 3210 3211 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3212 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3213 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3214 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3215 fence); 3216 } 3217 if (ufence) 3218 xe_sync_ufence_put(ufence); 3219 if (fence) { 3220 for (i = 0; i < vops->num_syncs; i++) 3221 xe_sync_entry_signal(vops->syncs + i, fence); 3222 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3223 } 3224 } 3225 3226 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3227 struct xe_vma_ops *vops) 3228 { 3229 struct xe_validation_ctx ctx; 3230 struct drm_exec exec; 3231 struct dma_fence *fence; 3232 int err = 0; 3233 3234 lockdep_assert_held_write(&vm->lock); 3235 3236 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3237 ((struct xe_val_flags) { 3238 .interruptible = true, 3239 .exec_ignore_duplicates = true, 3240 }), err) { 3241 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3242 drm_exec_retry_on_contention(&exec); 3243 xe_validation_retry_on_oom(&ctx, &err); 3244 if (err) 3245 return ERR_PTR(err); 3246 3247 xe_vm_set_validation_exec(vm, &exec); 3248 fence = ops_execute(vm, vops); 3249 xe_vm_set_validation_exec(vm, NULL); 3250 if (IS_ERR(fence)) { 3251 if (PTR_ERR(fence) == -ENODATA) 3252 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3253 return fence; 3254 } 3255 3256 vm_bind_ioctl_ops_fini(vm, vops, fence); 3257 } 3258 3259 return err ? ERR_PTR(err) : fence; 3260 } 3261 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3262 3263 #define SUPPORTED_FLAGS_STUB \ 3264 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3265 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3266 DRM_XE_VM_BIND_FLAG_NULL | \ 3267 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3268 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3269 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3270 3271 #ifdef TEST_VM_OPS_ERROR 3272 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3273 #else 3274 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3275 #endif 3276 3277 #define XE_64K_PAGE_MASK 0xffffull 3278 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3279 3280 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3281 struct drm_xe_vm_bind *args, 3282 struct drm_xe_vm_bind_op **bind_ops) 3283 { 3284 int err; 3285 int i; 3286 3287 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3288 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3289 return -EINVAL; 3290 3291 if (XE_IOCTL_DBG(xe, args->extensions)) 3292 return -EINVAL; 3293 3294 if (args->num_binds > 1) { 3295 u64 __user *bind_user = 3296 u64_to_user_ptr(args->vector_of_binds); 3297 3298 *bind_ops = kvmalloc_array(args->num_binds, 3299 sizeof(struct drm_xe_vm_bind_op), 3300 GFP_KERNEL | __GFP_ACCOUNT | 3301 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3302 if (!*bind_ops) 3303 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3304 3305 err = copy_from_user(*bind_ops, bind_user, 3306 sizeof(struct drm_xe_vm_bind_op) * 3307 args->num_binds); 3308 if (XE_IOCTL_DBG(xe, err)) { 3309 err = -EFAULT; 3310 goto free_bind_ops; 3311 } 3312 } else { 3313 *bind_ops = &args->bind; 3314 } 3315 3316 for (i = 0; i < args->num_binds; ++i) { 3317 u64 range = (*bind_ops)[i].range; 3318 u64 addr = (*bind_ops)[i].addr; 3319 u32 op = (*bind_ops)[i].op; 3320 u32 flags = (*bind_ops)[i].flags; 3321 u32 obj = (*bind_ops)[i].obj; 3322 u64 obj_offset = (*bind_ops)[i].obj_offset; 3323 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3324 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3325 bool is_cpu_addr_mirror = flags & 3326 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3327 u16 pat_index = (*bind_ops)[i].pat_index; 3328 u16 coh_mode; 3329 3330 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3331 (!xe_vm_in_fault_mode(vm) || 3332 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3333 err = -EINVAL; 3334 goto free_bind_ops; 3335 } 3336 3337 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3338 err = -EINVAL; 3339 goto free_bind_ops; 3340 } 3341 3342 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3343 (*bind_ops)[i].pat_index = pat_index; 3344 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3345 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3346 err = -EINVAL; 3347 goto free_bind_ops; 3348 } 3349 3350 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3351 err = -EINVAL; 3352 goto free_bind_ops; 3353 } 3354 3355 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3356 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3357 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3358 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3359 is_cpu_addr_mirror)) || 3360 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3361 (is_null || is_cpu_addr_mirror)) || 3362 XE_IOCTL_DBG(xe, !obj && 3363 op == DRM_XE_VM_BIND_OP_MAP && 3364 !is_null && !is_cpu_addr_mirror) || 3365 XE_IOCTL_DBG(xe, !obj && 3366 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3367 XE_IOCTL_DBG(xe, addr && 3368 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3369 XE_IOCTL_DBG(xe, range && 3370 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3371 XE_IOCTL_DBG(xe, obj && 3372 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3373 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3374 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3375 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3376 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3377 XE_IOCTL_DBG(xe, obj && 3378 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3379 XE_IOCTL_DBG(xe, prefetch_region && 3380 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3381 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3382 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3383 XE_IOCTL_DBG(xe, obj && 3384 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3385 err = -EINVAL; 3386 goto free_bind_ops; 3387 } 3388 3389 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3390 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3391 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3392 XE_IOCTL_DBG(xe, !range && 3393 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3394 err = -EINVAL; 3395 goto free_bind_ops; 3396 } 3397 } 3398 3399 return 0; 3400 3401 free_bind_ops: 3402 if (args->num_binds > 1) 3403 kvfree(*bind_ops); 3404 *bind_ops = NULL; 3405 return err; 3406 } 3407 3408 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3409 struct xe_exec_queue *q, 3410 struct xe_sync_entry *syncs, 3411 int num_syncs) 3412 { 3413 struct dma_fence *fence; 3414 int i, err = 0; 3415 3416 fence = xe_sync_in_fence_get(syncs, num_syncs, 3417 to_wait_exec_queue(vm, q), vm); 3418 if (IS_ERR(fence)) 3419 return PTR_ERR(fence); 3420 3421 for (i = 0; i < num_syncs; i++) 3422 xe_sync_entry_signal(&syncs[i], fence); 3423 3424 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3425 fence); 3426 dma_fence_put(fence); 3427 3428 return err; 3429 } 3430 3431 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3432 struct xe_exec_queue *q, 3433 struct xe_sync_entry *syncs, u32 num_syncs) 3434 { 3435 memset(vops, 0, sizeof(*vops)); 3436 INIT_LIST_HEAD(&vops->list); 3437 vops->vm = vm; 3438 vops->q = q; 3439 vops->syncs = syncs; 3440 vops->num_syncs = num_syncs; 3441 vops->flags = 0; 3442 } 3443 3444 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3445 u64 addr, u64 range, u64 obj_offset, 3446 u16 pat_index, u32 op, u32 bind_flags) 3447 { 3448 u16 coh_mode; 3449 3450 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3451 XE_IOCTL_DBG(xe, obj_offset > 3452 xe_bo_size(bo) - range)) { 3453 return -EINVAL; 3454 } 3455 3456 /* 3457 * Some platforms require 64k VM_BIND alignment, 3458 * specifically those with XE_VRAM_FLAGS_NEED64K. 3459 * 3460 * Other platforms may have BO's set to 64k physical placement, 3461 * but can be mapped at 4k offsets anyway. This check is only 3462 * there for the former case. 3463 */ 3464 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3465 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3466 if (XE_IOCTL_DBG(xe, obj_offset & 3467 XE_64K_PAGE_MASK) || 3468 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3469 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3470 return -EINVAL; 3471 } 3472 } 3473 3474 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3475 if (bo->cpu_caching) { 3476 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3477 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3478 return -EINVAL; 3479 } 3480 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3481 /* 3482 * Imported dma-buf from a different device should 3483 * require 1way or 2way coherency since we don't know 3484 * how it was mapped on the CPU. Just assume is it 3485 * potentially cached on CPU side. 3486 */ 3487 return -EINVAL; 3488 } 3489 3490 /* If a BO is protected it can only be mapped if the key is still valid */ 3491 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3492 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3493 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3494 return -ENOEXEC; 3495 3496 return 0; 3497 } 3498 3499 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3500 { 3501 struct xe_device *xe = to_xe_device(dev); 3502 struct xe_file *xef = to_xe_file(file); 3503 struct drm_xe_vm_bind *args = data; 3504 struct drm_xe_sync __user *syncs_user; 3505 struct xe_bo **bos = NULL; 3506 struct drm_gpuva_ops **ops = NULL; 3507 struct xe_vm *vm; 3508 struct xe_exec_queue *q = NULL; 3509 u32 num_syncs, num_ufence = 0; 3510 struct xe_sync_entry *syncs = NULL; 3511 struct drm_xe_vm_bind_op *bind_ops = NULL; 3512 struct xe_vma_ops vops; 3513 struct dma_fence *fence; 3514 int err; 3515 int i; 3516 3517 vm = xe_vm_lookup(xef, args->vm_id); 3518 if (XE_IOCTL_DBG(xe, !vm)) 3519 return -EINVAL; 3520 3521 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3522 if (err) 3523 goto put_vm; 3524 3525 if (args->exec_queue_id) { 3526 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3527 if (XE_IOCTL_DBG(xe, !q)) { 3528 err = -ENOENT; 3529 goto free_bind_ops; 3530 } 3531 3532 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3533 err = -EINVAL; 3534 goto put_exec_queue; 3535 } 3536 } 3537 3538 /* Ensure all UNMAPs visible */ 3539 xe_svm_flush(vm); 3540 3541 err = down_write_killable(&vm->lock); 3542 if (err) 3543 goto put_exec_queue; 3544 3545 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3546 err = -ENOENT; 3547 goto release_vm_lock; 3548 } 3549 3550 for (i = 0; i < args->num_binds; ++i) { 3551 u64 range = bind_ops[i].range; 3552 u64 addr = bind_ops[i].addr; 3553 3554 if (XE_IOCTL_DBG(xe, range > vm->size) || 3555 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3556 err = -EINVAL; 3557 goto release_vm_lock; 3558 } 3559 } 3560 3561 if (args->num_binds) { 3562 bos = kvcalloc(args->num_binds, sizeof(*bos), 3563 GFP_KERNEL | __GFP_ACCOUNT | 3564 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3565 if (!bos) { 3566 err = -ENOMEM; 3567 goto release_vm_lock; 3568 } 3569 3570 ops = kvcalloc(args->num_binds, sizeof(*ops), 3571 GFP_KERNEL | __GFP_ACCOUNT | 3572 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3573 if (!ops) { 3574 err = -ENOMEM; 3575 goto free_bos; 3576 } 3577 } 3578 3579 for (i = 0; i < args->num_binds; ++i) { 3580 struct drm_gem_object *gem_obj; 3581 u64 range = bind_ops[i].range; 3582 u64 addr = bind_ops[i].addr; 3583 u32 obj = bind_ops[i].obj; 3584 u64 obj_offset = bind_ops[i].obj_offset; 3585 u16 pat_index = bind_ops[i].pat_index; 3586 u32 op = bind_ops[i].op; 3587 u32 bind_flags = bind_ops[i].flags; 3588 3589 if (!obj) 3590 continue; 3591 3592 gem_obj = drm_gem_object_lookup(file, obj); 3593 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3594 err = -ENOENT; 3595 goto put_obj; 3596 } 3597 bos[i] = gem_to_xe_bo(gem_obj); 3598 3599 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3600 obj_offset, pat_index, op, 3601 bind_flags); 3602 if (err) 3603 goto put_obj; 3604 } 3605 3606 if (args->num_syncs) { 3607 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3608 if (!syncs) { 3609 err = -ENOMEM; 3610 goto put_obj; 3611 } 3612 } 3613 3614 syncs_user = u64_to_user_ptr(args->syncs); 3615 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3616 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3617 &syncs_user[num_syncs], 3618 (xe_vm_in_lr_mode(vm) ? 3619 SYNC_PARSE_FLAG_LR_MODE : 0) | 3620 (!args->num_binds ? 3621 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3622 if (err) 3623 goto free_syncs; 3624 3625 if (xe_sync_is_ufence(&syncs[num_syncs])) 3626 num_ufence++; 3627 } 3628 3629 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3630 err = -EINVAL; 3631 goto free_syncs; 3632 } 3633 3634 if (!args->num_binds) { 3635 err = -ENODATA; 3636 goto free_syncs; 3637 } 3638 3639 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3640 for (i = 0; i < args->num_binds; ++i) { 3641 u64 range = bind_ops[i].range; 3642 u64 addr = bind_ops[i].addr; 3643 u32 op = bind_ops[i].op; 3644 u32 flags = bind_ops[i].flags; 3645 u64 obj_offset = bind_ops[i].obj_offset; 3646 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3647 u16 pat_index = bind_ops[i].pat_index; 3648 3649 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3650 addr, range, op, flags, 3651 prefetch_region, pat_index); 3652 if (IS_ERR(ops[i])) { 3653 err = PTR_ERR(ops[i]); 3654 ops[i] = NULL; 3655 goto unwind_ops; 3656 } 3657 3658 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3659 if (err) 3660 goto unwind_ops; 3661 3662 #ifdef TEST_VM_OPS_ERROR 3663 if (flags & FORCE_OP_ERROR) { 3664 vops.inject_error = true; 3665 vm->xe->vm_inject_error_position = 3666 (vm->xe->vm_inject_error_position + 1) % 3667 FORCE_OP_ERROR_COUNT; 3668 } 3669 #endif 3670 } 3671 3672 /* Nothing to do */ 3673 if (list_empty(&vops.list)) { 3674 err = -ENODATA; 3675 goto unwind_ops; 3676 } 3677 3678 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3679 if (err) 3680 goto unwind_ops; 3681 3682 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3683 if (err) 3684 goto unwind_ops; 3685 3686 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3687 if (IS_ERR(fence)) 3688 err = PTR_ERR(fence); 3689 else 3690 dma_fence_put(fence); 3691 3692 unwind_ops: 3693 if (err && err != -ENODATA) 3694 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3695 xe_vma_ops_fini(&vops); 3696 for (i = args->num_binds - 1; i >= 0; --i) 3697 if (ops[i]) 3698 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3699 free_syncs: 3700 if (err == -ENODATA) 3701 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3702 while (num_syncs--) 3703 xe_sync_entry_cleanup(&syncs[num_syncs]); 3704 3705 kfree(syncs); 3706 put_obj: 3707 for (i = 0; i < args->num_binds; ++i) 3708 xe_bo_put(bos[i]); 3709 3710 kvfree(ops); 3711 free_bos: 3712 kvfree(bos); 3713 release_vm_lock: 3714 up_write(&vm->lock); 3715 put_exec_queue: 3716 if (q) 3717 xe_exec_queue_put(q); 3718 free_bind_ops: 3719 if (args->num_binds > 1) 3720 kvfree(bind_ops); 3721 put_vm: 3722 xe_vm_put(vm); 3723 return err; 3724 } 3725 3726 /** 3727 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3728 * @vm: VM to bind the BO to 3729 * @bo: BO to bind 3730 * @q: exec queue to use for the bind (optional) 3731 * @addr: address at which to bind the BO 3732 * @cache_lvl: PAT cache level to use 3733 * 3734 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3735 * kernel-owned VM. 3736 * 3737 * Returns a dma_fence to track the binding completion if the job to do so was 3738 * successfully submitted, an error pointer otherwise. 3739 */ 3740 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3741 struct xe_exec_queue *q, u64 addr, 3742 enum xe_cache_level cache_lvl) 3743 { 3744 struct xe_vma_ops vops; 3745 struct drm_gpuva_ops *ops = NULL; 3746 struct dma_fence *fence; 3747 int err; 3748 3749 xe_bo_get(bo); 3750 xe_vm_get(vm); 3751 if (q) 3752 xe_exec_queue_get(q); 3753 3754 down_write(&vm->lock); 3755 3756 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3757 3758 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3759 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3760 vm->xe->pat.idx[cache_lvl]); 3761 if (IS_ERR(ops)) { 3762 err = PTR_ERR(ops); 3763 goto release_vm_lock; 3764 } 3765 3766 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3767 if (err) 3768 goto release_vm_lock; 3769 3770 xe_assert(vm->xe, !list_empty(&vops.list)); 3771 3772 err = xe_vma_ops_alloc(&vops, false); 3773 if (err) 3774 goto unwind_ops; 3775 3776 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3777 if (IS_ERR(fence)) 3778 err = PTR_ERR(fence); 3779 3780 unwind_ops: 3781 if (err && err != -ENODATA) 3782 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3783 3784 xe_vma_ops_fini(&vops); 3785 drm_gpuva_ops_free(&vm->gpuvm, ops); 3786 3787 release_vm_lock: 3788 up_write(&vm->lock); 3789 3790 if (q) 3791 xe_exec_queue_put(q); 3792 xe_vm_put(vm); 3793 xe_bo_put(bo); 3794 3795 if (err) 3796 fence = ERR_PTR(err); 3797 3798 return fence; 3799 } 3800 3801 /** 3802 * xe_vm_lock() - Lock the vm's dma_resv object 3803 * @vm: The struct xe_vm whose lock is to be locked 3804 * @intr: Whether to perform any wait interruptible 3805 * 3806 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3807 * contended lock was interrupted. If @intr is false, the function 3808 * always returns 0. 3809 */ 3810 int xe_vm_lock(struct xe_vm *vm, bool intr) 3811 { 3812 int ret; 3813 3814 if (intr) 3815 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3816 else 3817 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3818 3819 return ret; 3820 } 3821 3822 /** 3823 * xe_vm_unlock() - Unlock the vm's dma_resv object 3824 * @vm: The struct xe_vm whose lock is to be released. 3825 * 3826 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3827 */ 3828 void xe_vm_unlock(struct xe_vm *vm) 3829 { 3830 dma_resv_unlock(xe_vm_resv(vm)); 3831 } 3832 3833 /** 3834 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3835 * address range 3836 * @vm: The VM 3837 * @start: start address 3838 * @end: end address 3839 * @tile_mask: mask for which gt's issue tlb invalidation 3840 * 3841 * Issue a range based TLB invalidation for gt's in tilemask 3842 * 3843 * Returns 0 for success, negative error code otherwise. 3844 */ 3845 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3846 u64 end, u8 tile_mask) 3847 { 3848 struct xe_tlb_inval_fence 3849 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3850 struct xe_tile *tile; 3851 u32 fence_id = 0; 3852 u8 id; 3853 int err; 3854 3855 if (!tile_mask) 3856 return 0; 3857 3858 for_each_tile(tile, vm->xe, id) { 3859 if (!(tile_mask & BIT(id))) 3860 continue; 3861 3862 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3863 &fence[fence_id], true); 3864 3865 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3866 &fence[fence_id], start, end, 3867 vm->usm.asid); 3868 if (err) 3869 goto wait; 3870 ++fence_id; 3871 3872 if (!tile->media_gt) 3873 continue; 3874 3875 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3876 &fence[fence_id], true); 3877 3878 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3879 &fence[fence_id], start, end, 3880 vm->usm.asid); 3881 if (err) 3882 goto wait; 3883 ++fence_id; 3884 } 3885 3886 wait: 3887 for (id = 0; id < fence_id; ++id) 3888 xe_tlb_inval_fence_wait(&fence[id]); 3889 3890 return err; 3891 } 3892 3893 /** 3894 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3895 * @vma: VMA to invalidate 3896 * 3897 * Walks a list of page tables leaves which it memset the entries owned by this 3898 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3899 * complete. 3900 * 3901 * Returns 0 for success, negative error code otherwise. 3902 */ 3903 int xe_vm_invalidate_vma(struct xe_vma *vma) 3904 { 3905 struct xe_device *xe = xe_vma_vm(vma)->xe; 3906 struct xe_vm *vm = xe_vma_vm(vma); 3907 struct xe_tile *tile; 3908 u8 tile_mask = 0; 3909 int ret = 0; 3910 u8 id; 3911 3912 xe_assert(xe, !xe_vma_is_null(vma)); 3913 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3914 trace_xe_vma_invalidate(vma); 3915 3916 vm_dbg(&vm->xe->drm, 3917 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3918 xe_vma_start(vma), xe_vma_size(vma)); 3919 3920 /* 3921 * Check that we don't race with page-table updates, tile_invalidated 3922 * update is safe 3923 */ 3924 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3925 if (xe_vma_is_userptr(vma)) { 3926 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3927 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3928 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3929 3930 WARN_ON_ONCE(!mmu_interval_check_retry 3931 (&to_userptr_vma(vma)->userptr.notifier, 3932 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3933 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3934 DMA_RESV_USAGE_BOOKKEEP)); 3935 3936 } else { 3937 xe_bo_assert_held(xe_vma_bo(vma)); 3938 } 3939 } 3940 3941 for_each_tile(tile, xe, id) 3942 if (xe_pt_zap_ptes(tile, vma)) 3943 tile_mask |= BIT(id); 3944 3945 xe_device_wmb(xe); 3946 3947 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3948 xe_vma_end(vma), tile_mask); 3949 3950 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3951 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3952 3953 return ret; 3954 } 3955 3956 int xe_vm_validate_protected(struct xe_vm *vm) 3957 { 3958 struct drm_gpuva *gpuva; 3959 int err = 0; 3960 3961 if (!vm) 3962 return -ENODEV; 3963 3964 mutex_lock(&vm->snap_mutex); 3965 3966 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3967 struct xe_vma *vma = gpuva_to_vma(gpuva); 3968 struct xe_bo *bo = vma->gpuva.gem.obj ? 3969 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3970 3971 if (!bo) 3972 continue; 3973 3974 if (xe_bo_is_protected(bo)) { 3975 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3976 if (err) 3977 break; 3978 } 3979 } 3980 3981 mutex_unlock(&vm->snap_mutex); 3982 return err; 3983 } 3984 3985 struct xe_vm_snapshot { 3986 unsigned long num_snaps; 3987 struct { 3988 u64 ofs, bo_ofs; 3989 unsigned long len; 3990 struct xe_bo *bo; 3991 void *data; 3992 struct mm_struct *mm; 3993 } snap[]; 3994 }; 3995 3996 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3997 { 3998 unsigned long num_snaps = 0, i; 3999 struct xe_vm_snapshot *snap = NULL; 4000 struct drm_gpuva *gpuva; 4001 4002 if (!vm) 4003 return NULL; 4004 4005 mutex_lock(&vm->snap_mutex); 4006 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4007 if (gpuva->flags & XE_VMA_DUMPABLE) 4008 num_snaps++; 4009 } 4010 4011 if (num_snaps) 4012 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4013 if (!snap) { 4014 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4015 goto out_unlock; 4016 } 4017 4018 snap->num_snaps = num_snaps; 4019 i = 0; 4020 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4021 struct xe_vma *vma = gpuva_to_vma(gpuva); 4022 struct xe_bo *bo = vma->gpuva.gem.obj ? 4023 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4024 4025 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4026 continue; 4027 4028 snap->snap[i].ofs = xe_vma_start(vma); 4029 snap->snap[i].len = xe_vma_size(vma); 4030 if (bo) { 4031 snap->snap[i].bo = xe_bo_get(bo); 4032 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4033 } else if (xe_vma_is_userptr(vma)) { 4034 struct mm_struct *mm = 4035 to_userptr_vma(vma)->userptr.notifier.mm; 4036 4037 if (mmget_not_zero(mm)) 4038 snap->snap[i].mm = mm; 4039 else 4040 snap->snap[i].data = ERR_PTR(-EFAULT); 4041 4042 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4043 } else { 4044 snap->snap[i].data = ERR_PTR(-ENOENT); 4045 } 4046 i++; 4047 } 4048 4049 out_unlock: 4050 mutex_unlock(&vm->snap_mutex); 4051 return snap; 4052 } 4053 4054 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4055 { 4056 if (IS_ERR_OR_NULL(snap)) 4057 return; 4058 4059 for (int i = 0; i < snap->num_snaps; i++) { 4060 struct xe_bo *bo = snap->snap[i].bo; 4061 int err; 4062 4063 if (IS_ERR(snap->snap[i].data)) 4064 continue; 4065 4066 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4067 if (!snap->snap[i].data) { 4068 snap->snap[i].data = ERR_PTR(-ENOMEM); 4069 goto cleanup_bo; 4070 } 4071 4072 if (bo) { 4073 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4074 snap->snap[i].data, snap->snap[i].len); 4075 } else { 4076 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4077 4078 kthread_use_mm(snap->snap[i].mm); 4079 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4080 err = 0; 4081 else 4082 err = -EFAULT; 4083 kthread_unuse_mm(snap->snap[i].mm); 4084 4085 mmput(snap->snap[i].mm); 4086 snap->snap[i].mm = NULL; 4087 } 4088 4089 if (err) { 4090 kvfree(snap->snap[i].data); 4091 snap->snap[i].data = ERR_PTR(err); 4092 } 4093 4094 cleanup_bo: 4095 xe_bo_put(bo); 4096 snap->snap[i].bo = NULL; 4097 } 4098 } 4099 4100 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4101 { 4102 unsigned long i, j; 4103 4104 if (IS_ERR_OR_NULL(snap)) { 4105 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4106 return; 4107 } 4108 4109 for (i = 0; i < snap->num_snaps; i++) { 4110 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4111 4112 if (IS_ERR(snap->snap[i].data)) { 4113 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4114 PTR_ERR(snap->snap[i].data)); 4115 continue; 4116 } 4117 4118 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4119 4120 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4121 u32 *val = snap->snap[i].data + j; 4122 char dumped[ASCII85_BUFSZ]; 4123 4124 drm_puts(p, ascii85_encode(*val, dumped)); 4125 } 4126 4127 drm_puts(p, "\n"); 4128 4129 if (drm_coredump_printer_is_full(p)) 4130 return; 4131 } 4132 } 4133 4134 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4135 { 4136 unsigned long i; 4137 4138 if (IS_ERR_OR_NULL(snap)) 4139 return; 4140 4141 for (i = 0; i < snap->num_snaps; i++) { 4142 if (!IS_ERR(snap->snap[i].data)) 4143 kvfree(snap->snap[i].data); 4144 xe_bo_put(snap->snap[i].bo); 4145 if (snap->snap[i].mm) 4146 mmput(snap->snap[i].mm); 4147 } 4148 kvfree(snap); 4149 } 4150 4151 /** 4152 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4153 * @xe: Pointer to the XE device structure 4154 * @vma: Pointer to the virtual memory area (VMA) structure 4155 * @is_atomic: In pagefault path and atomic operation 4156 * 4157 * This function determines whether the given VMA needs to be migrated to 4158 * VRAM in order to do atomic GPU operation. 4159 * 4160 * Return: 4161 * 1 - Migration to VRAM is required 4162 * 0 - Migration is not required 4163 * -EACCES - Invalid access for atomic memory attr 4164 * 4165 */ 4166 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4167 { 4168 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4169 vma->attr.atomic_access; 4170 4171 if (!IS_DGFX(xe) || !is_atomic) 4172 return false; 4173 4174 /* 4175 * NOTE: The checks implemented here are platform-specific. For 4176 * instance, on a device supporting CXL atomics, these would ideally 4177 * work universally without additional handling. 4178 */ 4179 switch (atomic_access) { 4180 case DRM_XE_ATOMIC_DEVICE: 4181 return !xe->info.has_device_atomics_on_smem; 4182 4183 case DRM_XE_ATOMIC_CPU: 4184 return -EACCES; 4185 4186 case DRM_XE_ATOMIC_UNDEFINED: 4187 case DRM_XE_ATOMIC_GLOBAL: 4188 default: 4189 return 1; 4190 } 4191 } 4192 4193 static int xe_vm_alloc_vma(struct xe_vm *vm, 4194 struct drm_gpuvm_map_req *map_req, 4195 bool is_madvise) 4196 { 4197 struct xe_vma_ops vops; 4198 struct drm_gpuva_ops *ops = NULL; 4199 struct drm_gpuva_op *__op; 4200 bool is_cpu_addr_mirror = false; 4201 bool remap_op = false; 4202 struct xe_vma_mem_attr tmp_attr; 4203 u16 default_pat; 4204 int err; 4205 4206 lockdep_assert_held_write(&vm->lock); 4207 4208 if (is_madvise) 4209 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4210 else 4211 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4212 4213 if (IS_ERR(ops)) 4214 return PTR_ERR(ops); 4215 4216 if (list_empty(&ops->list)) { 4217 err = 0; 4218 goto free_ops; 4219 } 4220 4221 drm_gpuva_for_each_op(__op, ops) { 4222 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4223 struct xe_vma *vma = NULL; 4224 4225 if (!is_madvise) { 4226 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4227 vma = gpuva_to_vma(op->base.unmap.va); 4228 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4229 default_pat = vma->attr.default_pat_index; 4230 } 4231 4232 if (__op->op == DRM_GPUVA_OP_REMAP) { 4233 vma = gpuva_to_vma(op->base.remap.unmap->va); 4234 default_pat = vma->attr.default_pat_index; 4235 } 4236 4237 if (__op->op == DRM_GPUVA_OP_MAP) { 4238 op->map.is_cpu_addr_mirror = true; 4239 op->map.pat_index = default_pat; 4240 } 4241 } else { 4242 if (__op->op == DRM_GPUVA_OP_REMAP) { 4243 vma = gpuva_to_vma(op->base.remap.unmap->va); 4244 xe_assert(vm->xe, !remap_op); 4245 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4246 remap_op = true; 4247 4248 if (xe_vma_is_cpu_addr_mirror(vma)) 4249 is_cpu_addr_mirror = true; 4250 else 4251 is_cpu_addr_mirror = false; 4252 } 4253 4254 if (__op->op == DRM_GPUVA_OP_MAP) { 4255 xe_assert(vm->xe, remap_op); 4256 remap_op = false; 4257 /* 4258 * In case of madvise ops DRM_GPUVA_OP_MAP is 4259 * always after DRM_GPUVA_OP_REMAP, so ensure 4260 * we assign op->map.is_cpu_addr_mirror true 4261 * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4262 */ 4263 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4264 } 4265 } 4266 print_op(vm->xe, __op); 4267 } 4268 4269 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4270 4271 if (is_madvise) 4272 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4273 4274 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4275 if (err) 4276 goto unwind_ops; 4277 4278 xe_vm_lock(vm, false); 4279 4280 drm_gpuva_for_each_op(__op, ops) { 4281 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4282 struct xe_vma *vma; 4283 4284 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4285 vma = gpuva_to_vma(op->base.unmap.va); 4286 /* There should be no unmap for madvise */ 4287 if (is_madvise) 4288 XE_WARN_ON("UNEXPECTED UNMAP"); 4289 4290 xe_vma_destroy(vma, NULL); 4291 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4292 vma = gpuva_to_vma(op->base.remap.unmap->va); 4293 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4294 * VMA, so they can be assigned to newly MAP created vma. 4295 */ 4296 if (is_madvise) 4297 tmp_attr = vma->attr; 4298 4299 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4300 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4301 vma = op->map.vma; 4302 /* In case of madvise call, MAP will always be follwed by REMAP. 4303 * Therefore temp_attr will always have sane values, making it safe to 4304 * copy them to new vma. 4305 */ 4306 if (is_madvise) 4307 vma->attr = tmp_attr; 4308 } 4309 } 4310 4311 xe_vm_unlock(vm); 4312 drm_gpuva_ops_free(&vm->gpuvm, ops); 4313 return 0; 4314 4315 unwind_ops: 4316 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4317 free_ops: 4318 drm_gpuva_ops_free(&vm->gpuvm, ops); 4319 return err; 4320 } 4321 4322 /** 4323 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4324 * @vm: Pointer to the xe_vm structure 4325 * @start: Starting input address 4326 * @range: Size of the input range 4327 * 4328 * This function splits existing vma to create new vma for user provided input range 4329 * 4330 * Return: 0 if success 4331 */ 4332 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4333 { 4334 struct drm_gpuvm_map_req map_req = { 4335 .map.va.addr = start, 4336 .map.va.range = range, 4337 }; 4338 4339 lockdep_assert_held_write(&vm->lock); 4340 4341 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4342 4343 return xe_vm_alloc_vma(vm, &map_req, true); 4344 } 4345 4346 /** 4347 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4348 * @vm: Pointer to the xe_vm structure 4349 * @start: Starting input address 4350 * @range: Size of the input range 4351 * 4352 * This function splits/merges existing vma to create new vma for user provided input range 4353 * 4354 * Return: 0 if success 4355 */ 4356 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4357 { 4358 struct drm_gpuvm_map_req map_req = { 4359 .map.va.addr = start, 4360 .map.va.range = range, 4361 }; 4362 4363 lockdep_assert_held_write(&vm->lock); 4364 4365 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4366 start, range); 4367 4368 return xe_vm_alloc_vma(vm, &map_req, false); 4369 } 4370