1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 115 xe_vm_assert_held(vm); 116 117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 118 if (q->lr.pfence) { 119 long timeout = dma_fence_wait(q->lr.pfence, false); 120 121 /* Only -ETIME on fence indicates VM needs to be killed */ 122 if (timeout < 0 || q->lr.pfence->error == -ETIME) 123 return -ETIME; 124 125 dma_fence_put(q->lr.pfence); 126 q->lr.pfence = NULL; 127 } 128 } 129 130 return 0; 131 } 132 133 static bool xe_vm_is_idle(struct xe_vm *vm) 134 { 135 struct xe_exec_queue *q; 136 137 xe_vm_assert_held(vm); 138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 139 if (!xe_exec_queue_is_idle(q)) 140 return false; 141 } 142 143 return true; 144 } 145 146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 147 { 148 struct list_head *link; 149 struct xe_exec_queue *q; 150 151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 152 struct dma_fence *fence; 153 154 link = list->next; 155 xe_assert(vm->xe, link != list); 156 157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 158 q, q->lr.context, 159 ++q->lr.seqno); 160 dma_fence_put(q->lr.pfence); 161 q->lr.pfence = fence; 162 } 163 } 164 165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 166 { 167 struct xe_exec_queue *q; 168 int err; 169 170 xe_bo_assert_held(bo); 171 172 if (!vm->preempt.num_exec_queues) 173 return 0; 174 175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 176 if (err) 177 return err; 178 179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 180 if (q->lr.pfence) { 181 dma_resv_add_fence(bo->ttm.base.resv, 182 q->lr.pfence, 183 DMA_RESV_USAGE_BOOKKEEP); 184 } 185 186 return 0; 187 } 188 189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 190 struct drm_exec *exec) 191 { 192 struct xe_exec_queue *q; 193 194 lockdep_assert_held(&vm->lock); 195 xe_vm_assert_held(vm); 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 198 q->ops->resume(q); 199 200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 202 } 203 } 204 205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 206 { 207 struct drm_gpuvm_exec vm_exec = { 208 .vm = &vm->gpuvm, 209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 210 .num_fences = 1, 211 }; 212 struct drm_exec *exec = &vm_exec.exec; 213 struct xe_validation_ctx ctx; 214 struct dma_fence *pfence; 215 int err; 216 bool wait; 217 218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 219 220 down_write(&vm->lock); 221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 222 if (err) 223 goto out_up_write; 224 225 pfence = xe_preempt_fence_create(q, q->lr.context, 226 ++q->lr.seqno); 227 if (IS_ERR(pfence)) { 228 err = PTR_ERR(pfence); 229 goto out_fini; 230 } 231 232 list_add(&q->lr.link, &vm->preempt.exec_queues); 233 ++vm->preempt.num_exec_queues; 234 q->lr.pfence = pfence; 235 236 xe_svm_notifier_lock(vm); 237 238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 240 241 /* 242 * Check to see if a preemption on VM is in flight or userptr 243 * invalidation, if so trigger this preempt fence to sync state with 244 * other preempt fences on the VM. 245 */ 246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 247 if (wait) 248 dma_fence_enable_sw_signaling(pfence); 249 250 xe_svm_notifier_unlock(vm); 251 252 out_fini: 253 xe_validation_ctx_fini(&ctx); 254 out_up_write: 255 up_write(&vm->lock); 256 257 return err; 258 } 259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 260 261 /** 262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 263 * @vm: The VM. 264 * @q: The exec_queue 265 * 266 * Note that this function might be called multiple times on the same queue. 267 */ 268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 269 { 270 if (!xe_vm_in_preempt_fence_mode(vm)) 271 return; 272 273 down_write(&vm->lock); 274 if (!list_empty(&q->lr.link)) { 275 list_del_init(&q->lr.link); 276 --vm->preempt.num_exec_queues; 277 } 278 if (q->lr.pfence) { 279 dma_fence_enable_sw_signaling(q->lr.pfence); 280 dma_fence_put(q->lr.pfence); 281 q->lr.pfence = NULL; 282 } 283 up_write(&vm->lock); 284 } 285 286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 287 288 /** 289 * xe_vm_kill() - VM Kill 290 * @vm: The VM. 291 * @unlocked: Flag indicates the VM's dma-resv is not held 292 * 293 * Kill the VM by setting banned flag indicated VM is no longer available for 294 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 295 */ 296 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 297 { 298 struct xe_exec_queue *q; 299 300 lockdep_assert_held(&vm->lock); 301 302 if (unlocked) 303 xe_vm_lock(vm, false); 304 305 vm->flags |= XE_VM_FLAG_BANNED; 306 trace_xe_vm_kill(vm); 307 308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 309 q->ops->kill(q); 310 311 if (unlocked) 312 xe_vm_unlock(vm); 313 314 /* TODO: Inform user the VM is banned */ 315 } 316 317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 318 { 319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 320 struct drm_gpuva *gpuva; 321 int ret; 322 323 lockdep_assert_held(&vm->lock); 324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 326 &vm->rebind_list); 327 328 if (!try_wait_for_completion(&vm->xe->pm_block)) 329 return -EAGAIN; 330 331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 332 if (ret) 333 return ret; 334 335 vm_bo->evicted = false; 336 return 0; 337 } 338 339 /** 340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 341 * @vm: The vm for which we are rebinding. 342 * @exec: The struct drm_exec with the locked GEM objects. 343 * @num_fences: The number of fences to reserve for the operation, not 344 * including rebinds and validations. 345 * 346 * Validates all evicted gem objects and rebinds their vmas. Note that 347 * rebindings may cause evictions and hence the validation-rebind 348 * sequence is rerun until there are no more objects to validate. 349 * 350 * Return: 0 on success, negative error code on error. In particular, 351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 352 * the drm_exec transaction needs to be restarted. 353 */ 354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 355 unsigned int num_fences) 356 { 357 struct drm_gem_object *obj; 358 unsigned long index; 359 int ret; 360 361 do { 362 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 363 if (ret) 364 return ret; 365 366 ret = xe_vm_rebind(vm, false); 367 if (ret) 368 return ret; 369 } while (!list_empty(&vm->gpuvm.evict.list)); 370 371 drm_exec_for_each_locked_object(exec, index, obj) { 372 ret = dma_resv_reserve_fences(obj->resv, num_fences); 373 if (ret) 374 return ret; 375 } 376 377 return 0; 378 } 379 380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 381 bool *done) 382 { 383 int err; 384 385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 386 if (err) 387 return err; 388 389 if (xe_vm_is_idle(vm)) { 390 vm->preempt.rebind_deactivated = true; 391 *done = true; 392 return 0; 393 } 394 395 if (!preempt_fences_waiting(vm)) { 396 *done = true; 397 return 0; 398 } 399 400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 401 if (err) 402 return err; 403 404 err = wait_for_existing_preempt_fences(vm); 405 if (err) 406 return err; 407 408 /* 409 * Add validation and rebinding to the locking loop since both can 410 * cause evictions which may require blocing dma_resv locks. 411 * The fence reservation here is intended for the new preempt fences 412 * we attach at the end of the rebind work. 413 */ 414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 415 } 416 417 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 418 { 419 struct xe_device *xe = vm->xe; 420 bool ret = false; 421 422 mutex_lock(&xe->rebind_resume_lock); 423 if (!try_wait_for_completion(&vm->xe->pm_block)) { 424 ret = true; 425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 426 } 427 mutex_unlock(&xe->rebind_resume_lock); 428 429 return ret; 430 } 431 432 /** 433 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 434 * @vm: The vm whose preempt worker to resume. 435 * 436 * Resume a preempt worker that was previously suspended by 437 * vm_suspend_rebind_worker(). 438 */ 439 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 440 { 441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 442 } 443 444 static void preempt_rebind_work_func(struct work_struct *w) 445 { 446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 447 struct xe_validation_ctx ctx; 448 struct drm_exec exec; 449 unsigned int fence_count = 0; 450 LIST_HEAD(preempt_fences); 451 int err = 0; 452 long wait; 453 int __maybe_unused tries = 0; 454 455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 456 trace_xe_vm_rebind_worker_enter(vm); 457 458 down_write(&vm->lock); 459 460 if (xe_vm_is_closed_or_banned(vm)) { 461 up_write(&vm->lock); 462 trace_xe_vm_rebind_worker_exit(vm); 463 return; 464 } 465 466 retry: 467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 468 up_write(&vm->lock); 469 /* We don't actually block but don't make progress. */ 470 xe_pm_might_block_on_suspend(); 471 return; 472 } 473 474 if (xe_vm_userptr_check_repin(vm)) { 475 err = xe_vm_userptr_pin(vm); 476 if (err) 477 goto out_unlock_outer; 478 } 479 480 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 481 (struct xe_val_flags) {.interruptible = true}); 482 if (err) 483 goto out_unlock_outer; 484 485 drm_exec_until_all_locked(&exec) { 486 bool done = false; 487 488 err = xe_preempt_work_begin(&exec, vm, &done); 489 drm_exec_retry_on_contention(&exec); 490 xe_validation_retry_on_oom(&ctx, &err); 491 if (err || done) { 492 xe_validation_ctx_fini(&ctx); 493 goto out_unlock_outer; 494 } 495 } 496 497 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 498 if (err) 499 goto out_unlock; 500 501 xe_vm_set_validation_exec(vm, &exec); 502 err = xe_vm_rebind(vm, true); 503 xe_vm_set_validation_exec(vm, NULL); 504 if (err) 505 goto out_unlock; 506 507 /* Wait on rebinds and munmap style VM unbinds */ 508 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 509 DMA_RESV_USAGE_KERNEL, 510 false, MAX_SCHEDULE_TIMEOUT); 511 if (wait <= 0) { 512 err = -ETIME; 513 goto out_unlock; 514 } 515 516 #define retry_required(__tries, __vm) \ 517 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 518 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 519 __xe_vm_userptr_needs_repin(__vm)) 520 521 xe_svm_notifier_lock(vm); 522 if (retry_required(tries, vm)) { 523 xe_svm_notifier_unlock(vm); 524 err = -EAGAIN; 525 goto out_unlock; 526 } 527 528 #undef retry_required 529 530 spin_lock(&vm->xe->ttm.lru_lock); 531 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 532 spin_unlock(&vm->xe->ttm.lru_lock); 533 534 /* Point of no return. */ 535 arm_preempt_fences(vm, &preempt_fences); 536 resume_and_reinstall_preempt_fences(vm, &exec); 537 xe_svm_notifier_unlock(vm); 538 539 out_unlock: 540 xe_validation_ctx_fini(&ctx); 541 out_unlock_outer: 542 if (err == -EAGAIN) { 543 trace_xe_vm_rebind_worker_retry(vm); 544 goto retry; 545 } 546 547 if (err) { 548 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 549 xe_vm_kill(vm, true); 550 } 551 up_write(&vm->lock); 552 553 free_preempt_fences(&preempt_fences); 554 555 trace_xe_vm_rebind_worker_exit(vm); 556 } 557 558 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 559 { 560 int i; 561 562 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 563 if (!vops->pt_update_ops[i].num_ops) 564 continue; 565 566 vops->pt_update_ops[i].ops = 567 kmalloc_array(vops->pt_update_ops[i].num_ops, 568 sizeof(*vops->pt_update_ops[i].ops), 569 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 570 if (!vops->pt_update_ops[i].ops) 571 return array_of_binds ? -ENOBUFS : -ENOMEM; 572 } 573 574 return 0; 575 } 576 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 577 578 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 579 { 580 struct xe_vma *vma; 581 582 vma = gpuva_to_vma(op->base.prefetch.va); 583 584 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 585 xa_destroy(&op->prefetch_range.range); 586 } 587 588 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 589 { 590 struct xe_vma_op *op; 591 592 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 593 return; 594 595 list_for_each_entry(op, &vops->list, link) 596 xe_vma_svm_prefetch_op_fini(op); 597 } 598 599 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 600 { 601 int i; 602 603 xe_vma_svm_prefetch_ops_fini(vops); 604 605 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 606 kfree(vops->pt_update_ops[i].ops); 607 } 608 609 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 610 { 611 int i; 612 613 if (!inc_val) 614 return; 615 616 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 617 if (BIT(i) & tile_mask) 618 vops->pt_update_ops[i].num_ops += inc_val; 619 } 620 621 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 622 u8 tile_mask) 623 { 624 INIT_LIST_HEAD(&op->link); 625 op->tile_mask = tile_mask; 626 op->base.op = DRM_GPUVA_OP_MAP; 627 op->base.map.va.addr = vma->gpuva.va.addr; 628 op->base.map.va.range = vma->gpuva.va.range; 629 op->base.map.gem.obj = vma->gpuva.gem.obj; 630 op->base.map.gem.offset = vma->gpuva.gem.offset; 631 op->map.vma = vma; 632 op->map.immediate = true; 633 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; 634 op->map.is_null = xe_vma_is_null(vma); 635 } 636 637 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 638 u8 tile_mask) 639 { 640 struct xe_vma_op *op; 641 642 op = kzalloc(sizeof(*op), GFP_KERNEL); 643 if (!op) 644 return -ENOMEM; 645 646 xe_vm_populate_rebind(op, vma, tile_mask); 647 list_add_tail(&op->link, &vops->list); 648 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 649 650 return 0; 651 } 652 653 static struct dma_fence *ops_execute(struct xe_vm *vm, 654 struct xe_vma_ops *vops); 655 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 656 struct xe_exec_queue *q, 657 struct xe_sync_entry *syncs, u32 num_syncs); 658 659 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 660 { 661 struct dma_fence *fence; 662 struct xe_vma *vma, *next; 663 struct xe_vma_ops vops; 664 struct xe_vma_op *op, *next_op; 665 int err, i; 666 667 lockdep_assert_held(&vm->lock); 668 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 669 list_empty(&vm->rebind_list)) 670 return 0; 671 672 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 673 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 674 vops.pt_update_ops[i].wait_vm_bookkeep = true; 675 676 xe_vm_assert_held(vm); 677 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 678 xe_assert(vm->xe, vma->tile_present); 679 680 if (rebind_worker) 681 trace_xe_vma_rebind_worker(vma); 682 else 683 trace_xe_vma_rebind_exec(vma); 684 685 err = xe_vm_ops_add_rebind(&vops, vma, 686 vma->tile_present); 687 if (err) 688 goto free_ops; 689 } 690 691 err = xe_vma_ops_alloc(&vops, false); 692 if (err) 693 goto free_ops; 694 695 fence = ops_execute(vm, &vops); 696 if (IS_ERR(fence)) { 697 err = PTR_ERR(fence); 698 } else { 699 dma_fence_put(fence); 700 list_for_each_entry_safe(vma, next, &vm->rebind_list, 701 combined_links.rebind) 702 list_del_init(&vma->combined_links.rebind); 703 } 704 free_ops: 705 list_for_each_entry_safe(op, next_op, &vops.list, link) { 706 list_del(&op->link); 707 kfree(op); 708 } 709 xe_vma_ops_fini(&vops); 710 711 return err; 712 } 713 714 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 715 { 716 struct dma_fence *fence = NULL; 717 struct xe_vma_ops vops; 718 struct xe_vma_op *op, *next_op; 719 struct xe_tile *tile; 720 u8 id; 721 int err; 722 723 lockdep_assert_held(&vm->lock); 724 xe_vm_assert_held(vm); 725 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 726 727 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 728 for_each_tile(tile, vm->xe, id) { 729 vops.pt_update_ops[id].wait_vm_bookkeep = true; 730 vops.pt_update_ops[tile->id].q = 731 xe_migrate_exec_queue(tile->migrate); 732 } 733 734 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 735 if (err) 736 return ERR_PTR(err); 737 738 err = xe_vma_ops_alloc(&vops, false); 739 if (err) { 740 fence = ERR_PTR(err); 741 goto free_ops; 742 } 743 744 fence = ops_execute(vm, &vops); 745 746 free_ops: 747 list_for_each_entry_safe(op, next_op, &vops.list, link) { 748 list_del(&op->link); 749 kfree(op); 750 } 751 xe_vma_ops_fini(&vops); 752 753 return fence; 754 } 755 756 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 757 struct xe_vma *vma, 758 struct xe_svm_range *range, 759 u8 tile_mask) 760 { 761 INIT_LIST_HEAD(&op->link); 762 op->tile_mask = tile_mask; 763 op->base.op = DRM_GPUVA_OP_DRIVER; 764 op->subop = XE_VMA_SUBOP_MAP_RANGE; 765 op->map_range.vma = vma; 766 op->map_range.range = range; 767 } 768 769 static int 770 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 771 struct xe_vma *vma, 772 struct xe_svm_range *range, 773 u8 tile_mask) 774 { 775 struct xe_vma_op *op; 776 777 op = kzalloc(sizeof(*op), GFP_KERNEL); 778 if (!op) 779 return -ENOMEM; 780 781 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 782 list_add_tail(&op->link, &vops->list); 783 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 784 785 return 0; 786 } 787 788 /** 789 * xe_vm_range_rebind() - VM range (re)bind 790 * @vm: The VM which the range belongs to. 791 * @vma: The VMA which the range belongs to. 792 * @range: SVM range to rebind. 793 * @tile_mask: Tile mask to bind the range to. 794 * 795 * (re)bind SVM range setting up GPU page tables for the range. 796 * 797 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 798 * failure 799 */ 800 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 801 struct xe_vma *vma, 802 struct xe_svm_range *range, 803 u8 tile_mask) 804 { 805 struct dma_fence *fence = NULL; 806 struct xe_vma_ops vops; 807 struct xe_vma_op *op, *next_op; 808 struct xe_tile *tile; 809 u8 id; 810 int err; 811 812 lockdep_assert_held(&vm->lock); 813 xe_vm_assert_held(vm); 814 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 815 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 816 817 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 818 for_each_tile(tile, vm->xe, id) { 819 vops.pt_update_ops[id].wait_vm_bookkeep = true; 820 vops.pt_update_ops[tile->id].q = 821 xe_migrate_exec_queue(tile->migrate); 822 } 823 824 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 825 if (err) 826 return ERR_PTR(err); 827 828 err = xe_vma_ops_alloc(&vops, false); 829 if (err) { 830 fence = ERR_PTR(err); 831 goto free_ops; 832 } 833 834 fence = ops_execute(vm, &vops); 835 836 free_ops: 837 list_for_each_entry_safe(op, next_op, &vops.list, link) { 838 list_del(&op->link); 839 kfree(op); 840 } 841 xe_vma_ops_fini(&vops); 842 843 return fence; 844 } 845 846 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 847 struct xe_svm_range *range) 848 { 849 INIT_LIST_HEAD(&op->link); 850 op->tile_mask = range->tile_present; 851 op->base.op = DRM_GPUVA_OP_DRIVER; 852 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 853 op->unmap_range.range = range; 854 } 855 856 static int 857 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 858 struct xe_svm_range *range) 859 { 860 struct xe_vma_op *op; 861 862 op = kzalloc(sizeof(*op), GFP_KERNEL); 863 if (!op) 864 return -ENOMEM; 865 866 xe_vm_populate_range_unbind(op, range); 867 list_add_tail(&op->link, &vops->list); 868 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 869 870 return 0; 871 } 872 873 /** 874 * xe_vm_range_unbind() - VM range unbind 875 * @vm: The VM which the range belongs to. 876 * @range: SVM range to rebind. 877 * 878 * Unbind SVM range removing the GPU page tables for the range. 879 * 880 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 881 * failure 882 */ 883 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 884 struct xe_svm_range *range) 885 { 886 struct dma_fence *fence = NULL; 887 struct xe_vma_ops vops; 888 struct xe_vma_op *op, *next_op; 889 struct xe_tile *tile; 890 u8 id; 891 int err; 892 893 lockdep_assert_held(&vm->lock); 894 xe_vm_assert_held(vm); 895 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 896 897 if (!range->tile_present) 898 return dma_fence_get_stub(); 899 900 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 901 for_each_tile(tile, vm->xe, id) { 902 vops.pt_update_ops[id].wait_vm_bookkeep = true; 903 vops.pt_update_ops[tile->id].q = 904 xe_migrate_exec_queue(tile->migrate); 905 } 906 907 err = xe_vm_ops_add_range_unbind(&vops, range); 908 if (err) 909 return ERR_PTR(err); 910 911 err = xe_vma_ops_alloc(&vops, false); 912 if (err) { 913 fence = ERR_PTR(err); 914 goto free_ops; 915 } 916 917 fence = ops_execute(vm, &vops); 918 919 free_ops: 920 list_for_each_entry_safe(op, next_op, &vops.list, link) { 921 list_del(&op->link); 922 kfree(op); 923 } 924 xe_vma_ops_fini(&vops); 925 926 return fence; 927 } 928 929 static void xe_vma_free(struct xe_vma *vma) 930 { 931 if (xe_vma_is_userptr(vma)) 932 kfree(to_userptr_vma(vma)); 933 else 934 kfree(vma); 935 } 936 937 #define VMA_CREATE_FLAG_READ_ONLY BIT(0) 938 #define VMA_CREATE_FLAG_IS_NULL BIT(1) 939 #define VMA_CREATE_FLAG_DUMPABLE BIT(2) 940 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) 941 942 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 943 struct xe_bo *bo, 944 u64 bo_offset_or_userptr, 945 u64 start, u64 end, 946 struct xe_vma_mem_attr *attr, 947 unsigned int flags) 948 { 949 struct xe_vma *vma; 950 struct xe_tile *tile; 951 u8 id; 952 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); 953 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); 954 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); 955 bool is_cpu_addr_mirror = 956 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); 957 958 xe_assert(vm->xe, start < end); 959 xe_assert(vm->xe, end < vm->size); 960 961 /* 962 * Allocate and ensure that the xe_vma_is_userptr() return 963 * matches what was allocated. 964 */ 965 if (!bo && !is_null && !is_cpu_addr_mirror) { 966 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 967 968 if (!uvma) 969 return ERR_PTR(-ENOMEM); 970 971 vma = &uvma->vma; 972 } else { 973 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 974 if (!vma) 975 return ERR_PTR(-ENOMEM); 976 977 if (is_cpu_addr_mirror) 978 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; 979 if (is_null) 980 vma->gpuva.flags |= DRM_GPUVA_SPARSE; 981 if (bo) 982 vma->gpuva.gem.obj = &bo->ttm.base; 983 } 984 985 INIT_LIST_HEAD(&vma->combined_links.rebind); 986 987 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 988 vma->gpuva.vm = &vm->gpuvm; 989 vma->gpuva.va.addr = start; 990 vma->gpuva.va.range = end - start + 1; 991 if (read_only) 992 vma->gpuva.flags |= XE_VMA_READ_ONLY; 993 if (dumpable) 994 vma->gpuva.flags |= XE_VMA_DUMPABLE; 995 996 for_each_tile(tile, vm->xe, id) 997 vma->tile_mask |= 0x1 << id; 998 999 if (vm->xe->info.has_atomic_enable_pte_bit) 1000 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1001 1002 vma->attr = *attr; 1003 1004 if (bo) { 1005 struct drm_gpuvm_bo *vm_bo; 1006 1007 xe_bo_assert_held(bo); 1008 1009 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 1010 if (IS_ERR(vm_bo)) { 1011 xe_vma_free(vma); 1012 return ERR_CAST(vm_bo); 1013 } 1014 1015 drm_gpuvm_bo_extobj_add(vm_bo); 1016 drm_gem_object_get(&bo->ttm.base); 1017 vma->gpuva.gem.offset = bo_offset_or_userptr; 1018 drm_gpuva_link(&vma->gpuva, vm_bo); 1019 drm_gpuvm_bo_put(vm_bo); 1020 } else /* userptr or null */ { 1021 if (!is_null && !is_cpu_addr_mirror) { 1022 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1023 u64 size = end - start + 1; 1024 int err; 1025 1026 vma->gpuva.gem.offset = bo_offset_or_userptr; 1027 1028 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1029 if (err) { 1030 xe_vma_free(vma); 1031 return ERR_PTR(err); 1032 } 1033 } 1034 1035 xe_vm_get(vm); 1036 } 1037 1038 return vma; 1039 } 1040 1041 static void xe_vma_destroy_late(struct xe_vma *vma) 1042 { 1043 struct xe_vm *vm = xe_vma_vm(vma); 1044 1045 if (vma->ufence) { 1046 xe_sync_ufence_put(vma->ufence); 1047 vma->ufence = NULL; 1048 } 1049 1050 if (xe_vma_is_userptr(vma)) { 1051 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1052 1053 xe_userptr_remove(uvma); 1054 xe_vm_put(vm); 1055 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1056 xe_vm_put(vm); 1057 } else { 1058 xe_bo_put(xe_vma_bo(vma)); 1059 } 1060 1061 xe_vma_free(vma); 1062 } 1063 1064 static void vma_destroy_work_func(struct work_struct *w) 1065 { 1066 struct xe_vma *vma = 1067 container_of(w, struct xe_vma, destroy_work); 1068 1069 xe_vma_destroy_late(vma); 1070 } 1071 1072 static void vma_destroy_cb(struct dma_fence *fence, 1073 struct dma_fence_cb *cb) 1074 { 1075 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1076 1077 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1078 queue_work(system_unbound_wq, &vma->destroy_work); 1079 } 1080 1081 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1082 { 1083 struct xe_vm *vm = xe_vma_vm(vma); 1084 1085 lockdep_assert_held_write(&vm->lock); 1086 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1087 1088 if (xe_vma_is_userptr(vma)) { 1089 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1090 xe_userptr_destroy(to_userptr_vma(vma)); 1091 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1092 xe_bo_assert_held(xe_vma_bo(vma)); 1093 1094 drm_gpuva_unlink(&vma->gpuva); 1095 } 1096 1097 xe_vm_assert_held(vm); 1098 if (fence) { 1099 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1100 vma_destroy_cb); 1101 1102 if (ret) { 1103 XE_WARN_ON(ret != -ENOENT); 1104 xe_vma_destroy_late(vma); 1105 } 1106 } else { 1107 xe_vma_destroy_late(vma); 1108 } 1109 } 1110 1111 /** 1112 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1113 * @exec: The drm_exec object we're currently locking for. 1114 * @vma: The vma for witch we want to lock the vm resv and any attached 1115 * object's resv. 1116 * 1117 * Return: 0 on success, negative error code on error. In particular 1118 * may return -EDEADLK on WW transaction contention and -EINTR if 1119 * an interruptible wait is terminated by a signal. 1120 */ 1121 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1122 { 1123 struct xe_vm *vm = xe_vma_vm(vma); 1124 struct xe_bo *bo = xe_vma_bo(vma); 1125 int err; 1126 1127 XE_WARN_ON(!vm); 1128 1129 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1130 if (!err && bo && !bo->vm) 1131 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1132 1133 return err; 1134 } 1135 1136 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1137 { 1138 struct xe_device *xe = xe_vma_vm(vma)->xe; 1139 struct xe_validation_ctx ctx; 1140 struct drm_exec exec; 1141 int err = 0; 1142 1143 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1144 err = xe_vm_lock_vma(&exec, vma); 1145 drm_exec_retry_on_contention(&exec); 1146 if (XE_WARN_ON(err)) 1147 break; 1148 xe_vma_destroy(vma, NULL); 1149 } 1150 xe_assert(xe, !err); 1151 } 1152 1153 struct xe_vma * 1154 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1155 { 1156 struct drm_gpuva *gpuva; 1157 1158 lockdep_assert_held(&vm->lock); 1159 1160 if (xe_vm_is_closed_or_banned(vm)) 1161 return NULL; 1162 1163 xe_assert(vm->xe, start + range <= vm->size); 1164 1165 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1166 1167 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1168 } 1169 1170 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1171 { 1172 int err; 1173 1174 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1175 lockdep_assert_held(&vm->lock); 1176 1177 mutex_lock(&vm->snap_mutex); 1178 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1179 mutex_unlock(&vm->snap_mutex); 1180 XE_WARN_ON(err); /* Shouldn't be possible */ 1181 1182 return err; 1183 } 1184 1185 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1186 { 1187 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1188 lockdep_assert_held(&vm->lock); 1189 1190 mutex_lock(&vm->snap_mutex); 1191 drm_gpuva_remove(&vma->gpuva); 1192 mutex_unlock(&vm->snap_mutex); 1193 if (vm->usm.last_fault_vma == vma) 1194 vm->usm.last_fault_vma = NULL; 1195 } 1196 1197 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1198 { 1199 struct xe_vma_op *op; 1200 1201 op = kzalloc(sizeof(*op), GFP_KERNEL); 1202 1203 if (unlikely(!op)) 1204 return NULL; 1205 1206 return &op->base; 1207 } 1208 1209 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1210 1211 static const struct drm_gpuvm_ops gpuvm_ops = { 1212 .op_alloc = xe_vm_op_alloc, 1213 .vm_bo_validate = xe_gpuvm_validate, 1214 .vm_free = xe_vm_free, 1215 }; 1216 1217 static u64 pde_encode_pat_index(u16 pat_index) 1218 { 1219 u64 pte = 0; 1220 1221 if (pat_index & BIT(0)) 1222 pte |= XE_PPGTT_PTE_PAT0; 1223 1224 if (pat_index & BIT(1)) 1225 pte |= XE_PPGTT_PTE_PAT1; 1226 1227 return pte; 1228 } 1229 1230 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1231 { 1232 u64 pte = 0; 1233 1234 if (pat_index & BIT(0)) 1235 pte |= XE_PPGTT_PTE_PAT0; 1236 1237 if (pat_index & BIT(1)) 1238 pte |= XE_PPGTT_PTE_PAT1; 1239 1240 if (pat_index & BIT(2)) { 1241 if (pt_level) 1242 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1243 else 1244 pte |= XE_PPGTT_PTE_PAT2; 1245 } 1246 1247 if (pat_index & BIT(3)) 1248 pte |= XELPG_PPGTT_PTE_PAT3; 1249 1250 if (pat_index & (BIT(4))) 1251 pte |= XE2_PPGTT_PTE_PAT4; 1252 1253 return pte; 1254 } 1255 1256 static u64 pte_encode_ps(u32 pt_level) 1257 { 1258 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1259 1260 if (pt_level == 1) 1261 return XE_PDE_PS_2M; 1262 else if (pt_level == 2) 1263 return XE_PDPE_PS_1G; 1264 1265 return 0; 1266 } 1267 1268 static u16 pde_pat_index(struct xe_bo *bo) 1269 { 1270 struct xe_device *xe = xe_bo_device(bo); 1271 u16 pat_index; 1272 1273 /* 1274 * We only have two bits to encode the PAT index in non-leaf nodes, but 1275 * these only point to other paging structures so we only need a minimal 1276 * selection of options. The user PAT index is only for encoding leaf 1277 * nodes, where we have use of more bits to do the encoding. The 1278 * non-leaf nodes are instead under driver control so the chosen index 1279 * here should be distict from the user PAT index. Also the 1280 * corresponding coherency of the PAT index should be tied to the 1281 * allocation type of the page table (or at least we should pick 1282 * something which is always safe). 1283 */ 1284 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1285 pat_index = xe->pat.idx[XE_CACHE_WB]; 1286 else 1287 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1288 1289 xe_assert(xe, pat_index <= 3); 1290 1291 return pat_index; 1292 } 1293 1294 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1295 { 1296 u64 pde; 1297 1298 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1299 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1300 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1301 1302 return pde; 1303 } 1304 1305 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1306 u16 pat_index, u32 pt_level) 1307 { 1308 u64 pte; 1309 1310 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1311 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1312 pte |= pte_encode_pat_index(pat_index, pt_level); 1313 pte |= pte_encode_ps(pt_level); 1314 1315 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1316 pte |= XE_PPGTT_PTE_DM; 1317 1318 return pte; 1319 } 1320 1321 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1322 u16 pat_index, u32 pt_level) 1323 { 1324 pte |= XE_PAGE_PRESENT; 1325 1326 if (likely(!xe_vma_read_only(vma))) 1327 pte |= XE_PAGE_RW; 1328 1329 pte |= pte_encode_pat_index(pat_index, pt_level); 1330 pte |= pte_encode_ps(pt_level); 1331 1332 if (unlikely(xe_vma_is_null(vma))) 1333 pte |= XE_PTE_NULL; 1334 1335 return pte; 1336 } 1337 1338 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1339 u16 pat_index, 1340 u32 pt_level, bool devmem, u64 flags) 1341 { 1342 u64 pte; 1343 1344 /* Avoid passing random bits directly as flags */ 1345 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1346 1347 pte = addr; 1348 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1349 pte |= pte_encode_pat_index(pat_index, pt_level); 1350 pte |= pte_encode_ps(pt_level); 1351 1352 if (devmem) 1353 pte |= XE_PPGTT_PTE_DM; 1354 1355 pte |= flags; 1356 1357 return pte; 1358 } 1359 1360 static const struct xe_pt_ops xelp_pt_ops = { 1361 .pte_encode_bo = xelp_pte_encode_bo, 1362 .pte_encode_vma = xelp_pte_encode_vma, 1363 .pte_encode_addr = xelp_pte_encode_addr, 1364 .pde_encode_bo = xelp_pde_encode_bo, 1365 }; 1366 1367 static void vm_destroy_work_func(struct work_struct *w); 1368 1369 /** 1370 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1371 * given tile and vm. 1372 * @xe: xe device. 1373 * @tile: tile to set up for. 1374 * @vm: vm to set up for. 1375 * @exec: The struct drm_exec object used to lock the vm resv. 1376 * 1377 * Sets up a pagetable tree with one page-table per level and a single 1378 * leaf PTE. All pagetable entries point to the single page-table or, 1379 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1380 * writes become NOPs. 1381 * 1382 * Return: 0 on success, negative error code on error. 1383 */ 1384 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1385 struct xe_vm *vm, struct drm_exec *exec) 1386 { 1387 u8 id = tile->id; 1388 int i; 1389 1390 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1391 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1392 if (IS_ERR(vm->scratch_pt[id][i])) { 1393 int err = PTR_ERR(vm->scratch_pt[id][i]); 1394 1395 vm->scratch_pt[id][i] = NULL; 1396 return err; 1397 } 1398 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1399 } 1400 1401 return 0; 1402 } 1403 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1404 1405 static void xe_vm_free_scratch(struct xe_vm *vm) 1406 { 1407 struct xe_tile *tile; 1408 u8 id; 1409 1410 if (!xe_vm_has_scratch(vm)) 1411 return; 1412 1413 for_each_tile(tile, vm->xe, id) { 1414 u32 i; 1415 1416 if (!vm->pt_root[id]) 1417 continue; 1418 1419 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1420 if (vm->scratch_pt[id][i]) 1421 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1422 } 1423 } 1424 1425 static void xe_vm_pt_destroy(struct xe_vm *vm) 1426 { 1427 struct xe_tile *tile; 1428 u8 id; 1429 1430 xe_vm_assert_held(vm); 1431 1432 for_each_tile(tile, vm->xe, id) { 1433 if (vm->pt_root[id]) { 1434 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1435 vm->pt_root[id] = NULL; 1436 } 1437 } 1438 } 1439 1440 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1441 { 1442 struct drm_gem_object *vm_resv_obj; 1443 struct xe_validation_ctx ctx; 1444 struct drm_exec exec; 1445 struct xe_vm *vm; 1446 int err, number_tiles = 0; 1447 struct xe_tile *tile; 1448 u8 id; 1449 1450 /* 1451 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1452 * ever be in faulting mode. 1453 */ 1454 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1455 1456 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1457 if (!vm) 1458 return ERR_PTR(-ENOMEM); 1459 1460 vm->xe = xe; 1461 1462 vm->size = 1ull << xe->info.va_bits; 1463 vm->flags = flags; 1464 1465 if (xef) 1466 vm->xef = xe_file_get(xef); 1467 /** 1468 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1469 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1470 * under a user-VM lock when the PXP session is started at exec_queue 1471 * creation time. Those are different VMs and therefore there is no risk 1472 * of deadlock, but we need to tell lockdep that this is the case or it 1473 * will print a warning. 1474 */ 1475 if (flags & XE_VM_FLAG_GSC) { 1476 static struct lock_class_key gsc_vm_key; 1477 1478 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1479 } else { 1480 init_rwsem(&vm->lock); 1481 } 1482 mutex_init(&vm->snap_mutex); 1483 1484 INIT_LIST_HEAD(&vm->rebind_list); 1485 1486 INIT_LIST_HEAD(&vm->userptr.repin_list); 1487 INIT_LIST_HEAD(&vm->userptr.invalidated); 1488 spin_lock_init(&vm->userptr.invalidated_lock); 1489 1490 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1491 1492 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1493 1494 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1495 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1496 1497 for_each_tile(tile, xe, id) 1498 xe_range_fence_tree_init(&vm->rftree[id]); 1499 1500 vm->pt_ops = &xelp_pt_ops; 1501 1502 /* 1503 * Long-running workloads are not protected by the scheduler references. 1504 * By design, run_job for long-running workloads returns NULL and the 1505 * scheduler drops all the references of it, hence protecting the VM 1506 * for this case is necessary. 1507 */ 1508 if (flags & XE_VM_FLAG_LR_MODE) { 1509 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1510 xe_pm_runtime_get_noresume(xe); 1511 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1512 } 1513 1514 err = xe_svm_init(vm); 1515 if (err) 1516 goto err_no_resv; 1517 1518 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1519 if (!vm_resv_obj) { 1520 err = -ENOMEM; 1521 goto err_svm_fini; 1522 } 1523 1524 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1525 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1526 1527 drm_gem_object_put(vm_resv_obj); 1528 1529 err = 0; 1530 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1531 err) { 1532 err = xe_vm_drm_exec_lock(vm, &exec); 1533 drm_exec_retry_on_contention(&exec); 1534 1535 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1536 vm->flags |= XE_VM_FLAG_64K; 1537 1538 for_each_tile(tile, xe, id) { 1539 if (flags & XE_VM_FLAG_MIGRATION && 1540 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1541 continue; 1542 1543 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1544 &exec); 1545 if (IS_ERR(vm->pt_root[id])) { 1546 err = PTR_ERR(vm->pt_root[id]); 1547 vm->pt_root[id] = NULL; 1548 xe_vm_pt_destroy(vm); 1549 drm_exec_retry_on_contention(&exec); 1550 xe_validation_retry_on_oom(&ctx, &err); 1551 break; 1552 } 1553 } 1554 if (err) 1555 break; 1556 1557 if (xe_vm_has_scratch(vm)) { 1558 for_each_tile(tile, xe, id) { 1559 if (!vm->pt_root[id]) 1560 continue; 1561 1562 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1563 if (err) { 1564 xe_vm_free_scratch(vm); 1565 xe_vm_pt_destroy(vm); 1566 drm_exec_retry_on_contention(&exec); 1567 xe_validation_retry_on_oom(&ctx, &err); 1568 break; 1569 } 1570 } 1571 if (err) 1572 break; 1573 vm->batch_invalidate_tlb = true; 1574 } 1575 1576 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1577 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1578 vm->batch_invalidate_tlb = false; 1579 } 1580 1581 /* Fill pt_root after allocating scratch tables */ 1582 for_each_tile(tile, xe, id) { 1583 if (!vm->pt_root[id]) 1584 continue; 1585 1586 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1587 } 1588 } 1589 if (err) 1590 goto err_close; 1591 1592 /* Kernel migration VM shouldn't have a circular loop.. */ 1593 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1594 for_each_tile(tile, xe, id) { 1595 struct xe_exec_queue *q; 1596 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1597 1598 if (!vm->pt_root[id]) 1599 continue; 1600 1601 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1602 if (IS_ERR(q)) { 1603 err = PTR_ERR(q); 1604 goto err_close; 1605 } 1606 vm->q[id] = q; 1607 number_tiles++; 1608 } 1609 } 1610 1611 if (number_tiles > 1) 1612 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1613 1614 if (xef && xe->info.has_asid) { 1615 u32 asid; 1616 1617 down_write(&xe->usm.lock); 1618 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1619 XA_LIMIT(1, XE_MAX_ASID - 1), 1620 &xe->usm.next_asid, GFP_KERNEL); 1621 up_write(&xe->usm.lock); 1622 if (err < 0) 1623 goto err_close; 1624 1625 vm->usm.asid = asid; 1626 } 1627 1628 trace_xe_vm_create(vm); 1629 1630 return vm; 1631 1632 err_close: 1633 xe_vm_close_and_put(vm); 1634 return ERR_PTR(err); 1635 1636 err_svm_fini: 1637 if (flags & XE_VM_FLAG_FAULT_MODE) { 1638 vm->size = 0; /* close the vm */ 1639 xe_svm_fini(vm); 1640 } 1641 err_no_resv: 1642 mutex_destroy(&vm->snap_mutex); 1643 for_each_tile(tile, xe, id) 1644 xe_range_fence_tree_fini(&vm->rftree[id]); 1645 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1646 if (vm->xef) 1647 xe_file_put(vm->xef); 1648 kfree(vm); 1649 if (flags & XE_VM_FLAG_LR_MODE) 1650 xe_pm_runtime_put(xe); 1651 return ERR_PTR(err); 1652 } 1653 1654 static void xe_vm_close(struct xe_vm *vm) 1655 { 1656 struct xe_device *xe = vm->xe; 1657 bool bound; 1658 int idx; 1659 1660 bound = drm_dev_enter(&xe->drm, &idx); 1661 1662 down_write(&vm->lock); 1663 if (xe_vm_in_fault_mode(vm)) 1664 xe_svm_notifier_lock(vm); 1665 1666 vm->size = 0; 1667 1668 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1669 struct xe_tile *tile; 1670 struct xe_gt *gt; 1671 u8 id; 1672 1673 /* Wait for pending binds */ 1674 dma_resv_wait_timeout(xe_vm_resv(vm), 1675 DMA_RESV_USAGE_BOOKKEEP, 1676 false, MAX_SCHEDULE_TIMEOUT); 1677 1678 if (bound) { 1679 for_each_tile(tile, xe, id) 1680 if (vm->pt_root[id]) 1681 xe_pt_clear(xe, vm->pt_root[id]); 1682 1683 for_each_gt(gt, xe, id) 1684 xe_tlb_inval_vm(>->tlb_inval, vm); 1685 } 1686 } 1687 1688 if (xe_vm_in_fault_mode(vm)) 1689 xe_svm_notifier_unlock(vm); 1690 up_write(&vm->lock); 1691 1692 if (bound) 1693 drm_dev_exit(idx); 1694 } 1695 1696 void xe_vm_close_and_put(struct xe_vm *vm) 1697 { 1698 LIST_HEAD(contested); 1699 struct xe_device *xe = vm->xe; 1700 struct xe_tile *tile; 1701 struct xe_vma *vma, *next_vma; 1702 struct drm_gpuva *gpuva, *next; 1703 u8 id; 1704 1705 xe_assert(xe, !vm->preempt.num_exec_queues); 1706 1707 xe_vm_close(vm); 1708 if (xe_vm_in_preempt_fence_mode(vm)) { 1709 mutex_lock(&xe->rebind_resume_lock); 1710 list_del_init(&vm->preempt.pm_activate_link); 1711 mutex_unlock(&xe->rebind_resume_lock); 1712 flush_work(&vm->preempt.rebind_work); 1713 } 1714 if (xe_vm_in_fault_mode(vm)) 1715 xe_svm_close(vm); 1716 1717 down_write(&vm->lock); 1718 for_each_tile(tile, xe, id) { 1719 if (vm->q[id]) 1720 xe_exec_queue_last_fence_put(vm->q[id], vm); 1721 } 1722 up_write(&vm->lock); 1723 1724 for_each_tile(tile, xe, id) { 1725 if (vm->q[id]) { 1726 xe_exec_queue_kill(vm->q[id]); 1727 xe_exec_queue_put(vm->q[id]); 1728 vm->q[id] = NULL; 1729 } 1730 } 1731 1732 down_write(&vm->lock); 1733 xe_vm_lock(vm, false); 1734 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1735 vma = gpuva_to_vma(gpuva); 1736 1737 if (xe_vma_has_no_bo(vma)) { 1738 xe_svm_notifier_lock(vm); 1739 vma->gpuva.flags |= XE_VMA_DESTROYED; 1740 xe_svm_notifier_unlock(vm); 1741 } 1742 1743 xe_vm_remove_vma(vm, vma); 1744 1745 /* easy case, remove from VMA? */ 1746 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1747 list_del_init(&vma->combined_links.rebind); 1748 xe_vma_destroy(vma, NULL); 1749 continue; 1750 } 1751 1752 list_move_tail(&vma->combined_links.destroy, &contested); 1753 vma->gpuva.flags |= XE_VMA_DESTROYED; 1754 } 1755 1756 /* 1757 * All vm operations will add shared fences to resv. 1758 * The only exception is eviction for a shared object, 1759 * but even so, the unbind when evicted would still 1760 * install a fence to resv. Hence it's safe to 1761 * destroy the pagetables immediately. 1762 */ 1763 xe_vm_free_scratch(vm); 1764 xe_vm_pt_destroy(vm); 1765 xe_vm_unlock(vm); 1766 1767 /* 1768 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1769 * Since we hold a refcount to the bo, we can remove and free 1770 * the members safely without locking. 1771 */ 1772 list_for_each_entry_safe(vma, next_vma, &contested, 1773 combined_links.destroy) { 1774 list_del_init(&vma->combined_links.destroy); 1775 xe_vma_destroy_unlocked(vma); 1776 } 1777 1778 xe_svm_fini(vm); 1779 1780 up_write(&vm->lock); 1781 1782 down_write(&xe->usm.lock); 1783 if (vm->usm.asid) { 1784 void *lookup; 1785 1786 xe_assert(xe, xe->info.has_asid); 1787 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1788 1789 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1790 xe_assert(xe, lookup == vm); 1791 } 1792 up_write(&xe->usm.lock); 1793 1794 for_each_tile(tile, xe, id) 1795 xe_range_fence_tree_fini(&vm->rftree[id]); 1796 1797 xe_vm_put(vm); 1798 } 1799 1800 static void vm_destroy_work_func(struct work_struct *w) 1801 { 1802 struct xe_vm *vm = 1803 container_of(w, struct xe_vm, destroy_work); 1804 struct xe_device *xe = vm->xe; 1805 struct xe_tile *tile; 1806 u8 id; 1807 1808 /* xe_vm_close_and_put was not called? */ 1809 xe_assert(xe, !vm->size); 1810 1811 if (xe_vm_in_preempt_fence_mode(vm)) 1812 flush_work(&vm->preempt.rebind_work); 1813 1814 mutex_destroy(&vm->snap_mutex); 1815 1816 if (vm->flags & XE_VM_FLAG_LR_MODE) 1817 xe_pm_runtime_put(xe); 1818 1819 for_each_tile(tile, xe, id) 1820 XE_WARN_ON(vm->pt_root[id]); 1821 1822 trace_xe_vm_free(vm); 1823 1824 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1825 1826 if (vm->xef) 1827 xe_file_put(vm->xef); 1828 1829 kfree(vm); 1830 } 1831 1832 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1833 { 1834 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1835 1836 /* To destroy the VM we need to be able to sleep */ 1837 queue_work(system_unbound_wq, &vm->destroy_work); 1838 } 1839 1840 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1841 { 1842 struct xe_vm *vm; 1843 1844 mutex_lock(&xef->vm.lock); 1845 vm = xa_load(&xef->vm.xa, id); 1846 if (vm) 1847 xe_vm_get(vm); 1848 mutex_unlock(&xef->vm.lock); 1849 1850 return vm; 1851 } 1852 1853 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1854 { 1855 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1856 } 1857 1858 static struct xe_exec_queue * 1859 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1860 { 1861 return q ? q : vm->q[0]; 1862 } 1863 1864 static struct xe_user_fence * 1865 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1866 { 1867 unsigned int i; 1868 1869 for (i = 0; i < num_syncs; i++) { 1870 struct xe_sync_entry *e = &syncs[i]; 1871 1872 if (xe_sync_is_ufence(e)) 1873 return xe_sync_ufence_get(e); 1874 } 1875 1876 return NULL; 1877 } 1878 1879 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1880 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1881 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1882 1883 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1884 struct drm_file *file) 1885 { 1886 struct xe_device *xe = to_xe_device(dev); 1887 struct xe_file *xef = to_xe_file(file); 1888 struct drm_xe_vm_create *args = data; 1889 struct xe_vm *vm; 1890 u32 id; 1891 int err; 1892 u32 flags = 0; 1893 1894 if (XE_IOCTL_DBG(xe, args->extensions)) 1895 return -EINVAL; 1896 1897 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 1898 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1899 1900 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1901 !xe->info.has_usm)) 1902 return -EINVAL; 1903 1904 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1905 return -EINVAL; 1906 1907 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1908 return -EINVAL; 1909 1910 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1911 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1912 !xe->info.needs_scratch)) 1913 return -EINVAL; 1914 1915 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1916 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1917 return -EINVAL; 1918 1919 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1920 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1921 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1922 flags |= XE_VM_FLAG_LR_MODE; 1923 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1924 flags |= XE_VM_FLAG_FAULT_MODE; 1925 1926 vm = xe_vm_create(xe, flags, xef); 1927 if (IS_ERR(vm)) 1928 return PTR_ERR(vm); 1929 1930 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1931 /* Warning: Security issue - never enable by default */ 1932 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1933 #endif 1934 1935 /* user id alloc must always be last in ioctl to prevent UAF */ 1936 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1937 if (err) 1938 goto err_close_and_put; 1939 1940 args->vm_id = id; 1941 1942 return 0; 1943 1944 err_close_and_put: 1945 xe_vm_close_and_put(vm); 1946 1947 return err; 1948 } 1949 1950 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1951 struct drm_file *file) 1952 { 1953 struct xe_device *xe = to_xe_device(dev); 1954 struct xe_file *xef = to_xe_file(file); 1955 struct drm_xe_vm_destroy *args = data; 1956 struct xe_vm *vm; 1957 int err = 0; 1958 1959 if (XE_IOCTL_DBG(xe, args->pad) || 1960 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1961 return -EINVAL; 1962 1963 mutex_lock(&xef->vm.lock); 1964 vm = xa_load(&xef->vm.xa, args->vm_id); 1965 if (XE_IOCTL_DBG(xe, !vm)) 1966 err = -ENOENT; 1967 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1968 err = -EBUSY; 1969 else 1970 xa_erase(&xef->vm.xa, args->vm_id); 1971 mutex_unlock(&xef->vm.lock); 1972 1973 if (!err) 1974 xe_vm_close_and_put(vm); 1975 1976 return err; 1977 } 1978 1979 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1980 { 1981 struct drm_gpuva *gpuva; 1982 u32 num_vmas = 0; 1983 1984 lockdep_assert_held(&vm->lock); 1985 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 1986 num_vmas++; 1987 1988 return num_vmas; 1989 } 1990 1991 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 1992 u64 end, struct drm_xe_mem_range_attr *attrs) 1993 { 1994 struct drm_gpuva *gpuva; 1995 int i = 0; 1996 1997 lockdep_assert_held(&vm->lock); 1998 1999 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2000 struct xe_vma *vma = gpuva_to_vma(gpuva); 2001 2002 if (i == *num_vmas) 2003 return -ENOSPC; 2004 2005 attrs[i].start = xe_vma_start(vma); 2006 attrs[i].end = xe_vma_end(vma); 2007 attrs[i].atomic.val = vma->attr.atomic_access; 2008 attrs[i].pat_index.val = vma->attr.pat_index; 2009 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2010 attrs[i].preferred_mem_loc.migration_policy = 2011 vma->attr.preferred_loc.migration_policy; 2012 2013 i++; 2014 } 2015 2016 *num_vmas = i; 2017 return 0; 2018 } 2019 2020 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2021 { 2022 struct xe_device *xe = to_xe_device(dev); 2023 struct xe_file *xef = to_xe_file(file); 2024 struct drm_xe_mem_range_attr *mem_attrs; 2025 struct drm_xe_vm_query_mem_range_attr *args = data; 2026 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2027 struct xe_vm *vm; 2028 int err = 0; 2029 2030 if (XE_IOCTL_DBG(xe, 2031 ((args->num_mem_ranges == 0 && 2032 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2033 (args->num_mem_ranges > 0 && 2034 (!attrs_user || 2035 args->sizeof_mem_range_attr != 2036 sizeof(struct drm_xe_mem_range_attr)))))) 2037 return -EINVAL; 2038 2039 vm = xe_vm_lookup(xef, args->vm_id); 2040 if (XE_IOCTL_DBG(xe, !vm)) 2041 return -EINVAL; 2042 2043 err = down_read_interruptible(&vm->lock); 2044 if (err) 2045 goto put_vm; 2046 2047 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2048 2049 if (args->num_mem_ranges == 0 && !attrs_user) { 2050 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2051 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2052 goto unlock_vm; 2053 } 2054 2055 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2056 GFP_KERNEL | __GFP_ACCOUNT | 2057 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2058 if (!mem_attrs) { 2059 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2060 goto unlock_vm; 2061 } 2062 2063 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2064 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2065 args->start + args->range, mem_attrs); 2066 if (err) 2067 goto free_mem_attrs; 2068 2069 err = copy_to_user(attrs_user, mem_attrs, 2070 args->sizeof_mem_range_attr * args->num_mem_ranges); 2071 if (err) 2072 err = -EFAULT; 2073 2074 free_mem_attrs: 2075 kvfree(mem_attrs); 2076 unlock_vm: 2077 up_read(&vm->lock); 2078 put_vm: 2079 xe_vm_put(vm); 2080 return err; 2081 } 2082 2083 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2084 { 2085 if (page_addr > xe_vma_end(vma) - 1 || 2086 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2087 return false; 2088 2089 return true; 2090 } 2091 2092 /** 2093 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2094 * 2095 * @vm: the xe_vm the vma belongs to 2096 * @page_addr: address to look up 2097 */ 2098 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2099 { 2100 struct xe_vma *vma = NULL; 2101 2102 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2103 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2104 vma = vm->usm.last_fault_vma; 2105 } 2106 if (!vma) 2107 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2108 2109 return vma; 2110 } 2111 2112 static const u32 region_to_mem_type[] = { 2113 XE_PL_TT, 2114 XE_PL_VRAM0, 2115 XE_PL_VRAM1, 2116 }; 2117 2118 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2119 bool post_commit) 2120 { 2121 xe_svm_notifier_lock(vm); 2122 vma->gpuva.flags |= XE_VMA_DESTROYED; 2123 xe_svm_notifier_unlock(vm); 2124 if (post_commit) 2125 xe_vm_remove_vma(vm, vma); 2126 } 2127 2128 #undef ULL 2129 #define ULL unsigned long long 2130 2131 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2132 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2133 { 2134 struct xe_vma *vma; 2135 2136 switch (op->op) { 2137 case DRM_GPUVA_OP_MAP: 2138 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2139 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2140 break; 2141 case DRM_GPUVA_OP_REMAP: 2142 vma = gpuva_to_vma(op->remap.unmap->va); 2143 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2144 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2145 op->remap.unmap->keep ? 1 : 0); 2146 if (op->remap.prev) 2147 vm_dbg(&xe->drm, 2148 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2149 (ULL)op->remap.prev->va.addr, 2150 (ULL)op->remap.prev->va.range); 2151 if (op->remap.next) 2152 vm_dbg(&xe->drm, 2153 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2154 (ULL)op->remap.next->va.addr, 2155 (ULL)op->remap.next->va.range); 2156 break; 2157 case DRM_GPUVA_OP_UNMAP: 2158 vma = gpuva_to_vma(op->unmap.va); 2159 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2160 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2161 op->unmap.keep ? 1 : 0); 2162 break; 2163 case DRM_GPUVA_OP_PREFETCH: 2164 vma = gpuva_to_vma(op->prefetch.va); 2165 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2166 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2167 break; 2168 default: 2169 drm_warn(&xe->drm, "NOT POSSIBLE"); 2170 } 2171 } 2172 #else 2173 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2174 { 2175 } 2176 #endif 2177 2178 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2179 { 2180 if (!xe_vm_in_fault_mode(vm)) 2181 return false; 2182 2183 if (!xe_vm_has_scratch(vm)) 2184 return false; 2185 2186 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2187 return false; 2188 2189 return true; 2190 } 2191 2192 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2193 { 2194 struct drm_gpuva_op *__op; 2195 2196 drm_gpuva_for_each_op(__op, ops) { 2197 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2198 2199 xe_vma_svm_prefetch_op_fini(op); 2200 } 2201 } 2202 2203 /* 2204 * Create operations list from IOCTL arguments, setup operations fields so parse 2205 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2206 */ 2207 static struct drm_gpuva_ops * 2208 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2209 struct xe_bo *bo, u64 bo_offset_or_userptr, 2210 u64 addr, u64 range, 2211 u32 operation, u32 flags, 2212 u32 prefetch_region, u16 pat_index) 2213 { 2214 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2215 struct drm_gpuva_ops *ops; 2216 struct drm_gpuva_op *__op; 2217 struct drm_gpuvm_bo *vm_bo; 2218 u64 range_end = addr + range; 2219 int err; 2220 2221 lockdep_assert_held_write(&vm->lock); 2222 2223 vm_dbg(&vm->xe->drm, 2224 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2225 operation, (ULL)addr, (ULL)range, 2226 (ULL)bo_offset_or_userptr); 2227 2228 switch (operation) { 2229 case DRM_XE_VM_BIND_OP_MAP: 2230 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2231 struct drm_gpuvm_map_req map_req = { 2232 .map.va.addr = addr, 2233 .map.va.range = range, 2234 .map.gem.obj = obj, 2235 .map.gem.offset = bo_offset_or_userptr, 2236 }; 2237 2238 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2239 break; 2240 } 2241 case DRM_XE_VM_BIND_OP_UNMAP: 2242 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2243 break; 2244 case DRM_XE_VM_BIND_OP_PREFETCH: 2245 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2246 break; 2247 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2248 xe_assert(vm->xe, bo); 2249 2250 err = xe_bo_lock(bo, true); 2251 if (err) 2252 return ERR_PTR(err); 2253 2254 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2255 if (IS_ERR(vm_bo)) { 2256 xe_bo_unlock(bo); 2257 return ERR_CAST(vm_bo); 2258 } 2259 2260 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2261 drm_gpuvm_bo_put(vm_bo); 2262 xe_bo_unlock(bo); 2263 break; 2264 default: 2265 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2266 ops = ERR_PTR(-EINVAL); 2267 } 2268 if (IS_ERR(ops)) 2269 return ops; 2270 2271 drm_gpuva_for_each_op(__op, ops) { 2272 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2273 2274 if (__op->op == DRM_GPUVA_OP_MAP) { 2275 op->map.immediate = 2276 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2277 op->map.read_only = 2278 flags & DRM_XE_VM_BIND_FLAG_READONLY; 2279 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 2280 op->map.is_cpu_addr_mirror = flags & 2281 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 2282 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; 2283 op->map.pat_index = pat_index; 2284 op->map.invalidate_on_bind = 2285 __xe_vm_needs_clear_scratch_pages(vm, flags); 2286 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2287 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2288 struct xe_tile *tile; 2289 struct xe_svm_range *svm_range; 2290 struct drm_gpusvm_ctx ctx = {}; 2291 struct drm_pagemap *dpagemap; 2292 u8 id, tile_mask = 0; 2293 u32 i; 2294 2295 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2296 op->prefetch.region = prefetch_region; 2297 break; 2298 } 2299 2300 ctx.read_only = xe_vma_read_only(vma); 2301 ctx.devmem_possible = IS_DGFX(vm->xe) && 2302 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2303 2304 for_each_tile(tile, vm->xe, id) 2305 tile_mask |= 0x1 << id; 2306 2307 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2308 op->prefetch_range.ranges_count = 0; 2309 tile = NULL; 2310 2311 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2312 dpagemap = xe_vma_resolve_pagemap(vma, 2313 xe_device_get_root_tile(vm->xe)); 2314 /* 2315 * TODO: Once multigpu support is enabled will need 2316 * something to dereference tile from dpagemap. 2317 */ 2318 if (dpagemap) 2319 tile = xe_device_get_root_tile(vm->xe); 2320 } else if (prefetch_region) { 2321 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2322 XE_PL_VRAM0]; 2323 } 2324 2325 op->prefetch_range.tile = tile; 2326 alloc_next_range: 2327 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2328 2329 if (PTR_ERR(svm_range) == -ENOENT) { 2330 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2331 2332 addr = ret == ULONG_MAX ? 0 : ret; 2333 if (addr) 2334 goto alloc_next_range; 2335 else 2336 goto print_op_label; 2337 } 2338 2339 if (IS_ERR(svm_range)) { 2340 err = PTR_ERR(svm_range); 2341 goto unwind_prefetch_ops; 2342 } 2343 2344 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2345 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2346 goto check_next_range; 2347 } 2348 2349 err = xa_alloc(&op->prefetch_range.range, 2350 &i, svm_range, xa_limit_32b, 2351 GFP_KERNEL); 2352 2353 if (err) 2354 goto unwind_prefetch_ops; 2355 2356 op->prefetch_range.ranges_count++; 2357 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2358 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2359 check_next_range: 2360 if (range_end > xe_svm_range_end(svm_range) && 2361 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2362 addr = xe_svm_range_end(svm_range); 2363 goto alloc_next_range; 2364 } 2365 } 2366 print_op_label: 2367 print_op(vm->xe, __op); 2368 } 2369 2370 return ops; 2371 2372 unwind_prefetch_ops: 2373 xe_svm_prefetch_gpuva_ops_fini(ops); 2374 drm_gpuva_ops_free(&vm->gpuvm, ops); 2375 return ERR_PTR(err); 2376 } 2377 2378 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2379 2380 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2381 struct xe_vma_mem_attr *attr, unsigned int flags) 2382 { 2383 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2384 struct xe_validation_ctx ctx; 2385 struct drm_exec exec; 2386 struct xe_vma *vma; 2387 int err = 0; 2388 2389 lockdep_assert_held_write(&vm->lock); 2390 2391 if (bo) { 2392 err = 0; 2393 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2394 (struct xe_val_flags) {.interruptible = true}, err) { 2395 if (!bo->vm) { 2396 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2397 drm_exec_retry_on_contention(&exec); 2398 } 2399 if (!err) { 2400 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2401 drm_exec_retry_on_contention(&exec); 2402 } 2403 if (err) 2404 return ERR_PTR(err); 2405 2406 vma = xe_vma_create(vm, bo, op->gem.offset, 2407 op->va.addr, op->va.addr + 2408 op->va.range - 1, attr, flags); 2409 if (IS_ERR(vma)) 2410 return vma; 2411 2412 if (!bo->vm) { 2413 err = add_preempt_fences(vm, bo); 2414 if (err) { 2415 prep_vma_destroy(vm, vma, false); 2416 xe_vma_destroy(vma, NULL); 2417 } 2418 } 2419 } 2420 if (err) 2421 return ERR_PTR(err); 2422 } else { 2423 vma = xe_vma_create(vm, NULL, op->gem.offset, 2424 op->va.addr, op->va.addr + 2425 op->va.range - 1, attr, flags); 2426 if (IS_ERR(vma)) 2427 return vma; 2428 2429 if (xe_vma_is_userptr(vma)) 2430 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2431 } 2432 if (err) { 2433 prep_vma_destroy(vm, vma, false); 2434 xe_vma_destroy_unlocked(vma); 2435 vma = ERR_PTR(err); 2436 } 2437 2438 return vma; 2439 } 2440 2441 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2442 { 2443 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2444 return SZ_1G; 2445 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2446 return SZ_2M; 2447 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2448 return SZ_64K; 2449 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2450 return SZ_4K; 2451 2452 return SZ_1G; /* Uninitialized, used max size */ 2453 } 2454 2455 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2456 { 2457 switch (size) { 2458 case SZ_1G: 2459 vma->gpuva.flags |= XE_VMA_PTE_1G; 2460 break; 2461 case SZ_2M: 2462 vma->gpuva.flags |= XE_VMA_PTE_2M; 2463 break; 2464 case SZ_64K: 2465 vma->gpuva.flags |= XE_VMA_PTE_64K; 2466 break; 2467 case SZ_4K: 2468 vma->gpuva.flags |= XE_VMA_PTE_4K; 2469 break; 2470 } 2471 } 2472 2473 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2474 { 2475 int err = 0; 2476 2477 lockdep_assert_held_write(&vm->lock); 2478 2479 switch (op->base.op) { 2480 case DRM_GPUVA_OP_MAP: 2481 err |= xe_vm_insert_vma(vm, op->map.vma); 2482 if (!err) 2483 op->flags |= XE_VMA_OP_COMMITTED; 2484 break; 2485 case DRM_GPUVA_OP_REMAP: 2486 { 2487 u8 tile_present = 2488 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2489 2490 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2491 true); 2492 op->flags |= XE_VMA_OP_COMMITTED; 2493 2494 if (op->remap.prev) { 2495 err |= xe_vm_insert_vma(vm, op->remap.prev); 2496 if (!err) 2497 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2498 if (!err && op->remap.skip_prev) { 2499 op->remap.prev->tile_present = 2500 tile_present; 2501 op->remap.prev = NULL; 2502 } 2503 } 2504 if (op->remap.next) { 2505 err |= xe_vm_insert_vma(vm, op->remap.next); 2506 if (!err) 2507 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2508 if (!err && op->remap.skip_next) { 2509 op->remap.next->tile_present = 2510 tile_present; 2511 op->remap.next = NULL; 2512 } 2513 } 2514 2515 /* Adjust for partial unbind after removing VMA from VM */ 2516 if (!err) { 2517 op->base.remap.unmap->va->va.addr = op->remap.start; 2518 op->base.remap.unmap->va->va.range = op->remap.range; 2519 } 2520 break; 2521 } 2522 case DRM_GPUVA_OP_UNMAP: 2523 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2524 op->flags |= XE_VMA_OP_COMMITTED; 2525 break; 2526 case DRM_GPUVA_OP_PREFETCH: 2527 op->flags |= XE_VMA_OP_COMMITTED; 2528 break; 2529 default: 2530 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2531 } 2532 2533 return err; 2534 } 2535 2536 /** 2537 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2538 * @vma: Pointer to the xe_vma structure to check 2539 * 2540 * This function determines whether the given VMA (Virtual Memory Area) 2541 * has its memory attributes set to their default values. Specifically, 2542 * it checks the following conditions: 2543 * 2544 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2545 * - `pat_index` is equal to `default_pat_index` 2546 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2547 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2548 * 2549 * Return: true if all attributes are at their default values, false otherwise. 2550 */ 2551 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2552 { 2553 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2554 vma->attr.pat_index == vma->attr.default_pat_index && 2555 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2556 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2557 } 2558 2559 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2560 struct xe_vma_ops *vops) 2561 { 2562 struct xe_device *xe = vm->xe; 2563 struct drm_gpuva_op *__op; 2564 struct xe_tile *tile; 2565 u8 id, tile_mask = 0; 2566 int err = 0; 2567 2568 lockdep_assert_held_write(&vm->lock); 2569 2570 for_each_tile(tile, vm->xe, id) 2571 tile_mask |= 0x1 << id; 2572 2573 drm_gpuva_for_each_op(__op, ops) { 2574 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2575 struct xe_vma *vma; 2576 unsigned int flags = 0; 2577 2578 INIT_LIST_HEAD(&op->link); 2579 list_add_tail(&op->link, &vops->list); 2580 op->tile_mask = tile_mask; 2581 2582 switch (op->base.op) { 2583 case DRM_GPUVA_OP_MAP: 2584 { 2585 struct xe_vma_mem_attr default_attr = { 2586 .preferred_loc = { 2587 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2588 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2589 }, 2590 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2591 .default_pat_index = op->map.pat_index, 2592 .pat_index = op->map.pat_index, 2593 }; 2594 2595 flags |= op->map.read_only ? 2596 VMA_CREATE_FLAG_READ_ONLY : 0; 2597 flags |= op->map.is_null ? 2598 VMA_CREATE_FLAG_IS_NULL : 0; 2599 flags |= op->map.dumpable ? 2600 VMA_CREATE_FLAG_DUMPABLE : 0; 2601 flags |= op->map.is_cpu_addr_mirror ? 2602 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2603 2604 vma = new_vma(vm, &op->base.map, &default_attr, 2605 flags); 2606 if (IS_ERR(vma)) 2607 return PTR_ERR(vma); 2608 2609 op->map.vma = vma; 2610 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2611 !op->map.is_cpu_addr_mirror) || 2612 op->map.invalidate_on_bind) 2613 xe_vma_ops_incr_pt_update_ops(vops, 2614 op->tile_mask, 1); 2615 break; 2616 } 2617 case DRM_GPUVA_OP_REMAP: 2618 { 2619 struct xe_vma *old = 2620 gpuva_to_vma(op->base.remap.unmap->va); 2621 bool skip = xe_vma_is_cpu_addr_mirror(old); 2622 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2623 int num_remap_ops = 0; 2624 2625 if (op->base.remap.prev) 2626 start = op->base.remap.prev->va.addr + 2627 op->base.remap.prev->va.range; 2628 if (op->base.remap.next) 2629 end = op->base.remap.next->va.addr; 2630 2631 if (xe_vma_is_cpu_addr_mirror(old) && 2632 xe_svm_has_mapping(vm, start, end)) { 2633 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2634 xe_svm_unmap_address_range(vm, start, end); 2635 else 2636 return -EBUSY; 2637 } 2638 2639 op->remap.start = xe_vma_start(old); 2640 op->remap.range = xe_vma_size(old); 2641 2642 flags |= op->base.remap.unmap->va->flags & 2643 XE_VMA_READ_ONLY ? 2644 VMA_CREATE_FLAG_READ_ONLY : 0; 2645 flags |= op->base.remap.unmap->va->flags & 2646 DRM_GPUVA_SPARSE ? 2647 VMA_CREATE_FLAG_IS_NULL : 0; 2648 flags |= op->base.remap.unmap->va->flags & 2649 XE_VMA_DUMPABLE ? 2650 VMA_CREATE_FLAG_DUMPABLE : 0; 2651 flags |= xe_vma_is_cpu_addr_mirror(old) ? 2652 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; 2653 2654 if (op->base.remap.prev) { 2655 vma = new_vma(vm, op->base.remap.prev, 2656 &old->attr, flags); 2657 if (IS_ERR(vma)) 2658 return PTR_ERR(vma); 2659 2660 op->remap.prev = vma; 2661 2662 /* 2663 * Userptr creates a new SG mapping so 2664 * we must also rebind. 2665 */ 2666 op->remap.skip_prev = skip || 2667 (!xe_vma_is_userptr(old) && 2668 IS_ALIGNED(xe_vma_end(vma), 2669 xe_vma_max_pte_size(old))); 2670 if (op->remap.skip_prev) { 2671 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2672 op->remap.range -= 2673 xe_vma_end(vma) - 2674 xe_vma_start(old); 2675 op->remap.start = xe_vma_end(vma); 2676 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2677 (ULL)op->remap.start, 2678 (ULL)op->remap.range); 2679 } else { 2680 num_remap_ops++; 2681 } 2682 } 2683 2684 if (op->base.remap.next) { 2685 vma = new_vma(vm, op->base.remap.next, 2686 &old->attr, flags); 2687 if (IS_ERR(vma)) 2688 return PTR_ERR(vma); 2689 2690 op->remap.next = vma; 2691 2692 /* 2693 * Userptr creates a new SG mapping so 2694 * we must also rebind. 2695 */ 2696 op->remap.skip_next = skip || 2697 (!xe_vma_is_userptr(old) && 2698 IS_ALIGNED(xe_vma_start(vma), 2699 xe_vma_max_pte_size(old))); 2700 if (op->remap.skip_next) { 2701 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2702 op->remap.range -= 2703 xe_vma_end(old) - 2704 xe_vma_start(vma); 2705 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2706 (ULL)op->remap.start, 2707 (ULL)op->remap.range); 2708 } else { 2709 num_remap_ops++; 2710 } 2711 } 2712 if (!skip) 2713 num_remap_ops++; 2714 2715 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2716 break; 2717 } 2718 case DRM_GPUVA_OP_UNMAP: 2719 vma = gpuva_to_vma(op->base.unmap.va); 2720 2721 if (xe_vma_is_cpu_addr_mirror(vma) && 2722 xe_svm_has_mapping(vm, xe_vma_start(vma), 2723 xe_vma_end(vma))) 2724 return -EBUSY; 2725 2726 if (!xe_vma_is_cpu_addr_mirror(vma)) 2727 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2728 break; 2729 case DRM_GPUVA_OP_PREFETCH: 2730 vma = gpuva_to_vma(op->base.prefetch.va); 2731 2732 if (xe_vma_is_userptr(vma)) { 2733 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2734 if (err) 2735 return err; 2736 } 2737 2738 if (xe_vma_is_cpu_addr_mirror(vma)) 2739 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2740 op->prefetch_range.ranges_count); 2741 else 2742 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2743 2744 break; 2745 default: 2746 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2747 } 2748 2749 err = xe_vma_op_commit(vm, op); 2750 if (err) 2751 return err; 2752 } 2753 2754 return 0; 2755 } 2756 2757 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2758 bool post_commit, bool prev_post_commit, 2759 bool next_post_commit) 2760 { 2761 lockdep_assert_held_write(&vm->lock); 2762 2763 switch (op->base.op) { 2764 case DRM_GPUVA_OP_MAP: 2765 if (op->map.vma) { 2766 prep_vma_destroy(vm, op->map.vma, post_commit); 2767 xe_vma_destroy_unlocked(op->map.vma); 2768 } 2769 break; 2770 case DRM_GPUVA_OP_UNMAP: 2771 { 2772 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2773 2774 if (vma) { 2775 xe_svm_notifier_lock(vm); 2776 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2777 xe_svm_notifier_unlock(vm); 2778 if (post_commit) 2779 xe_vm_insert_vma(vm, vma); 2780 } 2781 break; 2782 } 2783 case DRM_GPUVA_OP_REMAP: 2784 { 2785 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2786 2787 if (op->remap.prev) { 2788 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2789 xe_vma_destroy_unlocked(op->remap.prev); 2790 } 2791 if (op->remap.next) { 2792 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2793 xe_vma_destroy_unlocked(op->remap.next); 2794 } 2795 if (vma) { 2796 xe_svm_notifier_lock(vm); 2797 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2798 xe_svm_notifier_unlock(vm); 2799 if (post_commit) 2800 xe_vm_insert_vma(vm, vma); 2801 } 2802 break; 2803 } 2804 case DRM_GPUVA_OP_PREFETCH: 2805 /* Nothing to do */ 2806 break; 2807 default: 2808 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2809 } 2810 } 2811 2812 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2813 struct drm_gpuva_ops **ops, 2814 int num_ops_list) 2815 { 2816 int i; 2817 2818 for (i = num_ops_list - 1; i >= 0; --i) { 2819 struct drm_gpuva_ops *__ops = ops[i]; 2820 struct drm_gpuva_op *__op; 2821 2822 if (!__ops) 2823 continue; 2824 2825 drm_gpuva_for_each_op_reverse(__op, __ops) { 2826 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2827 2828 xe_vma_op_unwind(vm, op, 2829 op->flags & XE_VMA_OP_COMMITTED, 2830 op->flags & XE_VMA_OP_PREV_COMMITTED, 2831 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2832 } 2833 } 2834 } 2835 2836 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2837 bool validate) 2838 { 2839 struct xe_bo *bo = xe_vma_bo(vma); 2840 struct xe_vm *vm = xe_vma_vm(vma); 2841 int err = 0; 2842 2843 if (bo) { 2844 if (!bo->vm) 2845 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2846 if (!err && validate) 2847 err = xe_bo_validate(bo, vm, 2848 !xe_vm_in_preempt_fence_mode(vm), exec); 2849 } 2850 2851 return err; 2852 } 2853 2854 static int check_ufence(struct xe_vma *vma) 2855 { 2856 if (vma->ufence) { 2857 struct xe_user_fence * const f = vma->ufence; 2858 2859 if (!xe_sync_ufence_get_status(f)) 2860 return -EBUSY; 2861 2862 vma->ufence = NULL; 2863 xe_sync_ufence_put(f); 2864 } 2865 2866 return 0; 2867 } 2868 2869 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2870 { 2871 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2872 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2873 struct xe_tile *tile = op->prefetch_range.tile; 2874 int err = 0; 2875 2876 struct xe_svm_range *svm_range; 2877 struct drm_gpusvm_ctx ctx = {}; 2878 unsigned long i; 2879 2880 if (!xe_vma_is_cpu_addr_mirror(vma)) 2881 return 0; 2882 2883 ctx.read_only = xe_vma_read_only(vma); 2884 ctx.devmem_possible = devmem_possible; 2885 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2886 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2887 2888 /* TODO: Threading the migration */ 2889 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2890 if (!tile) 2891 xe_svm_range_migrate_to_smem(vm, svm_range); 2892 2893 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2894 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2895 if (err) { 2896 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2897 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2898 return -ENODATA; 2899 } 2900 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2901 } 2902 2903 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2904 if (err) { 2905 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2906 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2907 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2908 err = -ENODATA; 2909 return err; 2910 } 2911 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2912 } 2913 2914 return err; 2915 } 2916 2917 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2918 struct xe_vma_op *op) 2919 { 2920 int err = 0; 2921 2922 switch (op->base.op) { 2923 case DRM_GPUVA_OP_MAP: 2924 if (!op->map.invalidate_on_bind) 2925 err = vma_lock_and_validate(exec, op->map.vma, 2926 !xe_vm_in_fault_mode(vm) || 2927 op->map.immediate); 2928 break; 2929 case DRM_GPUVA_OP_REMAP: 2930 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2931 if (err) 2932 break; 2933 2934 err = vma_lock_and_validate(exec, 2935 gpuva_to_vma(op->base.remap.unmap->va), 2936 false); 2937 if (!err && op->remap.prev) 2938 err = vma_lock_and_validate(exec, op->remap.prev, true); 2939 if (!err && op->remap.next) 2940 err = vma_lock_and_validate(exec, op->remap.next, true); 2941 break; 2942 case DRM_GPUVA_OP_UNMAP: 2943 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2944 if (err) 2945 break; 2946 2947 err = vma_lock_and_validate(exec, 2948 gpuva_to_vma(op->base.unmap.va), 2949 false); 2950 break; 2951 case DRM_GPUVA_OP_PREFETCH: 2952 { 2953 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2954 u32 region; 2955 2956 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2957 region = op->prefetch.region; 2958 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2959 region <= ARRAY_SIZE(region_to_mem_type)); 2960 } 2961 2962 err = vma_lock_and_validate(exec, 2963 gpuva_to_vma(op->base.prefetch.va), 2964 false); 2965 if (!err && !xe_vma_has_no_bo(vma)) 2966 err = xe_bo_migrate(xe_vma_bo(vma), 2967 region_to_mem_type[region], 2968 NULL, 2969 exec); 2970 break; 2971 } 2972 default: 2973 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2974 } 2975 2976 return err; 2977 } 2978 2979 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2980 { 2981 struct xe_vma_op *op; 2982 int err; 2983 2984 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2985 return 0; 2986 2987 list_for_each_entry(op, &vops->list, link) { 2988 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 2989 err = prefetch_ranges(vm, op); 2990 if (err) 2991 return err; 2992 } 2993 } 2994 2995 return 0; 2996 } 2997 2998 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2999 struct xe_vm *vm, 3000 struct xe_vma_ops *vops) 3001 { 3002 struct xe_vma_op *op; 3003 int err; 3004 3005 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3006 if (err) 3007 return err; 3008 3009 list_for_each_entry(op, &vops->list, link) { 3010 err = op_lock_and_prep(exec, vm, op); 3011 if (err) 3012 return err; 3013 } 3014 3015 #ifdef TEST_VM_OPS_ERROR 3016 if (vops->inject_error && 3017 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3018 return -ENOSPC; 3019 #endif 3020 3021 return 0; 3022 } 3023 3024 static void op_trace(struct xe_vma_op *op) 3025 { 3026 switch (op->base.op) { 3027 case DRM_GPUVA_OP_MAP: 3028 trace_xe_vma_bind(op->map.vma); 3029 break; 3030 case DRM_GPUVA_OP_REMAP: 3031 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3032 if (op->remap.prev) 3033 trace_xe_vma_bind(op->remap.prev); 3034 if (op->remap.next) 3035 trace_xe_vma_bind(op->remap.next); 3036 break; 3037 case DRM_GPUVA_OP_UNMAP: 3038 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3039 break; 3040 case DRM_GPUVA_OP_PREFETCH: 3041 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3042 break; 3043 case DRM_GPUVA_OP_DRIVER: 3044 break; 3045 default: 3046 XE_WARN_ON("NOT POSSIBLE"); 3047 } 3048 } 3049 3050 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3051 { 3052 struct xe_vma_op *op; 3053 3054 list_for_each_entry(op, &vops->list, link) 3055 op_trace(op); 3056 } 3057 3058 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3059 { 3060 struct xe_exec_queue *q = vops->q; 3061 struct xe_tile *tile; 3062 int number_tiles = 0; 3063 u8 id; 3064 3065 for_each_tile(tile, vm->xe, id) { 3066 if (vops->pt_update_ops[id].num_ops) 3067 ++number_tiles; 3068 3069 if (vops->pt_update_ops[id].q) 3070 continue; 3071 3072 if (q) { 3073 vops->pt_update_ops[id].q = q; 3074 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3075 q = list_next_entry(q, multi_gt_list); 3076 } else { 3077 vops->pt_update_ops[id].q = vm->q[id]; 3078 } 3079 } 3080 3081 return number_tiles; 3082 } 3083 3084 static struct dma_fence *ops_execute(struct xe_vm *vm, 3085 struct xe_vma_ops *vops) 3086 { 3087 struct xe_tile *tile; 3088 struct dma_fence *fence = NULL; 3089 struct dma_fence **fences = NULL; 3090 struct dma_fence_array *cf = NULL; 3091 int number_tiles = 0, current_fence = 0, err; 3092 u8 id; 3093 3094 number_tiles = vm_ops_setup_tile_args(vm, vops); 3095 if (number_tiles == 0) 3096 return ERR_PTR(-ENODATA); 3097 3098 if (number_tiles > 1) { 3099 fences = kmalloc_array(number_tiles, sizeof(*fences), 3100 GFP_KERNEL); 3101 if (!fences) { 3102 fence = ERR_PTR(-ENOMEM); 3103 goto err_trace; 3104 } 3105 } 3106 3107 for_each_tile(tile, vm->xe, id) { 3108 if (!vops->pt_update_ops[id].num_ops) 3109 continue; 3110 3111 err = xe_pt_update_ops_prepare(tile, vops); 3112 if (err) { 3113 fence = ERR_PTR(err); 3114 goto err_out; 3115 } 3116 } 3117 3118 trace_xe_vm_ops_execute(vops); 3119 3120 for_each_tile(tile, vm->xe, id) { 3121 if (!vops->pt_update_ops[id].num_ops) 3122 continue; 3123 3124 fence = xe_pt_update_ops_run(tile, vops); 3125 if (IS_ERR(fence)) 3126 goto err_out; 3127 3128 if (fences) 3129 fences[current_fence++] = fence; 3130 } 3131 3132 if (fences) { 3133 cf = dma_fence_array_create(number_tiles, fences, 3134 vm->composite_fence_ctx, 3135 vm->composite_fence_seqno++, 3136 false); 3137 if (!cf) { 3138 --vm->composite_fence_seqno; 3139 fence = ERR_PTR(-ENOMEM); 3140 goto err_out; 3141 } 3142 fence = &cf->base; 3143 } 3144 3145 for_each_tile(tile, vm->xe, id) { 3146 if (!vops->pt_update_ops[id].num_ops) 3147 continue; 3148 3149 xe_pt_update_ops_fini(tile, vops); 3150 } 3151 3152 return fence; 3153 3154 err_out: 3155 for_each_tile(tile, vm->xe, id) { 3156 if (!vops->pt_update_ops[id].num_ops) 3157 continue; 3158 3159 xe_pt_update_ops_abort(tile, vops); 3160 } 3161 while (current_fence) 3162 dma_fence_put(fences[--current_fence]); 3163 kfree(fences); 3164 kfree(cf); 3165 3166 err_trace: 3167 trace_xe_vm_ops_fail(vm); 3168 return fence; 3169 } 3170 3171 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3172 { 3173 if (vma->ufence) 3174 xe_sync_ufence_put(vma->ufence); 3175 vma->ufence = __xe_sync_ufence_get(ufence); 3176 } 3177 3178 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3179 struct xe_user_fence *ufence) 3180 { 3181 switch (op->base.op) { 3182 case DRM_GPUVA_OP_MAP: 3183 vma_add_ufence(op->map.vma, ufence); 3184 break; 3185 case DRM_GPUVA_OP_REMAP: 3186 if (op->remap.prev) 3187 vma_add_ufence(op->remap.prev, ufence); 3188 if (op->remap.next) 3189 vma_add_ufence(op->remap.next, ufence); 3190 break; 3191 case DRM_GPUVA_OP_UNMAP: 3192 break; 3193 case DRM_GPUVA_OP_PREFETCH: 3194 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3195 break; 3196 default: 3197 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3198 } 3199 } 3200 3201 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3202 struct dma_fence *fence) 3203 { 3204 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3205 struct xe_user_fence *ufence; 3206 struct xe_vma_op *op; 3207 int i; 3208 3209 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3210 list_for_each_entry(op, &vops->list, link) { 3211 if (ufence) 3212 op_add_ufence(vm, op, ufence); 3213 3214 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3215 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3216 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3217 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3218 fence); 3219 } 3220 if (ufence) 3221 xe_sync_ufence_put(ufence); 3222 if (fence) { 3223 for (i = 0; i < vops->num_syncs; i++) 3224 xe_sync_entry_signal(vops->syncs + i, fence); 3225 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3226 } 3227 } 3228 3229 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3230 struct xe_vma_ops *vops) 3231 { 3232 struct xe_validation_ctx ctx; 3233 struct drm_exec exec; 3234 struct dma_fence *fence; 3235 int err = 0; 3236 3237 lockdep_assert_held_write(&vm->lock); 3238 3239 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3240 ((struct xe_val_flags) { 3241 .interruptible = true, 3242 .exec_ignore_duplicates = true, 3243 }), err) { 3244 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3245 drm_exec_retry_on_contention(&exec); 3246 xe_validation_retry_on_oom(&ctx, &err); 3247 if (err) 3248 return ERR_PTR(err); 3249 3250 xe_vm_set_validation_exec(vm, &exec); 3251 fence = ops_execute(vm, vops); 3252 xe_vm_set_validation_exec(vm, NULL); 3253 if (IS_ERR(fence)) { 3254 if (PTR_ERR(fence) == -ENODATA) 3255 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3256 return fence; 3257 } 3258 3259 vm_bind_ioctl_ops_fini(vm, vops, fence); 3260 } 3261 3262 return err ? ERR_PTR(err) : fence; 3263 } 3264 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3265 3266 #define SUPPORTED_FLAGS_STUB \ 3267 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3268 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3269 DRM_XE_VM_BIND_FLAG_NULL | \ 3270 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3271 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3272 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 3273 3274 #ifdef TEST_VM_OPS_ERROR 3275 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3276 #else 3277 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3278 #endif 3279 3280 #define XE_64K_PAGE_MASK 0xffffull 3281 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3282 3283 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3284 struct drm_xe_vm_bind *args, 3285 struct drm_xe_vm_bind_op **bind_ops) 3286 { 3287 int err; 3288 int i; 3289 3290 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3291 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3292 return -EINVAL; 3293 3294 if (XE_IOCTL_DBG(xe, args->extensions)) 3295 return -EINVAL; 3296 3297 if (args->num_binds > 1) { 3298 u64 __user *bind_user = 3299 u64_to_user_ptr(args->vector_of_binds); 3300 3301 *bind_ops = kvmalloc_array(args->num_binds, 3302 sizeof(struct drm_xe_vm_bind_op), 3303 GFP_KERNEL | __GFP_ACCOUNT | 3304 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3305 if (!*bind_ops) 3306 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3307 3308 err = copy_from_user(*bind_ops, bind_user, 3309 sizeof(struct drm_xe_vm_bind_op) * 3310 args->num_binds); 3311 if (XE_IOCTL_DBG(xe, err)) { 3312 err = -EFAULT; 3313 goto free_bind_ops; 3314 } 3315 } else { 3316 *bind_ops = &args->bind; 3317 } 3318 3319 for (i = 0; i < args->num_binds; ++i) { 3320 u64 range = (*bind_ops)[i].range; 3321 u64 addr = (*bind_ops)[i].addr; 3322 u32 op = (*bind_ops)[i].op; 3323 u32 flags = (*bind_ops)[i].flags; 3324 u32 obj = (*bind_ops)[i].obj; 3325 u64 obj_offset = (*bind_ops)[i].obj_offset; 3326 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3327 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3328 bool is_cpu_addr_mirror = flags & 3329 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3330 u16 pat_index = (*bind_ops)[i].pat_index; 3331 u16 coh_mode; 3332 3333 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3334 (!xe_vm_in_fault_mode(vm) || 3335 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3336 err = -EINVAL; 3337 goto free_bind_ops; 3338 } 3339 3340 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3341 err = -EINVAL; 3342 goto free_bind_ops; 3343 } 3344 3345 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3346 (*bind_ops)[i].pat_index = pat_index; 3347 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3348 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3349 err = -EINVAL; 3350 goto free_bind_ops; 3351 } 3352 3353 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3354 err = -EINVAL; 3355 goto free_bind_ops; 3356 } 3357 3358 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3359 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3360 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3361 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3362 is_cpu_addr_mirror)) || 3363 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3364 (is_null || is_cpu_addr_mirror)) || 3365 XE_IOCTL_DBG(xe, !obj && 3366 op == DRM_XE_VM_BIND_OP_MAP && 3367 !is_null && !is_cpu_addr_mirror) || 3368 XE_IOCTL_DBG(xe, !obj && 3369 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3370 XE_IOCTL_DBG(xe, addr && 3371 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3372 XE_IOCTL_DBG(xe, range && 3373 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3374 XE_IOCTL_DBG(xe, obj && 3375 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3376 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3377 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3378 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3379 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3380 XE_IOCTL_DBG(xe, obj && 3381 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3382 XE_IOCTL_DBG(xe, prefetch_region && 3383 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3384 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3385 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3386 XE_IOCTL_DBG(xe, obj && 3387 op == DRM_XE_VM_BIND_OP_UNMAP)) { 3388 err = -EINVAL; 3389 goto free_bind_ops; 3390 } 3391 3392 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3393 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3394 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3395 XE_IOCTL_DBG(xe, !range && 3396 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3397 err = -EINVAL; 3398 goto free_bind_ops; 3399 } 3400 } 3401 3402 return 0; 3403 3404 free_bind_ops: 3405 if (args->num_binds > 1) 3406 kvfree(*bind_ops); 3407 *bind_ops = NULL; 3408 return err; 3409 } 3410 3411 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3412 struct xe_exec_queue *q, 3413 struct xe_sync_entry *syncs, 3414 int num_syncs) 3415 { 3416 struct dma_fence *fence; 3417 int i, err = 0; 3418 3419 fence = xe_sync_in_fence_get(syncs, num_syncs, 3420 to_wait_exec_queue(vm, q), vm); 3421 if (IS_ERR(fence)) 3422 return PTR_ERR(fence); 3423 3424 for (i = 0; i < num_syncs; i++) 3425 xe_sync_entry_signal(&syncs[i], fence); 3426 3427 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3428 fence); 3429 dma_fence_put(fence); 3430 3431 return err; 3432 } 3433 3434 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3435 struct xe_exec_queue *q, 3436 struct xe_sync_entry *syncs, u32 num_syncs) 3437 { 3438 memset(vops, 0, sizeof(*vops)); 3439 INIT_LIST_HEAD(&vops->list); 3440 vops->vm = vm; 3441 vops->q = q; 3442 vops->syncs = syncs; 3443 vops->num_syncs = num_syncs; 3444 vops->flags = 0; 3445 } 3446 3447 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3448 u64 addr, u64 range, u64 obj_offset, 3449 u16 pat_index, u32 op, u32 bind_flags) 3450 { 3451 u16 coh_mode; 3452 3453 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3454 XE_IOCTL_DBG(xe, obj_offset > 3455 xe_bo_size(bo) - range)) { 3456 return -EINVAL; 3457 } 3458 3459 /* 3460 * Some platforms require 64k VM_BIND alignment, 3461 * specifically those with XE_VRAM_FLAGS_NEED64K. 3462 * 3463 * Other platforms may have BO's set to 64k physical placement, 3464 * but can be mapped at 4k offsets anyway. This check is only 3465 * there for the former case. 3466 */ 3467 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3468 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3469 if (XE_IOCTL_DBG(xe, obj_offset & 3470 XE_64K_PAGE_MASK) || 3471 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3472 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3473 return -EINVAL; 3474 } 3475 } 3476 3477 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3478 if (bo->cpu_caching) { 3479 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3480 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3481 return -EINVAL; 3482 } 3483 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3484 /* 3485 * Imported dma-buf from a different device should 3486 * require 1way or 2way coherency since we don't know 3487 * how it was mapped on the CPU. Just assume is it 3488 * potentially cached on CPU side. 3489 */ 3490 return -EINVAL; 3491 } 3492 3493 /* If a BO is protected it can only be mapped if the key is still valid */ 3494 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3495 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3496 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3497 return -ENOEXEC; 3498 3499 return 0; 3500 } 3501 3502 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3503 { 3504 struct xe_device *xe = to_xe_device(dev); 3505 struct xe_file *xef = to_xe_file(file); 3506 struct drm_xe_vm_bind *args = data; 3507 struct drm_xe_sync __user *syncs_user; 3508 struct xe_bo **bos = NULL; 3509 struct drm_gpuva_ops **ops = NULL; 3510 struct xe_vm *vm; 3511 struct xe_exec_queue *q = NULL; 3512 u32 num_syncs, num_ufence = 0; 3513 struct xe_sync_entry *syncs = NULL; 3514 struct drm_xe_vm_bind_op *bind_ops = NULL; 3515 struct xe_vma_ops vops; 3516 struct dma_fence *fence; 3517 int err; 3518 int i; 3519 3520 vm = xe_vm_lookup(xef, args->vm_id); 3521 if (XE_IOCTL_DBG(xe, !vm)) 3522 return -EINVAL; 3523 3524 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3525 if (err) 3526 goto put_vm; 3527 3528 if (args->exec_queue_id) { 3529 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3530 if (XE_IOCTL_DBG(xe, !q)) { 3531 err = -ENOENT; 3532 goto free_bind_ops; 3533 } 3534 3535 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3536 err = -EINVAL; 3537 goto put_exec_queue; 3538 } 3539 } 3540 3541 /* Ensure all UNMAPs visible */ 3542 xe_svm_flush(vm); 3543 3544 err = down_write_killable(&vm->lock); 3545 if (err) 3546 goto put_exec_queue; 3547 3548 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3549 err = -ENOENT; 3550 goto release_vm_lock; 3551 } 3552 3553 for (i = 0; i < args->num_binds; ++i) { 3554 u64 range = bind_ops[i].range; 3555 u64 addr = bind_ops[i].addr; 3556 3557 if (XE_IOCTL_DBG(xe, range > vm->size) || 3558 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3559 err = -EINVAL; 3560 goto release_vm_lock; 3561 } 3562 } 3563 3564 if (args->num_binds) { 3565 bos = kvcalloc(args->num_binds, sizeof(*bos), 3566 GFP_KERNEL | __GFP_ACCOUNT | 3567 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3568 if (!bos) { 3569 err = -ENOMEM; 3570 goto release_vm_lock; 3571 } 3572 3573 ops = kvcalloc(args->num_binds, sizeof(*ops), 3574 GFP_KERNEL | __GFP_ACCOUNT | 3575 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3576 if (!ops) { 3577 err = -ENOMEM; 3578 goto free_bos; 3579 } 3580 } 3581 3582 for (i = 0; i < args->num_binds; ++i) { 3583 struct drm_gem_object *gem_obj; 3584 u64 range = bind_ops[i].range; 3585 u64 addr = bind_ops[i].addr; 3586 u32 obj = bind_ops[i].obj; 3587 u64 obj_offset = bind_ops[i].obj_offset; 3588 u16 pat_index = bind_ops[i].pat_index; 3589 u32 op = bind_ops[i].op; 3590 u32 bind_flags = bind_ops[i].flags; 3591 3592 if (!obj) 3593 continue; 3594 3595 gem_obj = drm_gem_object_lookup(file, obj); 3596 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3597 err = -ENOENT; 3598 goto put_obj; 3599 } 3600 bos[i] = gem_to_xe_bo(gem_obj); 3601 3602 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3603 obj_offset, pat_index, op, 3604 bind_flags); 3605 if (err) 3606 goto put_obj; 3607 } 3608 3609 if (args->num_syncs) { 3610 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3611 if (!syncs) { 3612 err = -ENOMEM; 3613 goto put_obj; 3614 } 3615 } 3616 3617 syncs_user = u64_to_user_ptr(args->syncs); 3618 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3619 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3620 &syncs_user[num_syncs], 3621 (xe_vm_in_lr_mode(vm) ? 3622 SYNC_PARSE_FLAG_LR_MODE : 0) | 3623 (!args->num_binds ? 3624 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3625 if (err) 3626 goto free_syncs; 3627 3628 if (xe_sync_is_ufence(&syncs[num_syncs])) 3629 num_ufence++; 3630 } 3631 3632 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3633 err = -EINVAL; 3634 goto free_syncs; 3635 } 3636 3637 if (!args->num_binds) { 3638 err = -ENODATA; 3639 goto free_syncs; 3640 } 3641 3642 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3643 for (i = 0; i < args->num_binds; ++i) { 3644 u64 range = bind_ops[i].range; 3645 u64 addr = bind_ops[i].addr; 3646 u32 op = bind_ops[i].op; 3647 u32 flags = bind_ops[i].flags; 3648 u64 obj_offset = bind_ops[i].obj_offset; 3649 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3650 u16 pat_index = bind_ops[i].pat_index; 3651 3652 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3653 addr, range, op, flags, 3654 prefetch_region, pat_index); 3655 if (IS_ERR(ops[i])) { 3656 err = PTR_ERR(ops[i]); 3657 ops[i] = NULL; 3658 goto unwind_ops; 3659 } 3660 3661 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3662 if (err) 3663 goto unwind_ops; 3664 3665 #ifdef TEST_VM_OPS_ERROR 3666 if (flags & FORCE_OP_ERROR) { 3667 vops.inject_error = true; 3668 vm->xe->vm_inject_error_position = 3669 (vm->xe->vm_inject_error_position + 1) % 3670 FORCE_OP_ERROR_COUNT; 3671 } 3672 #endif 3673 } 3674 3675 /* Nothing to do */ 3676 if (list_empty(&vops.list)) { 3677 err = -ENODATA; 3678 goto unwind_ops; 3679 } 3680 3681 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3682 if (err) 3683 goto unwind_ops; 3684 3685 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3686 if (err) 3687 goto unwind_ops; 3688 3689 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3690 if (IS_ERR(fence)) 3691 err = PTR_ERR(fence); 3692 else 3693 dma_fence_put(fence); 3694 3695 unwind_ops: 3696 if (err && err != -ENODATA) 3697 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3698 xe_vma_ops_fini(&vops); 3699 for (i = args->num_binds - 1; i >= 0; --i) 3700 if (ops[i]) 3701 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3702 free_syncs: 3703 if (err == -ENODATA) 3704 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3705 while (num_syncs--) 3706 xe_sync_entry_cleanup(&syncs[num_syncs]); 3707 3708 kfree(syncs); 3709 put_obj: 3710 for (i = 0; i < args->num_binds; ++i) 3711 xe_bo_put(bos[i]); 3712 3713 kvfree(ops); 3714 free_bos: 3715 kvfree(bos); 3716 release_vm_lock: 3717 up_write(&vm->lock); 3718 put_exec_queue: 3719 if (q) 3720 xe_exec_queue_put(q); 3721 free_bind_ops: 3722 if (args->num_binds > 1) 3723 kvfree(bind_ops); 3724 put_vm: 3725 xe_vm_put(vm); 3726 return err; 3727 } 3728 3729 /** 3730 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3731 * @vm: VM to bind the BO to 3732 * @bo: BO to bind 3733 * @q: exec queue to use for the bind (optional) 3734 * @addr: address at which to bind the BO 3735 * @cache_lvl: PAT cache level to use 3736 * 3737 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3738 * kernel-owned VM. 3739 * 3740 * Returns a dma_fence to track the binding completion if the job to do so was 3741 * successfully submitted, an error pointer otherwise. 3742 */ 3743 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3744 struct xe_exec_queue *q, u64 addr, 3745 enum xe_cache_level cache_lvl) 3746 { 3747 struct xe_vma_ops vops; 3748 struct drm_gpuva_ops *ops = NULL; 3749 struct dma_fence *fence; 3750 int err; 3751 3752 xe_bo_get(bo); 3753 xe_vm_get(vm); 3754 if (q) 3755 xe_exec_queue_get(q); 3756 3757 down_write(&vm->lock); 3758 3759 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3760 3761 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3762 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3763 vm->xe->pat.idx[cache_lvl]); 3764 if (IS_ERR(ops)) { 3765 err = PTR_ERR(ops); 3766 goto release_vm_lock; 3767 } 3768 3769 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3770 if (err) 3771 goto release_vm_lock; 3772 3773 xe_assert(vm->xe, !list_empty(&vops.list)); 3774 3775 err = xe_vma_ops_alloc(&vops, false); 3776 if (err) 3777 goto unwind_ops; 3778 3779 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3780 if (IS_ERR(fence)) 3781 err = PTR_ERR(fence); 3782 3783 unwind_ops: 3784 if (err && err != -ENODATA) 3785 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3786 3787 xe_vma_ops_fini(&vops); 3788 drm_gpuva_ops_free(&vm->gpuvm, ops); 3789 3790 release_vm_lock: 3791 up_write(&vm->lock); 3792 3793 if (q) 3794 xe_exec_queue_put(q); 3795 xe_vm_put(vm); 3796 xe_bo_put(bo); 3797 3798 if (err) 3799 fence = ERR_PTR(err); 3800 3801 return fence; 3802 } 3803 3804 /** 3805 * xe_vm_lock() - Lock the vm's dma_resv object 3806 * @vm: The struct xe_vm whose lock is to be locked 3807 * @intr: Whether to perform any wait interruptible 3808 * 3809 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3810 * contended lock was interrupted. If @intr is false, the function 3811 * always returns 0. 3812 */ 3813 int xe_vm_lock(struct xe_vm *vm, bool intr) 3814 { 3815 int ret; 3816 3817 if (intr) 3818 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3819 else 3820 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3821 3822 return ret; 3823 } 3824 3825 /** 3826 * xe_vm_unlock() - Unlock the vm's dma_resv object 3827 * @vm: The struct xe_vm whose lock is to be released. 3828 * 3829 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3830 */ 3831 void xe_vm_unlock(struct xe_vm *vm) 3832 { 3833 dma_resv_unlock(xe_vm_resv(vm)); 3834 } 3835 3836 /** 3837 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3838 * address range 3839 * @vm: The VM 3840 * @start: start address 3841 * @end: end address 3842 * @tile_mask: mask for which gt's issue tlb invalidation 3843 * 3844 * Issue a range based TLB invalidation for gt's in tilemask 3845 * 3846 * Returns 0 for success, negative error code otherwise. 3847 */ 3848 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3849 u64 end, u8 tile_mask) 3850 { 3851 struct xe_tlb_inval_fence 3852 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3853 struct xe_tile *tile; 3854 u32 fence_id = 0; 3855 u8 id; 3856 int err; 3857 3858 if (!tile_mask) 3859 return 0; 3860 3861 for_each_tile(tile, vm->xe, id) { 3862 if (!(tile_mask & BIT(id))) 3863 continue; 3864 3865 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3866 &fence[fence_id], true); 3867 3868 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3869 &fence[fence_id], start, end, 3870 vm->usm.asid); 3871 if (err) 3872 goto wait; 3873 ++fence_id; 3874 3875 if (!tile->media_gt) 3876 continue; 3877 3878 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3879 &fence[fence_id], true); 3880 3881 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3882 &fence[fence_id], start, end, 3883 vm->usm.asid); 3884 if (err) 3885 goto wait; 3886 ++fence_id; 3887 } 3888 3889 wait: 3890 for (id = 0; id < fence_id; ++id) 3891 xe_tlb_inval_fence_wait(&fence[id]); 3892 3893 return err; 3894 } 3895 3896 /** 3897 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3898 * @vma: VMA to invalidate 3899 * 3900 * Walks a list of page tables leaves which it memset the entries owned by this 3901 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3902 * complete. 3903 * 3904 * Returns 0 for success, negative error code otherwise. 3905 */ 3906 int xe_vm_invalidate_vma(struct xe_vma *vma) 3907 { 3908 struct xe_device *xe = xe_vma_vm(vma)->xe; 3909 struct xe_vm *vm = xe_vma_vm(vma); 3910 struct xe_tile *tile; 3911 u8 tile_mask = 0; 3912 int ret = 0; 3913 u8 id; 3914 3915 xe_assert(xe, !xe_vma_is_null(vma)); 3916 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3917 trace_xe_vma_invalidate(vma); 3918 3919 vm_dbg(&vm->xe->drm, 3920 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3921 xe_vma_start(vma), xe_vma_size(vma)); 3922 3923 /* 3924 * Check that we don't race with page-table updates, tile_invalidated 3925 * update is safe 3926 */ 3927 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3928 if (xe_vma_is_userptr(vma)) { 3929 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3930 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3931 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3932 3933 WARN_ON_ONCE(!mmu_interval_check_retry 3934 (&to_userptr_vma(vma)->userptr.notifier, 3935 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3936 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3937 DMA_RESV_USAGE_BOOKKEEP)); 3938 3939 } else { 3940 xe_bo_assert_held(xe_vma_bo(vma)); 3941 } 3942 } 3943 3944 for_each_tile(tile, xe, id) 3945 if (xe_pt_zap_ptes(tile, vma)) 3946 tile_mask |= BIT(id); 3947 3948 xe_device_wmb(xe); 3949 3950 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3951 xe_vma_end(vma), tile_mask); 3952 3953 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3954 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3955 3956 return ret; 3957 } 3958 3959 int xe_vm_validate_protected(struct xe_vm *vm) 3960 { 3961 struct drm_gpuva *gpuva; 3962 int err = 0; 3963 3964 if (!vm) 3965 return -ENODEV; 3966 3967 mutex_lock(&vm->snap_mutex); 3968 3969 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3970 struct xe_vma *vma = gpuva_to_vma(gpuva); 3971 struct xe_bo *bo = vma->gpuva.gem.obj ? 3972 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3973 3974 if (!bo) 3975 continue; 3976 3977 if (xe_bo_is_protected(bo)) { 3978 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3979 if (err) 3980 break; 3981 } 3982 } 3983 3984 mutex_unlock(&vm->snap_mutex); 3985 return err; 3986 } 3987 3988 struct xe_vm_snapshot { 3989 unsigned long num_snaps; 3990 struct { 3991 u64 ofs, bo_ofs; 3992 unsigned long len; 3993 struct xe_bo *bo; 3994 void *data; 3995 struct mm_struct *mm; 3996 } snap[]; 3997 }; 3998 3999 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4000 { 4001 unsigned long num_snaps = 0, i; 4002 struct xe_vm_snapshot *snap = NULL; 4003 struct drm_gpuva *gpuva; 4004 4005 if (!vm) 4006 return NULL; 4007 4008 mutex_lock(&vm->snap_mutex); 4009 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4010 if (gpuva->flags & XE_VMA_DUMPABLE) 4011 num_snaps++; 4012 } 4013 4014 if (num_snaps) 4015 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4016 if (!snap) { 4017 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4018 goto out_unlock; 4019 } 4020 4021 snap->num_snaps = num_snaps; 4022 i = 0; 4023 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4024 struct xe_vma *vma = gpuva_to_vma(gpuva); 4025 struct xe_bo *bo = vma->gpuva.gem.obj ? 4026 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4027 4028 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4029 continue; 4030 4031 snap->snap[i].ofs = xe_vma_start(vma); 4032 snap->snap[i].len = xe_vma_size(vma); 4033 if (bo) { 4034 snap->snap[i].bo = xe_bo_get(bo); 4035 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4036 } else if (xe_vma_is_userptr(vma)) { 4037 struct mm_struct *mm = 4038 to_userptr_vma(vma)->userptr.notifier.mm; 4039 4040 if (mmget_not_zero(mm)) 4041 snap->snap[i].mm = mm; 4042 else 4043 snap->snap[i].data = ERR_PTR(-EFAULT); 4044 4045 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4046 } else { 4047 snap->snap[i].data = ERR_PTR(-ENOENT); 4048 } 4049 i++; 4050 } 4051 4052 out_unlock: 4053 mutex_unlock(&vm->snap_mutex); 4054 return snap; 4055 } 4056 4057 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4058 { 4059 if (IS_ERR_OR_NULL(snap)) 4060 return; 4061 4062 for (int i = 0; i < snap->num_snaps; i++) { 4063 struct xe_bo *bo = snap->snap[i].bo; 4064 int err; 4065 4066 if (IS_ERR(snap->snap[i].data)) 4067 continue; 4068 4069 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4070 if (!snap->snap[i].data) { 4071 snap->snap[i].data = ERR_PTR(-ENOMEM); 4072 goto cleanup_bo; 4073 } 4074 4075 if (bo) { 4076 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4077 snap->snap[i].data, snap->snap[i].len); 4078 } else { 4079 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4080 4081 kthread_use_mm(snap->snap[i].mm); 4082 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4083 err = 0; 4084 else 4085 err = -EFAULT; 4086 kthread_unuse_mm(snap->snap[i].mm); 4087 4088 mmput(snap->snap[i].mm); 4089 snap->snap[i].mm = NULL; 4090 } 4091 4092 if (err) { 4093 kvfree(snap->snap[i].data); 4094 snap->snap[i].data = ERR_PTR(err); 4095 } 4096 4097 cleanup_bo: 4098 xe_bo_put(bo); 4099 snap->snap[i].bo = NULL; 4100 } 4101 } 4102 4103 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4104 { 4105 unsigned long i, j; 4106 4107 if (IS_ERR_OR_NULL(snap)) { 4108 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4109 return; 4110 } 4111 4112 for (i = 0; i < snap->num_snaps; i++) { 4113 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4114 4115 if (IS_ERR(snap->snap[i].data)) { 4116 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4117 PTR_ERR(snap->snap[i].data)); 4118 continue; 4119 } 4120 4121 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4122 4123 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4124 u32 *val = snap->snap[i].data + j; 4125 char dumped[ASCII85_BUFSZ]; 4126 4127 drm_puts(p, ascii85_encode(*val, dumped)); 4128 } 4129 4130 drm_puts(p, "\n"); 4131 4132 if (drm_coredump_printer_is_full(p)) 4133 return; 4134 } 4135 } 4136 4137 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4138 { 4139 unsigned long i; 4140 4141 if (IS_ERR_OR_NULL(snap)) 4142 return; 4143 4144 for (i = 0; i < snap->num_snaps; i++) { 4145 if (!IS_ERR(snap->snap[i].data)) 4146 kvfree(snap->snap[i].data); 4147 xe_bo_put(snap->snap[i].bo); 4148 if (snap->snap[i].mm) 4149 mmput(snap->snap[i].mm); 4150 } 4151 kvfree(snap); 4152 } 4153 4154 /** 4155 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4156 * @xe: Pointer to the XE device structure 4157 * @vma: Pointer to the virtual memory area (VMA) structure 4158 * @is_atomic: In pagefault path and atomic operation 4159 * 4160 * This function determines whether the given VMA needs to be migrated to 4161 * VRAM in order to do atomic GPU operation. 4162 * 4163 * Return: 4164 * 1 - Migration to VRAM is required 4165 * 0 - Migration is not required 4166 * -EACCES - Invalid access for atomic memory attr 4167 * 4168 */ 4169 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4170 { 4171 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4172 vma->attr.atomic_access; 4173 4174 if (!IS_DGFX(xe) || !is_atomic) 4175 return false; 4176 4177 /* 4178 * NOTE: The checks implemented here are platform-specific. For 4179 * instance, on a device supporting CXL atomics, these would ideally 4180 * work universally without additional handling. 4181 */ 4182 switch (atomic_access) { 4183 case DRM_XE_ATOMIC_DEVICE: 4184 return !xe->info.has_device_atomics_on_smem; 4185 4186 case DRM_XE_ATOMIC_CPU: 4187 return -EACCES; 4188 4189 case DRM_XE_ATOMIC_UNDEFINED: 4190 case DRM_XE_ATOMIC_GLOBAL: 4191 default: 4192 return 1; 4193 } 4194 } 4195 4196 static int xe_vm_alloc_vma(struct xe_vm *vm, 4197 struct drm_gpuvm_map_req *map_req, 4198 bool is_madvise) 4199 { 4200 struct xe_vma_ops vops; 4201 struct drm_gpuva_ops *ops = NULL; 4202 struct drm_gpuva_op *__op; 4203 bool is_cpu_addr_mirror = false; 4204 bool remap_op = false; 4205 struct xe_vma_mem_attr tmp_attr; 4206 u16 default_pat; 4207 int err; 4208 4209 lockdep_assert_held_write(&vm->lock); 4210 4211 if (is_madvise) 4212 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4213 else 4214 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4215 4216 if (IS_ERR(ops)) 4217 return PTR_ERR(ops); 4218 4219 if (list_empty(&ops->list)) { 4220 err = 0; 4221 goto free_ops; 4222 } 4223 4224 drm_gpuva_for_each_op(__op, ops) { 4225 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4226 struct xe_vma *vma = NULL; 4227 4228 if (!is_madvise) { 4229 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4230 vma = gpuva_to_vma(op->base.unmap.va); 4231 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4232 default_pat = vma->attr.default_pat_index; 4233 } 4234 4235 if (__op->op == DRM_GPUVA_OP_REMAP) { 4236 vma = gpuva_to_vma(op->base.remap.unmap->va); 4237 default_pat = vma->attr.default_pat_index; 4238 } 4239 4240 if (__op->op == DRM_GPUVA_OP_MAP) { 4241 op->map.is_cpu_addr_mirror = true; 4242 op->map.pat_index = default_pat; 4243 } 4244 } else { 4245 if (__op->op == DRM_GPUVA_OP_REMAP) { 4246 vma = gpuva_to_vma(op->base.remap.unmap->va); 4247 xe_assert(vm->xe, !remap_op); 4248 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4249 remap_op = true; 4250 4251 if (xe_vma_is_cpu_addr_mirror(vma)) 4252 is_cpu_addr_mirror = true; 4253 else 4254 is_cpu_addr_mirror = false; 4255 } 4256 4257 if (__op->op == DRM_GPUVA_OP_MAP) { 4258 xe_assert(vm->xe, remap_op); 4259 remap_op = false; 4260 /* 4261 * In case of madvise ops DRM_GPUVA_OP_MAP is 4262 * always after DRM_GPUVA_OP_REMAP, so ensure 4263 * we assign op->map.is_cpu_addr_mirror true 4264 * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4265 */ 4266 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4267 } 4268 } 4269 print_op(vm->xe, __op); 4270 } 4271 4272 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4273 4274 if (is_madvise) 4275 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4276 4277 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4278 if (err) 4279 goto unwind_ops; 4280 4281 xe_vm_lock(vm, false); 4282 4283 drm_gpuva_for_each_op(__op, ops) { 4284 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4285 struct xe_vma *vma; 4286 4287 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4288 vma = gpuva_to_vma(op->base.unmap.va); 4289 /* There should be no unmap for madvise */ 4290 if (is_madvise) 4291 XE_WARN_ON("UNEXPECTED UNMAP"); 4292 4293 xe_vma_destroy(vma, NULL); 4294 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4295 vma = gpuva_to_vma(op->base.remap.unmap->va); 4296 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4297 * VMA, so they can be assigned to newly MAP created vma. 4298 */ 4299 if (is_madvise) 4300 tmp_attr = vma->attr; 4301 4302 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4303 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4304 vma = op->map.vma; 4305 /* In case of madvise call, MAP will always be follwed by REMAP. 4306 * Therefore temp_attr will always have sane values, making it safe to 4307 * copy them to new vma. 4308 */ 4309 if (is_madvise) 4310 vma->attr = tmp_attr; 4311 } 4312 } 4313 4314 xe_vm_unlock(vm); 4315 drm_gpuva_ops_free(&vm->gpuvm, ops); 4316 return 0; 4317 4318 unwind_ops: 4319 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4320 free_ops: 4321 drm_gpuva_ops_free(&vm->gpuvm, ops); 4322 return err; 4323 } 4324 4325 /** 4326 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4327 * @vm: Pointer to the xe_vm structure 4328 * @start: Starting input address 4329 * @range: Size of the input range 4330 * 4331 * This function splits existing vma to create new vma for user provided input range 4332 * 4333 * Return: 0 if success 4334 */ 4335 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4336 { 4337 struct drm_gpuvm_map_req map_req = { 4338 .map.va.addr = start, 4339 .map.va.range = range, 4340 }; 4341 4342 lockdep_assert_held_write(&vm->lock); 4343 4344 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4345 4346 return xe_vm_alloc_vma(vm, &map_req, true); 4347 } 4348 4349 /** 4350 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4351 * @vm: Pointer to the xe_vm structure 4352 * @start: Starting input address 4353 * @range: Size of the input range 4354 * 4355 * This function splits/merges existing vma to create new vma for user provided input range 4356 * 4357 * Return: 0 if success 4358 */ 4359 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4360 { 4361 struct drm_gpuvm_map_req map_req = { 4362 .map.va.addr = start, 4363 .map.va.range = range, 4364 }; 4365 4366 lockdep_assert_held_write(&vm->lock); 4367 4368 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4369 start, range); 4370 4371 return xe_vm_alloc_vma(vm, &map_req, false); 4372 } 4373