1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt_pagefault.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_res_cursor.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_wa.h" 44 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 46 { 47 return vm->gpuvm.r_obj; 48 } 49 50 /** 51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 52 * @vm: The vm whose resv is to be locked. 53 * @exec: The drm_exec transaction. 54 * 55 * Helper to lock the vm's resv as part of a drm_exec transaction. 56 * 57 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 58 */ 59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 60 { 61 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 62 } 63 64 static bool preempt_fences_waiting(struct xe_vm *vm) 65 { 66 struct xe_exec_queue *q; 67 68 lockdep_assert_held(&vm->lock); 69 xe_vm_assert_held(vm); 70 71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 72 if (!q->lr.pfence || 73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 74 &q->lr.pfence->flags)) { 75 return true; 76 } 77 } 78 79 return false; 80 } 81 82 static void free_preempt_fences(struct list_head *list) 83 { 84 struct list_head *link, *next; 85 86 list_for_each_safe(link, next, list) 87 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 88 } 89 90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 91 unsigned int *count) 92 { 93 lockdep_assert_held(&vm->lock); 94 xe_vm_assert_held(vm); 95 96 if (*count >= vm->preempt.num_exec_queues) 97 return 0; 98 99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 101 102 if (IS_ERR(pfence)) 103 return PTR_ERR(pfence); 104 105 list_move_tail(xe_preempt_fence_link(pfence), list); 106 } 107 108 return 0; 109 } 110 111 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 112 { 113 struct xe_exec_queue *q; 114 115 xe_vm_assert_held(vm); 116 117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 118 if (q->lr.pfence) { 119 long timeout = dma_fence_wait(q->lr.pfence, false); 120 121 /* Only -ETIME on fence indicates VM needs to be killed */ 122 if (timeout < 0 || q->lr.pfence->error == -ETIME) 123 return -ETIME; 124 125 dma_fence_put(q->lr.pfence); 126 q->lr.pfence = NULL; 127 } 128 } 129 130 return 0; 131 } 132 133 static bool xe_vm_is_idle(struct xe_vm *vm) 134 { 135 struct xe_exec_queue *q; 136 137 xe_vm_assert_held(vm); 138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 139 if (!xe_exec_queue_is_idle(q)) 140 return false; 141 } 142 143 return true; 144 } 145 146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 147 { 148 struct list_head *link; 149 struct xe_exec_queue *q; 150 151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 152 struct dma_fence *fence; 153 154 link = list->next; 155 xe_assert(vm->xe, link != list); 156 157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 158 q, q->lr.context, 159 ++q->lr.seqno); 160 dma_fence_put(q->lr.pfence); 161 q->lr.pfence = fence; 162 } 163 } 164 165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 166 { 167 struct xe_exec_queue *q; 168 int err; 169 170 xe_bo_assert_held(bo); 171 172 if (!vm->preempt.num_exec_queues) 173 return 0; 174 175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 176 if (err) 177 return err; 178 179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 180 if (q->lr.pfence) { 181 dma_resv_add_fence(bo->ttm.base.resv, 182 q->lr.pfence, 183 DMA_RESV_USAGE_BOOKKEEP); 184 } 185 186 return 0; 187 } 188 189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 190 struct drm_exec *exec) 191 { 192 struct xe_exec_queue *q; 193 194 lockdep_assert_held(&vm->lock); 195 xe_vm_assert_held(vm); 196 197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 198 q->ops->resume(q); 199 200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 202 } 203 } 204 205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 206 { 207 struct drm_gpuvm_exec vm_exec = { 208 .vm = &vm->gpuvm, 209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 210 .num_fences = 1, 211 }; 212 struct drm_exec *exec = &vm_exec.exec; 213 struct xe_validation_ctx ctx; 214 struct dma_fence *pfence; 215 int err; 216 bool wait; 217 218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 219 220 down_write(&vm->lock); 221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 222 if (err) 223 goto out_up_write; 224 225 pfence = xe_preempt_fence_create(q, q->lr.context, 226 ++q->lr.seqno); 227 if (IS_ERR(pfence)) { 228 err = PTR_ERR(pfence); 229 goto out_fini; 230 } 231 232 list_add(&q->lr.link, &vm->preempt.exec_queues); 233 ++vm->preempt.num_exec_queues; 234 q->lr.pfence = pfence; 235 236 xe_svm_notifier_lock(vm); 237 238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 240 241 /* 242 * Check to see if a preemption on VM is in flight or userptr 243 * invalidation, if so trigger this preempt fence to sync state with 244 * other preempt fences on the VM. 245 */ 246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 247 if (wait) 248 dma_fence_enable_sw_signaling(pfence); 249 250 xe_svm_notifier_unlock(vm); 251 252 out_fini: 253 xe_validation_ctx_fini(&ctx); 254 out_up_write: 255 up_write(&vm->lock); 256 257 return err; 258 } 259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 260 261 /** 262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 263 * @vm: The VM. 264 * @q: The exec_queue 265 * 266 * Note that this function might be called multiple times on the same queue. 267 */ 268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 269 { 270 if (!xe_vm_in_preempt_fence_mode(vm)) 271 return; 272 273 down_write(&vm->lock); 274 if (!list_empty(&q->lr.link)) { 275 list_del_init(&q->lr.link); 276 --vm->preempt.num_exec_queues; 277 } 278 if (q->lr.pfence) { 279 dma_fence_enable_sw_signaling(q->lr.pfence); 280 dma_fence_put(q->lr.pfence); 281 q->lr.pfence = NULL; 282 } 283 up_write(&vm->lock); 284 } 285 286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 287 288 /** 289 * xe_vm_kill() - VM Kill 290 * @vm: The VM. 291 * @unlocked: Flag indicates the VM's dma-resv is not held 292 * 293 * Kill the VM by setting banned flag indicated VM is no longer available for 294 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 295 */ 296 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 297 { 298 struct xe_exec_queue *q; 299 300 lockdep_assert_held(&vm->lock); 301 302 if (unlocked) 303 xe_vm_lock(vm, false); 304 305 vm->flags |= XE_VM_FLAG_BANNED; 306 trace_xe_vm_kill(vm); 307 308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 309 q->ops->kill(q); 310 311 if (unlocked) 312 xe_vm_unlock(vm); 313 314 /* TODO: Inform user the VM is banned */ 315 } 316 317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 318 { 319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 320 struct drm_gpuva *gpuva; 321 int ret; 322 323 lockdep_assert_held(&vm->lock); 324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 326 &vm->rebind_list); 327 328 if (!try_wait_for_completion(&vm->xe->pm_block)) 329 return -EAGAIN; 330 331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 332 if (ret) 333 return ret; 334 335 vm_bo->evicted = false; 336 return 0; 337 } 338 339 /** 340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 341 * @vm: The vm for which we are rebinding. 342 * @exec: The struct drm_exec with the locked GEM objects. 343 * @num_fences: The number of fences to reserve for the operation, not 344 * including rebinds and validations. 345 * 346 * Validates all evicted gem objects and rebinds their vmas. Note that 347 * rebindings may cause evictions and hence the validation-rebind 348 * sequence is rerun until there are no more objects to validate. 349 * 350 * Return: 0 on success, negative error code on error. In particular, 351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 352 * the drm_exec transaction needs to be restarted. 353 */ 354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 355 unsigned int num_fences) 356 { 357 struct drm_gem_object *obj; 358 unsigned long index; 359 int ret; 360 361 do { 362 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 363 if (ret) 364 return ret; 365 366 ret = xe_vm_rebind(vm, false); 367 if (ret) 368 return ret; 369 } while (!list_empty(&vm->gpuvm.evict.list)); 370 371 drm_exec_for_each_locked_object(exec, index, obj) { 372 ret = dma_resv_reserve_fences(obj->resv, num_fences); 373 if (ret) 374 return ret; 375 } 376 377 return 0; 378 } 379 380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 381 bool *done) 382 { 383 int err; 384 385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 386 if (err) 387 return err; 388 389 if (xe_vm_is_idle(vm)) { 390 vm->preempt.rebind_deactivated = true; 391 *done = true; 392 return 0; 393 } 394 395 if (!preempt_fences_waiting(vm)) { 396 *done = true; 397 return 0; 398 } 399 400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 401 if (err) 402 return err; 403 404 err = wait_for_existing_preempt_fences(vm); 405 if (err) 406 return err; 407 408 /* 409 * Add validation and rebinding to the locking loop since both can 410 * cause evictions which may require blocing dma_resv locks. 411 * The fence reservation here is intended for the new preempt fences 412 * we attach at the end of the rebind work. 413 */ 414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 415 } 416 417 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 418 { 419 struct xe_device *xe = vm->xe; 420 bool ret = false; 421 422 mutex_lock(&xe->rebind_resume_lock); 423 if (!try_wait_for_completion(&vm->xe->pm_block)) { 424 ret = true; 425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 426 } 427 mutex_unlock(&xe->rebind_resume_lock); 428 429 return ret; 430 } 431 432 /** 433 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 434 * @vm: The vm whose preempt worker to resume. 435 * 436 * Resume a preempt worker that was previously suspended by 437 * vm_suspend_rebind_worker(). 438 */ 439 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 440 { 441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 442 } 443 444 static void preempt_rebind_work_func(struct work_struct *w) 445 { 446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 447 struct xe_validation_ctx ctx; 448 struct drm_exec exec; 449 unsigned int fence_count = 0; 450 LIST_HEAD(preempt_fences); 451 int err = 0; 452 long wait; 453 int __maybe_unused tries = 0; 454 455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 456 trace_xe_vm_rebind_worker_enter(vm); 457 458 down_write(&vm->lock); 459 460 if (xe_vm_is_closed_or_banned(vm)) { 461 up_write(&vm->lock); 462 trace_xe_vm_rebind_worker_exit(vm); 463 return; 464 } 465 466 retry: 467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 468 up_write(&vm->lock); 469 return; 470 } 471 472 if (xe_vm_userptr_check_repin(vm)) { 473 err = xe_vm_userptr_pin(vm); 474 if (err) 475 goto out_unlock_outer; 476 } 477 478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 479 (struct xe_val_flags) {.interruptible = true}); 480 if (err) 481 goto out_unlock_outer; 482 483 drm_exec_until_all_locked(&exec) { 484 bool done = false; 485 486 err = xe_preempt_work_begin(&exec, vm, &done); 487 drm_exec_retry_on_contention(&exec); 488 xe_validation_retry_on_oom(&ctx, &err); 489 if (err || done) { 490 xe_validation_ctx_fini(&ctx); 491 goto out_unlock_outer; 492 } 493 } 494 495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 496 if (err) 497 goto out_unlock; 498 499 xe_vm_set_validation_exec(vm, &exec); 500 err = xe_vm_rebind(vm, true); 501 xe_vm_set_validation_exec(vm, NULL); 502 if (err) 503 goto out_unlock; 504 505 /* Wait on rebinds and munmap style VM unbinds */ 506 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 507 DMA_RESV_USAGE_KERNEL, 508 false, MAX_SCHEDULE_TIMEOUT); 509 if (wait <= 0) { 510 err = -ETIME; 511 goto out_unlock; 512 } 513 514 #define retry_required(__tries, __vm) \ 515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 517 __xe_vm_userptr_needs_repin(__vm)) 518 519 xe_svm_notifier_lock(vm); 520 if (retry_required(tries, vm)) { 521 xe_svm_notifier_unlock(vm); 522 err = -EAGAIN; 523 goto out_unlock; 524 } 525 526 #undef retry_required 527 528 spin_lock(&vm->xe->ttm.lru_lock); 529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 530 spin_unlock(&vm->xe->ttm.lru_lock); 531 532 /* Point of no return. */ 533 arm_preempt_fences(vm, &preempt_fences); 534 resume_and_reinstall_preempt_fences(vm, &exec); 535 xe_svm_notifier_unlock(vm); 536 537 out_unlock: 538 xe_validation_ctx_fini(&ctx); 539 out_unlock_outer: 540 if (err == -EAGAIN) { 541 trace_xe_vm_rebind_worker_retry(vm); 542 goto retry; 543 } 544 545 if (err) { 546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 547 xe_vm_kill(vm, true); 548 } 549 up_write(&vm->lock); 550 551 free_preempt_fences(&preempt_fences); 552 553 trace_xe_vm_rebind_worker_exit(vm); 554 } 555 556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 557 { 558 int i; 559 560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 561 if (!vops->pt_update_ops[i].num_ops) 562 continue; 563 564 vops->pt_update_ops[i].ops = 565 kmalloc_array(vops->pt_update_ops[i].num_ops, 566 sizeof(*vops->pt_update_ops[i].ops), 567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 568 if (!vops->pt_update_ops[i].ops) 569 return array_of_binds ? -ENOBUFS : -ENOMEM; 570 } 571 572 return 0; 573 } 574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 575 576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 577 { 578 struct xe_vma *vma; 579 580 vma = gpuva_to_vma(op->base.prefetch.va); 581 582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 583 xa_destroy(&op->prefetch_range.range); 584 } 585 586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 587 { 588 struct xe_vma_op *op; 589 590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 591 return; 592 593 list_for_each_entry(op, &vops->list, link) 594 xe_vma_svm_prefetch_op_fini(op); 595 } 596 597 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 598 { 599 int i; 600 601 xe_vma_svm_prefetch_ops_fini(vops); 602 603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 604 kfree(vops->pt_update_ops[i].ops); 605 } 606 607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 608 { 609 int i; 610 611 if (!inc_val) 612 return; 613 614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 615 if (BIT(i) & tile_mask) 616 vops->pt_update_ops[i].num_ops += inc_val; 617 } 618 619 #define XE_VMA_CREATE_MASK ( \ 620 XE_VMA_READ_ONLY | \ 621 XE_VMA_DUMPABLE | \ 622 XE_VMA_SYSTEM_ALLOCATOR | \ 623 DRM_GPUVA_SPARSE | \ 624 XE_VMA_MADV_AUTORESET) 625 626 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 627 u8 tile_mask) 628 { 629 INIT_LIST_HEAD(&op->link); 630 op->tile_mask = tile_mask; 631 op->base.op = DRM_GPUVA_OP_MAP; 632 op->base.map.va.addr = vma->gpuva.va.addr; 633 op->base.map.va.range = vma->gpuva.va.range; 634 op->base.map.gem.obj = vma->gpuva.gem.obj; 635 op->base.map.gem.offset = vma->gpuva.gem.offset; 636 op->map.vma = vma; 637 op->map.immediate = true; 638 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 639 } 640 641 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 642 u8 tile_mask) 643 { 644 struct xe_vma_op *op; 645 646 op = kzalloc(sizeof(*op), GFP_KERNEL); 647 if (!op) 648 return -ENOMEM; 649 650 xe_vm_populate_rebind(op, vma, tile_mask); 651 list_add_tail(&op->link, &vops->list); 652 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 653 654 return 0; 655 } 656 657 static struct dma_fence *ops_execute(struct xe_vm *vm, 658 struct xe_vma_ops *vops); 659 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 660 struct xe_exec_queue *q, 661 struct xe_sync_entry *syncs, u32 num_syncs); 662 663 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 664 { 665 struct dma_fence *fence; 666 struct xe_vma *vma, *next; 667 struct xe_vma_ops vops; 668 struct xe_vma_op *op, *next_op; 669 int err, i; 670 671 lockdep_assert_held(&vm->lock); 672 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 673 list_empty(&vm->rebind_list)) 674 return 0; 675 676 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 677 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 678 vops.pt_update_ops[i].wait_vm_bookkeep = true; 679 680 xe_vm_assert_held(vm); 681 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 682 xe_assert(vm->xe, vma->tile_present); 683 684 if (rebind_worker) 685 trace_xe_vma_rebind_worker(vma); 686 else 687 trace_xe_vma_rebind_exec(vma); 688 689 err = xe_vm_ops_add_rebind(&vops, vma, 690 vma->tile_present); 691 if (err) 692 goto free_ops; 693 } 694 695 err = xe_vma_ops_alloc(&vops, false); 696 if (err) 697 goto free_ops; 698 699 fence = ops_execute(vm, &vops); 700 if (IS_ERR(fence)) { 701 err = PTR_ERR(fence); 702 } else { 703 dma_fence_put(fence); 704 list_for_each_entry_safe(vma, next, &vm->rebind_list, 705 combined_links.rebind) 706 list_del_init(&vma->combined_links.rebind); 707 } 708 free_ops: 709 list_for_each_entry_safe(op, next_op, &vops.list, link) { 710 list_del(&op->link); 711 kfree(op); 712 } 713 xe_vma_ops_fini(&vops); 714 715 return err; 716 } 717 718 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 719 { 720 struct dma_fence *fence = NULL; 721 struct xe_vma_ops vops; 722 struct xe_vma_op *op, *next_op; 723 struct xe_tile *tile; 724 u8 id; 725 int err; 726 727 lockdep_assert_held(&vm->lock); 728 xe_vm_assert_held(vm); 729 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 730 731 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 732 for_each_tile(tile, vm->xe, id) { 733 vops.pt_update_ops[id].wait_vm_bookkeep = true; 734 vops.pt_update_ops[tile->id].q = 735 xe_migrate_exec_queue(tile->migrate); 736 } 737 738 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 739 if (err) 740 return ERR_PTR(err); 741 742 err = xe_vma_ops_alloc(&vops, false); 743 if (err) { 744 fence = ERR_PTR(err); 745 goto free_ops; 746 } 747 748 fence = ops_execute(vm, &vops); 749 750 free_ops: 751 list_for_each_entry_safe(op, next_op, &vops.list, link) { 752 list_del(&op->link); 753 kfree(op); 754 } 755 xe_vma_ops_fini(&vops); 756 757 return fence; 758 } 759 760 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 761 struct xe_vma *vma, 762 struct xe_svm_range *range, 763 u8 tile_mask) 764 { 765 INIT_LIST_HEAD(&op->link); 766 op->tile_mask = tile_mask; 767 op->base.op = DRM_GPUVA_OP_DRIVER; 768 op->subop = XE_VMA_SUBOP_MAP_RANGE; 769 op->map_range.vma = vma; 770 op->map_range.range = range; 771 } 772 773 static int 774 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 775 struct xe_vma *vma, 776 struct xe_svm_range *range, 777 u8 tile_mask) 778 { 779 struct xe_vma_op *op; 780 781 op = kzalloc(sizeof(*op), GFP_KERNEL); 782 if (!op) 783 return -ENOMEM; 784 785 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 786 list_add_tail(&op->link, &vops->list); 787 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 788 789 return 0; 790 } 791 792 /** 793 * xe_vm_range_rebind() - VM range (re)bind 794 * @vm: The VM which the range belongs to. 795 * @vma: The VMA which the range belongs to. 796 * @range: SVM range to rebind. 797 * @tile_mask: Tile mask to bind the range to. 798 * 799 * (re)bind SVM range setting up GPU page tables for the range. 800 * 801 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 802 * failure 803 */ 804 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 805 struct xe_vma *vma, 806 struct xe_svm_range *range, 807 u8 tile_mask) 808 { 809 struct dma_fence *fence = NULL; 810 struct xe_vma_ops vops; 811 struct xe_vma_op *op, *next_op; 812 struct xe_tile *tile; 813 u8 id; 814 int err; 815 816 lockdep_assert_held(&vm->lock); 817 xe_vm_assert_held(vm); 818 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 819 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 820 821 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 822 for_each_tile(tile, vm->xe, id) { 823 vops.pt_update_ops[id].wait_vm_bookkeep = true; 824 vops.pt_update_ops[tile->id].q = 825 xe_migrate_exec_queue(tile->migrate); 826 } 827 828 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 829 if (err) 830 return ERR_PTR(err); 831 832 err = xe_vma_ops_alloc(&vops, false); 833 if (err) { 834 fence = ERR_PTR(err); 835 goto free_ops; 836 } 837 838 fence = ops_execute(vm, &vops); 839 840 free_ops: 841 list_for_each_entry_safe(op, next_op, &vops.list, link) { 842 list_del(&op->link); 843 kfree(op); 844 } 845 xe_vma_ops_fini(&vops); 846 847 return fence; 848 } 849 850 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 851 struct xe_svm_range *range) 852 { 853 INIT_LIST_HEAD(&op->link); 854 op->tile_mask = range->tile_present; 855 op->base.op = DRM_GPUVA_OP_DRIVER; 856 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 857 op->unmap_range.range = range; 858 } 859 860 static int 861 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 862 struct xe_svm_range *range) 863 { 864 struct xe_vma_op *op; 865 866 op = kzalloc(sizeof(*op), GFP_KERNEL); 867 if (!op) 868 return -ENOMEM; 869 870 xe_vm_populate_range_unbind(op, range); 871 list_add_tail(&op->link, &vops->list); 872 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 873 874 return 0; 875 } 876 877 /** 878 * xe_vm_range_unbind() - VM range unbind 879 * @vm: The VM which the range belongs to. 880 * @range: SVM range to rebind. 881 * 882 * Unbind SVM range removing the GPU page tables for the range. 883 * 884 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 885 * failure 886 */ 887 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 888 struct xe_svm_range *range) 889 { 890 struct dma_fence *fence = NULL; 891 struct xe_vma_ops vops; 892 struct xe_vma_op *op, *next_op; 893 struct xe_tile *tile; 894 u8 id; 895 int err; 896 897 lockdep_assert_held(&vm->lock); 898 xe_vm_assert_held(vm); 899 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 900 901 if (!range->tile_present) 902 return dma_fence_get_stub(); 903 904 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 905 for_each_tile(tile, vm->xe, id) { 906 vops.pt_update_ops[id].wait_vm_bookkeep = true; 907 vops.pt_update_ops[tile->id].q = 908 xe_migrate_exec_queue(tile->migrate); 909 } 910 911 err = xe_vm_ops_add_range_unbind(&vops, range); 912 if (err) 913 return ERR_PTR(err); 914 915 err = xe_vma_ops_alloc(&vops, false); 916 if (err) { 917 fence = ERR_PTR(err); 918 goto free_ops; 919 } 920 921 fence = ops_execute(vm, &vops); 922 923 free_ops: 924 list_for_each_entry_safe(op, next_op, &vops.list, link) { 925 list_del(&op->link); 926 kfree(op); 927 } 928 xe_vma_ops_fini(&vops); 929 930 return fence; 931 } 932 933 static void xe_vma_free(struct xe_vma *vma) 934 { 935 if (xe_vma_is_userptr(vma)) 936 kfree(to_userptr_vma(vma)); 937 else 938 kfree(vma); 939 } 940 941 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 942 struct xe_bo *bo, 943 u64 bo_offset_or_userptr, 944 u64 start, u64 end, 945 struct xe_vma_mem_attr *attr, 946 unsigned int flags) 947 { 948 struct xe_vma *vma; 949 struct xe_tile *tile; 950 u8 id; 951 bool is_null = (flags & DRM_GPUVA_SPARSE); 952 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 953 954 xe_assert(vm->xe, start < end); 955 xe_assert(vm->xe, end < vm->size); 956 957 /* 958 * Allocate and ensure that the xe_vma_is_userptr() return 959 * matches what was allocated. 960 */ 961 if (!bo && !is_null && !is_cpu_addr_mirror) { 962 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); 963 964 if (!uvma) 965 return ERR_PTR(-ENOMEM); 966 967 vma = &uvma->vma; 968 } else { 969 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 970 if (!vma) 971 return ERR_PTR(-ENOMEM); 972 973 if (bo) 974 vma->gpuva.gem.obj = &bo->ttm.base; 975 } 976 977 INIT_LIST_HEAD(&vma->combined_links.rebind); 978 979 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 980 vma->gpuva.vm = &vm->gpuvm; 981 vma->gpuva.va.addr = start; 982 vma->gpuva.va.range = end - start + 1; 983 vma->gpuva.flags = flags; 984 985 for_each_tile(tile, vm->xe, id) 986 vma->tile_mask |= 0x1 << id; 987 988 if (vm->xe->info.has_atomic_enable_pte_bit) 989 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 990 991 vma->attr = *attr; 992 993 if (bo) { 994 struct drm_gpuvm_bo *vm_bo; 995 996 xe_bo_assert_held(bo); 997 998 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); 999 if (IS_ERR(vm_bo)) { 1000 xe_vma_free(vma); 1001 return ERR_CAST(vm_bo); 1002 } 1003 1004 drm_gpuvm_bo_extobj_add(vm_bo); 1005 drm_gem_object_get(&bo->ttm.base); 1006 vma->gpuva.gem.offset = bo_offset_or_userptr; 1007 drm_gpuva_link(&vma->gpuva, vm_bo); 1008 drm_gpuvm_bo_put(vm_bo); 1009 } else /* userptr or null */ { 1010 if (!is_null && !is_cpu_addr_mirror) { 1011 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1012 u64 size = end - start + 1; 1013 int err; 1014 1015 vma->gpuva.gem.offset = bo_offset_or_userptr; 1016 1017 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1018 if (err) { 1019 xe_vma_free(vma); 1020 return ERR_PTR(err); 1021 } 1022 } 1023 1024 xe_vm_get(vm); 1025 } 1026 1027 return vma; 1028 } 1029 1030 static void xe_vma_destroy_late(struct xe_vma *vma) 1031 { 1032 struct xe_vm *vm = xe_vma_vm(vma); 1033 1034 if (vma->ufence) { 1035 xe_sync_ufence_put(vma->ufence); 1036 vma->ufence = NULL; 1037 } 1038 1039 if (xe_vma_is_userptr(vma)) { 1040 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1041 1042 xe_userptr_remove(uvma); 1043 xe_vm_put(vm); 1044 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1045 xe_vm_put(vm); 1046 } else { 1047 xe_bo_put(xe_vma_bo(vma)); 1048 } 1049 1050 xe_vma_free(vma); 1051 } 1052 1053 static void vma_destroy_work_func(struct work_struct *w) 1054 { 1055 struct xe_vma *vma = 1056 container_of(w, struct xe_vma, destroy_work); 1057 1058 xe_vma_destroy_late(vma); 1059 } 1060 1061 static void vma_destroy_cb(struct dma_fence *fence, 1062 struct dma_fence_cb *cb) 1063 { 1064 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1065 1066 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1067 queue_work(system_unbound_wq, &vma->destroy_work); 1068 } 1069 1070 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1071 { 1072 struct xe_vm *vm = xe_vma_vm(vma); 1073 1074 lockdep_assert_held_write(&vm->lock); 1075 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1076 1077 if (xe_vma_is_userptr(vma)) { 1078 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1079 xe_userptr_destroy(to_userptr_vma(vma)); 1080 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1081 xe_bo_assert_held(xe_vma_bo(vma)); 1082 1083 drm_gpuva_unlink(&vma->gpuva); 1084 } 1085 1086 xe_vm_assert_held(vm); 1087 if (fence) { 1088 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1089 vma_destroy_cb); 1090 1091 if (ret) { 1092 XE_WARN_ON(ret != -ENOENT); 1093 xe_vma_destroy_late(vma); 1094 } 1095 } else { 1096 xe_vma_destroy_late(vma); 1097 } 1098 } 1099 1100 /** 1101 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1102 * @exec: The drm_exec object we're currently locking for. 1103 * @vma: The vma for witch we want to lock the vm resv and any attached 1104 * object's resv. 1105 * 1106 * Return: 0 on success, negative error code on error. In particular 1107 * may return -EDEADLK on WW transaction contention and -EINTR if 1108 * an interruptible wait is terminated by a signal. 1109 */ 1110 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1111 { 1112 struct xe_vm *vm = xe_vma_vm(vma); 1113 struct xe_bo *bo = xe_vma_bo(vma); 1114 int err; 1115 1116 XE_WARN_ON(!vm); 1117 1118 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1119 if (!err && bo && !bo->vm) 1120 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1121 1122 return err; 1123 } 1124 1125 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1126 { 1127 struct xe_device *xe = xe_vma_vm(vma)->xe; 1128 struct xe_validation_ctx ctx; 1129 struct drm_exec exec; 1130 int err = 0; 1131 1132 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1133 err = xe_vm_lock_vma(&exec, vma); 1134 drm_exec_retry_on_contention(&exec); 1135 if (XE_WARN_ON(err)) 1136 break; 1137 xe_vma_destroy(vma, NULL); 1138 } 1139 xe_assert(xe, !err); 1140 } 1141 1142 struct xe_vma * 1143 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1144 { 1145 struct drm_gpuva *gpuva; 1146 1147 lockdep_assert_held(&vm->lock); 1148 1149 if (xe_vm_is_closed_or_banned(vm)) 1150 return NULL; 1151 1152 xe_assert(vm->xe, start + range <= vm->size); 1153 1154 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1155 1156 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1157 } 1158 1159 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1160 { 1161 int err; 1162 1163 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1164 lockdep_assert_held(&vm->lock); 1165 1166 mutex_lock(&vm->snap_mutex); 1167 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1168 mutex_unlock(&vm->snap_mutex); 1169 XE_WARN_ON(err); /* Shouldn't be possible */ 1170 1171 return err; 1172 } 1173 1174 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1175 { 1176 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1177 lockdep_assert_held(&vm->lock); 1178 1179 mutex_lock(&vm->snap_mutex); 1180 drm_gpuva_remove(&vma->gpuva); 1181 mutex_unlock(&vm->snap_mutex); 1182 if (vm->usm.last_fault_vma == vma) 1183 vm->usm.last_fault_vma = NULL; 1184 } 1185 1186 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1187 { 1188 struct xe_vma_op *op; 1189 1190 op = kzalloc(sizeof(*op), GFP_KERNEL); 1191 1192 if (unlikely(!op)) 1193 return NULL; 1194 1195 return &op->base; 1196 } 1197 1198 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1199 1200 static const struct drm_gpuvm_ops gpuvm_ops = { 1201 .op_alloc = xe_vm_op_alloc, 1202 .vm_bo_validate = xe_gpuvm_validate, 1203 .vm_free = xe_vm_free, 1204 }; 1205 1206 static u64 pde_encode_pat_index(u16 pat_index) 1207 { 1208 u64 pte = 0; 1209 1210 if (pat_index & BIT(0)) 1211 pte |= XE_PPGTT_PTE_PAT0; 1212 1213 if (pat_index & BIT(1)) 1214 pte |= XE_PPGTT_PTE_PAT1; 1215 1216 return pte; 1217 } 1218 1219 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1220 { 1221 u64 pte = 0; 1222 1223 if (pat_index & BIT(0)) 1224 pte |= XE_PPGTT_PTE_PAT0; 1225 1226 if (pat_index & BIT(1)) 1227 pte |= XE_PPGTT_PTE_PAT1; 1228 1229 if (pat_index & BIT(2)) { 1230 if (pt_level) 1231 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1232 else 1233 pte |= XE_PPGTT_PTE_PAT2; 1234 } 1235 1236 if (pat_index & BIT(3)) 1237 pte |= XELPG_PPGTT_PTE_PAT3; 1238 1239 if (pat_index & (BIT(4))) 1240 pte |= XE2_PPGTT_PTE_PAT4; 1241 1242 return pte; 1243 } 1244 1245 static u64 pte_encode_ps(u32 pt_level) 1246 { 1247 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1248 1249 if (pt_level == 1) 1250 return XE_PDE_PS_2M; 1251 else if (pt_level == 2) 1252 return XE_PDPE_PS_1G; 1253 1254 return 0; 1255 } 1256 1257 static u16 pde_pat_index(struct xe_bo *bo) 1258 { 1259 struct xe_device *xe = xe_bo_device(bo); 1260 u16 pat_index; 1261 1262 /* 1263 * We only have two bits to encode the PAT index in non-leaf nodes, but 1264 * these only point to other paging structures so we only need a minimal 1265 * selection of options. The user PAT index is only for encoding leaf 1266 * nodes, where we have use of more bits to do the encoding. The 1267 * non-leaf nodes are instead under driver control so the chosen index 1268 * here should be distict from the user PAT index. Also the 1269 * corresponding coherency of the PAT index should be tied to the 1270 * allocation type of the page table (or at least we should pick 1271 * something which is always safe). 1272 */ 1273 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1274 pat_index = xe->pat.idx[XE_CACHE_WB]; 1275 else 1276 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1277 1278 xe_assert(xe, pat_index <= 3); 1279 1280 return pat_index; 1281 } 1282 1283 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1284 { 1285 u64 pde; 1286 1287 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1288 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1289 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1290 1291 return pde; 1292 } 1293 1294 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1295 u16 pat_index, u32 pt_level) 1296 { 1297 u64 pte; 1298 1299 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1300 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1301 pte |= pte_encode_pat_index(pat_index, pt_level); 1302 pte |= pte_encode_ps(pt_level); 1303 1304 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1305 pte |= XE_PPGTT_PTE_DM; 1306 1307 return pte; 1308 } 1309 1310 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1311 u16 pat_index, u32 pt_level) 1312 { 1313 pte |= XE_PAGE_PRESENT; 1314 1315 if (likely(!xe_vma_read_only(vma))) 1316 pte |= XE_PAGE_RW; 1317 1318 pte |= pte_encode_pat_index(pat_index, pt_level); 1319 pte |= pte_encode_ps(pt_level); 1320 1321 if (unlikely(xe_vma_is_null(vma))) 1322 pte |= XE_PTE_NULL; 1323 1324 return pte; 1325 } 1326 1327 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1328 u16 pat_index, 1329 u32 pt_level, bool devmem, u64 flags) 1330 { 1331 u64 pte; 1332 1333 /* Avoid passing random bits directly as flags */ 1334 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1335 1336 pte = addr; 1337 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1338 pte |= pte_encode_pat_index(pat_index, pt_level); 1339 pte |= pte_encode_ps(pt_level); 1340 1341 if (devmem) 1342 pte |= XE_PPGTT_PTE_DM; 1343 1344 pte |= flags; 1345 1346 return pte; 1347 } 1348 1349 static const struct xe_pt_ops xelp_pt_ops = { 1350 .pte_encode_bo = xelp_pte_encode_bo, 1351 .pte_encode_vma = xelp_pte_encode_vma, 1352 .pte_encode_addr = xelp_pte_encode_addr, 1353 .pde_encode_bo = xelp_pde_encode_bo, 1354 }; 1355 1356 static void vm_destroy_work_func(struct work_struct *w); 1357 1358 /** 1359 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1360 * given tile and vm. 1361 * @xe: xe device. 1362 * @tile: tile to set up for. 1363 * @vm: vm to set up for. 1364 * @exec: The struct drm_exec object used to lock the vm resv. 1365 * 1366 * Sets up a pagetable tree with one page-table per level and a single 1367 * leaf PTE. All pagetable entries point to the single page-table or, 1368 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1369 * writes become NOPs. 1370 * 1371 * Return: 0 on success, negative error code on error. 1372 */ 1373 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1374 struct xe_vm *vm, struct drm_exec *exec) 1375 { 1376 u8 id = tile->id; 1377 int i; 1378 1379 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1380 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1381 if (IS_ERR(vm->scratch_pt[id][i])) { 1382 int err = PTR_ERR(vm->scratch_pt[id][i]); 1383 1384 vm->scratch_pt[id][i] = NULL; 1385 return err; 1386 } 1387 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1388 } 1389 1390 return 0; 1391 } 1392 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1393 1394 static void xe_vm_free_scratch(struct xe_vm *vm) 1395 { 1396 struct xe_tile *tile; 1397 u8 id; 1398 1399 if (!xe_vm_has_scratch(vm)) 1400 return; 1401 1402 for_each_tile(tile, vm->xe, id) { 1403 u32 i; 1404 1405 if (!vm->pt_root[id]) 1406 continue; 1407 1408 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1409 if (vm->scratch_pt[id][i]) 1410 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1411 } 1412 } 1413 1414 static void xe_vm_pt_destroy(struct xe_vm *vm) 1415 { 1416 struct xe_tile *tile; 1417 u8 id; 1418 1419 xe_vm_assert_held(vm); 1420 1421 for_each_tile(tile, vm->xe, id) { 1422 if (vm->pt_root[id]) { 1423 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1424 vm->pt_root[id] = NULL; 1425 } 1426 } 1427 } 1428 1429 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1430 { 1431 struct drm_gem_object *vm_resv_obj; 1432 struct xe_validation_ctx ctx; 1433 struct drm_exec exec; 1434 struct xe_vm *vm; 1435 int err, number_tiles = 0; 1436 struct xe_tile *tile; 1437 u8 id; 1438 1439 /* 1440 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1441 * ever be in faulting mode. 1442 */ 1443 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1444 1445 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1446 if (!vm) 1447 return ERR_PTR(-ENOMEM); 1448 1449 vm->xe = xe; 1450 1451 vm->size = 1ull << xe->info.va_bits; 1452 vm->flags = flags; 1453 1454 if (xef) 1455 vm->xef = xe_file_get(xef); 1456 /** 1457 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1458 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1459 * under a user-VM lock when the PXP session is started at exec_queue 1460 * creation time. Those are different VMs and therefore there is no risk 1461 * of deadlock, but we need to tell lockdep that this is the case or it 1462 * will print a warning. 1463 */ 1464 if (flags & XE_VM_FLAG_GSC) { 1465 static struct lock_class_key gsc_vm_key; 1466 1467 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1468 } else { 1469 init_rwsem(&vm->lock); 1470 } 1471 mutex_init(&vm->snap_mutex); 1472 1473 INIT_LIST_HEAD(&vm->rebind_list); 1474 1475 INIT_LIST_HEAD(&vm->userptr.repin_list); 1476 INIT_LIST_HEAD(&vm->userptr.invalidated); 1477 spin_lock_init(&vm->userptr.invalidated_lock); 1478 1479 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1480 1481 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1482 1483 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1484 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1485 1486 for_each_tile(tile, xe, id) 1487 xe_range_fence_tree_init(&vm->rftree[id]); 1488 1489 vm->pt_ops = &xelp_pt_ops; 1490 1491 /* 1492 * Long-running workloads are not protected by the scheduler references. 1493 * By design, run_job for long-running workloads returns NULL and the 1494 * scheduler drops all the references of it, hence protecting the VM 1495 * for this case is necessary. 1496 */ 1497 if (flags & XE_VM_FLAG_LR_MODE) { 1498 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1499 xe_pm_runtime_get_noresume(xe); 1500 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1501 } 1502 1503 err = xe_svm_init(vm); 1504 if (err) 1505 goto err_no_resv; 1506 1507 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1508 if (!vm_resv_obj) { 1509 err = -ENOMEM; 1510 goto err_svm_fini; 1511 } 1512 1513 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1514 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1515 1516 drm_gem_object_put(vm_resv_obj); 1517 1518 err = 0; 1519 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1520 err) { 1521 err = xe_vm_drm_exec_lock(vm, &exec); 1522 drm_exec_retry_on_contention(&exec); 1523 1524 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1525 vm->flags |= XE_VM_FLAG_64K; 1526 1527 for_each_tile(tile, xe, id) { 1528 if (flags & XE_VM_FLAG_MIGRATION && 1529 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1530 continue; 1531 1532 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1533 &exec); 1534 if (IS_ERR(vm->pt_root[id])) { 1535 err = PTR_ERR(vm->pt_root[id]); 1536 vm->pt_root[id] = NULL; 1537 xe_vm_pt_destroy(vm); 1538 drm_exec_retry_on_contention(&exec); 1539 xe_validation_retry_on_oom(&ctx, &err); 1540 break; 1541 } 1542 } 1543 if (err) 1544 break; 1545 1546 if (xe_vm_has_scratch(vm)) { 1547 for_each_tile(tile, xe, id) { 1548 if (!vm->pt_root[id]) 1549 continue; 1550 1551 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1552 if (err) { 1553 xe_vm_free_scratch(vm); 1554 xe_vm_pt_destroy(vm); 1555 drm_exec_retry_on_contention(&exec); 1556 xe_validation_retry_on_oom(&ctx, &err); 1557 break; 1558 } 1559 } 1560 if (err) 1561 break; 1562 vm->batch_invalidate_tlb = true; 1563 } 1564 1565 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1566 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1567 vm->batch_invalidate_tlb = false; 1568 } 1569 1570 /* Fill pt_root after allocating scratch tables */ 1571 for_each_tile(tile, xe, id) { 1572 if (!vm->pt_root[id]) 1573 continue; 1574 1575 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1576 } 1577 } 1578 if (err) 1579 goto err_close; 1580 1581 /* Kernel migration VM shouldn't have a circular loop.. */ 1582 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1583 for_each_tile(tile, xe, id) { 1584 struct xe_exec_queue *q; 1585 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1586 1587 if (!vm->pt_root[id]) 1588 continue; 1589 1590 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1591 if (IS_ERR(q)) { 1592 err = PTR_ERR(q); 1593 goto err_close; 1594 } 1595 vm->q[id] = q; 1596 number_tiles++; 1597 } 1598 } 1599 1600 if (number_tiles > 1) 1601 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1602 1603 if (xef && xe->info.has_asid) { 1604 u32 asid; 1605 1606 down_write(&xe->usm.lock); 1607 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1608 XA_LIMIT(1, XE_MAX_ASID - 1), 1609 &xe->usm.next_asid, GFP_KERNEL); 1610 up_write(&xe->usm.lock); 1611 if (err < 0) 1612 goto err_close; 1613 1614 vm->usm.asid = asid; 1615 } 1616 1617 trace_xe_vm_create(vm); 1618 1619 return vm; 1620 1621 err_close: 1622 xe_vm_close_and_put(vm); 1623 return ERR_PTR(err); 1624 1625 err_svm_fini: 1626 if (flags & XE_VM_FLAG_FAULT_MODE) { 1627 vm->size = 0; /* close the vm */ 1628 xe_svm_fini(vm); 1629 } 1630 err_no_resv: 1631 mutex_destroy(&vm->snap_mutex); 1632 for_each_tile(tile, xe, id) 1633 xe_range_fence_tree_fini(&vm->rftree[id]); 1634 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1635 if (vm->xef) 1636 xe_file_put(vm->xef); 1637 kfree(vm); 1638 if (flags & XE_VM_FLAG_LR_MODE) 1639 xe_pm_runtime_put(xe); 1640 return ERR_PTR(err); 1641 } 1642 1643 static void xe_vm_close(struct xe_vm *vm) 1644 { 1645 struct xe_device *xe = vm->xe; 1646 bool bound; 1647 int idx; 1648 1649 bound = drm_dev_enter(&xe->drm, &idx); 1650 1651 down_write(&vm->lock); 1652 if (xe_vm_in_fault_mode(vm)) 1653 xe_svm_notifier_lock(vm); 1654 1655 vm->size = 0; 1656 1657 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1658 struct xe_tile *tile; 1659 struct xe_gt *gt; 1660 u8 id; 1661 1662 /* Wait for pending binds */ 1663 dma_resv_wait_timeout(xe_vm_resv(vm), 1664 DMA_RESV_USAGE_BOOKKEEP, 1665 false, MAX_SCHEDULE_TIMEOUT); 1666 1667 if (bound) { 1668 for_each_tile(tile, xe, id) 1669 if (vm->pt_root[id]) 1670 xe_pt_clear(xe, vm->pt_root[id]); 1671 1672 for_each_gt(gt, xe, id) 1673 xe_tlb_inval_vm(>->tlb_inval, vm); 1674 } 1675 } 1676 1677 if (xe_vm_in_fault_mode(vm)) 1678 xe_svm_notifier_unlock(vm); 1679 up_write(&vm->lock); 1680 1681 if (bound) 1682 drm_dev_exit(idx); 1683 } 1684 1685 void xe_vm_close_and_put(struct xe_vm *vm) 1686 { 1687 LIST_HEAD(contested); 1688 struct xe_device *xe = vm->xe; 1689 struct xe_tile *tile; 1690 struct xe_vma *vma, *next_vma; 1691 struct drm_gpuva *gpuva, *next; 1692 u8 id; 1693 1694 xe_assert(xe, !vm->preempt.num_exec_queues); 1695 1696 xe_vm_close(vm); 1697 if (xe_vm_in_preempt_fence_mode(vm)) { 1698 mutex_lock(&xe->rebind_resume_lock); 1699 list_del_init(&vm->preempt.pm_activate_link); 1700 mutex_unlock(&xe->rebind_resume_lock); 1701 flush_work(&vm->preempt.rebind_work); 1702 } 1703 if (xe_vm_in_fault_mode(vm)) 1704 xe_svm_close(vm); 1705 1706 down_write(&vm->lock); 1707 for_each_tile(tile, xe, id) { 1708 if (vm->q[id]) 1709 xe_exec_queue_last_fence_put(vm->q[id], vm); 1710 } 1711 up_write(&vm->lock); 1712 1713 for_each_tile(tile, xe, id) { 1714 if (vm->q[id]) { 1715 xe_exec_queue_kill(vm->q[id]); 1716 xe_exec_queue_put(vm->q[id]); 1717 vm->q[id] = NULL; 1718 } 1719 } 1720 1721 down_write(&vm->lock); 1722 xe_vm_lock(vm, false); 1723 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1724 vma = gpuva_to_vma(gpuva); 1725 1726 if (xe_vma_has_no_bo(vma)) { 1727 xe_svm_notifier_lock(vm); 1728 vma->gpuva.flags |= XE_VMA_DESTROYED; 1729 xe_svm_notifier_unlock(vm); 1730 } 1731 1732 xe_vm_remove_vma(vm, vma); 1733 1734 /* easy case, remove from VMA? */ 1735 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1736 list_del_init(&vma->combined_links.rebind); 1737 xe_vma_destroy(vma, NULL); 1738 continue; 1739 } 1740 1741 list_move_tail(&vma->combined_links.destroy, &contested); 1742 vma->gpuva.flags |= XE_VMA_DESTROYED; 1743 } 1744 1745 /* 1746 * All vm operations will add shared fences to resv. 1747 * The only exception is eviction for a shared object, 1748 * but even so, the unbind when evicted would still 1749 * install a fence to resv. Hence it's safe to 1750 * destroy the pagetables immediately. 1751 */ 1752 xe_vm_free_scratch(vm); 1753 xe_vm_pt_destroy(vm); 1754 xe_vm_unlock(vm); 1755 1756 /* 1757 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1758 * Since we hold a refcount to the bo, we can remove and free 1759 * the members safely without locking. 1760 */ 1761 list_for_each_entry_safe(vma, next_vma, &contested, 1762 combined_links.destroy) { 1763 list_del_init(&vma->combined_links.destroy); 1764 xe_vma_destroy_unlocked(vma); 1765 } 1766 1767 xe_svm_fini(vm); 1768 1769 up_write(&vm->lock); 1770 1771 down_write(&xe->usm.lock); 1772 if (vm->usm.asid) { 1773 void *lookup; 1774 1775 xe_assert(xe, xe->info.has_asid); 1776 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1777 1778 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1779 xe_assert(xe, lookup == vm); 1780 } 1781 up_write(&xe->usm.lock); 1782 1783 for_each_tile(tile, xe, id) 1784 xe_range_fence_tree_fini(&vm->rftree[id]); 1785 1786 xe_vm_put(vm); 1787 } 1788 1789 static void vm_destroy_work_func(struct work_struct *w) 1790 { 1791 struct xe_vm *vm = 1792 container_of(w, struct xe_vm, destroy_work); 1793 struct xe_device *xe = vm->xe; 1794 struct xe_tile *tile; 1795 u8 id; 1796 1797 /* xe_vm_close_and_put was not called? */ 1798 xe_assert(xe, !vm->size); 1799 1800 if (xe_vm_in_preempt_fence_mode(vm)) 1801 flush_work(&vm->preempt.rebind_work); 1802 1803 mutex_destroy(&vm->snap_mutex); 1804 1805 if (vm->flags & XE_VM_FLAG_LR_MODE) 1806 xe_pm_runtime_put(xe); 1807 1808 for_each_tile(tile, xe, id) 1809 XE_WARN_ON(vm->pt_root[id]); 1810 1811 trace_xe_vm_free(vm); 1812 1813 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1814 1815 if (vm->xef) 1816 xe_file_put(vm->xef); 1817 1818 kfree(vm); 1819 } 1820 1821 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1822 { 1823 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1824 1825 /* To destroy the VM we need to be able to sleep */ 1826 queue_work(system_unbound_wq, &vm->destroy_work); 1827 } 1828 1829 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1830 { 1831 struct xe_vm *vm; 1832 1833 mutex_lock(&xef->vm.lock); 1834 vm = xa_load(&xef->vm.xa, id); 1835 if (vm) 1836 xe_vm_get(vm); 1837 mutex_unlock(&xef->vm.lock); 1838 1839 return vm; 1840 } 1841 1842 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 1843 { 1844 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 1845 } 1846 1847 static struct xe_exec_queue * 1848 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 1849 { 1850 return q ? q : vm->q[0]; 1851 } 1852 1853 static struct xe_user_fence * 1854 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 1855 { 1856 unsigned int i; 1857 1858 for (i = 0; i < num_syncs; i++) { 1859 struct xe_sync_entry *e = &syncs[i]; 1860 1861 if (xe_sync_is_ufence(e)) 1862 return xe_sync_ufence_get(e); 1863 } 1864 1865 return NULL; 1866 } 1867 1868 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 1869 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 1870 DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1871 1872 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1873 struct drm_file *file) 1874 { 1875 struct xe_device *xe = to_xe_device(dev); 1876 struct xe_file *xef = to_xe_file(file); 1877 struct drm_xe_vm_create *args = data; 1878 struct xe_vm *vm; 1879 u32 id; 1880 int err; 1881 u32 flags = 0; 1882 1883 if (XE_IOCTL_DBG(xe, args->extensions)) 1884 return -EINVAL; 1885 1886 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) 1887 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 1888 1889 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1890 !xe->info.has_usm)) 1891 return -EINVAL; 1892 1893 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1894 return -EINVAL; 1895 1896 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1897 return -EINVAL; 1898 1899 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 1900 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1901 !xe->info.needs_scratch)) 1902 return -EINVAL; 1903 1904 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1905 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1906 return -EINVAL; 1907 1908 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 1909 flags |= XE_VM_FLAG_SCRATCH_PAGE; 1910 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 1911 flags |= XE_VM_FLAG_LR_MODE; 1912 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 1913 flags |= XE_VM_FLAG_FAULT_MODE; 1914 1915 vm = xe_vm_create(xe, flags, xef); 1916 if (IS_ERR(vm)) 1917 return PTR_ERR(vm); 1918 1919 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1920 /* Warning: Security issue - never enable by default */ 1921 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 1922 #endif 1923 1924 /* user id alloc must always be last in ioctl to prevent UAF */ 1925 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1926 if (err) 1927 goto err_close_and_put; 1928 1929 args->vm_id = id; 1930 1931 return 0; 1932 1933 err_close_and_put: 1934 xe_vm_close_and_put(vm); 1935 1936 return err; 1937 } 1938 1939 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1940 struct drm_file *file) 1941 { 1942 struct xe_device *xe = to_xe_device(dev); 1943 struct xe_file *xef = to_xe_file(file); 1944 struct drm_xe_vm_destroy *args = data; 1945 struct xe_vm *vm; 1946 int err = 0; 1947 1948 if (XE_IOCTL_DBG(xe, args->pad) || 1949 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1950 return -EINVAL; 1951 1952 mutex_lock(&xef->vm.lock); 1953 vm = xa_load(&xef->vm.xa, args->vm_id); 1954 if (XE_IOCTL_DBG(xe, !vm)) 1955 err = -ENOENT; 1956 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 1957 err = -EBUSY; 1958 else 1959 xa_erase(&xef->vm.xa, args->vm_id); 1960 mutex_unlock(&xef->vm.lock); 1961 1962 if (!err) 1963 xe_vm_close_and_put(vm); 1964 1965 return err; 1966 } 1967 1968 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 1969 { 1970 struct drm_gpuva *gpuva; 1971 u32 num_vmas = 0; 1972 1973 lockdep_assert_held(&vm->lock); 1974 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 1975 num_vmas++; 1976 1977 return num_vmas; 1978 } 1979 1980 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 1981 u64 end, struct drm_xe_mem_range_attr *attrs) 1982 { 1983 struct drm_gpuva *gpuva; 1984 int i = 0; 1985 1986 lockdep_assert_held(&vm->lock); 1987 1988 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 1989 struct xe_vma *vma = gpuva_to_vma(gpuva); 1990 1991 if (i == *num_vmas) 1992 return -ENOSPC; 1993 1994 attrs[i].start = xe_vma_start(vma); 1995 attrs[i].end = xe_vma_end(vma); 1996 attrs[i].atomic.val = vma->attr.atomic_access; 1997 attrs[i].pat_index.val = vma->attr.pat_index; 1998 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 1999 attrs[i].preferred_mem_loc.migration_policy = 2000 vma->attr.preferred_loc.migration_policy; 2001 2002 i++; 2003 } 2004 2005 *num_vmas = i; 2006 return 0; 2007 } 2008 2009 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2010 { 2011 struct xe_device *xe = to_xe_device(dev); 2012 struct xe_file *xef = to_xe_file(file); 2013 struct drm_xe_mem_range_attr *mem_attrs; 2014 struct drm_xe_vm_query_mem_range_attr *args = data; 2015 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2016 struct xe_vm *vm; 2017 int err = 0; 2018 2019 if (XE_IOCTL_DBG(xe, 2020 ((args->num_mem_ranges == 0 && 2021 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2022 (args->num_mem_ranges > 0 && 2023 (!attrs_user || 2024 args->sizeof_mem_range_attr != 2025 sizeof(struct drm_xe_mem_range_attr)))))) 2026 return -EINVAL; 2027 2028 vm = xe_vm_lookup(xef, args->vm_id); 2029 if (XE_IOCTL_DBG(xe, !vm)) 2030 return -EINVAL; 2031 2032 err = down_read_interruptible(&vm->lock); 2033 if (err) 2034 goto put_vm; 2035 2036 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2037 2038 if (args->num_mem_ranges == 0 && !attrs_user) { 2039 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2040 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2041 goto unlock_vm; 2042 } 2043 2044 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2045 GFP_KERNEL | __GFP_ACCOUNT | 2046 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2047 if (!mem_attrs) { 2048 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2049 goto unlock_vm; 2050 } 2051 2052 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2053 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2054 args->start + args->range, mem_attrs); 2055 if (err) 2056 goto free_mem_attrs; 2057 2058 err = copy_to_user(attrs_user, mem_attrs, 2059 args->sizeof_mem_range_attr * args->num_mem_ranges); 2060 if (err) 2061 err = -EFAULT; 2062 2063 free_mem_attrs: 2064 kvfree(mem_attrs); 2065 unlock_vm: 2066 up_read(&vm->lock); 2067 put_vm: 2068 xe_vm_put(vm); 2069 return err; 2070 } 2071 2072 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2073 { 2074 if (page_addr > xe_vma_end(vma) - 1 || 2075 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2076 return false; 2077 2078 return true; 2079 } 2080 2081 /** 2082 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2083 * 2084 * @vm: the xe_vm the vma belongs to 2085 * @page_addr: address to look up 2086 */ 2087 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2088 { 2089 struct xe_vma *vma = NULL; 2090 2091 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2092 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2093 vma = vm->usm.last_fault_vma; 2094 } 2095 if (!vma) 2096 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2097 2098 return vma; 2099 } 2100 2101 static const u32 region_to_mem_type[] = { 2102 XE_PL_TT, 2103 XE_PL_VRAM0, 2104 XE_PL_VRAM1, 2105 }; 2106 2107 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2108 bool post_commit) 2109 { 2110 xe_svm_notifier_lock(vm); 2111 vma->gpuva.flags |= XE_VMA_DESTROYED; 2112 xe_svm_notifier_unlock(vm); 2113 if (post_commit) 2114 xe_vm_remove_vma(vm, vma); 2115 } 2116 2117 #undef ULL 2118 #define ULL unsigned long long 2119 2120 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2121 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2122 { 2123 struct xe_vma *vma; 2124 2125 switch (op->op) { 2126 case DRM_GPUVA_OP_MAP: 2127 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2128 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2129 break; 2130 case DRM_GPUVA_OP_REMAP: 2131 vma = gpuva_to_vma(op->remap.unmap->va); 2132 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2133 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2134 op->remap.unmap->keep ? 1 : 0); 2135 if (op->remap.prev) 2136 vm_dbg(&xe->drm, 2137 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2138 (ULL)op->remap.prev->va.addr, 2139 (ULL)op->remap.prev->va.range); 2140 if (op->remap.next) 2141 vm_dbg(&xe->drm, 2142 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2143 (ULL)op->remap.next->va.addr, 2144 (ULL)op->remap.next->va.range); 2145 break; 2146 case DRM_GPUVA_OP_UNMAP: 2147 vma = gpuva_to_vma(op->unmap.va); 2148 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2149 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2150 op->unmap.keep ? 1 : 0); 2151 break; 2152 case DRM_GPUVA_OP_PREFETCH: 2153 vma = gpuva_to_vma(op->prefetch.va); 2154 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2155 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2156 break; 2157 default: 2158 drm_warn(&xe->drm, "NOT POSSIBLE"); 2159 } 2160 } 2161 #else 2162 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2163 { 2164 } 2165 #endif 2166 2167 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2168 { 2169 if (!xe_vm_in_fault_mode(vm)) 2170 return false; 2171 2172 if (!xe_vm_has_scratch(vm)) 2173 return false; 2174 2175 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2176 return false; 2177 2178 return true; 2179 } 2180 2181 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2182 { 2183 struct drm_gpuva_op *__op; 2184 2185 drm_gpuva_for_each_op(__op, ops) { 2186 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2187 2188 xe_vma_svm_prefetch_op_fini(op); 2189 } 2190 } 2191 2192 /* 2193 * Create operations list from IOCTL arguments, setup operations fields so parse 2194 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2195 */ 2196 static struct drm_gpuva_ops * 2197 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2198 struct xe_bo *bo, u64 bo_offset_or_userptr, 2199 u64 addr, u64 range, 2200 u32 operation, u32 flags, 2201 u32 prefetch_region, u16 pat_index) 2202 { 2203 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2204 struct drm_gpuva_ops *ops; 2205 struct drm_gpuva_op *__op; 2206 struct drm_gpuvm_bo *vm_bo; 2207 u64 range_end = addr + range; 2208 int err; 2209 2210 lockdep_assert_held_write(&vm->lock); 2211 2212 vm_dbg(&vm->xe->drm, 2213 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2214 operation, (ULL)addr, (ULL)range, 2215 (ULL)bo_offset_or_userptr); 2216 2217 switch (operation) { 2218 case DRM_XE_VM_BIND_OP_MAP: 2219 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2220 struct drm_gpuvm_map_req map_req = { 2221 .map.va.addr = addr, 2222 .map.va.range = range, 2223 .map.gem.obj = obj, 2224 .map.gem.offset = bo_offset_or_userptr, 2225 }; 2226 2227 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2228 break; 2229 } 2230 case DRM_XE_VM_BIND_OP_UNMAP: 2231 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2232 break; 2233 case DRM_XE_VM_BIND_OP_PREFETCH: 2234 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2235 break; 2236 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2237 xe_assert(vm->xe, bo); 2238 2239 err = xe_bo_lock(bo, true); 2240 if (err) 2241 return ERR_PTR(err); 2242 2243 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2244 if (IS_ERR(vm_bo)) { 2245 xe_bo_unlock(bo); 2246 return ERR_CAST(vm_bo); 2247 } 2248 2249 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2250 drm_gpuvm_bo_put(vm_bo); 2251 xe_bo_unlock(bo); 2252 break; 2253 default: 2254 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2255 ops = ERR_PTR(-EINVAL); 2256 } 2257 if (IS_ERR(ops)) 2258 return ops; 2259 2260 drm_gpuva_for_each_op(__op, ops) { 2261 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2262 2263 if (__op->op == DRM_GPUVA_OP_MAP) { 2264 op->map.immediate = 2265 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2266 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2267 op->map.vma_flags |= XE_VMA_READ_ONLY; 2268 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2269 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2270 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2271 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2272 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2273 op->map.vma_flags |= XE_VMA_DUMPABLE; 2274 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2275 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2276 op->map.pat_index = pat_index; 2277 op->map.invalidate_on_bind = 2278 __xe_vm_needs_clear_scratch_pages(vm, flags); 2279 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2280 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2281 struct xe_tile *tile; 2282 struct xe_svm_range *svm_range; 2283 struct drm_gpusvm_ctx ctx = {}; 2284 struct drm_pagemap *dpagemap; 2285 u8 id, tile_mask = 0; 2286 u32 i; 2287 2288 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2289 op->prefetch.region = prefetch_region; 2290 break; 2291 } 2292 2293 ctx.read_only = xe_vma_read_only(vma); 2294 ctx.devmem_possible = IS_DGFX(vm->xe) && 2295 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2296 2297 for_each_tile(tile, vm->xe, id) 2298 tile_mask |= 0x1 << id; 2299 2300 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2301 op->prefetch_range.ranges_count = 0; 2302 tile = NULL; 2303 2304 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2305 dpagemap = xe_vma_resolve_pagemap(vma, 2306 xe_device_get_root_tile(vm->xe)); 2307 /* 2308 * TODO: Once multigpu support is enabled will need 2309 * something to dereference tile from dpagemap. 2310 */ 2311 if (dpagemap) 2312 tile = xe_device_get_root_tile(vm->xe); 2313 } else if (prefetch_region) { 2314 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2315 XE_PL_VRAM0]; 2316 } 2317 2318 op->prefetch_range.tile = tile; 2319 alloc_next_range: 2320 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2321 2322 if (PTR_ERR(svm_range) == -ENOENT) { 2323 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2324 2325 addr = ret == ULONG_MAX ? 0 : ret; 2326 if (addr) 2327 goto alloc_next_range; 2328 else 2329 goto print_op_label; 2330 } 2331 2332 if (IS_ERR(svm_range)) { 2333 err = PTR_ERR(svm_range); 2334 goto unwind_prefetch_ops; 2335 } 2336 2337 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { 2338 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2339 goto check_next_range; 2340 } 2341 2342 err = xa_alloc(&op->prefetch_range.range, 2343 &i, svm_range, xa_limit_32b, 2344 GFP_KERNEL); 2345 2346 if (err) 2347 goto unwind_prefetch_ops; 2348 2349 op->prefetch_range.ranges_count++; 2350 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2351 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2352 check_next_range: 2353 if (range_end > xe_svm_range_end(svm_range) && 2354 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2355 addr = xe_svm_range_end(svm_range); 2356 goto alloc_next_range; 2357 } 2358 } 2359 print_op_label: 2360 print_op(vm->xe, __op); 2361 } 2362 2363 return ops; 2364 2365 unwind_prefetch_ops: 2366 xe_svm_prefetch_gpuva_ops_fini(ops); 2367 drm_gpuva_ops_free(&vm->gpuvm, ops); 2368 return ERR_PTR(err); 2369 } 2370 2371 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2372 2373 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2374 struct xe_vma_mem_attr *attr, unsigned int flags) 2375 { 2376 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2377 struct xe_validation_ctx ctx; 2378 struct drm_exec exec; 2379 struct xe_vma *vma; 2380 int err = 0; 2381 2382 lockdep_assert_held_write(&vm->lock); 2383 2384 if (bo) { 2385 err = 0; 2386 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2387 (struct xe_val_flags) {.interruptible = true}, err) { 2388 if (!bo->vm) { 2389 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2390 drm_exec_retry_on_contention(&exec); 2391 } 2392 if (!err) { 2393 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2394 drm_exec_retry_on_contention(&exec); 2395 } 2396 if (err) 2397 return ERR_PTR(err); 2398 2399 vma = xe_vma_create(vm, bo, op->gem.offset, 2400 op->va.addr, op->va.addr + 2401 op->va.range - 1, attr, flags); 2402 if (IS_ERR(vma)) 2403 return vma; 2404 2405 if (!bo->vm) { 2406 err = add_preempt_fences(vm, bo); 2407 if (err) { 2408 prep_vma_destroy(vm, vma, false); 2409 xe_vma_destroy(vma, NULL); 2410 } 2411 } 2412 } 2413 if (err) 2414 return ERR_PTR(err); 2415 } else { 2416 vma = xe_vma_create(vm, NULL, op->gem.offset, 2417 op->va.addr, op->va.addr + 2418 op->va.range - 1, attr, flags); 2419 if (IS_ERR(vma)) 2420 return vma; 2421 2422 if (xe_vma_is_userptr(vma)) 2423 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2424 } 2425 if (err) { 2426 prep_vma_destroy(vm, vma, false); 2427 xe_vma_destroy_unlocked(vma); 2428 vma = ERR_PTR(err); 2429 } 2430 2431 return vma; 2432 } 2433 2434 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2435 { 2436 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2437 return SZ_1G; 2438 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2439 return SZ_2M; 2440 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2441 return SZ_64K; 2442 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2443 return SZ_4K; 2444 2445 return SZ_1G; /* Uninitialized, used max size */ 2446 } 2447 2448 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2449 { 2450 switch (size) { 2451 case SZ_1G: 2452 vma->gpuva.flags |= XE_VMA_PTE_1G; 2453 break; 2454 case SZ_2M: 2455 vma->gpuva.flags |= XE_VMA_PTE_2M; 2456 break; 2457 case SZ_64K: 2458 vma->gpuva.flags |= XE_VMA_PTE_64K; 2459 break; 2460 case SZ_4K: 2461 vma->gpuva.flags |= XE_VMA_PTE_4K; 2462 break; 2463 } 2464 } 2465 2466 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2467 { 2468 int err = 0; 2469 2470 lockdep_assert_held_write(&vm->lock); 2471 2472 switch (op->base.op) { 2473 case DRM_GPUVA_OP_MAP: 2474 err |= xe_vm_insert_vma(vm, op->map.vma); 2475 if (!err) 2476 op->flags |= XE_VMA_OP_COMMITTED; 2477 break; 2478 case DRM_GPUVA_OP_REMAP: 2479 { 2480 u8 tile_present = 2481 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2482 2483 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2484 true); 2485 op->flags |= XE_VMA_OP_COMMITTED; 2486 2487 if (op->remap.prev) { 2488 err |= xe_vm_insert_vma(vm, op->remap.prev); 2489 if (!err) 2490 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2491 if (!err && op->remap.skip_prev) { 2492 op->remap.prev->tile_present = 2493 tile_present; 2494 op->remap.prev = NULL; 2495 } 2496 } 2497 if (op->remap.next) { 2498 err |= xe_vm_insert_vma(vm, op->remap.next); 2499 if (!err) 2500 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2501 if (!err && op->remap.skip_next) { 2502 op->remap.next->tile_present = 2503 tile_present; 2504 op->remap.next = NULL; 2505 } 2506 } 2507 2508 /* Adjust for partial unbind after removing VMA from VM */ 2509 if (!err) { 2510 op->base.remap.unmap->va->va.addr = op->remap.start; 2511 op->base.remap.unmap->va->va.range = op->remap.range; 2512 } 2513 break; 2514 } 2515 case DRM_GPUVA_OP_UNMAP: 2516 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2517 op->flags |= XE_VMA_OP_COMMITTED; 2518 break; 2519 case DRM_GPUVA_OP_PREFETCH: 2520 op->flags |= XE_VMA_OP_COMMITTED; 2521 break; 2522 default: 2523 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2524 } 2525 2526 return err; 2527 } 2528 2529 /** 2530 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2531 * @vma: Pointer to the xe_vma structure to check 2532 * 2533 * This function determines whether the given VMA (Virtual Memory Area) 2534 * has its memory attributes set to their default values. Specifically, 2535 * it checks the following conditions: 2536 * 2537 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2538 * - `pat_index` is equal to `default_pat_index` 2539 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2540 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2541 * 2542 * Return: true if all attributes are at their default values, false otherwise. 2543 */ 2544 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2545 { 2546 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2547 vma->attr.pat_index == vma->attr.default_pat_index && 2548 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2549 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2550 } 2551 2552 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2553 struct xe_vma_ops *vops) 2554 { 2555 struct xe_device *xe = vm->xe; 2556 struct drm_gpuva_op *__op; 2557 struct xe_tile *tile; 2558 u8 id, tile_mask = 0; 2559 int err = 0; 2560 2561 lockdep_assert_held_write(&vm->lock); 2562 2563 for_each_tile(tile, vm->xe, id) 2564 tile_mask |= 0x1 << id; 2565 2566 drm_gpuva_for_each_op(__op, ops) { 2567 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2568 struct xe_vma *vma; 2569 unsigned int flags = 0; 2570 2571 INIT_LIST_HEAD(&op->link); 2572 list_add_tail(&op->link, &vops->list); 2573 op->tile_mask = tile_mask; 2574 2575 switch (op->base.op) { 2576 case DRM_GPUVA_OP_MAP: 2577 { 2578 struct xe_vma_mem_attr default_attr = { 2579 .preferred_loc = { 2580 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2581 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2582 }, 2583 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2584 .default_pat_index = op->map.pat_index, 2585 .pat_index = op->map.pat_index, 2586 }; 2587 2588 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2589 2590 vma = new_vma(vm, &op->base.map, &default_attr, 2591 flags); 2592 if (IS_ERR(vma)) 2593 return PTR_ERR(vma); 2594 2595 op->map.vma = vma; 2596 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2597 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2598 op->map.invalidate_on_bind) 2599 xe_vma_ops_incr_pt_update_ops(vops, 2600 op->tile_mask, 1); 2601 break; 2602 } 2603 case DRM_GPUVA_OP_REMAP: 2604 { 2605 struct xe_vma *old = 2606 gpuva_to_vma(op->base.remap.unmap->va); 2607 bool skip = xe_vma_is_cpu_addr_mirror(old); 2608 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2609 int num_remap_ops = 0; 2610 2611 if (op->base.remap.prev) 2612 start = op->base.remap.prev->va.addr + 2613 op->base.remap.prev->va.range; 2614 if (op->base.remap.next) 2615 end = op->base.remap.next->va.addr; 2616 2617 if (xe_vma_is_cpu_addr_mirror(old) && 2618 xe_svm_has_mapping(vm, start, end)) { 2619 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2620 xe_svm_unmap_address_range(vm, start, end); 2621 else 2622 return -EBUSY; 2623 } 2624 2625 op->remap.start = xe_vma_start(old); 2626 op->remap.range = xe_vma_size(old); 2627 2628 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2629 if (op->base.remap.prev) { 2630 vma = new_vma(vm, op->base.remap.prev, 2631 &old->attr, flags); 2632 if (IS_ERR(vma)) 2633 return PTR_ERR(vma); 2634 2635 op->remap.prev = vma; 2636 2637 /* 2638 * Userptr creates a new SG mapping so 2639 * we must also rebind. 2640 */ 2641 op->remap.skip_prev = skip || 2642 (!xe_vma_is_userptr(old) && 2643 IS_ALIGNED(xe_vma_end(vma), 2644 xe_vma_max_pte_size(old))); 2645 if (op->remap.skip_prev) { 2646 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2647 op->remap.range -= 2648 xe_vma_end(vma) - 2649 xe_vma_start(old); 2650 op->remap.start = xe_vma_end(vma); 2651 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2652 (ULL)op->remap.start, 2653 (ULL)op->remap.range); 2654 } else { 2655 num_remap_ops++; 2656 } 2657 } 2658 2659 if (op->base.remap.next) { 2660 vma = new_vma(vm, op->base.remap.next, 2661 &old->attr, flags); 2662 if (IS_ERR(vma)) 2663 return PTR_ERR(vma); 2664 2665 op->remap.next = vma; 2666 2667 /* 2668 * Userptr creates a new SG mapping so 2669 * we must also rebind. 2670 */ 2671 op->remap.skip_next = skip || 2672 (!xe_vma_is_userptr(old) && 2673 IS_ALIGNED(xe_vma_start(vma), 2674 xe_vma_max_pte_size(old))); 2675 if (op->remap.skip_next) { 2676 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2677 op->remap.range -= 2678 xe_vma_end(old) - 2679 xe_vma_start(vma); 2680 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2681 (ULL)op->remap.start, 2682 (ULL)op->remap.range); 2683 } else { 2684 num_remap_ops++; 2685 } 2686 } 2687 if (!skip) 2688 num_remap_ops++; 2689 2690 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2691 break; 2692 } 2693 case DRM_GPUVA_OP_UNMAP: 2694 vma = gpuva_to_vma(op->base.unmap.va); 2695 2696 if (xe_vma_is_cpu_addr_mirror(vma) && 2697 xe_svm_has_mapping(vm, xe_vma_start(vma), 2698 xe_vma_end(vma))) 2699 return -EBUSY; 2700 2701 if (!xe_vma_is_cpu_addr_mirror(vma)) 2702 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2703 break; 2704 case DRM_GPUVA_OP_PREFETCH: 2705 vma = gpuva_to_vma(op->base.prefetch.va); 2706 2707 if (xe_vma_is_userptr(vma)) { 2708 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2709 if (err) 2710 return err; 2711 } 2712 2713 if (xe_vma_is_cpu_addr_mirror(vma)) 2714 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2715 op->prefetch_range.ranges_count); 2716 else 2717 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2718 2719 break; 2720 default: 2721 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2722 } 2723 2724 err = xe_vma_op_commit(vm, op); 2725 if (err) 2726 return err; 2727 } 2728 2729 return 0; 2730 } 2731 2732 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2733 bool post_commit, bool prev_post_commit, 2734 bool next_post_commit) 2735 { 2736 lockdep_assert_held_write(&vm->lock); 2737 2738 switch (op->base.op) { 2739 case DRM_GPUVA_OP_MAP: 2740 if (op->map.vma) { 2741 prep_vma_destroy(vm, op->map.vma, post_commit); 2742 xe_vma_destroy_unlocked(op->map.vma); 2743 } 2744 break; 2745 case DRM_GPUVA_OP_UNMAP: 2746 { 2747 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2748 2749 if (vma) { 2750 xe_svm_notifier_lock(vm); 2751 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2752 xe_svm_notifier_unlock(vm); 2753 if (post_commit) 2754 xe_vm_insert_vma(vm, vma); 2755 } 2756 break; 2757 } 2758 case DRM_GPUVA_OP_REMAP: 2759 { 2760 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2761 2762 if (op->remap.prev) { 2763 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2764 xe_vma_destroy_unlocked(op->remap.prev); 2765 } 2766 if (op->remap.next) { 2767 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2768 xe_vma_destroy_unlocked(op->remap.next); 2769 } 2770 if (vma) { 2771 xe_svm_notifier_lock(vm); 2772 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2773 xe_svm_notifier_unlock(vm); 2774 if (post_commit) 2775 xe_vm_insert_vma(vm, vma); 2776 } 2777 break; 2778 } 2779 case DRM_GPUVA_OP_PREFETCH: 2780 /* Nothing to do */ 2781 break; 2782 default: 2783 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2784 } 2785 } 2786 2787 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2788 struct drm_gpuva_ops **ops, 2789 int num_ops_list) 2790 { 2791 int i; 2792 2793 for (i = num_ops_list - 1; i >= 0; --i) { 2794 struct drm_gpuva_ops *__ops = ops[i]; 2795 struct drm_gpuva_op *__op; 2796 2797 if (!__ops) 2798 continue; 2799 2800 drm_gpuva_for_each_op_reverse(__op, __ops) { 2801 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2802 2803 xe_vma_op_unwind(vm, op, 2804 op->flags & XE_VMA_OP_COMMITTED, 2805 op->flags & XE_VMA_OP_PREV_COMMITTED, 2806 op->flags & XE_VMA_OP_NEXT_COMMITTED); 2807 } 2808 } 2809 } 2810 2811 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 2812 bool res_evict, bool validate) 2813 { 2814 struct xe_bo *bo = xe_vma_bo(vma); 2815 struct xe_vm *vm = xe_vma_vm(vma); 2816 int err = 0; 2817 2818 if (bo) { 2819 if (!bo->vm) 2820 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2821 if (!err && validate) 2822 err = xe_bo_validate(bo, vm, 2823 !xe_vm_in_preempt_fence_mode(vm) && 2824 res_evict, exec); 2825 } 2826 2827 return err; 2828 } 2829 2830 static int check_ufence(struct xe_vma *vma) 2831 { 2832 if (vma->ufence) { 2833 struct xe_user_fence * const f = vma->ufence; 2834 2835 if (!xe_sync_ufence_get_status(f)) 2836 return -EBUSY; 2837 2838 vma->ufence = NULL; 2839 xe_sync_ufence_put(f); 2840 } 2841 2842 return 0; 2843 } 2844 2845 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2846 { 2847 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2848 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2849 struct xe_tile *tile = op->prefetch_range.tile; 2850 int err = 0; 2851 2852 struct xe_svm_range *svm_range; 2853 struct drm_gpusvm_ctx ctx = {}; 2854 unsigned long i; 2855 2856 if (!xe_vma_is_cpu_addr_mirror(vma)) 2857 return 0; 2858 2859 ctx.read_only = xe_vma_read_only(vma); 2860 ctx.devmem_possible = devmem_possible; 2861 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2862 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2863 2864 /* TODO: Threading the migration */ 2865 xa_for_each(&op->prefetch_range.range, i, svm_range) { 2866 if (!tile) 2867 xe_svm_range_migrate_to_smem(vm, svm_range); 2868 2869 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { 2870 err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2871 if (err) { 2872 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2873 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2874 return -ENODATA; 2875 } 2876 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2877 } 2878 2879 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2880 if (err) { 2881 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2882 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2883 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2884 err = -ENODATA; 2885 return err; 2886 } 2887 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2888 } 2889 2890 return err; 2891 } 2892 2893 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2894 struct xe_vma_ops *vops, struct xe_vma_op *op) 2895 { 2896 int err = 0; 2897 bool res_evict; 2898 2899 /* 2900 * We only allow evicting a BO within the VM if it is not part of an 2901 * array of binds, as an array of binds can evict another BO within the 2902 * bind. 2903 */ 2904 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 2905 2906 switch (op->base.op) { 2907 case DRM_GPUVA_OP_MAP: 2908 if (!op->map.invalidate_on_bind) 2909 err = vma_lock_and_validate(exec, op->map.vma, 2910 res_evict, 2911 !xe_vm_in_fault_mode(vm) || 2912 op->map.immediate); 2913 break; 2914 case DRM_GPUVA_OP_REMAP: 2915 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 2916 if (err) 2917 break; 2918 2919 err = vma_lock_and_validate(exec, 2920 gpuva_to_vma(op->base.remap.unmap->va), 2921 res_evict, false); 2922 if (!err && op->remap.prev) 2923 err = vma_lock_and_validate(exec, op->remap.prev, 2924 res_evict, true); 2925 if (!err && op->remap.next) 2926 err = vma_lock_and_validate(exec, op->remap.next, 2927 res_evict, true); 2928 break; 2929 case DRM_GPUVA_OP_UNMAP: 2930 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 2931 if (err) 2932 break; 2933 2934 err = vma_lock_and_validate(exec, 2935 gpuva_to_vma(op->base.unmap.va), 2936 res_evict, false); 2937 break; 2938 case DRM_GPUVA_OP_PREFETCH: 2939 { 2940 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2941 u32 region; 2942 2943 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2944 region = op->prefetch.region; 2945 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 2946 region <= ARRAY_SIZE(region_to_mem_type)); 2947 } 2948 2949 err = vma_lock_and_validate(exec, 2950 gpuva_to_vma(op->base.prefetch.va), 2951 res_evict, false); 2952 if (!err && !xe_vma_has_no_bo(vma)) 2953 err = xe_bo_migrate(xe_vma_bo(vma), 2954 region_to_mem_type[region], 2955 NULL, 2956 exec); 2957 break; 2958 } 2959 default: 2960 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2961 } 2962 2963 return err; 2964 } 2965 2966 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2967 { 2968 struct xe_vma_op *op; 2969 int err; 2970 2971 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2972 return 0; 2973 2974 list_for_each_entry(op, &vops->list, link) { 2975 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 2976 err = prefetch_ranges(vm, op); 2977 if (err) 2978 return err; 2979 } 2980 } 2981 2982 return 0; 2983 } 2984 2985 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 2986 struct xe_vm *vm, 2987 struct xe_vma_ops *vops) 2988 { 2989 struct xe_vma_op *op; 2990 int err; 2991 2992 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 2993 if (err) 2994 return err; 2995 2996 list_for_each_entry(op, &vops->list, link) { 2997 err = op_lock_and_prep(exec, vm, vops, op); 2998 if (err) 2999 return err; 3000 } 3001 3002 #ifdef TEST_VM_OPS_ERROR 3003 if (vops->inject_error && 3004 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3005 return -ENOSPC; 3006 #endif 3007 3008 return 0; 3009 } 3010 3011 static void op_trace(struct xe_vma_op *op) 3012 { 3013 switch (op->base.op) { 3014 case DRM_GPUVA_OP_MAP: 3015 trace_xe_vma_bind(op->map.vma); 3016 break; 3017 case DRM_GPUVA_OP_REMAP: 3018 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3019 if (op->remap.prev) 3020 trace_xe_vma_bind(op->remap.prev); 3021 if (op->remap.next) 3022 trace_xe_vma_bind(op->remap.next); 3023 break; 3024 case DRM_GPUVA_OP_UNMAP: 3025 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3026 break; 3027 case DRM_GPUVA_OP_PREFETCH: 3028 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3029 break; 3030 case DRM_GPUVA_OP_DRIVER: 3031 break; 3032 default: 3033 XE_WARN_ON("NOT POSSIBLE"); 3034 } 3035 } 3036 3037 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3038 { 3039 struct xe_vma_op *op; 3040 3041 list_for_each_entry(op, &vops->list, link) 3042 op_trace(op); 3043 } 3044 3045 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3046 { 3047 struct xe_exec_queue *q = vops->q; 3048 struct xe_tile *tile; 3049 int number_tiles = 0; 3050 u8 id; 3051 3052 for_each_tile(tile, vm->xe, id) { 3053 if (vops->pt_update_ops[id].num_ops) 3054 ++number_tiles; 3055 3056 if (vops->pt_update_ops[id].q) 3057 continue; 3058 3059 if (q) { 3060 vops->pt_update_ops[id].q = q; 3061 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3062 q = list_next_entry(q, multi_gt_list); 3063 } else { 3064 vops->pt_update_ops[id].q = vm->q[id]; 3065 } 3066 } 3067 3068 return number_tiles; 3069 } 3070 3071 static struct dma_fence *ops_execute(struct xe_vm *vm, 3072 struct xe_vma_ops *vops) 3073 { 3074 struct xe_tile *tile; 3075 struct dma_fence *fence = NULL; 3076 struct dma_fence **fences = NULL; 3077 struct dma_fence_array *cf = NULL; 3078 int number_tiles = 0, current_fence = 0, err; 3079 u8 id; 3080 3081 number_tiles = vm_ops_setup_tile_args(vm, vops); 3082 if (number_tiles == 0) 3083 return ERR_PTR(-ENODATA); 3084 3085 if (number_tiles > 1) { 3086 fences = kmalloc_array(number_tiles, sizeof(*fences), 3087 GFP_KERNEL); 3088 if (!fences) { 3089 fence = ERR_PTR(-ENOMEM); 3090 goto err_trace; 3091 } 3092 } 3093 3094 for_each_tile(tile, vm->xe, id) { 3095 if (!vops->pt_update_ops[id].num_ops) 3096 continue; 3097 3098 err = xe_pt_update_ops_prepare(tile, vops); 3099 if (err) { 3100 fence = ERR_PTR(err); 3101 goto err_out; 3102 } 3103 } 3104 3105 trace_xe_vm_ops_execute(vops); 3106 3107 for_each_tile(tile, vm->xe, id) { 3108 if (!vops->pt_update_ops[id].num_ops) 3109 continue; 3110 3111 fence = xe_pt_update_ops_run(tile, vops); 3112 if (IS_ERR(fence)) 3113 goto err_out; 3114 3115 if (fences) 3116 fences[current_fence++] = fence; 3117 } 3118 3119 if (fences) { 3120 cf = dma_fence_array_create(number_tiles, fences, 3121 vm->composite_fence_ctx, 3122 vm->composite_fence_seqno++, 3123 false); 3124 if (!cf) { 3125 --vm->composite_fence_seqno; 3126 fence = ERR_PTR(-ENOMEM); 3127 goto err_out; 3128 } 3129 fence = &cf->base; 3130 } 3131 3132 for_each_tile(tile, vm->xe, id) { 3133 if (!vops->pt_update_ops[id].num_ops) 3134 continue; 3135 3136 xe_pt_update_ops_fini(tile, vops); 3137 } 3138 3139 return fence; 3140 3141 err_out: 3142 for_each_tile(tile, vm->xe, id) { 3143 if (!vops->pt_update_ops[id].num_ops) 3144 continue; 3145 3146 xe_pt_update_ops_abort(tile, vops); 3147 } 3148 while (current_fence) 3149 dma_fence_put(fences[--current_fence]); 3150 kfree(fences); 3151 kfree(cf); 3152 3153 err_trace: 3154 trace_xe_vm_ops_fail(vm); 3155 return fence; 3156 } 3157 3158 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3159 { 3160 if (vma->ufence) 3161 xe_sync_ufence_put(vma->ufence); 3162 vma->ufence = __xe_sync_ufence_get(ufence); 3163 } 3164 3165 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3166 struct xe_user_fence *ufence) 3167 { 3168 switch (op->base.op) { 3169 case DRM_GPUVA_OP_MAP: 3170 vma_add_ufence(op->map.vma, ufence); 3171 break; 3172 case DRM_GPUVA_OP_REMAP: 3173 if (op->remap.prev) 3174 vma_add_ufence(op->remap.prev, ufence); 3175 if (op->remap.next) 3176 vma_add_ufence(op->remap.next, ufence); 3177 break; 3178 case DRM_GPUVA_OP_UNMAP: 3179 break; 3180 case DRM_GPUVA_OP_PREFETCH: 3181 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3182 break; 3183 default: 3184 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 3185 } 3186 } 3187 3188 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3189 struct dma_fence *fence) 3190 { 3191 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); 3192 struct xe_user_fence *ufence; 3193 struct xe_vma_op *op; 3194 int i; 3195 3196 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3197 list_for_each_entry(op, &vops->list, link) { 3198 if (ufence) 3199 op_add_ufence(vm, op, ufence); 3200 3201 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3202 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3203 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3204 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3205 fence); 3206 } 3207 if (ufence) 3208 xe_sync_ufence_put(ufence); 3209 if (fence) { 3210 for (i = 0; i < vops->num_syncs; i++) 3211 xe_sync_entry_signal(vops->syncs + i, fence); 3212 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); 3213 } 3214 } 3215 3216 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3217 struct xe_vma_ops *vops) 3218 { 3219 struct xe_validation_ctx ctx; 3220 struct drm_exec exec; 3221 struct dma_fence *fence; 3222 int err = 0; 3223 3224 lockdep_assert_held_write(&vm->lock); 3225 3226 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3227 ((struct xe_val_flags) { 3228 .interruptible = true, 3229 .exec_ignore_duplicates = true, 3230 }), err) { 3231 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3232 drm_exec_retry_on_contention(&exec); 3233 xe_validation_retry_on_oom(&ctx, &err); 3234 if (err) 3235 return ERR_PTR(err); 3236 3237 xe_vm_set_validation_exec(vm, &exec); 3238 fence = ops_execute(vm, vops); 3239 xe_vm_set_validation_exec(vm, NULL); 3240 if (IS_ERR(fence)) { 3241 if (PTR_ERR(fence) == -ENODATA) 3242 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3243 return fence; 3244 } 3245 3246 vm_bind_ioctl_ops_fini(vm, vops, fence); 3247 } 3248 3249 return err ? ERR_PTR(err) : fence; 3250 } 3251 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3252 3253 #define SUPPORTED_FLAGS_STUB \ 3254 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3255 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3256 DRM_XE_VM_BIND_FLAG_NULL | \ 3257 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3258 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3259 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3260 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 3261 3262 #ifdef TEST_VM_OPS_ERROR 3263 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3264 #else 3265 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3266 #endif 3267 3268 #define XE_64K_PAGE_MASK 0xffffull 3269 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3270 3271 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3272 struct drm_xe_vm_bind *args, 3273 struct drm_xe_vm_bind_op **bind_ops) 3274 { 3275 int err; 3276 int i; 3277 3278 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3279 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3280 return -EINVAL; 3281 3282 if (XE_IOCTL_DBG(xe, args->extensions)) 3283 return -EINVAL; 3284 3285 if (args->num_binds > 1) { 3286 u64 __user *bind_user = 3287 u64_to_user_ptr(args->vector_of_binds); 3288 3289 *bind_ops = kvmalloc_array(args->num_binds, 3290 sizeof(struct drm_xe_vm_bind_op), 3291 GFP_KERNEL | __GFP_ACCOUNT | 3292 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3293 if (!*bind_ops) 3294 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3295 3296 err = copy_from_user(*bind_ops, bind_user, 3297 sizeof(struct drm_xe_vm_bind_op) * 3298 args->num_binds); 3299 if (XE_IOCTL_DBG(xe, err)) { 3300 err = -EFAULT; 3301 goto free_bind_ops; 3302 } 3303 } else { 3304 *bind_ops = &args->bind; 3305 } 3306 3307 for (i = 0; i < args->num_binds; ++i) { 3308 u64 range = (*bind_ops)[i].range; 3309 u64 addr = (*bind_ops)[i].addr; 3310 u32 op = (*bind_ops)[i].op; 3311 u32 flags = (*bind_ops)[i].flags; 3312 u32 obj = (*bind_ops)[i].obj; 3313 u64 obj_offset = (*bind_ops)[i].obj_offset; 3314 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3315 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3316 bool is_cpu_addr_mirror = flags & 3317 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3318 u16 pat_index = (*bind_ops)[i].pat_index; 3319 u16 coh_mode; 3320 3321 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3322 (!xe_vm_in_fault_mode(vm) || 3323 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3324 err = -EINVAL; 3325 goto free_bind_ops; 3326 } 3327 3328 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3329 err = -EINVAL; 3330 goto free_bind_ops; 3331 } 3332 3333 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3334 (*bind_ops)[i].pat_index = pat_index; 3335 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3336 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3337 err = -EINVAL; 3338 goto free_bind_ops; 3339 } 3340 3341 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { 3342 err = -EINVAL; 3343 goto free_bind_ops; 3344 } 3345 3346 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3347 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3348 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3349 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3350 is_cpu_addr_mirror)) || 3351 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3352 (is_null || is_cpu_addr_mirror)) || 3353 XE_IOCTL_DBG(xe, !obj && 3354 op == DRM_XE_VM_BIND_OP_MAP && 3355 !is_null && !is_cpu_addr_mirror) || 3356 XE_IOCTL_DBG(xe, !obj && 3357 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3358 XE_IOCTL_DBG(xe, addr && 3359 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3360 XE_IOCTL_DBG(xe, range && 3361 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3362 XE_IOCTL_DBG(xe, obj && 3363 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3364 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3365 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3366 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3367 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3368 XE_IOCTL_DBG(xe, obj && 3369 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3370 XE_IOCTL_DBG(xe, prefetch_region && 3371 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3372 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3373 !(BIT(prefetch_region) & xe->info.mem_region_mask))) || 3374 XE_IOCTL_DBG(xe, obj && 3375 op == DRM_XE_VM_BIND_OP_UNMAP) || 3376 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3377 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3378 err = -EINVAL; 3379 goto free_bind_ops; 3380 } 3381 3382 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3383 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3384 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3385 XE_IOCTL_DBG(xe, !range && 3386 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3387 err = -EINVAL; 3388 goto free_bind_ops; 3389 } 3390 } 3391 3392 return 0; 3393 3394 free_bind_ops: 3395 if (args->num_binds > 1) 3396 kvfree(*bind_ops); 3397 *bind_ops = NULL; 3398 return err; 3399 } 3400 3401 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3402 struct xe_exec_queue *q, 3403 struct xe_sync_entry *syncs, 3404 int num_syncs) 3405 { 3406 struct dma_fence *fence; 3407 int i, err = 0; 3408 3409 fence = xe_sync_in_fence_get(syncs, num_syncs, 3410 to_wait_exec_queue(vm, q), vm); 3411 if (IS_ERR(fence)) 3412 return PTR_ERR(fence); 3413 3414 for (i = 0; i < num_syncs; i++) 3415 xe_sync_entry_signal(&syncs[i], fence); 3416 3417 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, 3418 fence); 3419 dma_fence_put(fence); 3420 3421 return err; 3422 } 3423 3424 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3425 struct xe_exec_queue *q, 3426 struct xe_sync_entry *syncs, u32 num_syncs) 3427 { 3428 memset(vops, 0, sizeof(*vops)); 3429 INIT_LIST_HEAD(&vops->list); 3430 vops->vm = vm; 3431 vops->q = q; 3432 vops->syncs = syncs; 3433 vops->num_syncs = num_syncs; 3434 vops->flags = 0; 3435 } 3436 3437 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3438 u64 addr, u64 range, u64 obj_offset, 3439 u16 pat_index, u32 op, u32 bind_flags) 3440 { 3441 u16 coh_mode; 3442 3443 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3444 XE_IOCTL_DBG(xe, obj_offset > 3445 xe_bo_size(bo) - range)) { 3446 return -EINVAL; 3447 } 3448 3449 /* 3450 * Some platforms require 64k VM_BIND alignment, 3451 * specifically those with XE_VRAM_FLAGS_NEED64K. 3452 * 3453 * Other platforms may have BO's set to 64k physical placement, 3454 * but can be mapped at 4k offsets anyway. This check is only 3455 * there for the former case. 3456 */ 3457 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3458 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3459 if (XE_IOCTL_DBG(xe, obj_offset & 3460 XE_64K_PAGE_MASK) || 3461 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3462 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3463 return -EINVAL; 3464 } 3465 } 3466 3467 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3468 if (bo->cpu_caching) { 3469 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3470 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3471 return -EINVAL; 3472 } 3473 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3474 /* 3475 * Imported dma-buf from a different device should 3476 * require 1way or 2way coherency since we don't know 3477 * how it was mapped on the CPU. Just assume is it 3478 * potentially cached on CPU side. 3479 */ 3480 return -EINVAL; 3481 } 3482 3483 /* If a BO is protected it can only be mapped if the key is still valid */ 3484 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3485 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3486 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3487 return -ENOEXEC; 3488 3489 return 0; 3490 } 3491 3492 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3493 { 3494 struct xe_device *xe = to_xe_device(dev); 3495 struct xe_file *xef = to_xe_file(file); 3496 struct drm_xe_vm_bind *args = data; 3497 struct drm_xe_sync __user *syncs_user; 3498 struct xe_bo **bos = NULL; 3499 struct drm_gpuva_ops **ops = NULL; 3500 struct xe_vm *vm; 3501 struct xe_exec_queue *q = NULL; 3502 u32 num_syncs, num_ufence = 0; 3503 struct xe_sync_entry *syncs = NULL; 3504 struct drm_xe_vm_bind_op *bind_ops = NULL; 3505 struct xe_vma_ops vops; 3506 struct dma_fence *fence; 3507 int err; 3508 int i; 3509 3510 vm = xe_vm_lookup(xef, args->vm_id); 3511 if (XE_IOCTL_DBG(xe, !vm)) 3512 return -EINVAL; 3513 3514 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3515 if (err) 3516 goto put_vm; 3517 3518 if (args->exec_queue_id) { 3519 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3520 if (XE_IOCTL_DBG(xe, !q)) { 3521 err = -ENOENT; 3522 goto free_bind_ops; 3523 } 3524 3525 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3526 err = -EINVAL; 3527 goto put_exec_queue; 3528 } 3529 } 3530 3531 /* Ensure all UNMAPs visible */ 3532 xe_svm_flush(vm); 3533 3534 err = down_write_killable(&vm->lock); 3535 if (err) 3536 goto put_exec_queue; 3537 3538 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3539 err = -ENOENT; 3540 goto release_vm_lock; 3541 } 3542 3543 for (i = 0; i < args->num_binds; ++i) { 3544 u64 range = bind_ops[i].range; 3545 u64 addr = bind_ops[i].addr; 3546 3547 if (XE_IOCTL_DBG(xe, range > vm->size) || 3548 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3549 err = -EINVAL; 3550 goto release_vm_lock; 3551 } 3552 } 3553 3554 if (args->num_binds) { 3555 bos = kvcalloc(args->num_binds, sizeof(*bos), 3556 GFP_KERNEL | __GFP_ACCOUNT | 3557 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3558 if (!bos) { 3559 err = -ENOMEM; 3560 goto release_vm_lock; 3561 } 3562 3563 ops = kvcalloc(args->num_binds, sizeof(*ops), 3564 GFP_KERNEL | __GFP_ACCOUNT | 3565 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3566 if (!ops) { 3567 err = -ENOMEM; 3568 goto free_bos; 3569 } 3570 } 3571 3572 for (i = 0; i < args->num_binds; ++i) { 3573 struct drm_gem_object *gem_obj; 3574 u64 range = bind_ops[i].range; 3575 u64 addr = bind_ops[i].addr; 3576 u32 obj = bind_ops[i].obj; 3577 u64 obj_offset = bind_ops[i].obj_offset; 3578 u16 pat_index = bind_ops[i].pat_index; 3579 u32 op = bind_ops[i].op; 3580 u32 bind_flags = bind_ops[i].flags; 3581 3582 if (!obj) 3583 continue; 3584 3585 gem_obj = drm_gem_object_lookup(file, obj); 3586 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3587 err = -ENOENT; 3588 goto put_obj; 3589 } 3590 bos[i] = gem_to_xe_bo(gem_obj); 3591 3592 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3593 obj_offset, pat_index, op, 3594 bind_flags); 3595 if (err) 3596 goto put_obj; 3597 } 3598 3599 if (args->num_syncs) { 3600 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3601 if (!syncs) { 3602 err = -ENOMEM; 3603 goto put_obj; 3604 } 3605 } 3606 3607 syncs_user = u64_to_user_ptr(args->syncs); 3608 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3609 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3610 &syncs_user[num_syncs], 3611 (xe_vm_in_lr_mode(vm) ? 3612 SYNC_PARSE_FLAG_LR_MODE : 0) | 3613 (!args->num_binds ? 3614 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3615 if (err) 3616 goto free_syncs; 3617 3618 if (xe_sync_is_ufence(&syncs[num_syncs])) 3619 num_ufence++; 3620 } 3621 3622 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3623 err = -EINVAL; 3624 goto free_syncs; 3625 } 3626 3627 if (!args->num_binds) { 3628 err = -ENODATA; 3629 goto free_syncs; 3630 } 3631 3632 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3633 if (args->num_binds > 1) 3634 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3635 for (i = 0; i < args->num_binds; ++i) { 3636 u64 range = bind_ops[i].range; 3637 u64 addr = bind_ops[i].addr; 3638 u32 op = bind_ops[i].op; 3639 u32 flags = bind_ops[i].flags; 3640 u64 obj_offset = bind_ops[i].obj_offset; 3641 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3642 u16 pat_index = bind_ops[i].pat_index; 3643 3644 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3645 addr, range, op, flags, 3646 prefetch_region, pat_index); 3647 if (IS_ERR(ops[i])) { 3648 err = PTR_ERR(ops[i]); 3649 ops[i] = NULL; 3650 goto unwind_ops; 3651 } 3652 3653 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3654 if (err) 3655 goto unwind_ops; 3656 3657 #ifdef TEST_VM_OPS_ERROR 3658 if (flags & FORCE_OP_ERROR) { 3659 vops.inject_error = true; 3660 vm->xe->vm_inject_error_position = 3661 (vm->xe->vm_inject_error_position + 1) % 3662 FORCE_OP_ERROR_COUNT; 3663 } 3664 #endif 3665 } 3666 3667 /* Nothing to do */ 3668 if (list_empty(&vops.list)) { 3669 err = -ENODATA; 3670 goto unwind_ops; 3671 } 3672 3673 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3674 if (err) 3675 goto unwind_ops; 3676 3677 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3678 if (err) 3679 goto unwind_ops; 3680 3681 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3682 if (IS_ERR(fence)) 3683 err = PTR_ERR(fence); 3684 else 3685 dma_fence_put(fence); 3686 3687 unwind_ops: 3688 if (err && err != -ENODATA) 3689 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 3690 xe_vma_ops_fini(&vops); 3691 for (i = args->num_binds - 1; i >= 0; --i) 3692 if (ops[i]) 3693 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 3694 free_syncs: 3695 if (err == -ENODATA) 3696 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 3697 while (num_syncs--) 3698 xe_sync_entry_cleanup(&syncs[num_syncs]); 3699 3700 kfree(syncs); 3701 put_obj: 3702 for (i = 0; i < args->num_binds; ++i) 3703 xe_bo_put(bos[i]); 3704 3705 kvfree(ops); 3706 free_bos: 3707 kvfree(bos); 3708 release_vm_lock: 3709 up_write(&vm->lock); 3710 put_exec_queue: 3711 if (q) 3712 xe_exec_queue_put(q); 3713 free_bind_ops: 3714 if (args->num_binds > 1) 3715 kvfree(bind_ops); 3716 put_vm: 3717 xe_vm_put(vm); 3718 return err; 3719 } 3720 3721 /** 3722 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 3723 * @vm: VM to bind the BO to 3724 * @bo: BO to bind 3725 * @q: exec queue to use for the bind (optional) 3726 * @addr: address at which to bind the BO 3727 * @cache_lvl: PAT cache level to use 3728 * 3729 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 3730 * kernel-owned VM. 3731 * 3732 * Returns a dma_fence to track the binding completion if the job to do so was 3733 * successfully submitted, an error pointer otherwise. 3734 */ 3735 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 3736 struct xe_exec_queue *q, u64 addr, 3737 enum xe_cache_level cache_lvl) 3738 { 3739 struct xe_vma_ops vops; 3740 struct drm_gpuva_ops *ops = NULL; 3741 struct dma_fence *fence; 3742 int err; 3743 3744 xe_bo_get(bo); 3745 xe_vm_get(vm); 3746 if (q) 3747 xe_exec_queue_get(q); 3748 3749 down_write(&vm->lock); 3750 3751 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3752 3753 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3754 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3755 vm->xe->pat.idx[cache_lvl]); 3756 if (IS_ERR(ops)) { 3757 err = PTR_ERR(ops); 3758 goto release_vm_lock; 3759 } 3760 3761 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 3762 if (err) 3763 goto release_vm_lock; 3764 3765 xe_assert(vm->xe, !list_empty(&vops.list)); 3766 3767 err = xe_vma_ops_alloc(&vops, false); 3768 if (err) 3769 goto unwind_ops; 3770 3771 fence = vm_bind_ioctl_ops_execute(vm, &vops); 3772 if (IS_ERR(fence)) 3773 err = PTR_ERR(fence); 3774 3775 unwind_ops: 3776 if (err && err != -ENODATA) 3777 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 3778 3779 xe_vma_ops_fini(&vops); 3780 drm_gpuva_ops_free(&vm->gpuvm, ops); 3781 3782 release_vm_lock: 3783 up_write(&vm->lock); 3784 3785 if (q) 3786 xe_exec_queue_put(q); 3787 xe_vm_put(vm); 3788 xe_bo_put(bo); 3789 3790 if (err) 3791 fence = ERR_PTR(err); 3792 3793 return fence; 3794 } 3795 3796 /** 3797 * xe_vm_lock() - Lock the vm's dma_resv object 3798 * @vm: The struct xe_vm whose lock is to be locked 3799 * @intr: Whether to perform any wait interruptible 3800 * 3801 * Return: 0 on success, -EINTR if @intr is true and the wait for a 3802 * contended lock was interrupted. If @intr is false, the function 3803 * always returns 0. 3804 */ 3805 int xe_vm_lock(struct xe_vm *vm, bool intr) 3806 { 3807 int ret; 3808 3809 if (intr) 3810 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 3811 else 3812 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 3813 3814 return ret; 3815 } 3816 3817 /** 3818 * xe_vm_unlock() - Unlock the vm's dma_resv object 3819 * @vm: The struct xe_vm whose lock is to be released. 3820 * 3821 * Unlock a buffer object lock that was locked by xe_vm_lock(). 3822 */ 3823 void xe_vm_unlock(struct xe_vm *vm) 3824 { 3825 dma_resv_unlock(xe_vm_resv(vm)); 3826 } 3827 3828 /** 3829 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an 3830 * address range 3831 * @vm: The VM 3832 * @start: start address 3833 * @end: end address 3834 * @tile_mask: mask for which gt's issue tlb invalidation 3835 * 3836 * Issue a range based TLB invalidation for gt's in tilemask 3837 * 3838 * Returns 0 for success, negative error code otherwise. 3839 */ 3840 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, 3841 u64 end, u8 tile_mask) 3842 { 3843 struct xe_tlb_inval_fence 3844 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3845 struct xe_tile *tile; 3846 u32 fence_id = 0; 3847 u8 id; 3848 int err; 3849 3850 if (!tile_mask) 3851 return 0; 3852 3853 for_each_tile(tile, vm->xe, id) { 3854 if (!(tile_mask & BIT(id))) 3855 continue; 3856 3857 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, 3858 &fence[fence_id], true); 3859 3860 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3861 &fence[fence_id], start, end, 3862 vm->usm.asid); 3863 if (err) 3864 goto wait; 3865 ++fence_id; 3866 3867 if (!tile->media_gt) 3868 continue; 3869 3870 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, 3871 &fence[fence_id], true); 3872 3873 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3874 &fence[fence_id], start, end, 3875 vm->usm.asid); 3876 if (err) 3877 goto wait; 3878 ++fence_id; 3879 } 3880 3881 wait: 3882 for (id = 0; id < fence_id; ++id) 3883 xe_tlb_inval_fence_wait(&fence[id]); 3884 3885 return err; 3886 } 3887 3888 /** 3889 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3890 * @vma: VMA to invalidate 3891 * 3892 * Walks a list of page tables leaves which it memset the entries owned by this 3893 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3894 * complete. 3895 * 3896 * Returns 0 for success, negative error code otherwise. 3897 */ 3898 int xe_vm_invalidate_vma(struct xe_vma *vma) 3899 { 3900 struct xe_device *xe = xe_vma_vm(vma)->xe; 3901 struct xe_vm *vm = xe_vma_vm(vma); 3902 struct xe_tile *tile; 3903 u8 tile_mask = 0; 3904 int ret = 0; 3905 u8 id; 3906 3907 xe_assert(xe, !xe_vma_is_null(vma)); 3908 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3909 trace_xe_vma_invalidate(vma); 3910 3911 vm_dbg(&vm->xe->drm, 3912 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3913 xe_vma_start(vma), xe_vma_size(vma)); 3914 3915 /* 3916 * Check that we don't race with page-table updates, tile_invalidated 3917 * update is safe 3918 */ 3919 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3920 if (xe_vma_is_userptr(vma)) { 3921 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 3922 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3923 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3924 3925 WARN_ON_ONCE(!mmu_interval_check_retry 3926 (&to_userptr_vma(vma)->userptr.notifier, 3927 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3928 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3929 DMA_RESV_USAGE_BOOKKEEP)); 3930 3931 } else { 3932 xe_bo_assert_held(xe_vma_bo(vma)); 3933 } 3934 } 3935 3936 for_each_tile(tile, xe, id) 3937 if (xe_pt_zap_ptes(tile, vma)) 3938 tile_mask |= BIT(id); 3939 3940 xe_device_wmb(xe); 3941 3942 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), 3943 xe_vma_end(vma), tile_mask); 3944 3945 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3946 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3947 3948 return ret; 3949 } 3950 3951 int xe_vm_validate_protected(struct xe_vm *vm) 3952 { 3953 struct drm_gpuva *gpuva; 3954 int err = 0; 3955 3956 if (!vm) 3957 return -ENODEV; 3958 3959 mutex_lock(&vm->snap_mutex); 3960 3961 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 3962 struct xe_vma *vma = gpuva_to_vma(gpuva); 3963 struct xe_bo *bo = vma->gpuva.gem.obj ? 3964 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 3965 3966 if (!bo) 3967 continue; 3968 3969 if (xe_bo_is_protected(bo)) { 3970 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 3971 if (err) 3972 break; 3973 } 3974 } 3975 3976 mutex_unlock(&vm->snap_mutex); 3977 return err; 3978 } 3979 3980 struct xe_vm_snapshot { 3981 unsigned long num_snaps; 3982 struct { 3983 u64 ofs, bo_ofs; 3984 unsigned long len; 3985 struct xe_bo *bo; 3986 void *data; 3987 struct mm_struct *mm; 3988 } snap[]; 3989 }; 3990 3991 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 3992 { 3993 unsigned long num_snaps = 0, i; 3994 struct xe_vm_snapshot *snap = NULL; 3995 struct drm_gpuva *gpuva; 3996 3997 if (!vm) 3998 return NULL; 3999 4000 mutex_lock(&vm->snap_mutex); 4001 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4002 if (gpuva->flags & XE_VMA_DUMPABLE) 4003 num_snaps++; 4004 } 4005 4006 if (num_snaps) 4007 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4008 if (!snap) { 4009 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4010 goto out_unlock; 4011 } 4012 4013 snap->num_snaps = num_snaps; 4014 i = 0; 4015 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4016 struct xe_vma *vma = gpuva_to_vma(gpuva); 4017 struct xe_bo *bo = vma->gpuva.gem.obj ? 4018 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4019 4020 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4021 continue; 4022 4023 snap->snap[i].ofs = xe_vma_start(vma); 4024 snap->snap[i].len = xe_vma_size(vma); 4025 if (bo) { 4026 snap->snap[i].bo = xe_bo_get(bo); 4027 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4028 } else if (xe_vma_is_userptr(vma)) { 4029 struct mm_struct *mm = 4030 to_userptr_vma(vma)->userptr.notifier.mm; 4031 4032 if (mmget_not_zero(mm)) 4033 snap->snap[i].mm = mm; 4034 else 4035 snap->snap[i].data = ERR_PTR(-EFAULT); 4036 4037 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4038 } else { 4039 snap->snap[i].data = ERR_PTR(-ENOENT); 4040 } 4041 i++; 4042 } 4043 4044 out_unlock: 4045 mutex_unlock(&vm->snap_mutex); 4046 return snap; 4047 } 4048 4049 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4050 { 4051 if (IS_ERR_OR_NULL(snap)) 4052 return; 4053 4054 for (int i = 0; i < snap->num_snaps; i++) { 4055 struct xe_bo *bo = snap->snap[i].bo; 4056 int err; 4057 4058 if (IS_ERR(snap->snap[i].data)) 4059 continue; 4060 4061 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4062 if (!snap->snap[i].data) { 4063 snap->snap[i].data = ERR_PTR(-ENOMEM); 4064 goto cleanup_bo; 4065 } 4066 4067 if (bo) { 4068 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4069 snap->snap[i].data, snap->snap[i].len); 4070 } else { 4071 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4072 4073 kthread_use_mm(snap->snap[i].mm); 4074 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4075 err = 0; 4076 else 4077 err = -EFAULT; 4078 kthread_unuse_mm(snap->snap[i].mm); 4079 4080 mmput(snap->snap[i].mm); 4081 snap->snap[i].mm = NULL; 4082 } 4083 4084 if (err) { 4085 kvfree(snap->snap[i].data); 4086 snap->snap[i].data = ERR_PTR(err); 4087 } 4088 4089 cleanup_bo: 4090 xe_bo_put(bo); 4091 snap->snap[i].bo = NULL; 4092 } 4093 } 4094 4095 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4096 { 4097 unsigned long i, j; 4098 4099 if (IS_ERR_OR_NULL(snap)) { 4100 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4101 return; 4102 } 4103 4104 for (i = 0; i < snap->num_snaps; i++) { 4105 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4106 4107 if (IS_ERR(snap->snap[i].data)) { 4108 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4109 PTR_ERR(snap->snap[i].data)); 4110 continue; 4111 } 4112 4113 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4114 4115 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4116 u32 *val = snap->snap[i].data + j; 4117 char dumped[ASCII85_BUFSZ]; 4118 4119 drm_puts(p, ascii85_encode(*val, dumped)); 4120 } 4121 4122 drm_puts(p, "\n"); 4123 4124 if (drm_coredump_printer_is_full(p)) 4125 return; 4126 } 4127 } 4128 4129 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4130 { 4131 unsigned long i; 4132 4133 if (IS_ERR_OR_NULL(snap)) 4134 return; 4135 4136 for (i = 0; i < snap->num_snaps; i++) { 4137 if (!IS_ERR(snap->snap[i].data)) 4138 kvfree(snap->snap[i].data); 4139 xe_bo_put(snap->snap[i].bo); 4140 if (snap->snap[i].mm) 4141 mmput(snap->snap[i].mm); 4142 } 4143 kvfree(snap); 4144 } 4145 4146 /** 4147 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4148 * @xe: Pointer to the XE device structure 4149 * @vma: Pointer to the virtual memory area (VMA) structure 4150 * @is_atomic: In pagefault path and atomic operation 4151 * 4152 * This function determines whether the given VMA needs to be migrated to 4153 * VRAM in order to do atomic GPU operation. 4154 * 4155 * Return: 4156 * 1 - Migration to VRAM is required 4157 * 0 - Migration is not required 4158 * -EACCES - Invalid access for atomic memory attr 4159 * 4160 */ 4161 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4162 { 4163 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4164 vma->attr.atomic_access; 4165 4166 if (!IS_DGFX(xe) || !is_atomic) 4167 return false; 4168 4169 /* 4170 * NOTE: The checks implemented here are platform-specific. For 4171 * instance, on a device supporting CXL atomics, these would ideally 4172 * work universally without additional handling. 4173 */ 4174 switch (atomic_access) { 4175 case DRM_XE_ATOMIC_DEVICE: 4176 return !xe->info.has_device_atomics_on_smem; 4177 4178 case DRM_XE_ATOMIC_CPU: 4179 return -EACCES; 4180 4181 case DRM_XE_ATOMIC_UNDEFINED: 4182 case DRM_XE_ATOMIC_GLOBAL: 4183 default: 4184 return 1; 4185 } 4186 } 4187 4188 static int xe_vm_alloc_vma(struct xe_vm *vm, 4189 struct drm_gpuvm_map_req *map_req, 4190 bool is_madvise) 4191 { 4192 struct xe_vma_ops vops; 4193 struct drm_gpuva_ops *ops = NULL; 4194 struct drm_gpuva_op *__op; 4195 unsigned int vma_flags = 0; 4196 bool remap_op = false; 4197 struct xe_vma_mem_attr tmp_attr; 4198 u16 default_pat; 4199 int err; 4200 4201 lockdep_assert_held_write(&vm->lock); 4202 4203 if (is_madvise) 4204 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4205 else 4206 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4207 4208 if (IS_ERR(ops)) 4209 return PTR_ERR(ops); 4210 4211 if (list_empty(&ops->list)) { 4212 err = 0; 4213 goto free_ops; 4214 } 4215 4216 drm_gpuva_for_each_op(__op, ops) { 4217 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4218 struct xe_vma *vma = NULL; 4219 4220 if (!is_madvise) { 4221 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4222 vma = gpuva_to_vma(op->base.unmap.va); 4223 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4224 default_pat = vma->attr.default_pat_index; 4225 vma_flags = vma->gpuva.flags; 4226 } 4227 4228 if (__op->op == DRM_GPUVA_OP_REMAP) { 4229 vma = gpuva_to_vma(op->base.remap.unmap->va); 4230 default_pat = vma->attr.default_pat_index; 4231 vma_flags = vma->gpuva.flags; 4232 } 4233 4234 if (__op->op == DRM_GPUVA_OP_MAP) { 4235 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4236 op->map.pat_index = default_pat; 4237 } 4238 } else { 4239 if (__op->op == DRM_GPUVA_OP_REMAP) { 4240 vma = gpuva_to_vma(op->base.remap.unmap->va); 4241 xe_assert(vm->xe, !remap_op); 4242 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4243 remap_op = true; 4244 vma_flags = vma->gpuva.flags; 4245 } 4246 4247 if (__op->op == DRM_GPUVA_OP_MAP) { 4248 xe_assert(vm->xe, remap_op); 4249 remap_op = false; 4250 /* 4251 * In case of madvise ops DRM_GPUVA_OP_MAP is 4252 * always after DRM_GPUVA_OP_REMAP, so ensure 4253 * to propagate the flags from the vma we're 4254 * unmapping. 4255 */ 4256 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4257 } 4258 } 4259 print_op(vm->xe, __op); 4260 } 4261 4262 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4263 4264 if (is_madvise) 4265 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4266 4267 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4268 if (err) 4269 goto unwind_ops; 4270 4271 xe_vm_lock(vm, false); 4272 4273 drm_gpuva_for_each_op(__op, ops) { 4274 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4275 struct xe_vma *vma; 4276 4277 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4278 vma = gpuva_to_vma(op->base.unmap.va); 4279 /* There should be no unmap for madvise */ 4280 if (is_madvise) 4281 XE_WARN_ON("UNEXPECTED UNMAP"); 4282 4283 xe_vma_destroy(vma, NULL); 4284 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4285 vma = gpuva_to_vma(op->base.remap.unmap->va); 4286 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4287 * VMA, so they can be assigned to newly MAP created vma. 4288 */ 4289 if (is_madvise) 4290 tmp_attr = vma->attr; 4291 4292 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4293 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4294 vma = op->map.vma; 4295 /* In case of madvise call, MAP will always be follwed by REMAP. 4296 * Therefore temp_attr will always have sane values, making it safe to 4297 * copy them to new vma. 4298 */ 4299 if (is_madvise) 4300 vma->attr = tmp_attr; 4301 } 4302 } 4303 4304 xe_vm_unlock(vm); 4305 drm_gpuva_ops_free(&vm->gpuvm, ops); 4306 return 0; 4307 4308 unwind_ops: 4309 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4310 free_ops: 4311 drm_gpuva_ops_free(&vm->gpuvm, ops); 4312 return err; 4313 } 4314 4315 /** 4316 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4317 * @vm: Pointer to the xe_vm structure 4318 * @start: Starting input address 4319 * @range: Size of the input range 4320 * 4321 * This function splits existing vma to create new vma for user provided input range 4322 * 4323 * Return: 0 if success 4324 */ 4325 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4326 { 4327 struct drm_gpuvm_map_req map_req = { 4328 .map.va.addr = start, 4329 .map.va.range = range, 4330 }; 4331 4332 lockdep_assert_held_write(&vm->lock); 4333 4334 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4335 4336 return xe_vm_alloc_vma(vm, &map_req, true); 4337 } 4338 4339 /** 4340 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4341 * @vm: Pointer to the xe_vm structure 4342 * @start: Starting input address 4343 * @range: Size of the input range 4344 * 4345 * This function splits/merges existing vma to create new vma for user provided input range 4346 * 4347 * Return: 0 if success 4348 */ 4349 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4350 { 4351 struct drm_gpuvm_map_req map_req = { 4352 .map.va.addr = start, 4353 .map.va.range = range, 4354 }; 4355 4356 lockdep_assert_held_write(&vm->lock); 4357 4358 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4359 start, range); 4360 4361 return xe_vm_alloc_vma(vm, &map_req, false); 4362 } 4363