1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_sriov_vf.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_vm_madvise.h" 44 #include "xe_wa.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 53 * @vm: The vm whose resv is to be locked. 54 * @exec: The drm_exec transaction. 55 * 56 * Helper to lock the vm's resv as part of a drm_exec transaction. 57 * 58 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 59 */ 60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 61 { 62 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 63 } 64 65 static bool preempt_fences_waiting(struct xe_vm *vm) 66 { 67 struct xe_exec_queue *q; 68 69 lockdep_assert_held(&vm->lock); 70 xe_vm_assert_held(vm); 71 72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 73 if (!q->lr.pfence || 74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 75 &q->lr.pfence->flags)) { 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 static void free_preempt_fences(struct list_head *list) 84 { 85 struct list_head *link, *next; 86 87 list_for_each_safe(link, next, list) 88 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 89 } 90 91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 92 unsigned int *count) 93 { 94 lockdep_assert_held(&vm->lock); 95 xe_vm_assert_held(vm); 96 97 if (*count >= vm->preempt.num_exec_queues) 98 return 0; 99 100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 102 103 if (IS_ERR(pfence)) 104 return PTR_ERR(pfence); 105 106 list_move_tail(xe_preempt_fence_link(pfence), list); 107 } 108 109 return 0; 110 } 111 112 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 113 { 114 struct xe_exec_queue *q; 115 bool vf_migration = IS_SRIOV_VF(vm->xe) && 116 xe_sriov_vf_migration_supported(vm->xe); 117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 118 119 xe_vm_assert_held(vm); 120 121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 122 if (q->lr.pfence) { 123 long timeout; 124 125 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 126 wait_time); 127 if (!timeout) { 128 xe_assert(vm->xe, vf_migration); 129 return -EAGAIN; 130 } 131 132 /* Only -ETIME on fence indicates VM needs to be killed */ 133 if (timeout < 0 || q->lr.pfence->error == -ETIME) 134 return -ETIME; 135 136 dma_fence_put(q->lr.pfence); 137 q->lr.pfence = NULL; 138 } 139 } 140 141 return 0; 142 } 143 144 static bool xe_vm_is_idle(struct xe_vm *vm) 145 { 146 struct xe_exec_queue *q; 147 148 xe_vm_assert_held(vm); 149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 150 if (!xe_exec_queue_is_idle(q)) 151 return false; 152 } 153 154 return true; 155 } 156 157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 158 { 159 struct list_head *link; 160 struct xe_exec_queue *q; 161 162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 163 struct dma_fence *fence; 164 165 link = list->next; 166 xe_assert(vm->xe, link != list); 167 168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 169 q, q->lr.context, 170 ++q->lr.seqno); 171 dma_fence_put(q->lr.pfence); 172 q->lr.pfence = fence; 173 } 174 } 175 176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 177 { 178 struct xe_exec_queue *q; 179 int err; 180 181 xe_bo_assert_held(bo); 182 183 if (!vm->preempt.num_exec_queues) 184 return 0; 185 186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 187 if (err) 188 return err; 189 190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 191 if (q->lr.pfence) { 192 dma_resv_add_fence(bo->ttm.base.resv, 193 q->lr.pfence, 194 DMA_RESV_USAGE_BOOKKEEP); 195 } 196 197 return 0; 198 } 199 200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 201 struct drm_exec *exec) 202 { 203 struct xe_exec_queue *q; 204 205 lockdep_assert_held(&vm->lock); 206 xe_vm_assert_held(vm); 207 208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 209 q->ops->resume(q); 210 211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 213 } 214 } 215 216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 217 { 218 struct drm_gpuvm_exec vm_exec = { 219 .vm = &vm->gpuvm, 220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 221 .num_fences = 1, 222 }; 223 struct drm_exec *exec = &vm_exec.exec; 224 struct xe_validation_ctx ctx; 225 struct dma_fence *pfence; 226 int err; 227 bool wait; 228 229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 230 231 down_write(&vm->lock); 232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 233 if (err) 234 goto out_up_write; 235 236 pfence = xe_preempt_fence_create(q, q->lr.context, 237 ++q->lr.seqno); 238 if (IS_ERR(pfence)) { 239 err = PTR_ERR(pfence); 240 goto out_fini; 241 } 242 243 list_add(&q->lr.link, &vm->preempt.exec_queues); 244 ++vm->preempt.num_exec_queues; 245 q->lr.pfence = pfence; 246 247 xe_svm_notifier_lock(vm); 248 249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 251 252 /* 253 * Check to see if a preemption on VM is in flight or userptr 254 * invalidation, if so trigger this preempt fence to sync state with 255 * other preempt fences on the VM. 256 */ 257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 258 if (wait) 259 dma_fence_enable_sw_signaling(pfence); 260 261 xe_svm_notifier_unlock(vm); 262 263 out_fini: 264 xe_validation_ctx_fini(&ctx); 265 out_up_write: 266 up_write(&vm->lock); 267 268 return err; 269 } 270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 271 272 /** 273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 274 * @vm: The VM. 275 * @q: The exec_queue 276 * 277 * Note that this function might be called multiple times on the same queue. 278 */ 279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 280 { 281 if (!xe_vm_in_preempt_fence_mode(vm)) 282 return; 283 284 down_write(&vm->lock); 285 if (!list_empty(&q->lr.link)) { 286 list_del_init(&q->lr.link); 287 --vm->preempt.num_exec_queues; 288 } 289 if (q->lr.pfence) { 290 dma_fence_enable_sw_signaling(q->lr.pfence); 291 dma_fence_put(q->lr.pfence); 292 q->lr.pfence = NULL; 293 } 294 up_write(&vm->lock); 295 } 296 297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 298 299 /** 300 * xe_vm_kill() - VM Kill 301 * @vm: The VM. 302 * @unlocked: Flag indicates the VM's dma-resv is not held 303 * 304 * Kill the VM by setting banned flag indicated VM is no longer available for 305 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 306 */ 307 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 308 { 309 struct xe_exec_queue *q; 310 311 lockdep_assert_held(&vm->lock); 312 313 if (unlocked) 314 xe_vm_lock(vm, false); 315 316 vm->flags |= XE_VM_FLAG_BANNED; 317 trace_xe_vm_kill(vm); 318 319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 320 q->ops->kill(q); 321 322 if (unlocked) 323 xe_vm_unlock(vm); 324 325 /* TODO: Inform user the VM is banned */ 326 } 327 328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 329 { 330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj); 332 struct drm_gpuva *gpuva; 333 int ret; 334 335 lockdep_assert_held(&vm->lock); 336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 338 &vm->rebind_list); 339 340 /* Skip re-populating purged BOs, rebind maps scratch pages. */ 341 if (xe_bo_is_purged(bo)) { 342 vm_bo->evicted = false; 343 return 0; 344 } 345 346 if (!try_wait_for_completion(&vm->xe->pm_block)) 347 return -EAGAIN; 348 349 ret = xe_bo_validate(bo, vm, false, exec); 350 if (ret) 351 return ret; 352 353 vm_bo->evicted = false; 354 return 0; 355 } 356 357 /** 358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 359 * @vm: The vm for which we are rebinding. 360 * @exec: The struct drm_exec with the locked GEM objects. 361 * @num_fences: The number of fences to reserve for the operation, not 362 * including rebinds and validations. 363 * 364 * Validates all evicted gem objects and rebinds their vmas. Note that 365 * rebindings may cause evictions and hence the validation-rebind 366 * sequence is rerun until there are no more objects to validate. 367 * 368 * Return: 0 on success, negative error code on error. In particular, 369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 370 * the drm_exec transaction needs to be restarted. 371 */ 372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 373 unsigned int num_fences) 374 { 375 struct drm_gem_object *obj; 376 unsigned long index; 377 int ret; 378 379 do { 380 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 381 if (ret) 382 return ret; 383 384 ret = xe_vm_rebind(vm, false); 385 if (ret) 386 return ret; 387 } while (!list_empty(&vm->gpuvm.evict.list)); 388 389 drm_exec_for_each_locked_object(exec, index, obj) { 390 ret = dma_resv_reserve_fences(obj->resv, num_fences); 391 if (ret) 392 return ret; 393 } 394 395 return 0; 396 } 397 398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 399 bool *done) 400 { 401 int err; 402 403 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 404 if (err) 405 return err; 406 407 if (xe_vm_is_idle(vm)) { 408 vm->preempt.rebind_deactivated = true; 409 *done = true; 410 return 0; 411 } 412 413 if (!preempt_fences_waiting(vm)) { 414 *done = true; 415 return 0; 416 } 417 418 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 419 if (err) 420 return err; 421 422 err = wait_for_existing_preempt_fences(vm); 423 if (err) 424 return err; 425 426 /* 427 * Add validation and rebinding to the locking loop since both can 428 * cause evictions which may require blocing dma_resv locks. 429 * The fence reservation here is intended for the new preempt fences 430 * we attach at the end of the rebind work. 431 */ 432 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 433 } 434 435 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 436 { 437 struct xe_device *xe = vm->xe; 438 bool ret = false; 439 440 mutex_lock(&xe->rebind_resume_lock); 441 if (!try_wait_for_completion(&vm->xe->pm_block)) { 442 ret = true; 443 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 444 } 445 mutex_unlock(&xe->rebind_resume_lock); 446 447 return ret; 448 } 449 450 /** 451 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 452 * @vm: The vm whose preempt worker to resume. 453 * 454 * Resume a preempt worker that was previously suspended by 455 * vm_suspend_rebind_worker(). 456 */ 457 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 458 { 459 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 460 } 461 462 static void preempt_rebind_work_func(struct work_struct *w) 463 { 464 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 465 struct xe_validation_ctx ctx; 466 struct drm_exec exec; 467 unsigned int fence_count = 0; 468 LIST_HEAD(preempt_fences); 469 int err = 0; 470 long wait; 471 int __maybe_unused tries = 0; 472 473 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 474 trace_xe_vm_rebind_worker_enter(vm); 475 476 down_write(&vm->lock); 477 478 if (xe_vm_is_closed_or_banned(vm)) { 479 up_write(&vm->lock); 480 trace_xe_vm_rebind_worker_exit(vm); 481 return; 482 } 483 484 retry: 485 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 486 up_write(&vm->lock); 487 /* We don't actually block but don't make progress. */ 488 xe_pm_might_block_on_suspend(); 489 return; 490 } 491 492 if (xe_vm_userptr_check_repin(vm)) { 493 err = xe_vm_userptr_pin(vm); 494 if (err) 495 goto out_unlock_outer; 496 } 497 498 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 499 (struct xe_val_flags) {.interruptible = true}); 500 if (err) 501 goto out_unlock_outer; 502 503 drm_exec_until_all_locked(&exec) { 504 bool done = false; 505 506 err = xe_preempt_work_begin(&exec, vm, &done); 507 drm_exec_retry_on_contention(&exec); 508 xe_validation_retry_on_oom(&ctx, &err); 509 if (err || done) { 510 xe_validation_ctx_fini(&ctx); 511 goto out_unlock_outer; 512 } 513 } 514 515 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 516 if (err) 517 goto out_unlock; 518 519 xe_vm_set_validation_exec(vm, &exec); 520 err = xe_vm_rebind(vm, true); 521 xe_vm_set_validation_exec(vm, NULL); 522 if (err) 523 goto out_unlock; 524 525 /* Wait on rebinds and munmap style VM unbinds */ 526 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 527 DMA_RESV_USAGE_KERNEL, 528 false, MAX_SCHEDULE_TIMEOUT); 529 if (wait <= 0) { 530 err = -ETIME; 531 goto out_unlock; 532 } 533 534 #define retry_required(__tries, __vm) \ 535 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 536 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 537 __xe_vm_userptr_needs_repin(__vm)) 538 539 xe_svm_notifier_lock(vm); 540 if (retry_required(tries, vm)) { 541 xe_svm_notifier_unlock(vm); 542 err = -EAGAIN; 543 goto out_unlock; 544 } 545 546 #undef retry_required 547 548 spin_lock(&vm->xe->ttm.lru_lock); 549 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 550 spin_unlock(&vm->xe->ttm.lru_lock); 551 552 /* Point of no return. */ 553 arm_preempt_fences(vm, &preempt_fences); 554 resume_and_reinstall_preempt_fences(vm, &exec); 555 xe_svm_notifier_unlock(vm); 556 557 out_unlock: 558 xe_validation_ctx_fini(&ctx); 559 out_unlock_outer: 560 if (err == -EAGAIN) { 561 trace_xe_vm_rebind_worker_retry(vm); 562 563 /* 564 * We can't block in workers on a VF which supports migration 565 * given this can block the VF post-migration workers from 566 * getting scheduled. 567 */ 568 if (IS_SRIOV_VF(vm->xe) && 569 xe_sriov_vf_migration_supported(vm->xe)) { 570 up_write(&vm->lock); 571 xe_vm_queue_rebind_worker(vm); 572 return; 573 } 574 575 goto retry; 576 } 577 578 if (err) { 579 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 580 xe_vm_kill(vm, true); 581 } 582 up_write(&vm->lock); 583 584 free_preempt_fences(&preempt_fences); 585 586 trace_xe_vm_rebind_worker_exit(vm); 587 } 588 589 /** 590 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list 591 * @vm: The VM. 592 * @pf: The pagefault. 593 * 594 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list. 595 * 596 * The function exits silently if the list is full, and reports a warning if the pagefault 597 * could not be saved to the list. 598 */ 599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) 600 { 601 struct xe_vm_fault_entry *e; 602 struct xe_hw_engine *hwe; 603 604 /* Do not report faults on reserved engines */ 605 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class, 606 pf->consumer.engine_instance, false); 607 if (!hwe || xe_hw_engine_is_reserved(hwe)) 608 return; 609 610 e = kzalloc_obj(*e); 611 if (!e) { 612 drm_warn(&vm->xe->drm, 613 "Could not allocate memory for fault!\n"); 614 return; 615 } 616 617 guard(spinlock)(&vm->faults.lock); 618 619 /* 620 * Limit the number of faults in the fault list to prevent 621 * memory overuse. 622 */ 623 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) { 624 kfree(e); 625 return; 626 } 627 628 e->address = pf->consumer.page_addr; 629 /* 630 * TODO: 631 * Address precision is currently always SZ_4K, but this may change 632 * in the future. 633 */ 634 e->address_precision = SZ_4K; 635 e->access_type = pf->consumer.access_type; 636 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK, 637 pf->consumer.fault_type_level), 638 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, 639 pf->consumer.fault_type_level), 640 641 list_add_tail(&e->list, &vm->faults.list); 642 vm->faults.len++; 643 } 644 645 static void xe_vm_clear_fault_entries(struct xe_vm *vm) 646 { 647 struct xe_vm_fault_entry *e, *tmp; 648 649 guard(spinlock)(&vm->faults.lock); 650 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) { 651 list_del(&e->list); 652 kfree(e); 653 } 654 vm->faults.len = 0; 655 } 656 657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 658 { 659 int i; 660 661 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 662 if (!vops->pt_update_ops[i].num_ops) 663 continue; 664 665 vops->pt_update_ops[i].ops = 666 kmalloc_objs(*vops->pt_update_ops[i].ops, 667 vops->pt_update_ops[i].num_ops, 668 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 669 if (!vops->pt_update_ops[i].ops) 670 return array_of_binds ? -ENOBUFS : -ENOMEM; 671 } 672 673 return 0; 674 } 675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 676 677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 678 { 679 struct xe_vma *vma; 680 681 vma = gpuva_to_vma(op->base.prefetch.va); 682 683 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 684 xa_destroy(&op->prefetch_range.range); 685 } 686 687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 688 { 689 struct xe_vma_op *op; 690 691 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 692 return; 693 694 list_for_each_entry(op, &vops->list, link) 695 xe_vma_svm_prefetch_op_fini(op); 696 } 697 698 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 699 { 700 int i; 701 702 xe_vma_svm_prefetch_ops_fini(vops); 703 704 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 705 kfree(vops->pt_update_ops[i].ops); 706 } 707 708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 709 { 710 int i; 711 712 if (!inc_val) 713 return; 714 715 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 716 if (BIT(i) & tile_mask) 717 vops->pt_update_ops[i].num_ops += inc_val; 718 } 719 720 #define XE_VMA_CREATE_MASK ( \ 721 XE_VMA_READ_ONLY | \ 722 XE_VMA_DUMPABLE | \ 723 XE_VMA_SYSTEM_ALLOCATOR | \ 724 DRM_GPUVA_SPARSE | \ 725 XE_VMA_MADV_AUTORESET) 726 727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 728 u8 tile_mask) 729 { 730 INIT_LIST_HEAD(&op->link); 731 op->tile_mask = tile_mask; 732 op->base.op = DRM_GPUVA_OP_MAP; 733 op->base.map.va.addr = vma->gpuva.va.addr; 734 op->base.map.va.range = vma->gpuva.va.range; 735 op->base.map.gem.obj = vma->gpuva.gem.obj; 736 op->base.map.gem.offset = vma->gpuva.gem.offset; 737 op->map.vma = vma; 738 op->map.immediate = true; 739 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 740 } 741 742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 743 u8 tile_mask) 744 { 745 struct xe_vma_op *op; 746 747 op = kzalloc_obj(*op); 748 if (!op) 749 return -ENOMEM; 750 751 xe_vm_populate_rebind(op, vma, tile_mask); 752 list_add_tail(&op->link, &vops->list); 753 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 754 755 return 0; 756 } 757 758 static struct dma_fence *ops_execute(struct xe_vm *vm, 759 struct xe_vma_ops *vops); 760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 761 struct xe_exec_queue *q, 762 struct xe_sync_entry *syncs, u32 num_syncs); 763 764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 765 { 766 struct dma_fence *fence; 767 struct xe_vma *vma, *next; 768 struct xe_vma_ops vops; 769 struct xe_vma_op *op, *next_op; 770 int err, i; 771 772 lockdep_assert_held(&vm->lock); 773 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 774 list_empty(&vm->rebind_list)) 775 return 0; 776 777 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 778 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 779 vops.pt_update_ops[i].wait_vm_bookkeep = true; 780 781 xe_vm_assert_held(vm); 782 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 783 xe_assert(vm->xe, vma->tile_present); 784 785 if (rebind_worker) 786 trace_xe_vma_rebind_worker(vma); 787 else 788 trace_xe_vma_rebind_exec(vma); 789 790 err = xe_vm_ops_add_rebind(&vops, vma, 791 vma->tile_present); 792 if (err) 793 goto free_ops; 794 } 795 796 err = xe_vma_ops_alloc(&vops, false); 797 if (err) 798 goto free_ops; 799 800 fence = ops_execute(vm, &vops); 801 if (IS_ERR(fence)) { 802 err = PTR_ERR(fence); 803 } else { 804 dma_fence_put(fence); 805 list_for_each_entry_safe(vma, next, &vm->rebind_list, 806 combined_links.rebind) 807 list_del_init(&vma->combined_links.rebind); 808 } 809 free_ops: 810 list_for_each_entry_safe(op, next_op, &vops.list, link) { 811 list_del(&op->link); 812 kfree(op); 813 } 814 xe_vma_ops_fini(&vops); 815 816 return err; 817 } 818 819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 820 { 821 struct dma_fence *fence = NULL; 822 struct xe_vma_ops vops; 823 struct xe_vma_op *op, *next_op; 824 struct xe_tile *tile; 825 u8 id; 826 int err; 827 828 lockdep_assert_held(&vm->lock); 829 xe_vm_assert_held(vm); 830 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 831 832 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 833 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 834 for_each_tile(tile, vm->xe, id) { 835 vops.pt_update_ops[id].wait_vm_bookkeep = true; 836 vops.pt_update_ops[tile->id].q = 837 xe_migrate_exec_queue(tile->migrate); 838 } 839 840 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 841 if (err) 842 return ERR_PTR(err); 843 844 err = xe_vma_ops_alloc(&vops, false); 845 if (err) { 846 fence = ERR_PTR(err); 847 goto free_ops; 848 } 849 850 fence = ops_execute(vm, &vops); 851 852 free_ops: 853 list_for_each_entry_safe(op, next_op, &vops.list, link) { 854 list_del(&op->link); 855 kfree(op); 856 } 857 xe_vma_ops_fini(&vops); 858 859 return fence; 860 } 861 862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 863 struct xe_vma *vma, 864 struct xe_svm_range *range, 865 u8 tile_mask) 866 { 867 INIT_LIST_HEAD(&op->link); 868 op->tile_mask = tile_mask; 869 op->base.op = DRM_GPUVA_OP_DRIVER; 870 op->subop = XE_VMA_SUBOP_MAP_RANGE; 871 op->map_range.vma = vma; 872 op->map_range.range = range; 873 } 874 875 static int 876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 877 struct xe_vma *vma, 878 struct xe_svm_range *range, 879 u8 tile_mask) 880 { 881 struct xe_vma_op *op; 882 883 op = kzalloc_obj(*op); 884 if (!op) 885 return -ENOMEM; 886 887 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 888 list_add_tail(&op->link, &vops->list); 889 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 890 891 return 0; 892 } 893 894 /** 895 * xe_vm_range_rebind() - VM range (re)bind 896 * @vm: The VM which the range belongs to. 897 * @vma: The VMA which the range belongs to. 898 * @range: SVM range to rebind. 899 * @tile_mask: Tile mask to bind the range to. 900 * 901 * (re)bind SVM range setting up GPU page tables for the range. 902 * 903 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 904 * failure 905 */ 906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 907 struct xe_vma *vma, 908 struct xe_svm_range *range, 909 u8 tile_mask) 910 { 911 struct dma_fence *fence = NULL; 912 struct xe_vma_ops vops; 913 struct xe_vma_op *op, *next_op; 914 struct xe_tile *tile; 915 u8 id; 916 int err; 917 918 lockdep_assert_held(&vm->lock); 919 xe_vm_assert_held(vm); 920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 921 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 922 923 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 924 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 925 for_each_tile(tile, vm->xe, id) { 926 vops.pt_update_ops[id].wait_vm_bookkeep = true; 927 vops.pt_update_ops[tile->id].q = 928 xe_migrate_exec_queue(tile->migrate); 929 } 930 931 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 932 if (err) 933 return ERR_PTR(err); 934 935 err = xe_vma_ops_alloc(&vops, false); 936 if (err) { 937 fence = ERR_PTR(err); 938 goto free_ops; 939 } 940 941 fence = ops_execute(vm, &vops); 942 943 free_ops: 944 list_for_each_entry_safe(op, next_op, &vops.list, link) { 945 list_del(&op->link); 946 kfree(op); 947 } 948 xe_vma_ops_fini(&vops); 949 950 return fence; 951 } 952 953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 954 struct xe_svm_range *range) 955 { 956 INIT_LIST_HEAD(&op->link); 957 op->tile_mask = range->tile_present; 958 op->base.op = DRM_GPUVA_OP_DRIVER; 959 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 960 op->unmap_range.range = range; 961 } 962 963 static int 964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 965 struct xe_svm_range *range) 966 { 967 struct xe_vma_op *op; 968 969 op = kzalloc_obj(*op); 970 if (!op) 971 return -ENOMEM; 972 973 xe_vm_populate_range_unbind(op, range); 974 list_add_tail(&op->link, &vops->list); 975 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 976 977 return 0; 978 } 979 980 /** 981 * xe_vm_range_unbind() - VM range unbind 982 * @vm: The VM which the range belongs to. 983 * @range: SVM range to rebind. 984 * 985 * Unbind SVM range removing the GPU page tables for the range. 986 * 987 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 988 * failure 989 */ 990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 991 struct xe_svm_range *range) 992 { 993 struct dma_fence *fence = NULL; 994 struct xe_vma_ops vops; 995 struct xe_vma_op *op, *next_op; 996 struct xe_tile *tile; 997 u8 id; 998 int err; 999 1000 lockdep_assert_held(&vm->lock); 1001 xe_vm_assert_held(vm); 1002 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1003 1004 if (!range->tile_present) 1005 return dma_fence_get_stub(); 1006 1007 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1008 for_each_tile(tile, vm->xe, id) { 1009 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1010 vops.pt_update_ops[tile->id].q = 1011 xe_migrate_exec_queue(tile->migrate); 1012 } 1013 1014 err = xe_vm_ops_add_range_unbind(&vops, range); 1015 if (err) 1016 return ERR_PTR(err); 1017 1018 err = xe_vma_ops_alloc(&vops, false); 1019 if (err) { 1020 fence = ERR_PTR(err); 1021 goto free_ops; 1022 } 1023 1024 fence = ops_execute(vm, &vops); 1025 1026 free_ops: 1027 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1028 list_del(&op->link); 1029 kfree(op); 1030 } 1031 xe_vma_ops_fini(&vops); 1032 1033 return fence; 1034 } 1035 1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr) 1037 { 1038 drm_pagemap_put(attr->preferred_loc.dpagemap); 1039 } 1040 1041 static void xe_vma_free(struct xe_vma *vma) 1042 { 1043 xe_vma_mem_attr_fini(&vma->attr); 1044 1045 if (xe_vma_is_userptr(vma)) 1046 kfree(to_userptr_vma(vma)); 1047 else 1048 kfree(vma); 1049 } 1050 1051 /** 1052 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure. 1053 * @to: Destination. 1054 * @from: Source. 1055 * 1056 * Copies an xe_vma_mem_attr structure taking care to get reference 1057 * counting of individual members right. 1058 */ 1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from) 1060 { 1061 xe_vma_mem_attr_fini(to); 1062 *to = *from; 1063 if (to->preferred_loc.dpagemap) 1064 drm_pagemap_get(to->preferred_loc.dpagemap); 1065 } 1066 1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1068 struct xe_bo *bo, 1069 u64 bo_offset_or_userptr, 1070 u64 start, u64 end, 1071 struct xe_vma_mem_attr *attr, 1072 unsigned int flags) 1073 { 1074 struct xe_vma *vma; 1075 struct xe_tile *tile; 1076 u8 id; 1077 bool is_null = (flags & DRM_GPUVA_SPARSE); 1078 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 1079 1080 xe_assert(vm->xe, start < end); 1081 xe_assert(vm->xe, end < vm->size); 1082 1083 /* 1084 * Allocate and ensure that the xe_vma_is_userptr() return 1085 * matches what was allocated. 1086 */ 1087 if (!bo && !is_null && !is_cpu_addr_mirror) { 1088 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma); 1089 1090 if (!uvma) 1091 return ERR_PTR(-ENOMEM); 1092 1093 vma = &uvma->vma; 1094 } else { 1095 vma = kzalloc_obj(*vma); 1096 if (!vma) 1097 return ERR_PTR(-ENOMEM); 1098 1099 if (bo) 1100 vma->gpuva.gem.obj = &bo->ttm.base; 1101 } 1102 1103 INIT_LIST_HEAD(&vma->combined_links.rebind); 1104 1105 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1106 vma->gpuva.vm = &vm->gpuvm; 1107 vma->gpuva.va.addr = start; 1108 vma->gpuva.va.range = end - start + 1; 1109 vma->gpuva.flags = flags; 1110 1111 for_each_tile(tile, vm->xe, id) 1112 vma->tile_mask |= 0x1 << id; 1113 1114 if (vm->xe->info.has_atomic_enable_pte_bit) 1115 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1116 1117 xe_vma_mem_attr_copy(&vma->attr, attr); 1118 if (bo) { 1119 struct drm_gpuvm_bo *vm_bo; 1120 1121 xe_bo_assert_held(bo); 1122 1123 /* 1124 * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This 1125 * gates new vm_bind ioctls (user supplies WILLNEED) while 1126 * still allowing partial-unbind / remap splits whose new VMAs 1127 * inherit the parent's DONTNEED attr. It must also run before 1128 * xe_bo_willneed_get_locked() below so a 0->1 holder bump 1129 * cannot silently promote DONTNEED back to WILLNEED. 1130 */ 1131 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { 1132 if (xe_bo_madv_is_dontneed(bo)) { 1133 xe_vma_free(vma); 1134 return ERR_PTR(-EBUSY); 1135 } 1136 if (xe_bo_is_purged(bo)) { 1137 xe_vma_free(vma); 1138 return ERR_PTR(-EINVAL); 1139 } 1140 } 1141 1142 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); 1143 if (IS_ERR(vm_bo)) { 1144 xe_vma_free(vma); 1145 return ERR_CAST(vm_bo); 1146 } 1147 1148 drm_gpuvm_bo_extobj_add(vm_bo); 1149 drm_gem_object_get(&bo->ttm.base); 1150 vma->gpuva.gem.offset = bo_offset_or_userptr; 1151 drm_gpuva_link(&vma->gpuva, vm_bo); 1152 drm_gpuvm_bo_put(vm_bo); 1153 1154 xe_bo_vma_count_inc_locked(bo); 1155 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) 1156 xe_bo_willneed_get_locked(bo); 1157 } else /* userptr or null */ { 1158 if (!is_null && !is_cpu_addr_mirror) { 1159 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1160 u64 size = end - start + 1; 1161 int err; 1162 1163 vma->gpuva.gem.offset = bo_offset_or_userptr; 1164 1165 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1166 if (err) { 1167 xe_vma_free(vma); 1168 return ERR_PTR(err); 1169 } 1170 } 1171 1172 xe_vm_get(vm); 1173 } 1174 1175 return vma; 1176 } 1177 1178 static void xe_vma_destroy_late(struct xe_vma *vma) 1179 { 1180 struct xe_vm *vm = xe_vma_vm(vma); 1181 struct xe_bo *bo = xe_vma_bo(vma); 1182 1183 if (vma->ufence) { 1184 xe_sync_ufence_put(vma->ufence); 1185 vma->ufence = NULL; 1186 } 1187 1188 if (xe_vma_is_userptr(vma)) { 1189 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1190 1191 xe_userptr_remove(uvma); 1192 xe_vm_put(vm); 1193 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1194 xe_vm_put(vm); 1195 } else { 1196 xe_bo_put(bo); 1197 } 1198 1199 xe_vma_free(vma); 1200 } 1201 1202 static void vma_destroy_work_func(struct work_struct *w) 1203 { 1204 struct xe_vma *vma = 1205 container_of(w, struct xe_vma, destroy_work); 1206 1207 xe_vma_destroy_late(vma); 1208 } 1209 1210 static void vma_destroy_cb(struct dma_fence *fence, 1211 struct dma_fence_cb *cb) 1212 { 1213 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1214 1215 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1216 queue_work(system_dfl_wq, &vma->destroy_work); 1217 } 1218 1219 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1220 { 1221 struct xe_vm *vm = xe_vma_vm(vma); 1222 struct xe_bo *bo = xe_vma_bo(vma); 1223 1224 lockdep_assert_held_write(&vm->lock); 1225 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1226 1227 if (xe_vma_is_userptr(vma)) { 1228 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1229 xe_userptr_destroy(to_userptr_vma(vma)); 1230 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1231 xe_bo_assert_held(bo); 1232 1233 drm_gpuva_unlink(&vma->gpuva); 1234 1235 xe_bo_vma_count_dec_locked(bo); 1236 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) 1237 xe_bo_willneed_put_locked(bo); 1238 } 1239 1240 xe_vm_assert_held(vm); 1241 if (fence) { 1242 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1243 vma_destroy_cb); 1244 1245 if (ret) { 1246 XE_WARN_ON(ret != -ENOENT); 1247 xe_vma_destroy_late(vma); 1248 } 1249 } else { 1250 xe_vma_destroy_late(vma); 1251 } 1252 } 1253 1254 /** 1255 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1256 * @exec: The drm_exec object we're currently locking for. 1257 * @vma: The vma for witch we want to lock the vm resv and any attached 1258 * object's resv. 1259 * 1260 * Return: 0 on success, negative error code on error. In particular 1261 * may return -EDEADLK on WW transaction contention and -EINTR if 1262 * an interruptible wait is terminated by a signal. 1263 */ 1264 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1265 { 1266 struct xe_vm *vm = xe_vma_vm(vma); 1267 struct xe_bo *bo = xe_vma_bo(vma); 1268 int err; 1269 1270 XE_WARN_ON(!vm); 1271 1272 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1273 if (!err && bo && !bo->vm) 1274 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1275 1276 return err; 1277 } 1278 1279 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1280 { 1281 struct xe_device *xe = xe_vma_vm(vma)->xe; 1282 struct xe_validation_ctx ctx; 1283 struct drm_exec exec; 1284 int err = 0; 1285 1286 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1287 err = xe_vm_lock_vma(&exec, vma); 1288 drm_exec_retry_on_contention(&exec); 1289 if (XE_WARN_ON(err)) 1290 break; 1291 xe_vma_destroy(vma, NULL); 1292 } 1293 xe_assert(xe, !err); 1294 } 1295 1296 struct xe_vma * 1297 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1298 { 1299 struct drm_gpuva *gpuva; 1300 1301 lockdep_assert_held(&vm->lock); 1302 1303 if (xe_vm_is_closed_or_banned(vm)) 1304 return NULL; 1305 1306 xe_assert(vm->xe, start + range <= vm->size); 1307 1308 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1309 1310 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1311 } 1312 1313 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1314 { 1315 int err; 1316 1317 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1318 lockdep_assert_held(&vm->lock); 1319 1320 mutex_lock(&vm->snap_mutex); 1321 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1322 mutex_unlock(&vm->snap_mutex); 1323 XE_WARN_ON(err); /* Shouldn't be possible */ 1324 1325 return err; 1326 } 1327 1328 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1329 { 1330 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1331 lockdep_assert_held(&vm->lock); 1332 1333 mutex_lock(&vm->snap_mutex); 1334 drm_gpuva_remove(&vma->gpuva); 1335 mutex_unlock(&vm->snap_mutex); 1336 if (vm->usm.last_fault_vma == vma) 1337 vm->usm.last_fault_vma = NULL; 1338 } 1339 1340 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1341 { 1342 struct xe_vma_op *op; 1343 1344 op = kzalloc_obj(*op); 1345 1346 if (unlikely(!op)) 1347 return NULL; 1348 1349 return &op->base; 1350 } 1351 1352 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1353 1354 static const struct drm_gpuvm_ops gpuvm_ops = { 1355 .op_alloc = xe_vm_op_alloc, 1356 .vm_bo_validate = xe_gpuvm_validate, 1357 .vm_free = xe_vm_free, 1358 }; 1359 1360 static u64 pde_encode_pat_index(u16 pat_index) 1361 { 1362 u64 pte = 0; 1363 1364 if (pat_index & BIT(0)) 1365 pte |= XE_PPGTT_PTE_PAT0; 1366 1367 if (pat_index & BIT(1)) 1368 pte |= XE_PPGTT_PTE_PAT1; 1369 1370 return pte; 1371 } 1372 1373 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1374 { 1375 u64 pte = 0; 1376 1377 if (pat_index & BIT(0)) 1378 pte |= XE_PPGTT_PTE_PAT0; 1379 1380 if (pat_index & BIT(1)) 1381 pte |= XE_PPGTT_PTE_PAT1; 1382 1383 if (pat_index & BIT(2)) { 1384 if (pt_level) 1385 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1386 else 1387 pte |= XE_PPGTT_PTE_PAT2; 1388 } 1389 1390 if (pat_index & BIT(3)) 1391 pte |= XELPG_PPGTT_PTE_PAT3; 1392 1393 if (pat_index & (BIT(4))) 1394 pte |= XE2_PPGTT_PTE_PAT4; 1395 1396 return pte; 1397 } 1398 1399 static u64 pte_encode_ps(u32 pt_level) 1400 { 1401 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1402 1403 if (pt_level == 1) 1404 return XE_PDE_PS_2M; 1405 else if (pt_level == 2) 1406 return XE_PDPE_PS_1G; 1407 1408 return 0; 1409 } 1410 1411 static u16 pde_pat_index(struct xe_bo *bo) 1412 { 1413 struct xe_device *xe = xe_bo_device(bo); 1414 u16 pat_index; 1415 1416 /* 1417 * We only have two bits to encode the PAT index in non-leaf nodes, but 1418 * these only point to other paging structures so we only need a minimal 1419 * selection of options. The user PAT index is only for encoding leaf 1420 * nodes, where we have use of more bits to do the encoding. The 1421 * non-leaf nodes are instead under driver control so the chosen index 1422 * here should be distinct from the user PAT index. Also the 1423 * corresponding coherency of the PAT index should be tied to the 1424 * allocation type of the page table (or at least we should pick 1425 * something which is always safe). 1426 */ 1427 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1428 pat_index = xe->pat.idx[XE_CACHE_WB]; 1429 else 1430 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1431 1432 xe_assert(xe, pat_index <= 3); 1433 1434 return pat_index; 1435 } 1436 1437 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1438 { 1439 u64 pde; 1440 1441 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1442 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1443 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1444 1445 return pde; 1446 } 1447 1448 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1449 u16 pat_index, u32 pt_level) 1450 { 1451 u64 pte; 1452 1453 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1454 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1455 pte |= pte_encode_pat_index(pat_index, pt_level); 1456 pte |= pte_encode_ps(pt_level); 1457 1458 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1459 pte |= XE_PPGTT_PTE_DM; 1460 1461 return pte; 1462 } 1463 1464 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1465 u16 pat_index, u32 pt_level) 1466 { 1467 struct xe_bo *bo = xe_vma_bo(vma); 1468 struct xe_vm *vm = xe_vma_vm(vma); 1469 1470 pte |= XE_PAGE_PRESENT; 1471 1472 if (likely(!xe_vma_read_only(vma))) 1473 pte |= XE_PAGE_RW; 1474 1475 pte |= pte_encode_pat_index(pat_index, pt_level); 1476 pte |= pte_encode_ps(pt_level); 1477 1478 /* 1479 * NULL PTEs redirect to scratch page (return zeros on read). 1480 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs. 1481 * Never set NULL flag without scratch page - causes undefined behavior. 1482 */ 1483 if (unlikely(xe_vma_is_null(vma) || 1484 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm)))) 1485 pte |= XE_PTE_NULL; 1486 1487 return pte; 1488 } 1489 1490 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1491 u16 pat_index, 1492 u32 pt_level, bool devmem, u64 flags) 1493 { 1494 u64 pte; 1495 1496 /* Avoid passing random bits directly as flags */ 1497 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1498 1499 pte = addr; 1500 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1501 pte |= pte_encode_pat_index(pat_index, pt_level); 1502 pte |= pte_encode_ps(pt_level); 1503 1504 if (devmem) 1505 pte |= XE_PPGTT_PTE_DM; 1506 1507 pte |= flags; 1508 1509 return pte; 1510 } 1511 1512 static const struct xe_pt_ops xelp_pt_ops = { 1513 .pte_encode_bo = xelp_pte_encode_bo, 1514 .pte_encode_vma = xelp_pte_encode_vma, 1515 .pte_encode_addr = xelp_pte_encode_addr, 1516 .pde_encode_bo = xelp_pde_encode_bo, 1517 }; 1518 1519 static void vm_destroy_work_func(struct work_struct *w); 1520 1521 /** 1522 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1523 * given tile and vm. 1524 * @xe: xe device. 1525 * @tile: tile to set up for. 1526 * @vm: vm to set up for. 1527 * @exec: The struct drm_exec object used to lock the vm resv. 1528 * 1529 * Sets up a pagetable tree with one page-table per level and a single 1530 * leaf PTE. All pagetable entries point to the single page-table or, 1531 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1532 * writes become NOPs. 1533 * 1534 * Return: 0 on success, negative error code on error. 1535 */ 1536 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1537 struct xe_vm *vm, struct drm_exec *exec) 1538 { 1539 u8 id = tile->id; 1540 int i; 1541 1542 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1543 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1544 if (IS_ERR(vm->scratch_pt[id][i])) { 1545 int err = PTR_ERR(vm->scratch_pt[id][i]); 1546 1547 vm->scratch_pt[id][i] = NULL; 1548 return err; 1549 } 1550 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1551 } 1552 1553 return 0; 1554 } 1555 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1556 1557 static void xe_vm_free_scratch(struct xe_vm *vm) 1558 { 1559 struct xe_tile *tile; 1560 u8 id; 1561 1562 if (!xe_vm_has_scratch(vm)) 1563 return; 1564 1565 for_each_tile(tile, vm->xe, id) { 1566 u32 i; 1567 1568 if (!vm->pt_root[id]) 1569 continue; 1570 1571 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1572 if (vm->scratch_pt[id][i]) 1573 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1574 } 1575 } 1576 1577 static void xe_vm_pt_destroy(struct xe_vm *vm) 1578 { 1579 struct xe_tile *tile; 1580 u8 id; 1581 1582 xe_vm_assert_held(vm); 1583 1584 for_each_tile(tile, vm->xe, id) { 1585 if (vm->pt_root[id]) { 1586 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1587 vm->pt_root[id] = NULL; 1588 } 1589 } 1590 } 1591 1592 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) 1593 { 1594 if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) 1595 return; 1596 1597 fs_reclaim_acquire(GFP_KERNEL); 1598 might_lock(&vm->exec_queues.lock); 1599 fs_reclaim_release(GFP_KERNEL); 1600 1601 down_read(&vm->exec_queues.lock); 1602 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); 1603 up_read(&vm->exec_queues.lock); 1604 } 1605 1606 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1607 { 1608 struct drm_gem_object *vm_resv_obj; 1609 struct xe_validation_ctx ctx; 1610 struct drm_exec exec; 1611 struct xe_vm *vm; 1612 int err; 1613 struct xe_tile *tile; 1614 u8 id; 1615 1616 /* 1617 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1618 * ever be in faulting mode. 1619 */ 1620 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1621 1622 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1623 if (!vm) 1624 return ERR_PTR(-ENOMEM); 1625 1626 vm->xe = xe; 1627 1628 vm->size = 1ull << xe->info.va_bits; 1629 vm->flags = flags; 1630 1631 if (xef) 1632 vm->xef = xe_file_get(xef); 1633 /** 1634 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1635 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1636 * under a user-VM lock when the PXP session is started at exec_queue 1637 * creation time. Those are different VMs and therefore there is no risk 1638 * of deadlock, but we need to tell lockdep that this is the case or it 1639 * will print a warning. 1640 */ 1641 if (flags & XE_VM_FLAG_GSC) { 1642 static struct lock_class_key gsc_vm_key; 1643 1644 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1645 } else { 1646 init_rwsem(&vm->lock); 1647 } 1648 mutex_init(&vm->snap_mutex); 1649 1650 INIT_LIST_HEAD(&vm->rebind_list); 1651 1652 INIT_LIST_HEAD(&vm->userptr.repin_list); 1653 INIT_LIST_HEAD(&vm->userptr.invalidated); 1654 spin_lock_init(&vm->userptr.invalidated_lock); 1655 1656 INIT_LIST_HEAD(&vm->faults.list); 1657 spin_lock_init(&vm->faults.lock); 1658 1659 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1660 1661 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1662 1663 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1664 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) 1665 INIT_LIST_HEAD(&vm->exec_queues.list[id]); 1666 if (flags & XE_VM_FLAG_FAULT_MODE) 1667 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; 1668 else 1669 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; 1670 1671 init_rwsem(&vm->exec_queues.lock); 1672 xe_vm_init_prove_locking(xe, vm); 1673 1674 for_each_tile(tile, xe, id) 1675 xe_range_fence_tree_init(&vm->rftree[id]); 1676 1677 vm->pt_ops = &xelp_pt_ops; 1678 1679 /* 1680 * Long-running workloads are not protected by the scheduler references. 1681 * By design, run_job for long-running workloads returns NULL and the 1682 * scheduler drops all the references of it, hence protecting the VM 1683 * for this case is necessary. 1684 */ 1685 if (flags & XE_VM_FLAG_LR_MODE) { 1686 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1687 xe_pm_runtime_get_noresume(xe); 1688 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1689 } 1690 1691 err = xe_svm_init(vm); 1692 if (err) 1693 goto err_no_resv; 1694 1695 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1696 if (!vm_resv_obj) { 1697 err = -ENOMEM; 1698 goto err_svm_fini; 1699 } 1700 1701 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1702 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1703 1704 drm_gem_object_put(vm_resv_obj); 1705 1706 err = 0; 1707 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1708 err) { 1709 err = xe_vm_drm_exec_lock(vm, &exec); 1710 drm_exec_retry_on_contention(&exec); 1711 1712 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1713 vm->flags |= XE_VM_FLAG_64K; 1714 1715 for_each_tile(tile, xe, id) { 1716 if (flags & XE_VM_FLAG_MIGRATION && 1717 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1718 continue; 1719 1720 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1721 &exec); 1722 if (IS_ERR(vm->pt_root[id])) { 1723 err = PTR_ERR(vm->pt_root[id]); 1724 vm->pt_root[id] = NULL; 1725 xe_vm_pt_destroy(vm); 1726 drm_exec_retry_on_contention(&exec); 1727 xe_validation_retry_on_oom(&ctx, &err); 1728 break; 1729 } 1730 } 1731 if (err) 1732 break; 1733 1734 if (xe_vm_has_scratch(vm)) { 1735 for_each_tile(tile, xe, id) { 1736 if (!vm->pt_root[id]) 1737 continue; 1738 1739 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1740 if (err) { 1741 xe_vm_free_scratch(vm); 1742 xe_vm_pt_destroy(vm); 1743 drm_exec_retry_on_contention(&exec); 1744 xe_validation_retry_on_oom(&ctx, &err); 1745 break; 1746 } 1747 } 1748 if (err) 1749 break; 1750 vm->batch_invalidate_tlb = true; 1751 } 1752 1753 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1754 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1755 vm->batch_invalidate_tlb = false; 1756 } 1757 1758 /* Fill pt_root after allocating scratch tables */ 1759 for_each_tile(tile, xe, id) { 1760 if (!vm->pt_root[id]) 1761 continue; 1762 1763 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1764 } 1765 } 1766 if (err) 1767 goto err_close; 1768 1769 /* Kernel migration VM shouldn't have a circular loop.. */ 1770 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1771 for_each_tile(tile, xe, id) { 1772 struct xe_exec_queue *q; 1773 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1774 1775 if (!vm->pt_root[id]) 1776 continue; 1777 1778 if (!xef) /* Not from userspace */ 1779 create_flags |= EXEC_QUEUE_FLAG_KERNEL; 1780 1781 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); 1782 if (IS_ERR(q)) { 1783 err = PTR_ERR(q); 1784 goto err_close; 1785 } 1786 vm->q[id] = q; 1787 } 1788 } 1789 1790 if (xef && xe->info.has_asid) { 1791 u32 asid; 1792 1793 down_write(&xe->usm.lock); 1794 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1795 XA_LIMIT(1, XE_MAX_ASID - 1), 1796 &xe->usm.next_asid, GFP_NOWAIT); 1797 up_write(&xe->usm.lock); 1798 if (err < 0) 1799 goto err_close; 1800 1801 vm->usm.asid = asid; 1802 } 1803 1804 trace_xe_vm_create(vm); 1805 1806 return vm; 1807 1808 err_close: 1809 xe_vm_close_and_put(vm); 1810 return ERR_PTR(err); 1811 1812 err_svm_fini: 1813 if (flags & XE_VM_FLAG_FAULT_MODE) { 1814 vm->size = 0; /* close the vm */ 1815 xe_svm_fini(vm); 1816 } 1817 err_no_resv: 1818 mutex_destroy(&vm->snap_mutex); 1819 for_each_tile(tile, xe, id) 1820 xe_range_fence_tree_fini(&vm->rftree[id]); 1821 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1822 if (vm->xef) 1823 xe_file_put(vm->xef); 1824 kfree(vm); 1825 if (flags & XE_VM_FLAG_LR_MODE) 1826 xe_pm_runtime_put(xe); 1827 return ERR_PTR(err); 1828 } 1829 1830 static void xe_vm_close(struct xe_vm *vm) 1831 { 1832 struct xe_device *xe = vm->xe; 1833 bool bound; 1834 int idx; 1835 1836 bound = drm_dev_enter(&xe->drm, &idx); 1837 1838 down_write(&vm->lock); 1839 if (xe_vm_in_fault_mode(vm)) 1840 xe_svm_notifier_lock(vm); 1841 1842 vm->size = 0; 1843 1844 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1845 struct xe_tile *tile; 1846 struct xe_gt *gt; 1847 u8 id; 1848 1849 /* Wait for pending binds */ 1850 dma_resv_wait_timeout(xe_vm_resv(vm), 1851 DMA_RESV_USAGE_BOOKKEEP, 1852 false, MAX_SCHEDULE_TIMEOUT); 1853 1854 if (bound) { 1855 for_each_tile(tile, xe, id) 1856 if (vm->pt_root[id]) 1857 xe_pt_clear(xe, vm->pt_root[id]); 1858 1859 for_each_gt(gt, xe, id) 1860 xe_tlb_inval_vm(>->tlb_inval, vm); 1861 } 1862 } 1863 1864 if (xe_vm_in_fault_mode(vm)) 1865 xe_svm_notifier_unlock(vm); 1866 up_write(&vm->lock); 1867 1868 if (bound) 1869 drm_dev_exit(idx); 1870 } 1871 1872 void xe_vm_close_and_put(struct xe_vm *vm) 1873 { 1874 LIST_HEAD(contested); 1875 struct xe_device *xe = vm->xe; 1876 struct xe_tile *tile; 1877 struct xe_vma *vma, *next_vma; 1878 struct drm_gpuva *gpuva, *next; 1879 u8 id; 1880 1881 xe_assert(xe, !vm->preempt.num_exec_queues); 1882 1883 xe_vm_close(vm); 1884 if (xe_vm_in_preempt_fence_mode(vm)) { 1885 mutex_lock(&xe->rebind_resume_lock); 1886 list_del_init(&vm->preempt.pm_activate_link); 1887 mutex_unlock(&xe->rebind_resume_lock); 1888 flush_work(&vm->preempt.rebind_work); 1889 } 1890 if (xe_vm_in_fault_mode(vm)) 1891 xe_svm_close(vm); 1892 1893 down_write(&vm->lock); 1894 for_each_tile(tile, xe, id) { 1895 if (vm->q[id]) { 1896 int i; 1897 1898 xe_exec_queue_last_fence_put(vm->q[id], vm); 1899 for_each_tlb_inval(i) 1900 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1901 } 1902 } 1903 up_write(&vm->lock); 1904 1905 for_each_tile(tile, xe, id) { 1906 if (vm->q[id]) { 1907 xe_exec_queue_kill(vm->q[id]); 1908 xe_exec_queue_put(vm->q[id]); 1909 vm->q[id] = NULL; 1910 } 1911 } 1912 1913 down_write(&vm->lock); 1914 xe_vm_lock(vm, false); 1915 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1916 vma = gpuva_to_vma(gpuva); 1917 1918 if (xe_vma_has_no_bo(vma)) { 1919 xe_svm_notifier_lock(vm); 1920 vma->gpuva.flags |= XE_VMA_DESTROYED; 1921 xe_svm_notifier_unlock(vm); 1922 } 1923 1924 xe_vm_remove_vma(vm, vma); 1925 1926 /* easy case, remove from VMA? */ 1927 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1928 list_del_init(&vma->combined_links.rebind); 1929 xe_vma_destroy(vma, NULL); 1930 continue; 1931 } 1932 1933 list_move_tail(&vma->combined_links.destroy, &contested); 1934 vma->gpuva.flags |= XE_VMA_DESTROYED; 1935 } 1936 1937 /* 1938 * All vm operations will add shared fences to resv. 1939 * The only exception is eviction for a shared object, 1940 * but even so, the unbind when evicted would still 1941 * install a fence to resv. Hence it's safe to 1942 * destroy the pagetables immediately. 1943 */ 1944 xe_vm_free_scratch(vm); 1945 xe_vm_pt_destroy(vm); 1946 xe_vm_unlock(vm); 1947 1948 /* 1949 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1950 * Since we hold a refcount to the bo, we can remove and free 1951 * the members safely without locking. 1952 */ 1953 list_for_each_entry_safe(vma, next_vma, &contested, 1954 combined_links.destroy) { 1955 list_del_init(&vma->combined_links.destroy); 1956 xe_vma_destroy_unlocked(vma); 1957 } 1958 1959 xe_svm_fini(vm); 1960 1961 up_write(&vm->lock); 1962 1963 down_write(&xe->usm.lock); 1964 if (vm->usm.asid) { 1965 void *lookup; 1966 1967 xe_assert(xe, xe->info.has_asid); 1968 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1969 1970 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1971 xe_assert(xe, lookup == vm); 1972 } 1973 up_write(&xe->usm.lock); 1974 1975 xe_vm_clear_fault_entries(vm); 1976 1977 for_each_tile(tile, xe, id) 1978 xe_range_fence_tree_fini(&vm->rftree[id]); 1979 1980 xe_vm_put(vm); 1981 } 1982 1983 static void vm_destroy_work_func(struct work_struct *w) 1984 { 1985 struct xe_vm *vm = 1986 container_of(w, struct xe_vm, destroy_work); 1987 struct xe_device *xe = vm->xe; 1988 struct xe_tile *tile; 1989 u8 id; 1990 1991 /* xe_vm_close_and_put was not called? */ 1992 xe_assert(xe, !vm->size); 1993 1994 if (xe_vm_in_preempt_fence_mode(vm)) 1995 flush_work(&vm->preempt.rebind_work); 1996 1997 mutex_destroy(&vm->snap_mutex); 1998 1999 if (vm->flags & XE_VM_FLAG_LR_MODE) 2000 xe_pm_runtime_put(xe); 2001 2002 for_each_tile(tile, xe, id) 2003 XE_WARN_ON(vm->pt_root[id]); 2004 2005 trace_xe_vm_free(vm); 2006 2007 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2008 2009 if (vm->xef) 2010 xe_file_put(vm->xef); 2011 2012 kfree(vm); 2013 } 2014 2015 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2016 { 2017 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2018 2019 /* To destroy the VM we need to be able to sleep */ 2020 queue_work(system_dfl_wq, &vm->destroy_work); 2021 } 2022 2023 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2024 { 2025 struct xe_vm *vm; 2026 2027 mutex_lock(&xef->vm.lock); 2028 vm = xa_load(&xef->vm.xa, id); 2029 if (vm) 2030 xe_vm_get(vm); 2031 mutex_unlock(&xef->vm.lock); 2032 2033 return vm; 2034 } 2035 2036 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2037 { 2038 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2039 } 2040 2041 static struct xe_exec_queue * 2042 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2043 { 2044 return q ? q : vm->q[0]; 2045 } 2046 2047 static struct xe_user_fence * 2048 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2049 { 2050 unsigned int i; 2051 2052 for (i = 0; i < num_syncs; i++) { 2053 struct xe_sync_entry *e = &syncs[i]; 2054 2055 if (xe_sync_is_ufence(e)) 2056 return xe_sync_ufence_get(e); 2057 } 2058 2059 return NULL; 2060 } 2061 2062 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2063 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2064 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ 2065 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2066 2067 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2068 struct drm_file *file) 2069 { 2070 struct xe_device *xe = to_xe_device(dev); 2071 struct xe_file *xef = to_xe_file(file); 2072 struct drm_xe_vm_create *args = data; 2073 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 2074 struct xe_vm *vm; 2075 u32 id; 2076 int err; 2077 u32 flags = 0; 2078 2079 if (XE_IOCTL_DBG(xe, args->extensions)) 2080 return -EINVAL; 2081 2082 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 2083 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2084 2085 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2086 !xe->info.has_usm)) 2087 return -EINVAL; 2088 2089 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2090 return -EINVAL; 2091 2092 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2093 return -EINVAL; 2094 2095 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2096 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2097 !xe->info.needs_scratch)) 2098 return -EINVAL; 2099 2100 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2101 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2102 return -EINVAL; 2103 2104 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 2105 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) 2106 return -EINVAL; 2107 2108 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2109 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2110 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2111 flags |= XE_VM_FLAG_LR_MODE; 2112 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2113 flags |= XE_VM_FLAG_FAULT_MODE; 2114 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2115 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; 2116 2117 vm = xe_vm_create(xe, flags, xef); 2118 if (IS_ERR(vm)) 2119 return PTR_ERR(vm); 2120 2121 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2122 /* Warning: Security issue - never enable by default */ 2123 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2124 #endif 2125 2126 /* user id alloc must always be last in ioctl to prevent UAF */ 2127 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2128 if (err) 2129 goto err_close_and_put; 2130 2131 args->vm_id = id; 2132 2133 return 0; 2134 2135 err_close_and_put: 2136 xe_vm_close_and_put(vm); 2137 2138 return err; 2139 } 2140 2141 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2142 struct drm_file *file) 2143 { 2144 struct xe_device *xe = to_xe_device(dev); 2145 struct xe_file *xef = to_xe_file(file); 2146 struct drm_xe_vm_destroy *args = data; 2147 struct xe_vm *vm; 2148 int err = 0; 2149 2150 if (XE_IOCTL_DBG(xe, args->pad) || 2151 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2152 return -EINVAL; 2153 2154 mutex_lock(&xef->vm.lock); 2155 vm = xa_load(&xef->vm.xa, args->vm_id); 2156 if (XE_IOCTL_DBG(xe, !vm)) 2157 err = -ENOENT; 2158 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2159 err = -EBUSY; 2160 else 2161 xa_erase(&xef->vm.xa, args->vm_id); 2162 mutex_unlock(&xef->vm.lock); 2163 2164 if (!err) 2165 xe_vm_close_and_put(vm); 2166 2167 return err; 2168 } 2169 2170 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2171 { 2172 struct drm_gpuva *gpuva; 2173 u32 num_vmas = 0; 2174 2175 lockdep_assert_held(&vm->lock); 2176 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2177 num_vmas++; 2178 2179 return num_vmas; 2180 } 2181 2182 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2183 u64 end, struct drm_xe_mem_range_attr *attrs) 2184 { 2185 struct drm_gpuva *gpuva; 2186 int i = 0; 2187 2188 lockdep_assert_held(&vm->lock); 2189 2190 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2191 struct xe_vma *vma = gpuva_to_vma(gpuva); 2192 2193 if (i == *num_vmas) 2194 return -ENOSPC; 2195 2196 attrs[i].start = xe_vma_start(vma); 2197 attrs[i].end = xe_vma_end(vma); 2198 attrs[i].atomic.val = vma->attr.atomic_access; 2199 attrs[i].pat_index.val = vma->attr.pat_index; 2200 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2201 attrs[i].preferred_mem_loc.migration_policy = 2202 vma->attr.preferred_loc.migration_policy; 2203 2204 i++; 2205 } 2206 2207 *num_vmas = i; 2208 return 0; 2209 } 2210 2211 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2212 { 2213 struct xe_device *xe = to_xe_device(dev); 2214 struct xe_file *xef = to_xe_file(file); 2215 struct drm_xe_mem_range_attr *mem_attrs; 2216 struct drm_xe_vm_query_mem_range_attr *args = data; 2217 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2218 struct xe_vm *vm; 2219 int err = 0; 2220 2221 if (XE_IOCTL_DBG(xe, 2222 ((args->num_mem_ranges == 0 && 2223 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2224 (args->num_mem_ranges > 0 && 2225 (!attrs_user || 2226 args->sizeof_mem_range_attr != 2227 sizeof(struct drm_xe_mem_range_attr)))))) 2228 return -EINVAL; 2229 2230 vm = xe_vm_lookup(xef, args->vm_id); 2231 if (XE_IOCTL_DBG(xe, !vm)) 2232 return -EINVAL; 2233 2234 err = down_read_interruptible(&vm->lock); 2235 if (err) 2236 goto put_vm; 2237 2238 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2239 2240 if (args->num_mem_ranges == 0 && !attrs_user) { 2241 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2242 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2243 goto unlock_vm; 2244 } 2245 2246 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2247 GFP_KERNEL | __GFP_ACCOUNT | 2248 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2249 if (!mem_attrs) { 2250 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2251 goto unlock_vm; 2252 } 2253 2254 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2255 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2256 args->start + args->range, mem_attrs); 2257 if (err) 2258 goto free_mem_attrs; 2259 2260 err = copy_to_user(attrs_user, mem_attrs, 2261 args->sizeof_mem_range_attr * args->num_mem_ranges); 2262 if (err) 2263 err = -EFAULT; 2264 2265 free_mem_attrs: 2266 kvfree(mem_attrs); 2267 unlock_vm: 2268 up_read(&vm->lock); 2269 put_vm: 2270 xe_vm_put(vm); 2271 return err; 2272 } 2273 2274 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2275 { 2276 if (page_addr > xe_vma_end(vma) - 1 || 2277 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2278 return false; 2279 2280 return true; 2281 } 2282 2283 /** 2284 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2285 * 2286 * @vm: the xe_vm the vma belongs to 2287 * @page_addr: address to look up 2288 */ 2289 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2290 { 2291 struct xe_vma *vma = NULL; 2292 2293 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2294 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2295 vma = vm->usm.last_fault_vma; 2296 } 2297 if (!vma) 2298 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2299 2300 return vma; 2301 } 2302 2303 static const u32 region_to_mem_type[] = { 2304 XE_PL_TT, 2305 XE_PL_VRAM0, 2306 XE_PL_VRAM1, 2307 }; 2308 2309 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2310 bool post_commit) 2311 { 2312 xe_svm_notifier_lock(vm); 2313 vma->gpuva.flags |= XE_VMA_DESTROYED; 2314 xe_svm_notifier_unlock(vm); 2315 if (post_commit) 2316 xe_vm_remove_vma(vm, vma); 2317 } 2318 2319 #undef ULL 2320 #define ULL unsigned long long 2321 2322 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2323 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2324 { 2325 struct xe_vma *vma; 2326 2327 switch (op->op) { 2328 case DRM_GPUVA_OP_MAP: 2329 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2330 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2331 break; 2332 case DRM_GPUVA_OP_REMAP: 2333 vma = gpuva_to_vma(op->remap.unmap->va); 2334 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2335 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2336 op->remap.unmap->keep ? 1 : 0); 2337 if (op->remap.prev) 2338 vm_dbg(&xe->drm, 2339 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2340 (ULL)op->remap.prev->va.addr, 2341 (ULL)op->remap.prev->va.range); 2342 if (op->remap.next) 2343 vm_dbg(&xe->drm, 2344 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2345 (ULL)op->remap.next->va.addr, 2346 (ULL)op->remap.next->va.range); 2347 break; 2348 case DRM_GPUVA_OP_UNMAP: 2349 vma = gpuva_to_vma(op->unmap.va); 2350 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2351 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2352 op->unmap.keep ? 1 : 0); 2353 break; 2354 case DRM_GPUVA_OP_PREFETCH: 2355 vma = gpuva_to_vma(op->prefetch.va); 2356 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2357 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2358 break; 2359 default: 2360 drm_warn(&xe->drm, "NOT POSSIBLE\n"); 2361 } 2362 } 2363 #else 2364 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2365 { 2366 } 2367 #endif 2368 2369 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2370 { 2371 if (!xe_vm_in_fault_mode(vm)) 2372 return false; 2373 2374 if (!xe_vm_has_scratch(vm)) 2375 return false; 2376 2377 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2378 return false; 2379 2380 return true; 2381 } 2382 2383 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2384 { 2385 struct drm_gpuva_op *__op; 2386 2387 drm_gpuva_for_each_op(__op, ops) { 2388 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2389 2390 xe_vma_svm_prefetch_op_fini(op); 2391 } 2392 } 2393 2394 /* 2395 * Create operations list from IOCTL arguments, setup operations fields so parse 2396 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2397 */ 2398 static struct drm_gpuva_ops * 2399 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2400 struct xe_bo *bo, u64 bo_offset_or_userptr, 2401 u64 addr, u64 range, 2402 u32 operation, u32 flags, 2403 u32 prefetch_region, u16 pat_index) 2404 { 2405 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2406 struct drm_gpuva_ops *ops; 2407 struct drm_gpuva_op *__op; 2408 struct drm_gpuvm_bo *vm_bo; 2409 u64 range_start = addr; 2410 u64 range_end = addr + range; 2411 int err; 2412 2413 lockdep_assert_held_write(&vm->lock); 2414 2415 vm_dbg(&vm->xe->drm, 2416 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2417 operation, (ULL)addr, (ULL)range, 2418 (ULL)bo_offset_or_userptr); 2419 2420 switch (operation) { 2421 case DRM_XE_VM_BIND_OP_MAP: 2422 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { 2423 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); 2424 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 2425 } 2426 2427 fallthrough; 2428 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2429 struct drm_gpuvm_map_req map_req = { 2430 .map.va.addr = range_start, 2431 .map.va.range = range_end - range_start, 2432 .map.gem.obj = obj, 2433 .map.gem.offset = bo_offset_or_userptr, 2434 }; 2435 2436 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2437 break; 2438 } 2439 case DRM_XE_VM_BIND_OP_UNMAP: 2440 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2441 break; 2442 case DRM_XE_VM_BIND_OP_PREFETCH: 2443 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2444 break; 2445 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2446 xe_assert(vm->xe, bo); 2447 2448 err = xe_bo_lock(bo, true); 2449 if (err) 2450 return ERR_PTR(err); 2451 2452 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj); 2453 if (IS_ERR(vm_bo)) { 2454 xe_bo_unlock(bo); 2455 return ERR_CAST(vm_bo); 2456 } 2457 2458 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2459 drm_gpuvm_bo_put(vm_bo); 2460 xe_bo_unlock(bo); 2461 break; 2462 default: 2463 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2464 ops = ERR_PTR(-EINVAL); 2465 } 2466 if (IS_ERR(ops)) 2467 return ops; 2468 2469 drm_gpuva_for_each_op(__op, ops) { 2470 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2471 2472 if (__op->op == DRM_GPUVA_OP_MAP) { 2473 op->map.immediate = 2474 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2475 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2476 op->map.vma_flags |= XE_VMA_READ_ONLY; 2477 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2478 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2479 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2480 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2481 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2482 op->map.vma_flags |= XE_VMA_DUMPABLE; 2483 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2484 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2485 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 2486 op->map.pat_index = pat_index; 2487 op->map.invalidate_on_bind = 2488 __xe_vm_needs_clear_scratch_pages(vm, flags); 2489 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2490 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2491 struct xe_tile *tile; 2492 struct xe_svm_range *svm_range; 2493 struct drm_gpusvm_ctx ctx = {}; 2494 struct drm_pagemap *dpagemap = NULL; 2495 u8 id, tile_mask = 0; 2496 u32 i; 2497 2498 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2499 op->prefetch.region = prefetch_region; 2500 break; 2501 } 2502 2503 ctx.read_only = xe_vma_read_only(vma); 2504 ctx.devmem_possible = IS_DGFX(vm->xe) && 2505 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2506 2507 for_each_tile(tile, vm->xe, id) 2508 tile_mask |= 0x1 << id; 2509 2510 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2511 op->prefetch_range.ranges_count = 0; 2512 2513 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2514 dpagemap = xe_vma_resolve_pagemap(vma, 2515 xe_device_get_root_tile(vm->xe)); 2516 } else if (prefetch_region) { 2517 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2518 XE_PL_VRAM0]; 2519 dpagemap = xe_tile_local_pagemap(tile); 2520 } 2521 2522 op->prefetch_range.dpagemap = dpagemap; 2523 alloc_next_range: 2524 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2525 2526 if (PTR_ERR(svm_range) == -ENOENT) { 2527 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2528 2529 addr = ret == ULONG_MAX ? 0 : ret; 2530 if (addr) 2531 goto alloc_next_range; 2532 else 2533 goto print_op_label; 2534 } 2535 2536 if (IS_ERR(svm_range)) { 2537 err = PTR_ERR(svm_range); 2538 goto unwind_prefetch_ops; 2539 } 2540 2541 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) { 2542 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2543 goto check_next_range; 2544 } 2545 2546 err = xa_alloc(&op->prefetch_range.range, 2547 &i, svm_range, xa_limit_32b, 2548 GFP_KERNEL); 2549 2550 if (err) 2551 goto unwind_prefetch_ops; 2552 2553 op->prefetch_range.ranges_count++; 2554 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2555 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2556 check_next_range: 2557 if (range_end > xe_svm_range_end(svm_range) && 2558 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2559 addr = xe_svm_range_end(svm_range); 2560 goto alloc_next_range; 2561 } 2562 } 2563 print_op_label: 2564 print_op(vm->xe, __op); 2565 } 2566 2567 return ops; 2568 2569 unwind_prefetch_ops: 2570 xe_svm_prefetch_gpuva_ops_fini(ops); 2571 drm_gpuva_ops_free(&vm->gpuvm, ops); 2572 return ERR_PTR(err); 2573 } 2574 2575 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2576 2577 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2578 struct xe_vma_mem_attr *attr, unsigned int flags) 2579 { 2580 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2581 struct xe_validation_ctx ctx; 2582 struct drm_exec exec; 2583 struct xe_vma *vma; 2584 int err = 0; 2585 2586 lockdep_assert_held_write(&vm->lock); 2587 2588 if (bo) { 2589 err = 0; 2590 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2591 (struct xe_val_flags) {.interruptible = true}, err) { 2592 if (!bo->vm) { 2593 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2594 drm_exec_retry_on_contention(&exec); 2595 } 2596 if (!err) { 2597 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2598 drm_exec_retry_on_contention(&exec); 2599 } 2600 if (err) 2601 return ERR_PTR(err); 2602 2603 vma = xe_vma_create(vm, bo, op->gem.offset, 2604 op->va.addr, op->va.addr + 2605 op->va.range - 1, attr, flags); 2606 if (IS_ERR(vma)) 2607 return vma; 2608 2609 if (!bo->vm) { 2610 err = add_preempt_fences(vm, bo); 2611 if (err) { 2612 prep_vma_destroy(vm, vma, false); 2613 xe_vma_destroy(vma, NULL); 2614 } 2615 } 2616 } 2617 if (err) 2618 return ERR_PTR(err); 2619 } else { 2620 vma = xe_vma_create(vm, NULL, op->gem.offset, 2621 op->va.addr, op->va.addr + 2622 op->va.range - 1, attr, flags); 2623 if (IS_ERR(vma)) 2624 return vma; 2625 2626 if (xe_vma_is_userptr(vma)) { 2627 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2628 /* 2629 * -EBUSY has dedicated meaning that a user fence 2630 * attached to the VMA is busy, in practice 2631 * xe_vma_userptr_pin_pages can only fail with -EBUSY if 2632 * we are low on memory so convert this to -ENOMEM. 2633 */ 2634 if (err == -EBUSY) 2635 err = -ENOMEM; 2636 } 2637 } 2638 if (err) { 2639 prep_vma_destroy(vm, vma, false); 2640 xe_vma_destroy_unlocked(vma); 2641 vma = ERR_PTR(err); 2642 } 2643 2644 return vma; 2645 } 2646 2647 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2648 { 2649 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2650 return SZ_1G; 2651 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2652 return SZ_2M; 2653 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2654 return SZ_64K; 2655 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2656 return SZ_4K; 2657 2658 return SZ_1G; /* Uninitialized, used max size */ 2659 } 2660 2661 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2662 { 2663 switch (size) { 2664 case SZ_1G: 2665 vma->gpuva.flags |= XE_VMA_PTE_1G; 2666 break; 2667 case SZ_2M: 2668 vma->gpuva.flags |= XE_VMA_PTE_2M; 2669 break; 2670 case SZ_64K: 2671 vma->gpuva.flags |= XE_VMA_PTE_64K; 2672 break; 2673 case SZ_4K: 2674 vma->gpuva.flags |= XE_VMA_PTE_4K; 2675 break; 2676 } 2677 } 2678 2679 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2680 { 2681 int err = 0; 2682 2683 lockdep_assert_held_write(&vm->lock); 2684 2685 switch (op->base.op) { 2686 case DRM_GPUVA_OP_MAP: 2687 err |= xe_vm_insert_vma(vm, op->map.vma); 2688 if (!err) 2689 op->flags |= XE_VMA_OP_COMMITTED; 2690 break; 2691 case DRM_GPUVA_OP_REMAP: 2692 { 2693 u8 tile_present = 2694 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2695 2696 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2697 true); 2698 op->flags |= XE_VMA_OP_COMMITTED; 2699 2700 if (op->remap.prev) { 2701 err |= xe_vm_insert_vma(vm, op->remap.prev); 2702 if (!err) 2703 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2704 if (!err && op->remap.skip_prev) { 2705 op->remap.prev->tile_present = 2706 tile_present; 2707 } 2708 } 2709 if (op->remap.next) { 2710 err |= xe_vm_insert_vma(vm, op->remap.next); 2711 if (!err) 2712 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2713 if (!err && op->remap.skip_next) { 2714 op->remap.next->tile_present = 2715 tile_present; 2716 } 2717 } 2718 2719 /* 2720 * Adjust for partial unbind after removing VMA from VM. In case 2721 * of unwind we might need to undo this later. 2722 */ 2723 if (!err) { 2724 op->base.remap.unmap->va->va.addr = op->remap.start; 2725 op->base.remap.unmap->va->va.range = op->remap.range; 2726 } 2727 break; 2728 } 2729 case DRM_GPUVA_OP_UNMAP: 2730 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2731 op->flags |= XE_VMA_OP_COMMITTED; 2732 break; 2733 case DRM_GPUVA_OP_PREFETCH: 2734 op->flags |= XE_VMA_OP_COMMITTED; 2735 break; 2736 default: 2737 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2738 } 2739 2740 return err; 2741 } 2742 2743 /** 2744 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2745 * @vma: Pointer to the xe_vma structure to check 2746 * 2747 * This function determines whether the given VMA (Virtual Memory Area) 2748 * has its memory attributes set to their default values. Specifically, 2749 * it checks the following conditions: 2750 * 2751 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2752 * - `pat_index` is equal to `default_pat_index` 2753 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2754 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2755 * 2756 * Return: true if all attributes are at their default values, false otherwise. 2757 */ 2758 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2759 { 2760 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2761 vma->attr.pat_index == vma->attr.default_pat_index && 2762 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2763 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2764 } 2765 2766 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2767 struct xe_vma_ops *vops) 2768 { 2769 struct xe_device *xe = vm->xe; 2770 struct drm_gpuva_op *__op; 2771 struct xe_tile *tile; 2772 u8 id, tile_mask = 0; 2773 int err = 0; 2774 2775 lockdep_assert_held_write(&vm->lock); 2776 2777 for_each_tile(tile, vm->xe, id) 2778 tile_mask |= 0x1 << id; 2779 2780 drm_gpuva_for_each_op(__op, ops) { 2781 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2782 struct xe_vma *vma; 2783 unsigned int flags = 0; 2784 2785 INIT_LIST_HEAD(&op->link); 2786 list_add_tail(&op->link, &vops->list); 2787 op->tile_mask = tile_mask; 2788 2789 switch (op->base.op) { 2790 case DRM_GPUVA_OP_MAP: 2791 { 2792 struct xe_vma_mem_attr default_attr = { 2793 .preferred_loc = { 2794 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2795 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2796 }, 2797 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2798 .default_pat_index = op->map.pat_index, 2799 .pat_index = op->map.pat_index, 2800 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 2801 }; 2802 2803 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2804 2805 vma = new_vma(vm, &op->base.map, &default_attr, 2806 flags); 2807 if (IS_ERR(vma)) 2808 return PTR_ERR(vma); 2809 2810 op->map.vma = vma; 2811 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2812 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2813 op->map.invalidate_on_bind) 2814 xe_vma_ops_incr_pt_update_ops(vops, 2815 op->tile_mask, 1); 2816 break; 2817 } 2818 case DRM_GPUVA_OP_REMAP: 2819 { 2820 struct xe_vma *old = 2821 gpuva_to_vma(op->base.remap.unmap->va); 2822 bool skip = xe_vma_is_cpu_addr_mirror(old); 2823 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2824 int num_remap_ops = 0; 2825 2826 if (op->base.remap.prev) 2827 start = op->base.remap.prev->va.addr + 2828 op->base.remap.prev->va.range; 2829 if (op->base.remap.next) 2830 end = op->base.remap.next->va.addr; 2831 2832 if (xe_vma_is_cpu_addr_mirror(old) && 2833 xe_svm_has_mapping(vm, start, end)) { 2834 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2835 xe_svm_unmap_address_range(vm, start, end); 2836 else 2837 return -EBUSY; 2838 } 2839 2840 op->remap.start = xe_vma_start(old); 2841 op->remap.range = xe_vma_size(old); 2842 op->remap.old_start = op->remap.start; 2843 op->remap.old_range = op->remap.range; 2844 2845 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2846 if (op->base.remap.prev) { 2847 vma = new_vma(vm, op->base.remap.prev, 2848 &old->attr, flags); 2849 if (IS_ERR(vma)) 2850 return PTR_ERR(vma); 2851 2852 op->remap.prev = vma; 2853 2854 /* 2855 * Userptr creates a new SG mapping so 2856 * we must also rebind. 2857 */ 2858 op->remap.skip_prev = skip || 2859 (!xe_vma_is_userptr(old) && 2860 IS_ALIGNED(xe_vma_end(vma), 2861 xe_vma_max_pte_size(old))); 2862 if (op->remap.skip_prev) { 2863 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2864 op->remap.range -= 2865 xe_vma_end(vma) - 2866 xe_vma_start(old); 2867 op->remap.start = xe_vma_end(vma); 2868 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2869 (ULL)op->remap.start, 2870 (ULL)op->remap.range); 2871 } else { 2872 num_remap_ops++; 2873 } 2874 } 2875 2876 if (op->base.remap.next) { 2877 vma = new_vma(vm, op->base.remap.next, 2878 &old->attr, flags); 2879 if (IS_ERR(vma)) 2880 return PTR_ERR(vma); 2881 2882 op->remap.next = vma; 2883 2884 /* 2885 * Userptr creates a new SG mapping so 2886 * we must also rebind. 2887 */ 2888 op->remap.skip_next = skip || 2889 (!xe_vma_is_userptr(old) && 2890 IS_ALIGNED(xe_vma_start(vma), 2891 xe_vma_max_pte_size(old))); 2892 if (op->remap.skip_next) { 2893 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2894 op->remap.range -= 2895 xe_vma_end(old) - 2896 xe_vma_start(vma); 2897 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2898 (ULL)op->remap.start, 2899 (ULL)op->remap.range); 2900 } else { 2901 num_remap_ops++; 2902 } 2903 } 2904 if (!skip) 2905 num_remap_ops++; 2906 2907 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2908 break; 2909 } 2910 case DRM_GPUVA_OP_UNMAP: 2911 vma = gpuva_to_vma(op->base.unmap.va); 2912 2913 if (xe_vma_is_cpu_addr_mirror(vma) && 2914 xe_svm_has_mapping(vm, xe_vma_start(vma), 2915 xe_vma_end(vma)) && 2916 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) 2917 return -EBUSY; 2918 2919 if (!xe_vma_is_cpu_addr_mirror(vma)) 2920 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2921 break; 2922 case DRM_GPUVA_OP_PREFETCH: 2923 vma = gpuva_to_vma(op->base.prefetch.va); 2924 2925 if (xe_vma_is_userptr(vma)) { 2926 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2927 if (err) 2928 return err; 2929 } 2930 2931 if (xe_vma_is_cpu_addr_mirror(vma)) 2932 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2933 op->prefetch_range.ranges_count); 2934 else 2935 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2936 2937 break; 2938 default: 2939 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2940 } 2941 2942 err = xe_vma_op_commit(vm, op); 2943 if (err) 2944 return err; 2945 } 2946 2947 return 0; 2948 } 2949 2950 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2951 bool post_commit, bool prev_post_commit, 2952 bool next_post_commit) 2953 { 2954 lockdep_assert_held_write(&vm->lock); 2955 2956 switch (op->base.op) { 2957 case DRM_GPUVA_OP_MAP: 2958 if (op->map.vma) { 2959 prep_vma_destroy(vm, op->map.vma, post_commit); 2960 xe_vma_destroy_unlocked(op->map.vma); 2961 } 2962 break; 2963 case DRM_GPUVA_OP_UNMAP: 2964 { 2965 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2966 2967 if (vma) { 2968 xe_svm_notifier_lock(vm); 2969 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2970 xe_svm_notifier_unlock(vm); 2971 if (post_commit) 2972 xe_vm_insert_vma(vm, vma); 2973 } 2974 break; 2975 } 2976 case DRM_GPUVA_OP_REMAP: 2977 { 2978 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2979 2980 if (op->remap.prev) { 2981 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2982 xe_vma_destroy_unlocked(op->remap.prev); 2983 } 2984 if (op->remap.next) { 2985 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2986 xe_vma_destroy_unlocked(op->remap.next); 2987 } 2988 if (vma) { 2989 xe_svm_notifier_lock(vm); 2990 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2991 xe_svm_notifier_unlock(vm); 2992 if (post_commit) { 2993 /* 2994 * Restore the old va range, in case of the 2995 * prev/next skip optimisation. Otherwise what 2996 * we re-insert here could be smaller than the 2997 * original range. 2998 */ 2999 op->base.remap.unmap->va->va.addr = 3000 op->remap.old_start; 3001 op->base.remap.unmap->va->va.range = 3002 op->remap.old_range; 3003 xe_vm_insert_vma(vm, vma); 3004 } 3005 } 3006 break; 3007 } 3008 case DRM_GPUVA_OP_PREFETCH: 3009 /* Nothing to do */ 3010 break; 3011 default: 3012 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3013 } 3014 } 3015 3016 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 3017 struct drm_gpuva_ops **ops, 3018 int num_ops_list) 3019 { 3020 int i; 3021 3022 for (i = num_ops_list - 1; i >= 0; --i) { 3023 struct drm_gpuva_ops *__ops = ops[i]; 3024 struct drm_gpuva_op *__op; 3025 3026 if (!__ops) 3027 continue; 3028 3029 drm_gpuva_for_each_op_reverse(__op, __ops) { 3030 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 3031 3032 xe_vma_op_unwind(vm, op, 3033 op->flags & XE_VMA_OP_COMMITTED, 3034 op->flags & XE_VMA_OP_PREV_COMMITTED, 3035 op->flags & XE_VMA_OP_NEXT_COMMITTED); 3036 } 3037 } 3038 } 3039 3040 /** 3041 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate() 3042 * @res_evict: Allow evicting resources during validation 3043 * @validate: Perform BO validation 3044 * @request_decompress: Request BO decompression 3045 * @check_purged: Reject operation if BO is DONTNEED or PURGED 3046 */ 3047 struct xe_vma_lock_and_validate_flags { 3048 u32 res_evict : 1; 3049 u32 validate : 1; 3050 u32 request_decompress : 1; 3051 u32 check_purged : 1; 3052 }; 3053 3054 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3055 struct xe_vma_lock_and_validate_flags flags) 3056 { 3057 struct xe_bo *bo = xe_vma_bo(vma); 3058 struct xe_vm *vm = xe_vma_vm(vma); 3059 bool validate_bo = flags.validate; 3060 int err = 0; 3061 3062 if (bo) { 3063 if (!bo->vm) 3064 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3065 3066 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */ 3067 if (!err && flags.check_purged) { 3068 if (xe_bo_madv_is_dontneed(bo)) 3069 err = -EBUSY; /* BO marked purgeable */ 3070 else if (xe_bo_is_purged(bo)) 3071 err = -EINVAL; /* BO already purged */ 3072 } 3073 3074 /* Don't validate the BO for DONTNEED/PURGED remap remnants. */ 3075 if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED) 3076 validate_bo = false; 3077 3078 if (!err && validate_bo) 3079 err = xe_bo_validate(bo, vm, 3080 xe_vm_allow_vm_eviction(vm) && 3081 flags.res_evict, exec); 3082 3083 if (err) 3084 return err; 3085 3086 if (flags.request_decompress) 3087 err = xe_bo_decompress(bo); 3088 } 3089 3090 return err; 3091 } 3092 3093 static int check_ufence(struct xe_vma *vma) 3094 { 3095 if (vma->ufence) { 3096 struct xe_user_fence * const f = vma->ufence; 3097 3098 if (!xe_sync_ufence_get_status(f)) 3099 return -EBUSY; 3100 3101 vma->ufence = NULL; 3102 xe_sync_ufence_put(f); 3103 } 3104 3105 return 0; 3106 } 3107 3108 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 3109 { 3110 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 3111 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3112 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap; 3113 int err = 0; 3114 3115 struct xe_svm_range *svm_range; 3116 struct drm_gpusvm_ctx ctx = {}; 3117 unsigned long i; 3118 3119 if (!xe_vma_is_cpu_addr_mirror(vma)) 3120 return 0; 3121 3122 ctx.read_only = xe_vma_read_only(vma); 3123 ctx.devmem_possible = devmem_possible; 3124 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 3125 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 3126 3127 /* TODO: Threading the migration */ 3128 xa_for_each(&op->prefetch_range.range, i, svm_range) { 3129 if (!dpagemap) 3130 xe_svm_range_migrate_to_smem(vm, svm_range); 3131 3132 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 3133 drm_dbg(&vm->xe->drm, 3134 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n", 3135 dpagemap ? dpagemap->drm->unique : "system", 3136 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range)); 3137 } 3138 3139 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) { 3140 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap); 3141 if (err) { 3142 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 3143 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3144 return -ENODATA; 3145 } 3146 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 3147 } 3148 3149 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 3150 if (err) { 3151 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 3152 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3153 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 3154 err = -ENODATA; 3155 return err; 3156 } 3157 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 3158 } 3159 3160 return err; 3161 } 3162 3163 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3164 struct xe_vma_ops *vops, struct xe_vma_op *op) 3165 { 3166 int err = 0; 3167 bool res_evict; 3168 3169 /* 3170 * We only allow evicting a BO within the VM if it is not part of an 3171 * array of binds, as an array of binds can evict another BO within the 3172 * bind. 3173 */ 3174 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 3175 3176 switch (op->base.op) { 3177 case DRM_GPUVA_OP_MAP: 3178 if (!op->map.invalidate_on_bind) 3179 err = vma_lock_and_validate(exec, op->map.vma, 3180 (struct xe_vma_lock_and_validate_flags) { 3181 .res_evict = res_evict, 3182 .validate = !xe_vm_in_fault_mode(vm) || 3183 op->map.immediate, 3184 .request_decompress = 3185 op->map.request_decompress, 3186 .check_purged = false, 3187 }); 3188 break; 3189 case DRM_GPUVA_OP_REMAP: 3190 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3191 if (err) 3192 break; 3193 3194 err = vma_lock_and_validate(exec, 3195 gpuva_to_vma(op->base.remap.unmap->va), 3196 (struct xe_vma_lock_and_validate_flags) { 3197 .res_evict = res_evict, 3198 .validate = false, 3199 .request_decompress = false, 3200 .check_purged = false, 3201 }); 3202 if (!err && op->remap.prev) 3203 err = vma_lock_and_validate(exec, op->remap.prev, 3204 (struct xe_vma_lock_and_validate_flags) { 3205 .res_evict = res_evict, 3206 .validate = true, 3207 .request_decompress = false, 3208 .check_purged = false, 3209 }); 3210 if (!err && op->remap.next) 3211 err = vma_lock_and_validate(exec, op->remap.next, 3212 (struct xe_vma_lock_and_validate_flags) { 3213 .res_evict = res_evict, 3214 .validate = true, 3215 .request_decompress = false, 3216 .check_purged = false, 3217 }); 3218 break; 3219 case DRM_GPUVA_OP_UNMAP: 3220 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3221 if (err) 3222 break; 3223 3224 err = vma_lock_and_validate(exec, 3225 gpuva_to_vma(op->base.unmap.va), 3226 (struct xe_vma_lock_and_validate_flags) { 3227 .res_evict = res_evict, 3228 .validate = false, 3229 .request_decompress = false, 3230 .check_purged = false, 3231 }); 3232 break; 3233 case DRM_GPUVA_OP_PREFETCH: 3234 { 3235 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3236 u32 region; 3237 3238 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3239 region = op->prefetch.region; 3240 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3241 region <= ARRAY_SIZE(region_to_mem_type)); 3242 } 3243 3244 /* 3245 * PREFETCH is the only op that still gates on BO purge state. 3246 * MAP/REMAP handle this inside xe_vma_create() so partial 3247 * unbind on a DONTNEED BO still works. PREFETCH skips 3248 * xe_vma_create() and would migrate a BO with no backing 3249 * store, so reject DONTNEED/PURGED here. 3250 */ 3251 err = vma_lock_and_validate(exec, 3252 gpuva_to_vma(op->base.prefetch.va), 3253 (struct xe_vma_lock_and_validate_flags) { 3254 .res_evict = res_evict, 3255 .validate = false, 3256 .request_decompress = false, 3257 .check_purged = true, 3258 }); 3259 if (!err && !xe_vma_has_no_bo(vma)) 3260 err = xe_bo_migrate(xe_vma_bo(vma), 3261 region_to_mem_type[region], 3262 NULL, 3263 exec); 3264 break; 3265 } 3266 default: 3267 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3268 } 3269 3270 return err; 3271 } 3272 3273 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3274 { 3275 struct xe_vma_op *op; 3276 int err; 3277 3278 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3279 return 0; 3280 3281 list_for_each_entry(op, &vops->list, link) { 3282 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3283 err = prefetch_ranges(vm, op); 3284 if (err) 3285 return err; 3286 } 3287 } 3288 3289 return 0; 3290 } 3291 3292 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3293 struct xe_vm *vm, 3294 struct xe_vma_ops *vops) 3295 { 3296 struct xe_vma_op *op; 3297 int err; 3298 3299 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3300 if (err) 3301 return err; 3302 3303 list_for_each_entry(op, &vops->list, link) { 3304 err = op_lock_and_prep(exec, vm, vops, op); 3305 if (err) 3306 return err; 3307 } 3308 3309 #ifdef TEST_VM_OPS_ERROR 3310 if (vops->inject_error && 3311 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3312 return -ENOSPC; 3313 #endif 3314 3315 return 0; 3316 } 3317 3318 static void op_trace(struct xe_vma_op *op) 3319 { 3320 switch (op->base.op) { 3321 case DRM_GPUVA_OP_MAP: 3322 trace_xe_vma_bind(op->map.vma); 3323 break; 3324 case DRM_GPUVA_OP_REMAP: 3325 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3326 if (op->remap.prev) 3327 trace_xe_vma_bind(op->remap.prev); 3328 if (op->remap.next) 3329 trace_xe_vma_bind(op->remap.next); 3330 break; 3331 case DRM_GPUVA_OP_UNMAP: 3332 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3333 break; 3334 case DRM_GPUVA_OP_PREFETCH: 3335 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3336 break; 3337 case DRM_GPUVA_OP_DRIVER: 3338 break; 3339 default: 3340 XE_WARN_ON("NOT POSSIBLE"); 3341 } 3342 } 3343 3344 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3345 { 3346 struct xe_vma_op *op; 3347 3348 list_for_each_entry(op, &vops->list, link) 3349 op_trace(op); 3350 } 3351 3352 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3353 { 3354 struct xe_exec_queue *q = vops->q; 3355 struct xe_tile *tile; 3356 int number_tiles = 0; 3357 u8 id; 3358 3359 for_each_tile(tile, vm->xe, id) { 3360 if (vops->pt_update_ops[id].num_ops) 3361 ++number_tiles; 3362 3363 if (vops->pt_update_ops[id].q) 3364 continue; 3365 3366 if (q) { 3367 vops->pt_update_ops[id].q = q; 3368 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3369 q = list_next_entry(q, multi_gt_list); 3370 } else { 3371 vops->pt_update_ops[id].q = vm->q[id]; 3372 } 3373 } 3374 3375 return number_tiles; 3376 } 3377 3378 static struct dma_fence *ops_execute(struct xe_vm *vm, 3379 struct xe_vma_ops *vops) 3380 { 3381 struct xe_tile *tile; 3382 struct dma_fence *fence = NULL; 3383 struct dma_fence **fences = NULL; 3384 struct dma_fence_array *cf = NULL; 3385 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; 3386 u8 id; 3387 3388 number_tiles = vm_ops_setup_tile_args(vm, vops); 3389 if (number_tiles == 0) 3390 return ERR_PTR(-ENODATA); 3391 3392 for_each_tile(tile, vm->xe, id) { 3393 ++n_fence; 3394 3395 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) 3396 for_each_tlb_inval(i) 3397 ++n_fence; 3398 } 3399 3400 fences = kmalloc_objs(*fences, n_fence); 3401 if (!fences) { 3402 fence = ERR_PTR(-ENOMEM); 3403 goto err_trace; 3404 } 3405 3406 cf = dma_fence_array_alloc(n_fence); 3407 if (!cf) { 3408 fence = ERR_PTR(-ENOMEM); 3409 goto err_out; 3410 } 3411 3412 for_each_tile(tile, vm->xe, id) { 3413 if (!vops->pt_update_ops[id].num_ops) 3414 continue; 3415 3416 err = xe_pt_update_ops_prepare(tile, vops); 3417 if (err) { 3418 fence = ERR_PTR(err); 3419 goto err_out; 3420 } 3421 } 3422 3423 trace_xe_vm_ops_execute(vops); 3424 3425 for_each_tile(tile, vm->xe, id) { 3426 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3427 3428 fence = NULL; 3429 if (!vops->pt_update_ops[id].num_ops) 3430 goto collect_fences; 3431 3432 fence = xe_pt_update_ops_run(tile, vops); 3433 if (IS_ERR(fence)) 3434 goto err_out; 3435 3436 collect_fences: 3437 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3438 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3439 continue; 3440 3441 xe_migrate_job_lock(tile->migrate, q); 3442 for_each_tlb_inval(i) 3443 fences[current_fence++] = 3444 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3445 xe_migrate_job_unlock(tile->migrate, q); 3446 } 3447 3448 xe_assert(vm->xe, current_fence == n_fence); 3449 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3450 1, false); 3451 fence = &cf->base; 3452 3453 for_each_tile(tile, vm->xe, id) { 3454 if (!vops->pt_update_ops[id].num_ops) 3455 continue; 3456 3457 xe_pt_update_ops_fini(tile, vops); 3458 } 3459 3460 return fence; 3461 3462 err_out: 3463 for_each_tile(tile, vm->xe, id) { 3464 if (!vops->pt_update_ops[id].num_ops) 3465 continue; 3466 3467 xe_pt_update_ops_abort(tile, vops); 3468 } 3469 while (current_fence) 3470 dma_fence_put(fences[--current_fence]); 3471 kfree(fences); 3472 kfree(cf); 3473 3474 err_trace: 3475 trace_xe_vm_ops_fail(vm); 3476 return fence; 3477 } 3478 3479 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3480 { 3481 if (vma->ufence) 3482 xe_sync_ufence_put(vma->ufence); 3483 vma->ufence = __xe_sync_ufence_get(ufence); 3484 } 3485 3486 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3487 struct xe_user_fence *ufence) 3488 { 3489 switch (op->base.op) { 3490 case DRM_GPUVA_OP_MAP: 3491 if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) 3492 vma_add_ufence(op->map.vma, ufence); 3493 break; 3494 case DRM_GPUVA_OP_REMAP: 3495 if (op->remap.prev) 3496 vma_add_ufence(op->remap.prev, ufence); 3497 if (op->remap.next) 3498 vma_add_ufence(op->remap.next, ufence); 3499 break; 3500 case DRM_GPUVA_OP_UNMAP: 3501 break; 3502 case DRM_GPUVA_OP_PREFETCH: 3503 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3504 break; 3505 default: 3506 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3507 } 3508 } 3509 3510 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3511 struct dma_fence *fence) 3512 { 3513 struct xe_user_fence *ufence; 3514 struct xe_vma_op *op; 3515 int i; 3516 3517 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3518 list_for_each_entry(op, &vops->list, link) { 3519 if (ufence) 3520 op_add_ufence(vm, op, ufence); 3521 3522 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3523 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3524 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3525 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3526 fence); 3527 } 3528 if (ufence) 3529 xe_sync_ufence_put(ufence); 3530 if (fence) { 3531 for (i = 0; i < vops->num_syncs; i++) 3532 xe_sync_entry_signal(vops->syncs + i, fence); 3533 } 3534 } 3535 3536 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3537 struct xe_vma_ops *vops) 3538 { 3539 struct xe_validation_ctx ctx; 3540 struct drm_exec exec; 3541 struct dma_fence *fence; 3542 int err = 0; 3543 3544 lockdep_assert_held_write(&vm->lock); 3545 3546 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3547 ((struct xe_val_flags) { 3548 .interruptible = true, 3549 .exec_ignore_duplicates = true, 3550 }), err) { 3551 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3552 drm_exec_retry_on_contention(&exec); 3553 xe_validation_retry_on_oom(&ctx, &err); 3554 if (err) 3555 return ERR_PTR(err); 3556 3557 xe_vm_set_validation_exec(vm, &exec); 3558 fence = ops_execute(vm, vops); 3559 xe_vm_set_validation_exec(vm, NULL); 3560 if (IS_ERR(fence)) { 3561 if (PTR_ERR(fence) == -ENODATA) 3562 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3563 return fence; 3564 } 3565 3566 vm_bind_ioctl_ops_fini(vm, vops, fence); 3567 } 3568 3569 return err ? ERR_PTR(err) : fence; 3570 } 3571 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3572 3573 #define SUPPORTED_FLAGS_STUB \ 3574 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3575 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3576 DRM_XE_VM_BIND_FLAG_NULL | \ 3577 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3578 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3579 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3580 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \ 3581 DRM_XE_VM_BIND_FLAG_DECOMPRESS) 3582 3583 #ifdef TEST_VM_OPS_ERROR 3584 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3585 #else 3586 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3587 #endif 3588 3589 #define XE_64K_PAGE_MASK 0xffffull 3590 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3591 3592 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3593 struct drm_xe_vm_bind *args, 3594 struct drm_xe_vm_bind_op **bind_ops) 3595 { 3596 int err; 3597 int i; 3598 3599 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3600 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3601 return -EINVAL; 3602 3603 if (XE_IOCTL_DBG(xe, args->extensions)) 3604 return -EINVAL; 3605 3606 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) 3607 return -EINVAL; 3608 3609 if (args->num_binds > 1) { 3610 u64 __user *bind_user = 3611 u64_to_user_ptr(args->vector_of_binds); 3612 3613 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op, 3614 args->num_binds, 3615 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3616 if (!*bind_ops) 3617 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3618 3619 err = copy_from_user(*bind_ops, bind_user, 3620 sizeof(struct drm_xe_vm_bind_op) * 3621 args->num_binds); 3622 if (XE_IOCTL_DBG(xe, err)) { 3623 err = -EFAULT; 3624 goto free_bind_ops; 3625 } 3626 } else { 3627 *bind_ops = &args->bind; 3628 } 3629 3630 for (i = 0; i < args->num_binds; ++i) { 3631 u64 range = (*bind_ops)[i].range; 3632 u64 addr = (*bind_ops)[i].addr; 3633 u32 op = (*bind_ops)[i].op; 3634 u32 flags = (*bind_ops)[i].flags; 3635 u32 obj = (*bind_ops)[i].obj; 3636 u64 obj_offset = (*bind_ops)[i].obj_offset; 3637 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3638 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3639 bool is_cpu_addr_mirror = flags & 3640 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3641 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 3642 u16 pat_index = (*bind_ops)[i].pat_index; 3643 u16 coh_mode; 3644 bool comp_en; 3645 3646 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3647 (!xe_vm_in_fault_mode(vm) || 3648 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3649 err = -EINVAL; 3650 goto free_bind_ops; 3651 } 3652 3653 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3654 err = -EINVAL; 3655 goto free_bind_ops; 3656 } 3657 3658 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3659 (*bind_ops)[i].pat_index = pat_index; 3660 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3661 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3662 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3663 err = -EINVAL; 3664 goto free_bind_ops; 3665 } 3666 3667 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) { 3668 err = -EINVAL; 3669 goto free_bind_ops; 3670 } 3671 3672 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3673 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3674 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3675 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3676 is_cpu_addr_mirror)) || 3677 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3678 (is_decompress || is_null || is_cpu_addr_mirror)) || 3679 XE_IOCTL_DBG(xe, is_decompress && 3680 xe_pat_index_get_comp_en(xe, pat_index)) || 3681 XE_IOCTL_DBG(xe, !obj && 3682 op == DRM_XE_VM_BIND_OP_MAP && 3683 !is_null && !is_cpu_addr_mirror) || 3684 XE_IOCTL_DBG(xe, !obj && 3685 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3686 XE_IOCTL_DBG(xe, addr && 3687 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3688 XE_IOCTL_DBG(xe, range && 3689 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3690 XE_IOCTL_DBG(xe, obj && 3691 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3692 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3693 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3694 XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && 3695 is_cpu_addr_mirror) || 3696 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && 3697 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || 3698 is_cpu_addr_mirror) && 3699 (pat_index != 19 && coh_mode != XE_COH_2WAY)) || 3700 XE_IOCTL_DBG(xe, comp_en && 3701 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3702 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3703 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3704 XE_IOCTL_DBG(xe, obj && 3705 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3706 XE_IOCTL_DBG(xe, prefetch_region && 3707 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3708 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3709 /* Guard against undefined shift in BIT(prefetch_region) */ 3710 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3711 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3712 XE_IOCTL_DBG(xe, obj && 3713 op == DRM_XE_VM_BIND_OP_UNMAP) || 3714 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3715 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3716 err = -EINVAL; 3717 goto free_bind_ops; 3718 } 3719 3720 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3721 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3722 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3723 XE_IOCTL_DBG(xe, !range && 3724 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3725 err = -EINVAL; 3726 goto free_bind_ops; 3727 } 3728 3729 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) || 3730 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) || 3731 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) { 3732 err = -EOPNOTSUPP; 3733 goto free_bind_ops; 3734 } 3735 } 3736 3737 return 0; 3738 3739 free_bind_ops: 3740 if (args->num_binds > 1) 3741 kvfree(*bind_ops); 3742 *bind_ops = NULL; 3743 return err; 3744 } 3745 3746 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3747 struct xe_exec_queue *q, 3748 struct xe_sync_entry *syncs, 3749 int num_syncs) 3750 { 3751 struct dma_fence *fence = NULL; 3752 int i, err = 0; 3753 3754 if (num_syncs) { 3755 fence = xe_sync_in_fence_get(syncs, num_syncs, 3756 to_wait_exec_queue(vm, q), vm); 3757 if (IS_ERR(fence)) 3758 return PTR_ERR(fence); 3759 3760 for (i = 0; i < num_syncs; i++) 3761 xe_sync_entry_signal(&syncs[i], fence); 3762 } 3763 3764 dma_fence_put(fence); 3765 3766 return err; 3767 } 3768 3769 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3770 struct xe_exec_queue *q, 3771 struct xe_sync_entry *syncs, u32 num_syncs) 3772 { 3773 memset(vops, 0, sizeof(*vops)); 3774 INIT_LIST_HEAD(&vops->list); 3775 vops->vm = vm; 3776 vops->q = q; 3777 vops->syncs = syncs; 3778 vops->num_syncs = num_syncs; 3779 vops->flags = 0; 3780 } 3781 3782 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3783 u64 addr, u64 range, u64 obj_offset, 3784 u16 pat_index, u32 op, u32 bind_flags) 3785 { 3786 u16 coh_mode; 3787 bool comp_en; 3788 3789 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && 3790 xe_pat_index_get_comp_en(xe, pat_index))) 3791 return -EINVAL; 3792 3793 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3794 XE_IOCTL_DBG(xe, obj_offset > 3795 xe_bo_size(bo) - range)) { 3796 return -EINVAL; 3797 } 3798 3799 /* 3800 * Some platforms require 64k VM_BIND alignment, 3801 * specifically those with XE_VRAM_FLAGS_NEED64K. 3802 * 3803 * Other platforms may have BO's set to 64k physical placement, 3804 * but can be mapped at 4k offsets anyway. This check is only 3805 * there for the former case. 3806 */ 3807 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3808 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3809 if (XE_IOCTL_DBG(xe, obj_offset & 3810 XE_64K_PAGE_MASK) || 3811 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3812 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3813 return -EINVAL; 3814 } 3815 } 3816 3817 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3818 if (bo->cpu_caching) { 3819 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3820 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3821 return -EINVAL; 3822 } 3823 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3824 /* 3825 * Imported dma-buf from a different device should 3826 * require 1way or 2way coherency since we don't know 3827 * how it was mapped on the CPU. Just assume is it 3828 * potentially cached on CPU side. 3829 */ 3830 return -EINVAL; 3831 } 3832 3833 /* 3834 * Ensures that imported buffer objects (dma-bufs) are not mapped 3835 * with a PAT index that enables compression. 3836 */ 3837 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3838 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) 3839 return -EINVAL; 3840 3841 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) && 3842 (pat_index != 19 && coh_mode != XE_COH_2WAY))) 3843 return -EINVAL; 3844 3845 /* If a BO is protected it can only be mapped if the key is still valid */ 3846 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3847 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3848 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3849 return -ENOEXEC; 3850 3851 return 0; 3852 } 3853 3854 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3855 { 3856 struct xe_device *xe = to_xe_device(dev); 3857 struct xe_file *xef = to_xe_file(file); 3858 struct drm_xe_vm_bind *args = data; 3859 struct drm_xe_sync __user *syncs_user; 3860 struct xe_bo **bos = NULL; 3861 struct drm_gpuva_ops **ops = NULL; 3862 struct xe_vm *vm; 3863 struct xe_exec_queue *q = NULL; 3864 u32 num_syncs, num_ufence = 0; 3865 struct xe_sync_entry *syncs = NULL; 3866 struct drm_xe_vm_bind_op *bind_ops = NULL; 3867 struct xe_vma_ops vops; 3868 struct dma_fence *fence; 3869 int err; 3870 int i; 3871 3872 vm = xe_vm_lookup(xef, args->vm_id); 3873 if (XE_IOCTL_DBG(xe, !vm)) 3874 return -EINVAL; 3875 3876 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3877 if (err) 3878 goto put_vm; 3879 3880 if (args->exec_queue_id) { 3881 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3882 if (XE_IOCTL_DBG(xe, !q)) { 3883 err = -ENOENT; 3884 goto free_bind_ops; 3885 } 3886 3887 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3888 err = -EINVAL; 3889 goto put_exec_queue; 3890 } 3891 } 3892 3893 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) { 3894 err = -EINVAL; 3895 goto put_exec_queue; 3896 } 3897 3898 /* Ensure all UNMAPs visible */ 3899 xe_svm_flush(vm); 3900 3901 err = down_write_killable(&vm->lock); 3902 if (err) 3903 goto put_exec_queue; 3904 3905 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3906 err = -ENOENT; 3907 goto release_vm_lock; 3908 } 3909 3910 for (i = 0; i < args->num_binds; ++i) { 3911 u64 range = bind_ops[i].range; 3912 u64 addr = bind_ops[i].addr; 3913 3914 if (XE_IOCTL_DBG(xe, range > vm->size) || 3915 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3916 err = -EINVAL; 3917 goto release_vm_lock; 3918 } 3919 } 3920 3921 if (args->num_binds) { 3922 bos = kvzalloc_objs(*bos, args->num_binds, 3923 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3924 if (!bos) { 3925 err = -ENOMEM; 3926 goto release_vm_lock; 3927 } 3928 3929 ops = kvzalloc_objs(*ops, args->num_binds, 3930 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3931 if (!ops) { 3932 err = -ENOMEM; 3933 goto free_bos; 3934 } 3935 } 3936 3937 for (i = 0; i < args->num_binds; ++i) { 3938 struct drm_gem_object *gem_obj; 3939 u64 range = bind_ops[i].range; 3940 u64 addr = bind_ops[i].addr; 3941 u32 obj = bind_ops[i].obj; 3942 u64 obj_offset = bind_ops[i].obj_offset; 3943 u16 pat_index = bind_ops[i].pat_index; 3944 u32 op = bind_ops[i].op; 3945 u32 bind_flags = bind_ops[i].flags; 3946 3947 if (!obj) 3948 continue; 3949 3950 gem_obj = drm_gem_object_lookup(file, obj); 3951 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3952 err = -ENOENT; 3953 goto put_obj; 3954 } 3955 bos[i] = gem_to_xe_bo(gem_obj); 3956 3957 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3958 obj_offset, pat_index, op, 3959 bind_flags); 3960 if (err) 3961 goto put_obj; 3962 } 3963 3964 if (args->num_syncs) { 3965 syncs = kzalloc_objs(*syncs, args->num_syncs); 3966 if (!syncs) { 3967 err = -ENOMEM; 3968 goto put_obj; 3969 } 3970 } 3971 3972 syncs_user = u64_to_user_ptr(args->syncs); 3973 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3974 struct xe_exec_queue *__q = q ?: vm->q[0]; 3975 3976 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3977 &syncs_user[num_syncs], 3978 __q->ufence_syncobj, 3979 ++__q->ufence_timeline_value, 3980 (xe_vm_in_lr_mode(vm) ? 3981 SYNC_PARSE_FLAG_LR_MODE : 0) | 3982 (!args->num_binds ? 3983 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3984 if (err) 3985 goto free_syncs; 3986 3987 if (xe_sync_is_ufence(&syncs[num_syncs])) 3988 num_ufence++; 3989 } 3990 3991 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3992 err = -EINVAL; 3993 goto free_syncs; 3994 } 3995 3996 if (!args->num_binds) { 3997 err = -ENODATA; 3998 goto free_syncs; 3999 } 4000 4001 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 4002 if (args->num_binds > 1) 4003 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 4004 for (i = 0; i < args->num_binds; ++i) { 4005 u64 range = bind_ops[i].range; 4006 u64 addr = bind_ops[i].addr; 4007 u32 op = bind_ops[i].op; 4008 u32 flags = bind_ops[i].flags; 4009 u64 obj_offset = bind_ops[i].obj_offset; 4010 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 4011 u16 pat_index = bind_ops[i].pat_index; 4012 4013 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 4014 addr, range, op, flags, 4015 prefetch_region, pat_index); 4016 if (IS_ERR(ops[i])) { 4017 err = PTR_ERR(ops[i]); 4018 ops[i] = NULL; 4019 goto unwind_ops; 4020 } 4021 4022 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 4023 if (err) 4024 goto unwind_ops; 4025 4026 #ifdef TEST_VM_OPS_ERROR 4027 if (flags & FORCE_OP_ERROR) { 4028 vops.inject_error = true; 4029 vm->xe->vm_inject_error_position = 4030 (vm->xe->vm_inject_error_position + 1) % 4031 FORCE_OP_ERROR_COUNT; 4032 } 4033 #endif 4034 } 4035 4036 /* Nothing to do */ 4037 if (list_empty(&vops.list)) { 4038 err = -ENODATA; 4039 goto unwind_ops; 4040 } 4041 4042 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 4043 if (err) 4044 goto unwind_ops; 4045 4046 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 4047 if (err) 4048 goto unwind_ops; 4049 4050 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4051 if (IS_ERR(fence)) 4052 err = PTR_ERR(fence); 4053 else 4054 dma_fence_put(fence); 4055 4056 unwind_ops: 4057 if (err && err != -ENODATA) 4058 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 4059 xe_vma_ops_fini(&vops); 4060 for (i = args->num_binds - 1; i >= 0; --i) 4061 if (ops[i]) 4062 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 4063 free_syncs: 4064 if (err == -ENODATA) 4065 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 4066 while (num_syncs--) 4067 xe_sync_entry_cleanup(&syncs[num_syncs]); 4068 4069 kfree(syncs); 4070 put_obj: 4071 for (i = 0; i < args->num_binds; ++i) 4072 xe_bo_put(bos[i]); 4073 4074 kvfree(ops); 4075 free_bos: 4076 kvfree(bos); 4077 release_vm_lock: 4078 up_write(&vm->lock); 4079 put_exec_queue: 4080 if (q) 4081 xe_exec_queue_put(q); 4082 free_bind_ops: 4083 if (args->num_binds > 1) 4084 kvfree(bind_ops); 4085 put_vm: 4086 xe_vm_put(vm); 4087 return err; 4088 } 4089 4090 /* 4091 * Map access type, fault type, and fault level from current bspec 4092 * specification to user spec abstraction. The current mapping is 4093 * approximately 1-to-1, with access type being the only notable 4094 * exception as it carries additional data with respect to prefetch 4095 * status that needs to be masked out. 4096 */ 4097 static u8 xe_to_user_access_type(u8 access_type) 4098 { 4099 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK; 4100 } 4101 4102 static u8 xe_to_user_fault_type(u8 fault_type) 4103 { 4104 return fault_type; 4105 } 4106 4107 static u8 xe_to_user_fault_level(u8 fault_level) 4108 { 4109 return fault_level; 4110 } 4111 4112 static int fill_faults(struct xe_vm *vm, 4113 struct drm_xe_vm_get_property *args) 4114 { 4115 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data); 4116 struct xe_vm_fault *fault_list, fault_entry = { 0 }; 4117 struct xe_vm_fault_entry *entry; 4118 int ret = 0, i = 0, count, entry_size; 4119 4120 entry_size = sizeof(struct xe_vm_fault); 4121 count = args->size / entry_size; 4122 4123 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL); 4124 if (!fault_list) 4125 return -ENOMEM; 4126 4127 spin_lock(&vm->faults.lock); 4128 list_for_each_entry(entry, &vm->faults.list, list) { 4129 if (i == count) 4130 break; 4131 4132 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address); 4133 fault_entry.address_precision = entry->address_precision; 4134 4135 fault_entry.access_type = xe_to_user_access_type(entry->access_type); 4136 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type); 4137 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level); 4138 4139 memcpy(&fault_list[i], &fault_entry, entry_size); 4140 4141 i++; 4142 } 4143 spin_unlock(&vm->faults.lock); 4144 4145 ret = copy_to_user(usr_ptr, fault_list, args->size); 4146 4147 kfree(fault_list); 4148 return ret ? -EFAULT : 0; 4149 } 4150 4151 static int xe_vm_get_property_helper(struct xe_vm *vm, 4152 struct drm_xe_vm_get_property *args) 4153 { 4154 size_t size; 4155 4156 switch (args->property) { 4157 case DRM_XE_VM_GET_PROPERTY_FAULTS: 4158 spin_lock(&vm->faults.lock); 4159 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len); 4160 spin_unlock(&vm->faults.lock); 4161 4162 if (!args->size) { 4163 args->size = size; 4164 return 0; 4165 } 4166 4167 /* 4168 * Number of faults may increase between calls to 4169 * xe_vm_get_property_ioctl, so just report the number of 4170 * faults the user requests if it's less than or equal to 4171 * the number of faults in the VM fault array. 4172 * 4173 * We should also at least assert that the args->size value 4174 * is a multiple of the xe_vm_fault struct size. 4175 */ 4176 if (args->size > size || args->size % sizeof(struct xe_vm_fault)) 4177 return -EINVAL; 4178 4179 return fill_faults(vm, args); 4180 } 4181 return -EINVAL; 4182 } 4183 4184 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, 4185 struct drm_file *file) 4186 { 4187 struct xe_device *xe = to_xe_device(drm); 4188 struct xe_file *xef = to_xe_file(file); 4189 struct drm_xe_vm_get_property *args = data; 4190 struct xe_vm *vm; 4191 int ret = 0; 4192 4193 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || 4194 args->reserved[2] || args->extensions || 4195 args->pad))) 4196 return -EINVAL; 4197 4198 vm = xe_vm_lookup(xef, args->vm_id); 4199 if (XE_IOCTL_DBG(xe, !vm)) 4200 return -ENOENT; 4201 4202 ret = xe_vm_get_property_helper(vm, args); 4203 4204 xe_vm_put(vm); 4205 return ret; 4206 } 4207 4208 /** 4209 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 4210 * @vm: VM to bind the BO to 4211 * @bo: BO to bind 4212 * @q: exec queue to use for the bind (optional) 4213 * @addr: address at which to bind the BO 4214 * @cache_lvl: PAT cache level to use 4215 * 4216 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 4217 * kernel-owned VM. 4218 * 4219 * Returns a dma_fence to track the binding completion if the job to do so was 4220 * successfully submitted, an error pointer otherwise. 4221 */ 4222 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 4223 struct xe_exec_queue *q, u64 addr, 4224 enum xe_cache_level cache_lvl) 4225 { 4226 struct xe_vma_ops vops; 4227 struct drm_gpuva_ops *ops = NULL; 4228 struct dma_fence *fence; 4229 int err; 4230 4231 xe_bo_get(bo); 4232 xe_vm_get(vm); 4233 if (q) 4234 xe_exec_queue_get(q); 4235 4236 down_write(&vm->lock); 4237 4238 xe_vma_ops_init(&vops, vm, q, NULL, 0); 4239 4240 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 4241 DRM_XE_VM_BIND_OP_MAP, 0, 0, 4242 vm->xe->pat.idx[cache_lvl]); 4243 if (IS_ERR(ops)) { 4244 err = PTR_ERR(ops); 4245 goto release_vm_lock; 4246 } 4247 4248 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4249 if (err) 4250 goto release_vm_lock; 4251 4252 xe_assert(vm->xe, !list_empty(&vops.list)); 4253 4254 err = xe_vma_ops_alloc(&vops, false); 4255 if (err) 4256 goto unwind_ops; 4257 4258 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4259 if (IS_ERR(fence)) 4260 err = PTR_ERR(fence); 4261 4262 unwind_ops: 4263 if (err && err != -ENODATA) 4264 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4265 4266 xe_vma_ops_fini(&vops); 4267 drm_gpuva_ops_free(&vm->gpuvm, ops); 4268 4269 release_vm_lock: 4270 up_write(&vm->lock); 4271 4272 if (q) 4273 xe_exec_queue_put(q); 4274 xe_vm_put(vm); 4275 xe_bo_put(bo); 4276 4277 if (err) 4278 fence = ERR_PTR(err); 4279 4280 return fence; 4281 } 4282 4283 /** 4284 * xe_vm_lock() - Lock the vm's dma_resv object 4285 * @vm: The struct xe_vm whose lock is to be locked 4286 * @intr: Whether to perform any wait interruptible 4287 * 4288 * Return: 0 on success, -EINTR if @intr is true and the wait for a 4289 * contended lock was interrupted. If @intr is false, the function 4290 * always returns 0. 4291 */ 4292 int xe_vm_lock(struct xe_vm *vm, bool intr) 4293 { 4294 int ret; 4295 4296 if (intr) 4297 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4298 else 4299 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 4300 4301 return ret; 4302 } 4303 4304 /** 4305 * xe_vm_unlock() - Unlock the vm's dma_resv object 4306 * @vm: The struct xe_vm whose lock is to be released. 4307 * 4308 * Unlock a buffer object lock that was locked by xe_vm_lock(). 4309 */ 4310 void xe_vm_unlock(struct xe_vm *vm) 4311 { 4312 dma_resv_unlock(xe_vm_resv(vm)); 4313 } 4314 4315 /** 4316 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for 4317 * VMA. 4318 * @vma: VMA to invalidate 4319 * @batch: TLB invalidation batch to populate; caller must later call 4320 * xe_tlb_inval_batch_wait() on it to wait for completion 4321 * 4322 * Walks a list of page tables leaves which it memset the entries owned by this 4323 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush 4324 * to complete, but instead populates @batch which can be waited on using 4325 * xe_tlb_inval_batch_wait(). 4326 * 4327 * Returns 0 for success, negative error code otherwise. 4328 */ 4329 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch) 4330 { 4331 struct xe_device *xe = xe_vma_vm(vma)->xe; 4332 struct xe_vm *vm = xe_vma_vm(vma); 4333 struct xe_tile *tile; 4334 u8 tile_mask = 0; 4335 int ret = 0; 4336 u8 id; 4337 4338 xe_assert(xe, !xe_vma_is_null(vma)); 4339 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4340 trace_xe_vma_invalidate(vma); 4341 4342 vm_dbg(&vm->xe->drm, 4343 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4344 xe_vma_start(vma), xe_vma_size(vma)); 4345 4346 /* 4347 * Check that we don't race with page-table updates, tile_invalidated 4348 * update is safe 4349 */ 4350 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4351 if (xe_vma_is_userptr(vma)) { 4352 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 4353 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 4354 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4355 4356 WARN_ON_ONCE(!mmu_interval_check_retry 4357 (&to_userptr_vma(vma)->userptr.notifier, 4358 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 4359 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4360 DMA_RESV_USAGE_BOOKKEEP)); 4361 4362 } else { 4363 xe_bo_assert_held(xe_vma_bo(vma)); 4364 } 4365 } 4366 4367 for_each_tile(tile, xe, id) 4368 if (xe_pt_zap_ptes(tile, vma)) 4369 tile_mask |= BIT(id); 4370 4371 xe_device_wmb(xe); 4372 4373 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid, 4374 xe_vma_start(vma), xe_vma_end(vma), 4375 tile_mask, batch); 4376 4377 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4378 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4379 return ret; 4380 } 4381 4382 /** 4383 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4384 * @vma: VMA to invalidate 4385 * 4386 * Walks a list of page tables leaves which it memset the entries owned by this 4387 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4388 * complete. 4389 * 4390 * Returns 0 for success, negative error code otherwise. 4391 */ 4392 int xe_vm_invalidate_vma(struct xe_vma *vma) 4393 { 4394 struct xe_tlb_inval_batch batch; 4395 int ret; 4396 4397 ret = xe_vm_invalidate_vma_submit(vma, &batch); 4398 if (ret) 4399 return ret; 4400 4401 xe_tlb_inval_batch_wait(&batch); 4402 return ret; 4403 } 4404 4405 int xe_vm_validate_protected(struct xe_vm *vm) 4406 { 4407 struct drm_gpuva *gpuva; 4408 int err = 0; 4409 4410 if (!vm) 4411 return -ENODEV; 4412 4413 mutex_lock(&vm->snap_mutex); 4414 4415 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4416 struct xe_vma *vma = gpuva_to_vma(gpuva); 4417 struct xe_bo *bo = vma->gpuva.gem.obj ? 4418 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4419 4420 if (!bo) 4421 continue; 4422 4423 if (xe_bo_is_protected(bo)) { 4424 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4425 if (err) 4426 break; 4427 } 4428 } 4429 4430 mutex_unlock(&vm->snap_mutex); 4431 return err; 4432 } 4433 4434 struct xe_vm_snapshot { 4435 int uapi_flags; 4436 unsigned long num_snaps; 4437 struct { 4438 u64 ofs, bo_ofs; 4439 unsigned long len; 4440 #define XE_VM_SNAP_FLAG_USERPTR BIT(0) 4441 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) 4442 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2) 4443 unsigned long flags; 4444 int uapi_mem_region; 4445 int pat_index; 4446 int cpu_caching; 4447 struct xe_bo *bo; 4448 void *data; 4449 struct mm_struct *mm; 4450 } snap[]; 4451 }; 4452 4453 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4454 { 4455 unsigned long num_snaps = 0, i; 4456 struct xe_vm_snapshot *snap = NULL; 4457 struct drm_gpuva *gpuva; 4458 4459 if (!vm) 4460 return NULL; 4461 4462 mutex_lock(&vm->snap_mutex); 4463 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4464 if (gpuva->flags & XE_VMA_DUMPABLE) 4465 num_snaps++; 4466 } 4467 4468 if (num_snaps) 4469 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4470 if (!snap) { 4471 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4472 goto out_unlock; 4473 } 4474 4475 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 4476 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; 4477 if (vm->flags & XE_VM_FLAG_LR_MODE) 4478 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; 4479 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) 4480 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 4481 4482 snap->num_snaps = num_snaps; 4483 i = 0; 4484 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4485 struct xe_vma *vma = gpuva_to_vma(gpuva); 4486 struct xe_bo *bo = vma->gpuva.gem.obj ? 4487 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4488 4489 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4490 continue; 4491 4492 snap->snap[i].ofs = xe_vma_start(vma); 4493 snap->snap[i].len = xe_vma_size(vma); 4494 snap->snap[i].flags = xe_vma_read_only(vma) ? 4495 XE_VM_SNAP_FLAG_READ_ONLY : 0; 4496 snap->snap[i].pat_index = vma->attr.pat_index; 4497 if (bo) { 4498 snap->snap[i].cpu_caching = bo->cpu_caching; 4499 snap->snap[i].bo = xe_bo_get(bo); 4500 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4501 switch (bo->ttm.resource->mem_type) { 4502 case XE_PL_SYSTEM: 4503 case XE_PL_TT: 4504 snap->snap[i].uapi_mem_region = 0; 4505 break; 4506 case XE_PL_VRAM0: 4507 snap->snap[i].uapi_mem_region = 1; 4508 break; 4509 case XE_PL_VRAM1: 4510 snap->snap[i].uapi_mem_region = 2; 4511 break; 4512 } 4513 } else if (xe_vma_is_userptr(vma)) { 4514 struct mm_struct *mm = 4515 to_userptr_vma(vma)->userptr.notifier.mm; 4516 4517 if (mmget_not_zero(mm)) 4518 snap->snap[i].mm = mm; 4519 else 4520 snap->snap[i].data = ERR_PTR(-EFAULT); 4521 4522 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4523 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; 4524 snap->snap[i].uapi_mem_region = 0; 4525 } else if (xe_vma_is_null(vma)) { 4526 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; 4527 snap->snap[i].uapi_mem_region = -1; 4528 } else { 4529 snap->snap[i].data = ERR_PTR(-ENOENT); 4530 snap->snap[i].uapi_mem_region = -1; 4531 } 4532 i++; 4533 } 4534 4535 out_unlock: 4536 mutex_unlock(&vm->snap_mutex); 4537 return snap; 4538 } 4539 4540 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4541 { 4542 if (IS_ERR_OR_NULL(snap)) 4543 return; 4544 4545 for (int i = 0; i < snap->num_snaps; i++) { 4546 struct xe_bo *bo = snap->snap[i].bo; 4547 int err; 4548 4549 if (IS_ERR(snap->snap[i].data) || 4550 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4551 continue; 4552 4553 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4554 if (!snap->snap[i].data) { 4555 snap->snap[i].data = ERR_PTR(-ENOMEM); 4556 goto cleanup_bo; 4557 } 4558 4559 if (bo) { 4560 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4561 snap->snap[i].data, snap->snap[i].len); 4562 } else { 4563 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4564 4565 kthread_use_mm(snap->snap[i].mm); 4566 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4567 err = 0; 4568 else 4569 err = -EFAULT; 4570 kthread_unuse_mm(snap->snap[i].mm); 4571 4572 mmput(snap->snap[i].mm); 4573 snap->snap[i].mm = NULL; 4574 } 4575 4576 if (err) { 4577 kvfree(snap->snap[i].data); 4578 snap->snap[i].data = ERR_PTR(err); 4579 } 4580 4581 cleanup_bo: 4582 xe_bo_put(bo); 4583 snap->snap[i].bo = NULL; 4584 } 4585 } 4586 4587 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4588 { 4589 unsigned long i, j; 4590 4591 if (IS_ERR_OR_NULL(snap)) { 4592 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4593 return; 4594 } 4595 4596 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); 4597 for (i = 0; i < snap->num_snaps; i++) { 4598 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4599 4600 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", 4601 snap->snap[i].ofs, 4602 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? 4603 "read_only" : "read_write", 4604 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? 4605 "null_sparse" : 4606 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? 4607 "userptr" : "bo", 4608 snap->snap[i].uapi_mem_region == -1 ? 0 : 4609 BIT(snap->snap[i].uapi_mem_region), 4610 snap->snap[i].pat_index, 4611 snap->snap[i].cpu_caching); 4612 4613 if (IS_ERR(snap->snap[i].data)) { 4614 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4615 PTR_ERR(snap->snap[i].data)); 4616 continue; 4617 } 4618 4619 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4620 continue; 4621 4622 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4623 4624 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4625 u32 *val = snap->snap[i].data + j; 4626 char dumped[ASCII85_BUFSZ]; 4627 4628 drm_puts(p, ascii85_encode(*val, dumped)); 4629 } 4630 4631 drm_puts(p, "\n"); 4632 4633 if (drm_coredump_printer_is_full(p)) 4634 return; 4635 } 4636 } 4637 4638 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4639 { 4640 unsigned long i; 4641 4642 if (IS_ERR_OR_NULL(snap)) 4643 return; 4644 4645 for (i = 0; i < snap->num_snaps; i++) { 4646 if (!IS_ERR(snap->snap[i].data)) 4647 kvfree(snap->snap[i].data); 4648 xe_bo_put(snap->snap[i].bo); 4649 if (snap->snap[i].mm) 4650 mmput(snap->snap[i].mm); 4651 } 4652 kvfree(snap); 4653 } 4654 4655 /** 4656 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4657 * @xe: Pointer to the Xe device structure 4658 * @vma: Pointer to the virtual memory area (VMA) structure 4659 * @is_atomic: In pagefault path and atomic operation 4660 * 4661 * This function determines whether the given VMA needs to be migrated to 4662 * VRAM in order to do atomic GPU operation. 4663 * 4664 * Return: 4665 * 1 - Migration to VRAM is required 4666 * 0 - Migration is not required 4667 * -EACCES - Invalid access for atomic memory attr 4668 * 4669 */ 4670 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4671 { 4672 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4673 vma->attr.atomic_access; 4674 4675 if (!IS_DGFX(xe) || !is_atomic) 4676 return false; 4677 4678 /* 4679 * NOTE: The checks implemented here are platform-specific. For 4680 * instance, on a device supporting CXL atomics, these would ideally 4681 * work universally without additional handling. 4682 */ 4683 switch (atomic_access) { 4684 case DRM_XE_ATOMIC_DEVICE: 4685 return !xe->info.has_device_atomics_on_smem; 4686 4687 case DRM_XE_ATOMIC_CPU: 4688 return -EACCES; 4689 4690 case DRM_XE_ATOMIC_UNDEFINED: 4691 case DRM_XE_ATOMIC_GLOBAL: 4692 default: 4693 return 1; 4694 } 4695 } 4696 4697 static int xe_vm_alloc_vma(struct xe_vm *vm, 4698 struct drm_gpuvm_map_req *map_req, 4699 bool is_madvise) 4700 { 4701 struct xe_vma_ops vops; 4702 struct drm_gpuva_ops *ops = NULL; 4703 struct drm_gpuva_op *__op; 4704 unsigned int vma_flags = 0; 4705 bool remap_op = false; 4706 struct xe_vma_mem_attr tmp_attr = {}; 4707 u16 default_pat; 4708 int err; 4709 4710 lockdep_assert_held_write(&vm->lock); 4711 4712 if (is_madvise) 4713 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4714 else 4715 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4716 4717 if (IS_ERR(ops)) 4718 return PTR_ERR(ops); 4719 4720 if (list_empty(&ops->list)) { 4721 err = 0; 4722 goto free_ops; 4723 } 4724 4725 drm_gpuva_for_each_op(__op, ops) { 4726 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4727 struct xe_vma *vma = NULL; 4728 4729 if (!is_madvise) { 4730 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4731 vma = gpuva_to_vma(op->base.unmap.va); 4732 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4733 default_pat = vma->attr.default_pat_index; 4734 vma_flags = vma->gpuva.flags; 4735 } 4736 4737 if (__op->op == DRM_GPUVA_OP_REMAP) { 4738 vma = gpuva_to_vma(op->base.remap.unmap->va); 4739 default_pat = vma->attr.default_pat_index; 4740 vma_flags = vma->gpuva.flags; 4741 } 4742 4743 if (__op->op == DRM_GPUVA_OP_MAP) { 4744 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4745 op->map.pat_index = default_pat; 4746 } 4747 } else { 4748 if (__op->op == DRM_GPUVA_OP_REMAP) { 4749 vma = gpuva_to_vma(op->base.remap.unmap->va); 4750 xe_assert(vm->xe, !remap_op); 4751 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4752 remap_op = true; 4753 vma_flags = vma->gpuva.flags; 4754 } 4755 4756 if (__op->op == DRM_GPUVA_OP_MAP) { 4757 xe_assert(vm->xe, remap_op); 4758 remap_op = false; 4759 /* 4760 * In case of madvise ops DRM_GPUVA_OP_MAP is 4761 * always after DRM_GPUVA_OP_REMAP, so ensure 4762 * to propagate the flags from the vma we're 4763 * unmapping. 4764 */ 4765 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4766 } 4767 } 4768 print_op(vm->xe, __op); 4769 } 4770 4771 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4772 4773 if (is_madvise) 4774 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4775 else 4776 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 4777 4778 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4779 if (err) 4780 goto unwind_ops; 4781 4782 xe_vm_lock(vm, false); 4783 4784 drm_gpuva_for_each_op(__op, ops) { 4785 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4786 struct xe_vma *vma; 4787 4788 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4789 vma = gpuva_to_vma(op->base.unmap.va); 4790 /* There should be no unmap for madvise */ 4791 if (is_madvise) 4792 XE_WARN_ON("UNEXPECTED UNMAP"); 4793 4794 xe_vma_destroy(vma, NULL); 4795 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4796 vma = gpuva_to_vma(op->base.remap.unmap->va); 4797 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4798 * VMA, so they can be assigned to newly MAP created vma. 4799 */ 4800 if (is_madvise) 4801 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr); 4802 4803 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4804 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4805 vma = op->map.vma; 4806 /* In case of madvise call, MAP will always be followed by REMAP. 4807 * Therefore temp_attr will always have sane values, making it safe to 4808 * copy them to new vma. 4809 */ 4810 if (is_madvise) 4811 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr); 4812 } 4813 } 4814 4815 xe_vm_unlock(vm); 4816 drm_gpuva_ops_free(&vm->gpuvm, ops); 4817 xe_vma_mem_attr_fini(&tmp_attr); 4818 return 0; 4819 4820 unwind_ops: 4821 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4822 free_ops: 4823 drm_gpuva_ops_free(&vm->gpuvm, ops); 4824 return err; 4825 } 4826 4827 /** 4828 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4829 * @vm: Pointer to the xe_vm structure 4830 * @start: Starting input address 4831 * @range: Size of the input range 4832 * 4833 * This function splits existing vma to create new vma for user provided input range 4834 * 4835 * Return: 0 if success 4836 */ 4837 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4838 { 4839 struct drm_gpuvm_map_req map_req = { 4840 .map.va.addr = start, 4841 .map.va.range = range, 4842 }; 4843 4844 lockdep_assert_held_write(&vm->lock); 4845 4846 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4847 4848 return xe_vm_alloc_vma(vm, &map_req, true); 4849 } 4850 4851 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) 4852 { 4853 return vma && xe_vma_is_cpu_addr_mirror(vma) && 4854 xe_vma_has_default_mem_attrs(vma); 4855 } 4856 4857 /** 4858 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs 4859 * @vm: VM to search within 4860 * @start: Input/output pointer to the starting address of the range 4861 * @end: Input/output pointer to the end address of the range 4862 * 4863 * Given a range defined by @start and @range, this function checks the VMAs 4864 * immediately before and after the range. If those neighboring VMAs are 4865 * CPU-address-mirrored and have default memory attributes, the function 4866 * updates @start and @range to include them. This extended range can then 4867 * be used for merging or other operations that require a unified VMA. 4868 * 4869 * The function does not perform the merge itself; it only computes the 4870 * mergeable boundaries. 4871 */ 4872 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) 4873 { 4874 struct xe_vma *prev, *next; 4875 4876 lockdep_assert_held(&vm->lock); 4877 4878 if (*start >= SZ_4K) { 4879 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); 4880 if (is_cpu_addr_vma_with_default_attr(prev)) 4881 *start = xe_vma_start(prev); 4882 } 4883 4884 if (*end < vm->size) { 4885 next = xe_vm_find_vma_by_addr(vm, *end + 1); 4886 if (is_cpu_addr_vma_with_default_attr(next)) 4887 *end = xe_vma_end(next); 4888 } 4889 } 4890 4891 /** 4892 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4893 * @vm: Pointer to the xe_vm structure 4894 * @start: Starting input address 4895 * @range: Size of the input range 4896 * 4897 * This function splits/merges existing vma to create new vma for user provided input range 4898 * 4899 * Return: 0 if success 4900 */ 4901 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4902 { 4903 struct drm_gpuvm_map_req map_req = { 4904 .map.va.addr = start, 4905 .map.va.range = range, 4906 }; 4907 4908 lockdep_assert_held_write(&vm->lock); 4909 4910 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4911 start, range); 4912 4913 return xe_vm_alloc_vma(vm, &map_req, false); 4914 } 4915 4916 /** 4917 * xe_vm_add_exec_queue() - Add exec queue to VM 4918 * @vm: The VM. 4919 * @q: The exec_queue 4920 * 4921 * Add exec queue to VM, skipped if the device does not have context based TLB 4922 * invalidations. 4923 */ 4924 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4925 { 4926 struct xe_device *xe = vm->xe; 4927 4928 /* User VMs and queues only */ 4929 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 4930 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 4931 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 4932 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); 4933 xe_assert(xe, vm->xef); 4934 xe_assert(xe, vm == q->vm); 4935 4936 if (!xe->info.has_ctx_tlb_inval) 4937 return; 4938 4939 down_write(&vm->exec_queues.lock); 4940 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); 4941 ++vm->exec_queues.count[q->gt->info.id]; 4942 up_write(&vm->exec_queues.lock); 4943 } 4944 4945 /** 4946 * xe_vm_remove_exec_queue() - Remove exec queue from VM 4947 * @vm: The VM. 4948 * @q: The exec_queue 4949 * 4950 * Remove exec queue from VM, skipped if the device does not have context based 4951 * TLB invalidations. 4952 */ 4953 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4954 { 4955 if (!vm->xe->info.has_ctx_tlb_inval) 4956 return; 4957 4958 down_write(&vm->exec_queues.lock); 4959 if (!list_empty(&q->vm_exec_queue_link)) { 4960 list_del(&q->vm_exec_queue_link); 4961 --vm->exec_queues.count[q->gt->info.id]; 4962 } 4963 up_write(&vm->exec_queues.lock); 4964 } 4965