1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_sriov_vf.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_vm_madvise.h" 44 #include "xe_wa.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 53 * @vm: The vm whose resv is to be locked. 54 * @exec: The drm_exec transaction. 55 * 56 * Helper to lock the vm's resv as part of a drm_exec transaction. 57 * 58 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 59 */ 60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 61 { 62 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 63 } 64 65 static bool preempt_fences_waiting(struct xe_vm *vm) 66 { 67 struct xe_exec_queue *q; 68 69 lockdep_assert_held(&vm->lock); 70 xe_vm_assert_held(vm); 71 72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 73 if (!q->lr.pfence || 74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 75 &q->lr.pfence->flags)) { 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 static void free_preempt_fences(struct list_head *list) 84 { 85 struct list_head *link, *next; 86 87 list_for_each_safe(link, next, list) 88 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 89 } 90 91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 92 unsigned int *count) 93 { 94 lockdep_assert_held(&vm->lock); 95 xe_vm_assert_held(vm); 96 97 if (*count >= vm->preempt.num_exec_queues) 98 return 0; 99 100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 102 103 if (IS_ERR(pfence)) 104 return PTR_ERR(pfence); 105 106 list_move_tail(xe_preempt_fence_link(pfence), list); 107 } 108 109 return 0; 110 } 111 112 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 113 { 114 struct xe_exec_queue *q; 115 bool vf_migration = IS_SRIOV_VF(vm->xe) && 116 xe_sriov_vf_migration_supported(vm->xe); 117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 118 119 xe_vm_assert_held(vm); 120 121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 122 if (q->lr.pfence) { 123 long timeout; 124 125 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 126 wait_time); 127 if (!timeout) { 128 xe_assert(vm->xe, vf_migration); 129 return -EAGAIN; 130 } 131 132 /* Only -ETIME on fence indicates VM needs to be killed */ 133 if (timeout < 0 || q->lr.pfence->error == -ETIME) 134 return -ETIME; 135 136 dma_fence_put(q->lr.pfence); 137 q->lr.pfence = NULL; 138 } 139 } 140 141 return 0; 142 } 143 144 static bool xe_vm_is_idle(struct xe_vm *vm) 145 { 146 struct xe_exec_queue *q; 147 148 xe_vm_assert_held(vm); 149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 150 if (!xe_exec_queue_is_idle(q)) 151 return false; 152 } 153 154 return true; 155 } 156 157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 158 { 159 struct list_head *link; 160 struct xe_exec_queue *q; 161 162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 163 struct dma_fence *fence; 164 165 link = list->next; 166 xe_assert(vm->xe, link != list); 167 168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 169 q, q->lr.context, 170 ++q->lr.seqno); 171 dma_fence_put(q->lr.pfence); 172 q->lr.pfence = fence; 173 } 174 } 175 176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 177 { 178 struct xe_exec_queue *q; 179 int err; 180 181 xe_bo_assert_held(bo); 182 183 if (!vm->preempt.num_exec_queues) 184 return 0; 185 186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 187 if (err) 188 return err; 189 190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 191 if (q->lr.pfence) { 192 dma_resv_add_fence(bo->ttm.base.resv, 193 q->lr.pfence, 194 DMA_RESV_USAGE_BOOKKEEP); 195 } 196 197 return 0; 198 } 199 200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 201 struct drm_exec *exec) 202 { 203 struct xe_exec_queue *q; 204 205 lockdep_assert_held(&vm->lock); 206 xe_vm_assert_held(vm); 207 208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 209 q->ops->resume(q); 210 211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 213 } 214 } 215 216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 217 { 218 struct drm_gpuvm_exec vm_exec = { 219 .vm = &vm->gpuvm, 220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 221 .num_fences = 1, 222 }; 223 struct drm_exec *exec = &vm_exec.exec; 224 struct xe_validation_ctx ctx; 225 struct dma_fence *pfence; 226 int err; 227 bool wait; 228 229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 230 231 down_write(&vm->lock); 232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 233 if (err) 234 goto out_up_write; 235 236 pfence = xe_preempt_fence_create(q, q->lr.context, 237 ++q->lr.seqno); 238 if (IS_ERR(pfence)) { 239 err = PTR_ERR(pfence); 240 goto out_fini; 241 } 242 243 list_add(&q->lr.link, &vm->preempt.exec_queues); 244 ++vm->preempt.num_exec_queues; 245 q->lr.pfence = pfence; 246 247 xe_svm_notifier_lock(vm); 248 249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 251 252 /* 253 * Check to see if a preemption on VM is in flight or userptr 254 * invalidation, if so trigger this preempt fence to sync state with 255 * other preempt fences on the VM. 256 */ 257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 258 if (wait) 259 dma_fence_enable_sw_signaling(pfence); 260 261 xe_svm_notifier_unlock(vm); 262 263 out_fini: 264 xe_validation_ctx_fini(&ctx); 265 out_up_write: 266 up_write(&vm->lock); 267 268 return err; 269 } 270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 271 272 /** 273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 274 * @vm: The VM. 275 * @q: The exec_queue 276 * 277 * Note that this function might be called multiple times on the same queue. 278 */ 279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 280 { 281 if (!xe_vm_in_preempt_fence_mode(vm)) 282 return; 283 284 down_write(&vm->lock); 285 if (!list_empty(&q->lr.link)) { 286 list_del_init(&q->lr.link); 287 --vm->preempt.num_exec_queues; 288 } 289 if (q->lr.pfence) { 290 dma_fence_enable_sw_signaling(q->lr.pfence); 291 dma_fence_put(q->lr.pfence); 292 q->lr.pfence = NULL; 293 } 294 up_write(&vm->lock); 295 } 296 297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 298 299 /** 300 * xe_vm_kill() - VM Kill 301 * @vm: The VM. 302 * @unlocked: Flag indicates the VM's dma-resv is not held 303 * 304 * Kill the VM by setting banned flag indicated VM is no longer available for 305 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 306 */ 307 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 308 { 309 struct xe_exec_queue *q; 310 311 lockdep_assert_held(&vm->lock); 312 313 if (unlocked) 314 xe_vm_lock(vm, false); 315 316 vm->flags |= XE_VM_FLAG_BANNED; 317 trace_xe_vm_kill(vm); 318 319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 320 q->ops->kill(q); 321 322 if (unlocked) 323 xe_vm_unlock(vm); 324 325 /* TODO: Inform user the VM is banned */ 326 } 327 328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 329 { 330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj); 332 struct drm_gpuva *gpuva; 333 int ret; 334 335 lockdep_assert_held(&vm->lock); 336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 338 &vm->rebind_list); 339 340 /* Skip re-populating purged BOs, rebind maps scratch pages. */ 341 if (xe_bo_is_purged(bo)) { 342 vm_bo->evicted = false; 343 return 0; 344 } 345 346 if (!try_wait_for_completion(&vm->xe->pm_block)) 347 return -EAGAIN; 348 349 ret = xe_bo_validate(bo, vm, false, exec); 350 if (ret) 351 return ret; 352 353 vm_bo->evicted = false; 354 return 0; 355 } 356 357 /** 358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 359 * @vm: The vm for which we are rebinding. 360 * @exec: The struct drm_exec with the locked GEM objects. 361 * @num_fences: The number of fences to reserve for the operation, not 362 * including rebinds and validations. 363 * 364 * Validates all evicted gem objects and rebinds their vmas. Note that 365 * rebindings may cause evictions and hence the validation-rebind 366 * sequence is rerun until there are no more objects to validate. 367 * 368 * Return: 0 on success, negative error code on error. In particular, 369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 370 * the drm_exec transaction needs to be restarted. 371 */ 372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 373 unsigned int num_fences) 374 { 375 struct drm_gem_object *obj; 376 int ret; 377 378 do { 379 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 380 if (ret) 381 return ret; 382 383 ret = xe_vm_rebind(vm, false); 384 if (ret) 385 return ret; 386 } while (!list_empty(&vm->gpuvm.evict.list)); 387 388 drm_exec_for_each_locked_object(exec, obj) { 389 ret = dma_resv_reserve_fences(obj->resv, num_fences); 390 if (ret) 391 return ret; 392 } 393 394 return 0; 395 } 396 397 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 398 bool *done) 399 { 400 int err; 401 402 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 403 if (err) 404 return err; 405 406 if (xe_vm_is_idle(vm)) { 407 vm->preempt.rebind_deactivated = true; 408 *done = true; 409 return 0; 410 } 411 412 if (!preempt_fences_waiting(vm)) { 413 *done = true; 414 return 0; 415 } 416 417 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 418 if (err) 419 return err; 420 421 err = wait_for_existing_preempt_fences(vm); 422 if (err) 423 return err; 424 425 /* 426 * Add validation and rebinding to the locking loop since both can 427 * cause evictions which may require blocing dma_resv locks. 428 * The fence reservation here is intended for the new preempt fences 429 * we attach at the end of the rebind work. 430 */ 431 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 432 } 433 434 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 435 { 436 struct xe_device *xe = vm->xe; 437 bool ret = false; 438 439 mutex_lock(&xe->rebind_resume_lock); 440 if (!try_wait_for_completion(&vm->xe->pm_block)) { 441 ret = true; 442 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 443 } 444 mutex_unlock(&xe->rebind_resume_lock); 445 446 return ret; 447 } 448 449 /** 450 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 451 * @vm: The vm whose preempt worker to resume. 452 * 453 * Resume a preempt worker that was previously suspended by 454 * vm_suspend_rebind_worker(). 455 */ 456 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 457 { 458 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 459 } 460 461 static void preempt_rebind_work_func(struct work_struct *w) 462 { 463 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 464 struct xe_validation_ctx ctx; 465 struct drm_exec exec; 466 unsigned int fence_count = 0; 467 LIST_HEAD(preempt_fences); 468 int err = 0; 469 long wait; 470 int __maybe_unused tries = 0; 471 472 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 473 trace_xe_vm_rebind_worker_enter(vm); 474 475 down_write(&vm->lock); 476 477 if (xe_vm_is_closed_or_banned(vm)) { 478 up_write(&vm->lock); 479 trace_xe_vm_rebind_worker_exit(vm); 480 return; 481 } 482 483 retry: 484 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 485 up_write(&vm->lock); 486 /* We don't actually block but don't make progress. */ 487 xe_pm_might_block_on_suspend(); 488 return; 489 } 490 491 if (xe_vm_userptr_check_repin(vm)) { 492 err = xe_vm_userptr_pin(vm); 493 if (err) 494 goto out_unlock_outer; 495 } 496 497 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 498 (struct xe_val_flags) {.interruptible = true}); 499 if (err) 500 goto out_unlock_outer; 501 502 drm_exec_until_all_locked(&exec) { 503 bool done = false; 504 505 err = xe_preempt_work_begin(&exec, vm, &done); 506 drm_exec_retry_on_contention(&exec); 507 xe_validation_retry_on_oom(&ctx, &err); 508 if (err || done) { 509 xe_validation_ctx_fini(&ctx); 510 goto out_unlock_outer; 511 } 512 } 513 514 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 515 if (err) 516 goto out_unlock; 517 518 xe_vm_set_validation_exec(vm, &exec); 519 err = xe_vm_rebind(vm, true); 520 xe_vm_set_validation_exec(vm, NULL); 521 if (err) 522 goto out_unlock; 523 524 /* Wait on rebinds and munmap style VM unbinds */ 525 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 526 DMA_RESV_USAGE_KERNEL, 527 false, MAX_SCHEDULE_TIMEOUT); 528 if (wait <= 0) { 529 err = -ETIME; 530 goto out_unlock; 531 } 532 533 #define retry_required(__tries, __vm) \ 534 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 535 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 536 __xe_vm_userptr_needs_repin(__vm)) 537 538 xe_svm_notifier_lock(vm); 539 if (retry_required(tries, vm)) { 540 xe_svm_notifier_unlock(vm); 541 err = -EAGAIN; 542 goto out_unlock; 543 } 544 545 #undef retry_required 546 547 spin_lock(&vm->xe->ttm.lru_lock); 548 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 549 spin_unlock(&vm->xe->ttm.lru_lock); 550 551 /* Point of no return. */ 552 arm_preempt_fences(vm, &preempt_fences); 553 resume_and_reinstall_preempt_fences(vm, &exec); 554 xe_svm_notifier_unlock(vm); 555 556 out_unlock: 557 xe_validation_ctx_fini(&ctx); 558 out_unlock_outer: 559 if (err == -EAGAIN) { 560 trace_xe_vm_rebind_worker_retry(vm); 561 562 /* 563 * We can't block in workers on a VF which supports migration 564 * given this can block the VF post-migration workers from 565 * getting scheduled. 566 */ 567 if (IS_SRIOV_VF(vm->xe) && 568 xe_sriov_vf_migration_supported(vm->xe)) { 569 up_write(&vm->lock); 570 xe_vm_queue_rebind_worker(vm); 571 return; 572 } 573 574 goto retry; 575 } 576 577 if (err) { 578 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 579 xe_vm_kill(vm, true); 580 } 581 up_write(&vm->lock); 582 583 free_preempt_fences(&preempt_fences); 584 585 trace_xe_vm_rebind_worker_exit(vm); 586 } 587 588 /** 589 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list 590 * @vm: The VM. 591 * @pf: The pagefault. 592 * 593 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list. 594 * 595 * The function exits silently if the list is full, and reports a warning if the pagefault 596 * could not be saved to the list. 597 */ 598 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) 599 { 600 struct xe_vm_fault_entry *e; 601 struct xe_hw_engine *hwe; 602 603 /* Do not report faults on reserved engines */ 604 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class, 605 pf->consumer.engine_instance, false); 606 if (!hwe || xe_hw_engine_is_reserved(hwe)) 607 return; 608 609 e = kzalloc_obj(*e); 610 if (!e) { 611 drm_warn(&vm->xe->drm, 612 "Could not allocate memory for fault!\n"); 613 return; 614 } 615 616 guard(spinlock)(&vm->faults.lock); 617 618 /* 619 * Limit the number of faults in the fault list to prevent 620 * memory overuse. 621 */ 622 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) { 623 kfree(e); 624 return; 625 } 626 627 e->address = pf->consumer.page_addr; 628 /* 629 * TODO: 630 * Address precision is currently always SZ_4K, but this may change 631 * in the future. 632 */ 633 e->address_precision = SZ_4K; 634 e->access_type = pf->consumer.access_type; 635 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK, 636 pf->consumer.fault_type_level); 637 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, 638 pf->consumer.fault_type_level); 639 640 list_add_tail(&e->list, &vm->faults.list); 641 vm->faults.len++; 642 } 643 644 static void xe_vm_clear_fault_entries(struct xe_vm *vm) 645 { 646 struct xe_vm_fault_entry *e, *tmp; 647 648 guard(spinlock)(&vm->faults.lock); 649 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) { 650 list_del(&e->list); 651 kfree(e); 652 } 653 vm->faults.len = 0; 654 } 655 656 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 657 { 658 int i; 659 660 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 661 if (!vops->pt_update_ops[i].num_ops) 662 continue; 663 664 vops->pt_update_ops[i].ops = 665 kmalloc_objs(*vops->pt_update_ops[i].ops, 666 vops->pt_update_ops[i].num_ops, 667 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 668 if (!vops->pt_update_ops[i].ops) 669 return array_of_binds ? -ENOBUFS : -ENOMEM; 670 } 671 672 return 0; 673 } 674 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 675 676 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 677 { 678 struct xe_vma *vma; 679 680 vma = gpuva_to_vma(op->base.prefetch.va); 681 682 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 683 xa_destroy(&op->prefetch_range.range); 684 } 685 686 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 687 { 688 struct xe_vma_op *op; 689 690 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 691 return; 692 693 list_for_each_entry(op, &vops->list, link) 694 xe_vma_svm_prefetch_op_fini(op); 695 } 696 697 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 698 { 699 int i; 700 701 xe_vma_svm_prefetch_ops_fini(vops); 702 703 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 704 kfree(vops->pt_update_ops[i].ops); 705 } 706 707 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 708 { 709 int i; 710 711 if (!inc_val) 712 return; 713 714 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 715 if (BIT(i) & tile_mask) 716 vops->pt_update_ops[i].num_ops += inc_val; 717 } 718 719 #define XE_VMA_CREATE_MASK ( \ 720 XE_VMA_READ_ONLY | \ 721 XE_VMA_DUMPABLE | \ 722 XE_VMA_SYSTEM_ALLOCATOR | \ 723 DRM_GPUVA_SPARSE | \ 724 XE_VMA_MADV_AUTORESET) 725 726 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 727 u8 tile_mask) 728 { 729 INIT_LIST_HEAD(&op->link); 730 op->tile_mask = tile_mask; 731 op->base.op = DRM_GPUVA_OP_MAP; 732 op->base.map.va.addr = vma->gpuva.va.addr; 733 op->base.map.va.range = vma->gpuva.va.range; 734 op->base.map.gem.obj = vma->gpuva.gem.obj; 735 op->base.map.gem.offset = vma->gpuva.gem.offset; 736 op->map.vma = vma; 737 op->map.immediate = true; 738 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 739 } 740 741 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 742 u8 tile_mask) 743 { 744 struct xe_vma_op *op; 745 746 op = kzalloc_obj(*op); 747 if (!op) 748 return -ENOMEM; 749 750 xe_vm_populate_rebind(op, vma, tile_mask); 751 list_add_tail(&op->link, &vops->list); 752 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 753 754 return 0; 755 } 756 757 static struct dma_fence *ops_execute(struct xe_vm *vm, 758 struct xe_vma_ops *vops); 759 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 760 struct xe_exec_queue *q, 761 struct xe_sync_entry *syncs, u32 num_syncs); 762 763 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 764 { 765 struct dma_fence *fence; 766 struct xe_vma *vma, *next; 767 struct xe_vma_ops vops; 768 struct xe_vma_op *op, *next_op; 769 int err, i; 770 771 lockdep_assert_held(&vm->lock); 772 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 773 list_empty(&vm->rebind_list)) 774 return 0; 775 776 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 777 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 778 vops.pt_update_ops[i].wait_vm_bookkeep = true; 779 780 xe_vm_assert_held(vm); 781 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 782 xe_assert(vm->xe, vma->tile_present); 783 784 if (rebind_worker) 785 trace_xe_vma_rebind_worker(vma); 786 else 787 trace_xe_vma_rebind_exec(vma); 788 789 err = xe_vm_ops_add_rebind(&vops, vma, 790 vma->tile_present); 791 if (err) 792 goto free_ops; 793 } 794 795 err = xe_vma_ops_alloc(&vops, false); 796 if (err) 797 goto free_ops; 798 799 fence = ops_execute(vm, &vops); 800 if (IS_ERR(fence)) { 801 err = PTR_ERR(fence); 802 } else { 803 dma_fence_put(fence); 804 list_for_each_entry_safe(vma, next, &vm->rebind_list, 805 combined_links.rebind) 806 list_del_init(&vma->combined_links.rebind); 807 } 808 free_ops: 809 list_for_each_entry_safe(op, next_op, &vops.list, link) { 810 list_del(&op->link); 811 kfree(op); 812 } 813 xe_vma_ops_fini(&vops); 814 815 return err; 816 } 817 818 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 819 { 820 struct dma_fence *fence = NULL; 821 struct xe_vma_ops vops; 822 struct xe_vma_op *op, *next_op; 823 struct xe_tile *tile; 824 u8 id; 825 int err; 826 827 lockdep_assert_held(&vm->lock); 828 xe_vm_assert_held(vm); 829 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 830 831 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 832 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 833 for_each_tile(tile, vm->xe, id) { 834 vops.pt_update_ops[id].wait_vm_bookkeep = true; 835 vops.pt_update_ops[tile->id].q = 836 xe_migrate_exec_queue(tile->migrate); 837 } 838 839 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 840 if (err) 841 return ERR_PTR(err); 842 843 err = xe_vma_ops_alloc(&vops, false); 844 if (err) { 845 fence = ERR_PTR(err); 846 goto free_ops; 847 } 848 849 fence = ops_execute(vm, &vops); 850 851 free_ops: 852 list_for_each_entry_safe(op, next_op, &vops.list, link) { 853 list_del(&op->link); 854 kfree(op); 855 } 856 xe_vma_ops_fini(&vops); 857 858 return fence; 859 } 860 861 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 862 struct xe_vma *vma, 863 struct xe_svm_range *range, 864 u8 tile_mask) 865 { 866 INIT_LIST_HEAD(&op->link); 867 op->tile_mask = tile_mask; 868 op->base.op = DRM_GPUVA_OP_DRIVER; 869 op->subop = XE_VMA_SUBOP_MAP_RANGE; 870 op->map_range.vma = vma; 871 op->map_range.range = range; 872 } 873 874 static int 875 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 876 struct xe_vma *vma, 877 struct xe_svm_range *range, 878 u8 tile_mask) 879 { 880 struct xe_vma_op *op; 881 882 op = kzalloc_obj(*op); 883 if (!op) 884 return -ENOMEM; 885 886 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 887 list_add_tail(&op->link, &vops->list); 888 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 889 890 return 0; 891 } 892 893 /** 894 * xe_vm_range_rebind() - VM range (re)bind 895 * @vm: The VM which the range belongs to. 896 * @vma: The VMA which the range belongs to. 897 * @range: SVM range to rebind. 898 * @tile_mask: Tile mask to bind the range to. 899 * 900 * (re)bind SVM range setting up GPU page tables for the range. 901 * 902 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 903 * failure 904 */ 905 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 906 struct xe_vma *vma, 907 struct xe_svm_range *range, 908 u8 tile_mask) 909 { 910 struct dma_fence *fence = NULL; 911 struct xe_vma_ops vops; 912 struct xe_vma_op *op, *next_op; 913 struct xe_tile *tile; 914 u8 id; 915 int err; 916 917 lockdep_assert_held(&vm->lock); 918 xe_vm_assert_held(vm); 919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 920 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 921 922 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 923 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 924 for_each_tile(tile, vm->xe, id) { 925 vops.pt_update_ops[id].wait_vm_bookkeep = true; 926 vops.pt_update_ops[tile->id].q = 927 xe_migrate_exec_queue(tile->migrate); 928 } 929 930 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 931 if (err) 932 return ERR_PTR(err); 933 934 err = xe_vma_ops_alloc(&vops, false); 935 if (err) { 936 fence = ERR_PTR(err); 937 goto free_ops; 938 } 939 940 fence = ops_execute(vm, &vops); 941 942 free_ops: 943 list_for_each_entry_safe(op, next_op, &vops.list, link) { 944 list_del(&op->link); 945 kfree(op); 946 } 947 xe_vma_ops_fini(&vops); 948 949 return fence; 950 } 951 952 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 953 struct xe_svm_range *range) 954 { 955 INIT_LIST_HEAD(&op->link); 956 op->tile_mask = range->tile_present; 957 op->base.op = DRM_GPUVA_OP_DRIVER; 958 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 959 op->unmap_range.range = range; 960 } 961 962 static int 963 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 964 struct xe_svm_range *range) 965 { 966 struct xe_vma_op *op; 967 968 op = kzalloc_obj(*op); 969 if (!op) 970 return -ENOMEM; 971 972 xe_vm_populate_range_unbind(op, range); 973 list_add_tail(&op->link, &vops->list); 974 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 975 976 return 0; 977 } 978 979 /** 980 * xe_vm_range_unbind() - VM range unbind 981 * @vm: The VM which the range belongs to. 982 * @range: SVM range to rebind. 983 * 984 * Unbind SVM range removing the GPU page tables for the range. 985 * 986 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 987 * failure 988 */ 989 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 990 struct xe_svm_range *range) 991 { 992 struct dma_fence *fence = NULL; 993 struct xe_vma_ops vops; 994 struct xe_vma_op *op, *next_op; 995 struct xe_tile *tile; 996 u8 id; 997 int err; 998 999 lockdep_assert_held(&vm->lock); 1000 xe_vm_assert_held(vm); 1001 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1002 1003 if (!range->tile_present) 1004 return dma_fence_get_stub(); 1005 1006 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1007 for_each_tile(tile, vm->xe, id) { 1008 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1009 vops.pt_update_ops[tile->id].q = 1010 xe_migrate_exec_queue(tile->migrate); 1011 } 1012 1013 err = xe_vm_ops_add_range_unbind(&vops, range); 1014 if (err) 1015 return ERR_PTR(err); 1016 1017 err = xe_vma_ops_alloc(&vops, false); 1018 if (err) { 1019 fence = ERR_PTR(err); 1020 goto free_ops; 1021 } 1022 1023 fence = ops_execute(vm, &vops); 1024 1025 free_ops: 1026 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1027 list_del(&op->link); 1028 kfree(op); 1029 } 1030 xe_vma_ops_fini(&vops); 1031 1032 return fence; 1033 } 1034 1035 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr) 1036 { 1037 drm_pagemap_put(attr->preferred_loc.dpagemap); 1038 } 1039 1040 static void xe_vma_free(struct xe_vma *vma) 1041 { 1042 xe_vma_mem_attr_fini(&vma->attr); 1043 1044 if (xe_vma_is_userptr(vma)) 1045 kfree(to_userptr_vma(vma)); 1046 else 1047 kfree(vma); 1048 } 1049 1050 /** 1051 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure. 1052 * @to: Destination. 1053 * @from: Source. 1054 * 1055 * Copies an xe_vma_mem_attr structure taking care to get reference 1056 * counting of individual members right. 1057 */ 1058 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from) 1059 { 1060 xe_vma_mem_attr_fini(to); 1061 *to = *from; 1062 if (to->preferred_loc.dpagemap) 1063 drm_pagemap_get(to->preferred_loc.dpagemap); 1064 } 1065 1066 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1067 struct xe_bo *bo, 1068 u64 bo_offset_or_userptr, 1069 u64 start, u64 end, 1070 struct xe_vma_mem_attr *attr, 1071 unsigned int flags) 1072 { 1073 struct xe_vma *vma; 1074 struct xe_tile *tile; 1075 u8 id; 1076 bool is_null = (flags & DRM_GPUVA_SPARSE); 1077 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 1078 1079 xe_assert(vm->xe, start < end); 1080 xe_assert(vm->xe, end < vm->size); 1081 1082 /* 1083 * Allocate and ensure that the xe_vma_is_userptr() return 1084 * matches what was allocated. 1085 */ 1086 if (!bo && !is_null && !is_cpu_addr_mirror) { 1087 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma); 1088 1089 if (!uvma) 1090 return ERR_PTR(-ENOMEM); 1091 1092 vma = &uvma->vma; 1093 } else { 1094 vma = kzalloc_obj(*vma); 1095 if (!vma) 1096 return ERR_PTR(-ENOMEM); 1097 1098 if (bo) 1099 vma->gpuva.gem.obj = &bo->ttm.base; 1100 } 1101 1102 INIT_LIST_HEAD(&vma->combined_links.rebind); 1103 1104 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1105 vma->gpuva.vm = &vm->gpuvm; 1106 vma->gpuva.va.addr = start; 1107 vma->gpuva.va.range = end - start + 1; 1108 vma->gpuva.flags = flags; 1109 1110 for_each_tile(tile, vm->xe, id) 1111 vma->tile_mask |= 0x1 << id; 1112 1113 if (vm->xe->info.has_atomic_enable_pte_bit) 1114 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1115 1116 xe_vma_mem_attr_copy(&vma->attr, attr); 1117 if (bo) { 1118 struct drm_gpuvm_bo *vm_bo; 1119 1120 xe_bo_assert_held(bo); 1121 1122 /* 1123 * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This 1124 * gates new vm_bind ioctls (user supplies WILLNEED) while 1125 * still allowing partial-unbind / remap splits whose new VMAs 1126 * inherit the parent's DONTNEED attr. It must also run before 1127 * xe_bo_willneed_get_locked() below so a 0->1 holder bump 1128 * cannot silently promote DONTNEED back to WILLNEED. 1129 */ 1130 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { 1131 if (xe_bo_madv_is_dontneed(bo)) { 1132 xe_vma_free(vma); 1133 return ERR_PTR(-EBUSY); 1134 } 1135 if (xe_bo_is_purged(bo)) { 1136 xe_vma_free(vma); 1137 return ERR_PTR(-EINVAL); 1138 } 1139 } 1140 1141 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); 1142 if (IS_ERR(vm_bo)) { 1143 xe_vma_free(vma); 1144 return ERR_CAST(vm_bo); 1145 } 1146 1147 drm_gpuvm_bo_extobj_add(vm_bo); 1148 drm_gem_object_get(&bo->ttm.base); 1149 vma->gpuva.gem.offset = bo_offset_or_userptr; 1150 drm_gpuva_link(&vma->gpuva, vm_bo); 1151 drm_gpuvm_bo_put(vm_bo); 1152 1153 xe_bo_vma_count_inc_locked(bo); 1154 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) 1155 xe_bo_willneed_get_locked(bo); 1156 } else /* userptr or null */ { 1157 if (!is_null && !is_cpu_addr_mirror) { 1158 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1159 u64 size = end - start + 1; 1160 int err; 1161 1162 vma->gpuva.gem.offset = bo_offset_or_userptr; 1163 1164 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1165 if (err) { 1166 xe_vma_free(vma); 1167 return ERR_PTR(err); 1168 } 1169 } 1170 1171 xe_vm_get(vm); 1172 } 1173 1174 return vma; 1175 } 1176 1177 static void xe_vma_destroy_late(struct xe_vma *vma) 1178 { 1179 struct xe_vm *vm = xe_vma_vm(vma); 1180 struct xe_bo *bo = xe_vma_bo(vma); 1181 1182 if (vma->ufence) { 1183 xe_sync_ufence_put(vma->ufence); 1184 vma->ufence = NULL; 1185 } 1186 1187 if (xe_vma_is_userptr(vma)) { 1188 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1189 1190 xe_userptr_remove(uvma); 1191 xe_vm_put(vm); 1192 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1193 xe_vm_put(vm); 1194 } else { 1195 xe_bo_put(bo); 1196 } 1197 1198 xe_vma_free(vma); 1199 } 1200 1201 static void vma_destroy_work_func(struct work_struct *w) 1202 { 1203 struct xe_vma *vma = 1204 container_of(w, struct xe_vma, destroy_work); 1205 1206 xe_vma_destroy_late(vma); 1207 } 1208 1209 static void vma_destroy_cb(struct dma_fence *fence, 1210 struct dma_fence_cb *cb) 1211 { 1212 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1213 1214 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1215 queue_work(system_dfl_wq, &vma->destroy_work); 1216 } 1217 1218 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1219 { 1220 struct xe_vm *vm = xe_vma_vm(vma); 1221 struct xe_bo *bo = xe_vma_bo(vma); 1222 1223 lockdep_assert_held_write(&vm->lock); 1224 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1225 1226 if (xe_vma_is_userptr(vma)) { 1227 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1228 xe_userptr_destroy(to_userptr_vma(vma)); 1229 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1230 xe_bo_assert_held(bo); 1231 1232 drm_gpuva_unlink(&vma->gpuva); 1233 1234 xe_bo_vma_count_dec_locked(bo); 1235 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) 1236 xe_bo_willneed_put_locked(bo); 1237 } 1238 1239 xe_vm_assert_held(vm); 1240 if (fence) { 1241 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1242 vma_destroy_cb); 1243 1244 if (ret) { 1245 XE_WARN_ON(ret != -ENOENT); 1246 xe_vma_destroy_late(vma); 1247 } 1248 } else { 1249 xe_vma_destroy_late(vma); 1250 } 1251 } 1252 1253 /** 1254 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1255 * @exec: The drm_exec object we're currently locking for. 1256 * @vma: The vma for witch we want to lock the vm resv and any attached 1257 * object's resv. 1258 * 1259 * Return: 0 on success, negative error code on error. In particular 1260 * may return -EDEADLK on WW transaction contention and -EINTR if 1261 * an interruptible wait is terminated by a signal. 1262 */ 1263 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1264 { 1265 struct xe_vm *vm = xe_vma_vm(vma); 1266 struct xe_bo *bo = xe_vma_bo(vma); 1267 int err; 1268 1269 XE_WARN_ON(!vm); 1270 1271 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1272 if (!err && bo && !bo->vm) 1273 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1274 1275 return err; 1276 } 1277 1278 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1279 { 1280 struct xe_device *xe = xe_vma_vm(vma)->xe; 1281 struct xe_validation_ctx ctx; 1282 struct drm_exec exec; 1283 int err = 0; 1284 1285 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1286 err = xe_vm_lock_vma(&exec, vma); 1287 drm_exec_retry_on_contention(&exec); 1288 if (XE_WARN_ON(err)) 1289 break; 1290 xe_vma_destroy(vma, NULL); 1291 } 1292 xe_assert(xe, !err); 1293 } 1294 1295 struct xe_vma * 1296 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1297 { 1298 struct drm_gpuva *gpuva; 1299 1300 lockdep_assert_held(&vm->lock); 1301 1302 if (xe_vm_is_closed_or_banned(vm)) 1303 return NULL; 1304 1305 xe_assert(vm->xe, start + range <= vm->size); 1306 1307 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1308 1309 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1310 } 1311 1312 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1313 { 1314 int err; 1315 1316 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1317 lockdep_assert_held(&vm->lock); 1318 1319 mutex_lock(&vm->snap_mutex); 1320 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1321 mutex_unlock(&vm->snap_mutex); 1322 XE_WARN_ON(err); /* Shouldn't be possible */ 1323 1324 return err; 1325 } 1326 1327 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1328 { 1329 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1330 lockdep_assert_held(&vm->lock); 1331 1332 mutex_lock(&vm->snap_mutex); 1333 drm_gpuva_remove(&vma->gpuva); 1334 mutex_unlock(&vm->snap_mutex); 1335 if (vm->usm.last_fault_vma == vma) 1336 vm->usm.last_fault_vma = NULL; 1337 } 1338 1339 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1340 { 1341 struct xe_vma_op *op; 1342 1343 op = kzalloc_obj(*op); 1344 1345 if (unlikely(!op)) 1346 return NULL; 1347 1348 return &op->base; 1349 } 1350 1351 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1352 1353 static const struct drm_gpuvm_ops gpuvm_ops = { 1354 .op_alloc = xe_vm_op_alloc, 1355 .vm_bo_validate = xe_gpuvm_validate, 1356 .vm_free = xe_vm_free, 1357 }; 1358 1359 static u64 pde_encode_pat_index(u16 pat_index) 1360 { 1361 u64 pte = 0; 1362 1363 if (pat_index & BIT(0)) 1364 pte |= XE_PPGTT_PTE_PAT0; 1365 1366 if (pat_index & BIT(1)) 1367 pte |= XE_PPGTT_PTE_PAT1; 1368 1369 return pte; 1370 } 1371 1372 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1373 { 1374 u64 pte = 0; 1375 1376 if (pat_index & BIT(0)) 1377 pte |= XE_PPGTT_PTE_PAT0; 1378 1379 if (pat_index & BIT(1)) 1380 pte |= XE_PPGTT_PTE_PAT1; 1381 1382 if (pat_index & BIT(2)) { 1383 if (pt_level) 1384 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1385 else 1386 pte |= XE_PPGTT_PTE_PAT2; 1387 } 1388 1389 if (pat_index & BIT(3)) 1390 pte |= XELPG_PPGTT_PTE_PAT3; 1391 1392 if (pat_index & (BIT(4))) 1393 pte |= XE2_PPGTT_PTE_PAT4; 1394 1395 return pte; 1396 } 1397 1398 static u64 pte_encode_ps(u32 pt_level) 1399 { 1400 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1401 1402 if (pt_level == 1) 1403 return XE_PDE_PS_2M; 1404 else if (pt_level == 2) 1405 return XE_PDPE_PS_1G; 1406 1407 return 0; 1408 } 1409 1410 static u16 pde_pat_index(struct xe_bo *bo) 1411 { 1412 struct xe_device *xe = xe_bo_device(bo); 1413 u16 pat_index; 1414 1415 /* 1416 * We only have two bits to encode the PAT index in non-leaf nodes, but 1417 * these only point to other paging structures so we only need a minimal 1418 * selection of options. The user PAT index is only for encoding leaf 1419 * nodes, where we have use of more bits to do the encoding. The 1420 * non-leaf nodes are instead under driver control so the chosen index 1421 * here should be distinct from the user PAT index. Also the 1422 * corresponding coherency of the PAT index should be tied to the 1423 * allocation type of the page table (or at least we should pick 1424 * something which is always safe). 1425 */ 1426 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1427 pat_index = xe_cache_pat_idx(xe, XE_CACHE_WB); 1428 else 1429 pat_index = xe_cache_pat_idx(xe, XE_CACHE_NONE); 1430 1431 xe_assert(xe, pat_index <= 3); 1432 1433 return pat_index; 1434 } 1435 1436 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1437 { 1438 u64 pde; 1439 1440 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1441 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1442 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1443 1444 return pde; 1445 } 1446 1447 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1448 u16 pat_index, u32 pt_level) 1449 { 1450 u64 pte; 1451 1452 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1453 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1454 pte |= pte_encode_pat_index(pat_index, pt_level); 1455 pte |= pte_encode_ps(pt_level); 1456 1457 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1458 pte |= XE_PPGTT_PTE_DM; 1459 1460 return pte; 1461 } 1462 1463 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1464 u16 pat_index, u32 pt_level) 1465 { 1466 struct xe_bo *bo = xe_vma_bo(vma); 1467 struct xe_vm *vm = xe_vma_vm(vma); 1468 1469 pte |= XE_PAGE_PRESENT; 1470 1471 if (likely(!xe_vma_read_only(vma))) 1472 pte |= XE_PAGE_RW; 1473 1474 pte |= pte_encode_pat_index(pat_index, pt_level); 1475 pte |= pte_encode_ps(pt_level); 1476 1477 /* 1478 * NULL PTEs redirect to scratch page (return zeros on read). 1479 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs. 1480 * Never set NULL flag without scratch page - causes undefined behavior. 1481 */ 1482 if (unlikely(xe_vma_is_null(vma) || 1483 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm)))) 1484 pte |= XE_PTE_NULL; 1485 1486 return pte; 1487 } 1488 1489 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1490 u16 pat_index, 1491 u32 pt_level, bool devmem, u64 flags) 1492 { 1493 u64 pte; 1494 1495 /* Avoid passing random bits directly as flags */ 1496 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1497 1498 pte = addr; 1499 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1500 pte |= pte_encode_pat_index(pat_index, pt_level); 1501 pte |= pte_encode_ps(pt_level); 1502 1503 if (devmem) 1504 pte |= XE_PPGTT_PTE_DM; 1505 1506 pte |= flags; 1507 1508 return pte; 1509 } 1510 1511 static const struct xe_pt_ops xelp_pt_ops = { 1512 .pte_encode_bo = xelp_pte_encode_bo, 1513 .pte_encode_vma = xelp_pte_encode_vma, 1514 .pte_encode_addr = xelp_pte_encode_addr, 1515 .pde_encode_bo = xelp_pde_encode_bo, 1516 }; 1517 1518 static void vm_destroy_work_func(struct work_struct *w); 1519 1520 /** 1521 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1522 * given tile and vm. 1523 * @xe: xe device. 1524 * @tile: tile to set up for. 1525 * @vm: vm to set up for. 1526 * @exec: The struct drm_exec object used to lock the vm resv. 1527 * 1528 * Sets up a pagetable tree with one page-table per level and a single 1529 * leaf PTE. All pagetable entries point to the single page-table or, 1530 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1531 * writes become NOPs. 1532 * 1533 * Return: 0 on success, negative error code on error. 1534 */ 1535 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1536 struct xe_vm *vm, struct drm_exec *exec) 1537 { 1538 u8 id = tile->id; 1539 int i; 1540 1541 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1542 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1543 if (IS_ERR(vm->scratch_pt[id][i])) { 1544 int err = PTR_ERR(vm->scratch_pt[id][i]); 1545 1546 vm->scratch_pt[id][i] = NULL; 1547 return err; 1548 } 1549 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1550 } 1551 1552 return 0; 1553 } 1554 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1555 1556 static void xe_vm_free_scratch(struct xe_vm *vm) 1557 { 1558 struct xe_tile *tile; 1559 u8 id; 1560 1561 if (!xe_vm_has_scratch(vm)) 1562 return; 1563 1564 for_each_tile(tile, vm->xe, id) { 1565 u32 i; 1566 1567 if (!vm->pt_root[id]) 1568 continue; 1569 1570 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1571 if (vm->scratch_pt[id][i]) 1572 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1573 } 1574 } 1575 1576 static void xe_vm_pt_destroy(struct xe_vm *vm) 1577 { 1578 struct xe_tile *tile; 1579 u8 id; 1580 1581 xe_vm_assert_held(vm); 1582 1583 for_each_tile(tile, vm->xe, id) { 1584 if (vm->pt_root[id]) { 1585 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1586 vm->pt_root[id] = NULL; 1587 } 1588 } 1589 } 1590 1591 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) 1592 { 1593 if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) 1594 return; 1595 1596 fs_reclaim_acquire(GFP_KERNEL); 1597 might_lock(&vm->exec_queues.lock); 1598 fs_reclaim_release(GFP_KERNEL); 1599 1600 down_read(&vm->exec_queues.lock); 1601 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); 1602 up_read(&vm->exec_queues.lock); 1603 } 1604 1605 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1606 { 1607 struct drm_gem_object *vm_resv_obj; 1608 struct xe_validation_ctx ctx; 1609 struct drm_exec exec; 1610 struct xe_vm *vm; 1611 int err; 1612 struct xe_tile *tile; 1613 u8 id; 1614 1615 /* 1616 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1617 * ever be in faulting mode. 1618 */ 1619 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1620 1621 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1622 if (!vm) 1623 return ERR_PTR(-ENOMEM); 1624 1625 vm->xe = xe; 1626 1627 vm->size = 1ull << xe->info.va_bits; 1628 vm->flags = flags; 1629 1630 if (xef) 1631 vm->xef = xe_file_get(xef); 1632 /** 1633 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1634 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1635 * under a user-VM lock when the PXP session is started at exec_queue 1636 * creation time. Those are different VMs and therefore there is no risk 1637 * of deadlock, but we need to tell lockdep that this is the case or it 1638 * will print a warning. 1639 */ 1640 if (flags & XE_VM_FLAG_GSC) { 1641 static struct lock_class_key gsc_vm_key; 1642 1643 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1644 } else { 1645 init_rwsem(&vm->lock); 1646 } 1647 mutex_init(&vm->snap_mutex); 1648 1649 INIT_LIST_HEAD(&vm->rebind_list); 1650 1651 INIT_LIST_HEAD(&vm->userptr.repin_list); 1652 INIT_LIST_HEAD(&vm->userptr.invalidated); 1653 spin_lock_init(&vm->userptr.invalidated_lock); 1654 1655 INIT_LIST_HEAD(&vm->faults.list); 1656 spin_lock_init(&vm->faults.lock); 1657 1658 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1659 1660 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1661 1662 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1663 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) 1664 INIT_LIST_HEAD(&vm->exec_queues.list[id]); 1665 if (flags & XE_VM_FLAG_FAULT_MODE) 1666 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; 1667 else 1668 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; 1669 1670 init_rwsem(&vm->exec_queues.lock); 1671 xe_vm_init_prove_locking(xe, vm); 1672 1673 for_each_tile(tile, xe, id) 1674 xe_range_fence_tree_init(&vm->rftree[id]); 1675 1676 vm->pt_ops = &xelp_pt_ops; 1677 1678 /* 1679 * Long-running workloads are not protected by the scheduler references. 1680 * By design, run_job for long-running workloads returns NULL and the 1681 * scheduler drops all the references of it, hence protecting the VM 1682 * for this case is necessary. 1683 */ 1684 if (flags & XE_VM_FLAG_LR_MODE) { 1685 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1686 xe_pm_runtime_get_noresume(xe); 1687 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1688 } 1689 1690 err = xe_svm_init(vm); 1691 if (err) 1692 goto err_no_resv; 1693 1694 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1695 if (!vm_resv_obj) { 1696 err = -ENOMEM; 1697 goto err_svm_fini; 1698 } 1699 1700 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1701 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1702 1703 drm_gem_object_put(vm_resv_obj); 1704 1705 err = 0; 1706 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1707 err) { 1708 err = xe_vm_drm_exec_lock(vm, &exec); 1709 drm_exec_retry_on_contention(&exec); 1710 1711 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1712 vm->flags |= XE_VM_FLAG_64K; 1713 1714 for_each_tile(tile, xe, id) { 1715 if (flags & XE_VM_FLAG_MIGRATION && 1716 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1717 continue; 1718 1719 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1720 &exec); 1721 if (IS_ERR(vm->pt_root[id])) { 1722 err = PTR_ERR(vm->pt_root[id]); 1723 vm->pt_root[id] = NULL; 1724 xe_vm_pt_destroy(vm); 1725 drm_exec_retry_on_contention(&exec); 1726 xe_validation_retry_on_oom(&ctx, &err); 1727 break; 1728 } 1729 } 1730 if (err) 1731 break; 1732 1733 if (xe_vm_has_scratch(vm)) { 1734 for_each_tile(tile, xe, id) { 1735 if (!vm->pt_root[id]) 1736 continue; 1737 1738 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1739 if (err) { 1740 xe_vm_free_scratch(vm); 1741 xe_vm_pt_destroy(vm); 1742 drm_exec_retry_on_contention(&exec); 1743 xe_validation_retry_on_oom(&ctx, &err); 1744 break; 1745 } 1746 } 1747 if (err) 1748 break; 1749 vm->batch_invalidate_tlb = true; 1750 } 1751 1752 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1753 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1754 vm->batch_invalidate_tlb = false; 1755 } 1756 1757 /* Fill pt_root after allocating scratch tables */ 1758 for_each_tile(tile, xe, id) { 1759 if (!vm->pt_root[id]) 1760 continue; 1761 1762 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1763 } 1764 } 1765 if (err) 1766 goto err_close; 1767 1768 /* Kernel migration VM shouldn't have a circular loop.. */ 1769 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1770 for_each_tile(tile, xe, id) { 1771 struct xe_exec_queue *q; 1772 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1773 1774 if (!vm->pt_root[id]) 1775 continue; 1776 1777 if (!xef) /* Not from userspace */ 1778 create_flags |= EXEC_QUEUE_FLAG_KERNEL; 1779 1780 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); 1781 if (IS_ERR(q)) { 1782 err = PTR_ERR(q); 1783 goto err_close; 1784 } 1785 vm->q[id] = q; 1786 } 1787 } 1788 1789 if (xef && xe->info.has_asid) { 1790 u32 asid; 1791 1792 down_write(&xe->usm.lock); 1793 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1794 XA_LIMIT(1, XE_MAX_ASID - 1), 1795 &xe->usm.next_asid, GFP_NOWAIT); 1796 up_write(&xe->usm.lock); 1797 if (err < 0) 1798 goto err_close; 1799 1800 vm->usm.asid = asid; 1801 } 1802 1803 trace_xe_vm_create(vm); 1804 1805 return vm; 1806 1807 err_close: 1808 xe_vm_close_and_put(vm); 1809 return ERR_PTR(err); 1810 1811 err_svm_fini: 1812 if (flags & XE_VM_FLAG_FAULT_MODE) { 1813 vm->size = 0; /* close the vm */ 1814 xe_svm_fini(vm); 1815 } 1816 err_no_resv: 1817 mutex_destroy(&vm->snap_mutex); 1818 for_each_tile(tile, xe, id) 1819 xe_range_fence_tree_fini(&vm->rftree[id]); 1820 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1821 if (vm->xef) 1822 xe_file_put(vm->xef); 1823 kfree(vm); 1824 if (flags & XE_VM_FLAG_LR_MODE) 1825 xe_pm_runtime_put(xe); 1826 return ERR_PTR(err); 1827 } 1828 1829 static void xe_vm_close(struct xe_vm *vm) 1830 { 1831 struct xe_device *xe = vm->xe; 1832 bool bound; 1833 int idx; 1834 1835 bound = drm_dev_enter(&xe->drm, &idx); 1836 1837 down_write(&vm->lock); 1838 if (xe_vm_in_fault_mode(vm)) 1839 xe_svm_notifier_lock(vm); 1840 1841 vm->size = 0; 1842 1843 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1844 struct xe_tile *tile; 1845 struct xe_gt *gt; 1846 u8 id; 1847 1848 /* Wait for pending binds */ 1849 dma_resv_wait_timeout(xe_vm_resv(vm), 1850 DMA_RESV_USAGE_BOOKKEEP, 1851 false, MAX_SCHEDULE_TIMEOUT); 1852 1853 if (bound) { 1854 for_each_tile(tile, xe, id) 1855 if (vm->pt_root[id]) 1856 xe_pt_clear(xe, vm->pt_root[id]); 1857 1858 for_each_gt(gt, xe, id) 1859 xe_tlb_inval_vm(>->tlb_inval, vm); 1860 } 1861 } 1862 1863 if (xe_vm_in_fault_mode(vm)) 1864 xe_svm_notifier_unlock(vm); 1865 up_write(&vm->lock); 1866 1867 if (bound) 1868 drm_dev_exit(idx); 1869 } 1870 1871 void xe_vm_close_and_put(struct xe_vm *vm) 1872 { 1873 LIST_HEAD(contested); 1874 struct xe_device *xe = vm->xe; 1875 struct xe_tile *tile; 1876 struct xe_vma *vma, *next_vma; 1877 struct drm_gpuva *gpuva, *next; 1878 u8 id; 1879 1880 xe_assert(xe, !vm->preempt.num_exec_queues); 1881 1882 xe_vm_close(vm); 1883 if (xe_vm_in_preempt_fence_mode(vm)) { 1884 mutex_lock(&xe->rebind_resume_lock); 1885 list_del_init(&vm->preempt.pm_activate_link); 1886 mutex_unlock(&xe->rebind_resume_lock); 1887 flush_work(&vm->preempt.rebind_work); 1888 } 1889 if (xe_vm_in_fault_mode(vm)) 1890 xe_svm_close(vm); 1891 1892 down_write(&vm->lock); 1893 for_each_tile(tile, xe, id) { 1894 if (vm->q[id]) { 1895 int i; 1896 1897 xe_exec_queue_last_fence_put(vm->q[id], vm); 1898 for_each_tlb_inval(i) 1899 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1900 } 1901 } 1902 up_write(&vm->lock); 1903 1904 for_each_tile(tile, xe, id) { 1905 if (vm->q[id]) { 1906 xe_exec_queue_kill(vm->q[id]); 1907 xe_exec_queue_put(vm->q[id]); 1908 vm->q[id] = NULL; 1909 } 1910 } 1911 1912 down_write(&vm->lock); 1913 xe_vm_lock(vm, false); 1914 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1915 vma = gpuva_to_vma(gpuva); 1916 1917 if (xe_vma_has_no_bo(vma)) { 1918 xe_svm_notifier_lock(vm); 1919 vma->gpuva.flags |= XE_VMA_DESTROYED; 1920 xe_svm_notifier_unlock(vm); 1921 } 1922 1923 xe_vm_remove_vma(vm, vma); 1924 1925 /* easy case, remove from VMA? */ 1926 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1927 list_del_init(&vma->combined_links.rebind); 1928 xe_vma_destroy(vma, NULL); 1929 continue; 1930 } 1931 1932 list_move_tail(&vma->combined_links.destroy, &contested); 1933 vma->gpuva.flags |= XE_VMA_DESTROYED; 1934 } 1935 1936 /* 1937 * All vm operations will add shared fences to resv. 1938 * The only exception is eviction for a shared object, 1939 * but even so, the unbind when evicted would still 1940 * install a fence to resv. Hence it's safe to 1941 * destroy the pagetables immediately. 1942 */ 1943 xe_vm_free_scratch(vm); 1944 xe_vm_pt_destroy(vm); 1945 xe_vm_unlock(vm); 1946 1947 /* 1948 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1949 * Since we hold a refcount to the bo, we can remove and free 1950 * the members safely without locking. 1951 */ 1952 list_for_each_entry_safe(vma, next_vma, &contested, 1953 combined_links.destroy) { 1954 list_del_init(&vma->combined_links.destroy); 1955 xe_vma_destroy_unlocked(vma); 1956 } 1957 1958 xe_svm_fini(vm); 1959 1960 up_write(&vm->lock); 1961 1962 down_write(&xe->usm.lock); 1963 if (vm->usm.asid) { 1964 void *lookup; 1965 1966 xe_assert(xe, xe->info.has_asid); 1967 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1968 1969 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1970 xe_assert(xe, lookup == vm); 1971 } 1972 up_write(&xe->usm.lock); 1973 1974 xe_vm_clear_fault_entries(vm); 1975 1976 for_each_tile(tile, xe, id) 1977 xe_range_fence_tree_fini(&vm->rftree[id]); 1978 1979 xe_vm_put(vm); 1980 } 1981 1982 static void vm_destroy_work_func(struct work_struct *w) 1983 { 1984 struct xe_vm *vm = 1985 container_of(w, struct xe_vm, destroy_work); 1986 struct xe_device *xe = vm->xe; 1987 struct xe_tile *tile; 1988 u8 id; 1989 1990 /* xe_vm_close_and_put was not called? */ 1991 xe_assert(xe, !vm->size); 1992 1993 if (xe_vm_in_preempt_fence_mode(vm)) 1994 flush_work(&vm->preempt.rebind_work); 1995 1996 mutex_destroy(&vm->snap_mutex); 1997 1998 if (vm->flags & XE_VM_FLAG_LR_MODE) 1999 xe_pm_runtime_put(xe); 2000 2001 for_each_tile(tile, xe, id) 2002 XE_WARN_ON(vm->pt_root[id]); 2003 2004 trace_xe_vm_free(vm); 2005 2006 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 2007 2008 if (vm->xef) 2009 xe_file_put(vm->xef); 2010 2011 kfree(vm); 2012 } 2013 2014 static void xe_vm_free(struct drm_gpuvm *gpuvm) 2015 { 2016 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 2017 2018 /* To destroy the VM we need to be able to sleep */ 2019 queue_work(system_dfl_wq, &vm->destroy_work); 2020 } 2021 2022 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 2023 { 2024 struct xe_vm *vm; 2025 2026 mutex_lock(&xef->vm.lock); 2027 vm = xa_load(&xef->vm.xa, id); 2028 if (vm) 2029 xe_vm_get(vm); 2030 mutex_unlock(&xef->vm.lock); 2031 2032 return vm; 2033 } 2034 2035 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2036 { 2037 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2038 } 2039 2040 static struct xe_exec_queue * 2041 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2042 { 2043 return q ? q : vm->q[0]; 2044 } 2045 2046 static struct xe_user_fence * 2047 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2048 { 2049 unsigned int i; 2050 2051 for (i = 0; i < num_syncs; i++) { 2052 struct xe_sync_entry *e = &syncs[i]; 2053 2054 if (xe_sync_is_ufence(e)) 2055 return xe_sync_ufence_get(e); 2056 } 2057 2058 return NULL; 2059 } 2060 2061 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2062 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2063 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ 2064 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2065 2066 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2067 struct drm_file *file) 2068 { 2069 struct xe_device *xe = to_xe_device(dev); 2070 struct xe_file *xef = to_xe_file(file); 2071 struct drm_xe_vm_create *args = data; 2072 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 2073 struct xe_vm *vm; 2074 u32 id; 2075 int err; 2076 u32 flags = 0; 2077 2078 if (XE_IOCTL_DBG(xe, args->extensions)) 2079 return -EINVAL; 2080 2081 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 2082 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2083 2084 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2085 !xe->info.has_usm)) 2086 return -EINVAL; 2087 2088 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2089 return -EINVAL; 2090 2091 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2092 return -EINVAL; 2093 2094 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2095 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2096 !xe->info.needs_scratch)) 2097 return -EINVAL; 2098 2099 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2100 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2101 return -EINVAL; 2102 2103 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 2104 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) 2105 return -EINVAL; 2106 2107 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2108 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2109 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2110 flags |= XE_VM_FLAG_LR_MODE; 2111 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2112 flags |= XE_VM_FLAG_FAULT_MODE; 2113 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2114 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; 2115 2116 vm = xe_vm_create(xe, flags, xef); 2117 if (IS_ERR(vm)) 2118 return PTR_ERR(vm); 2119 2120 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2121 /* Warning: Security issue - never enable by default */ 2122 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2123 #endif 2124 2125 /* user id alloc must always be last in ioctl to prevent UAF */ 2126 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2127 if (err) 2128 goto err_close_and_put; 2129 2130 args->vm_id = id; 2131 2132 return 0; 2133 2134 err_close_and_put: 2135 xe_vm_close_and_put(vm); 2136 2137 return err; 2138 } 2139 2140 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2141 struct drm_file *file) 2142 { 2143 struct xe_device *xe = to_xe_device(dev); 2144 struct xe_file *xef = to_xe_file(file); 2145 struct drm_xe_vm_destroy *args = data; 2146 struct xe_vm *vm; 2147 int err = 0; 2148 2149 if (XE_IOCTL_DBG(xe, args->pad) || 2150 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2151 return -EINVAL; 2152 2153 mutex_lock(&xef->vm.lock); 2154 vm = xa_load(&xef->vm.xa, args->vm_id); 2155 if (XE_IOCTL_DBG(xe, !vm)) 2156 err = -ENOENT; 2157 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2158 err = -EBUSY; 2159 else 2160 xa_erase(&xef->vm.xa, args->vm_id); 2161 mutex_unlock(&xef->vm.lock); 2162 2163 if (!err) 2164 xe_vm_close_and_put(vm); 2165 2166 return err; 2167 } 2168 2169 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2170 { 2171 struct drm_gpuva *gpuva; 2172 u32 num_vmas = 0; 2173 2174 lockdep_assert_held(&vm->lock); 2175 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2176 num_vmas++; 2177 2178 return num_vmas; 2179 } 2180 2181 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2182 u64 end, struct drm_xe_mem_range_attr *attrs) 2183 { 2184 struct drm_gpuva *gpuva; 2185 int i = 0; 2186 2187 lockdep_assert_held(&vm->lock); 2188 2189 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2190 struct xe_vma *vma = gpuva_to_vma(gpuva); 2191 2192 if (i == *num_vmas) 2193 return -ENOSPC; 2194 2195 attrs[i].start = xe_vma_start(vma); 2196 attrs[i].end = xe_vma_end(vma); 2197 attrs[i].atomic.val = vma->attr.atomic_access; 2198 attrs[i].pat_index.val = vma->attr.pat_index; 2199 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2200 attrs[i].preferred_mem_loc.migration_policy = 2201 vma->attr.preferred_loc.migration_policy; 2202 2203 i++; 2204 } 2205 2206 *num_vmas = i; 2207 return 0; 2208 } 2209 2210 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2211 { 2212 struct xe_device *xe = to_xe_device(dev); 2213 struct xe_file *xef = to_xe_file(file); 2214 struct drm_xe_mem_range_attr *mem_attrs; 2215 struct drm_xe_vm_query_mem_range_attr *args = data; 2216 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2217 struct xe_vm *vm; 2218 int err = 0; 2219 2220 if (XE_IOCTL_DBG(xe, 2221 ((args->num_mem_ranges == 0 && 2222 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2223 (args->num_mem_ranges > 0 && 2224 (!attrs_user || 2225 args->sizeof_mem_range_attr != 2226 sizeof(struct drm_xe_mem_range_attr)))))) 2227 return -EINVAL; 2228 2229 vm = xe_vm_lookup(xef, args->vm_id); 2230 if (XE_IOCTL_DBG(xe, !vm)) 2231 return -EINVAL; 2232 2233 err = down_read_interruptible(&vm->lock); 2234 if (err) 2235 goto put_vm; 2236 2237 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2238 2239 if (args->num_mem_ranges == 0 && !attrs_user) { 2240 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2241 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2242 goto unlock_vm; 2243 } 2244 2245 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2246 GFP_KERNEL | __GFP_ACCOUNT | 2247 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2248 if (!mem_attrs) { 2249 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2250 goto unlock_vm; 2251 } 2252 2253 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2254 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2255 args->start + args->range, mem_attrs); 2256 if (err) 2257 goto free_mem_attrs; 2258 2259 err = copy_to_user(attrs_user, mem_attrs, 2260 args->sizeof_mem_range_attr * args->num_mem_ranges); 2261 if (err) 2262 err = -EFAULT; 2263 2264 free_mem_attrs: 2265 kvfree(mem_attrs); 2266 unlock_vm: 2267 up_read(&vm->lock); 2268 put_vm: 2269 xe_vm_put(vm); 2270 return err; 2271 } 2272 2273 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2274 { 2275 if (page_addr > xe_vma_end(vma) - 1 || 2276 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2277 return false; 2278 2279 return true; 2280 } 2281 2282 /** 2283 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2284 * 2285 * @vm: the xe_vm the vma belongs to 2286 * @page_addr: address to look up 2287 */ 2288 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2289 { 2290 struct xe_vma *vma = NULL; 2291 2292 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2293 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2294 vma = vm->usm.last_fault_vma; 2295 } 2296 if (!vma) 2297 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2298 2299 return vma; 2300 } 2301 2302 static const u32 region_to_mem_type[] = { 2303 XE_PL_TT, 2304 XE_PL_VRAM0, 2305 XE_PL_VRAM1, 2306 }; 2307 2308 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2309 bool post_commit) 2310 { 2311 xe_svm_notifier_lock(vm); 2312 vma->gpuva.flags |= XE_VMA_DESTROYED; 2313 xe_svm_notifier_unlock(vm); 2314 if (post_commit) 2315 xe_vm_remove_vma(vm, vma); 2316 } 2317 2318 #undef ULL 2319 #define ULL unsigned long long 2320 2321 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2322 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2323 { 2324 struct xe_vma *vma; 2325 2326 switch (op->op) { 2327 case DRM_GPUVA_OP_MAP: 2328 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2329 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2330 break; 2331 case DRM_GPUVA_OP_REMAP: 2332 vma = gpuva_to_vma(op->remap.unmap->va); 2333 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2334 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2335 op->remap.unmap->keep ? 1 : 0); 2336 if (op->remap.prev) 2337 vm_dbg(&xe->drm, 2338 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2339 (ULL)op->remap.prev->va.addr, 2340 (ULL)op->remap.prev->va.range); 2341 if (op->remap.next) 2342 vm_dbg(&xe->drm, 2343 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2344 (ULL)op->remap.next->va.addr, 2345 (ULL)op->remap.next->va.range); 2346 break; 2347 case DRM_GPUVA_OP_UNMAP: 2348 vma = gpuva_to_vma(op->unmap.va); 2349 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2350 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2351 op->unmap.keep ? 1 : 0); 2352 break; 2353 case DRM_GPUVA_OP_PREFETCH: 2354 vma = gpuva_to_vma(op->prefetch.va); 2355 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2356 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2357 break; 2358 default: 2359 drm_warn(&xe->drm, "NOT POSSIBLE\n"); 2360 } 2361 } 2362 #else 2363 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2364 { 2365 } 2366 #endif 2367 2368 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2369 { 2370 if (!xe_vm_in_fault_mode(vm)) 2371 return false; 2372 2373 if (!xe_vm_has_scratch(vm)) 2374 return false; 2375 2376 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2377 return false; 2378 2379 return true; 2380 } 2381 2382 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2383 { 2384 struct drm_gpuva_op *__op; 2385 2386 drm_gpuva_for_each_op(__op, ops) { 2387 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2388 2389 xe_vma_svm_prefetch_op_fini(op); 2390 } 2391 } 2392 2393 /* 2394 * Create operations list from IOCTL arguments, setup operations fields so parse 2395 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2396 */ 2397 static struct drm_gpuva_ops * 2398 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2399 struct xe_bo *bo, u64 bo_offset_or_userptr, 2400 u64 addr, u64 range, 2401 u32 operation, u32 flags, 2402 u32 prefetch_region, u16 pat_index) 2403 { 2404 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2405 struct drm_gpuva_ops *ops; 2406 struct drm_gpuva_op *__op; 2407 struct drm_gpuvm_bo *vm_bo; 2408 u64 range_start = addr; 2409 u64 range_end = addr + range; 2410 int err; 2411 2412 lockdep_assert_held_write(&vm->lock); 2413 2414 vm_dbg(&vm->xe->drm, 2415 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2416 operation, (ULL)addr, (ULL)range, 2417 (ULL)bo_offset_or_userptr); 2418 2419 switch (operation) { 2420 case DRM_XE_VM_BIND_OP_MAP: 2421 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { 2422 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); 2423 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 2424 } 2425 2426 fallthrough; 2427 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2428 struct drm_gpuvm_map_req map_req = { 2429 .map.va.addr = range_start, 2430 .map.va.range = range_end - range_start, 2431 .map.gem.obj = obj, 2432 .map.gem.offset = bo_offset_or_userptr, 2433 }; 2434 2435 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2436 break; 2437 } 2438 case DRM_XE_VM_BIND_OP_UNMAP: 2439 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2440 break; 2441 case DRM_XE_VM_BIND_OP_PREFETCH: 2442 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2443 break; 2444 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2445 xe_assert(vm->xe, bo); 2446 2447 err = xe_bo_lock(bo, true); 2448 if (err) 2449 return ERR_PTR(err); 2450 2451 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj); 2452 if (IS_ERR(vm_bo)) { 2453 xe_bo_unlock(bo); 2454 return ERR_CAST(vm_bo); 2455 } 2456 2457 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2458 drm_gpuvm_bo_put(vm_bo); 2459 xe_bo_unlock(bo); 2460 break; 2461 default: 2462 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2463 ops = ERR_PTR(-EINVAL); 2464 } 2465 if (IS_ERR(ops)) 2466 return ops; 2467 2468 drm_gpuva_for_each_op(__op, ops) { 2469 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2470 2471 if (__op->op == DRM_GPUVA_OP_MAP) { 2472 op->map.immediate = 2473 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2474 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2475 op->map.vma_flags |= XE_VMA_READ_ONLY; 2476 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2477 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2478 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2479 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2480 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2481 op->map.vma_flags |= XE_VMA_DUMPABLE; 2482 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2483 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2484 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 2485 op->map.pat_index = pat_index; 2486 op->map.invalidate_on_bind = 2487 __xe_vm_needs_clear_scratch_pages(vm, flags); 2488 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2489 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2490 struct xe_tile *tile; 2491 struct xe_svm_range *svm_range; 2492 struct drm_gpusvm_ctx ctx = {}; 2493 struct drm_pagemap *dpagemap = NULL; 2494 u8 id, tile_mask = 0; 2495 u32 i; 2496 2497 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2498 op->prefetch.region = prefetch_region; 2499 break; 2500 } 2501 2502 ctx.read_only = xe_vma_read_only(vma); 2503 ctx.devmem_possible = IS_DGFX(vm->xe) && 2504 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2505 2506 for_each_tile(tile, vm->xe, id) 2507 tile_mask |= 0x1 << id; 2508 2509 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2510 op->prefetch_range.ranges_count = 0; 2511 2512 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2513 dpagemap = xe_vma_resolve_pagemap(vma, 2514 xe_device_get_root_tile(vm->xe)); 2515 } else if (prefetch_region) { 2516 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2517 XE_PL_VRAM0]; 2518 dpagemap = xe_tile_local_pagemap(tile); 2519 } 2520 2521 op->prefetch_range.dpagemap = dpagemap; 2522 alloc_next_range: 2523 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2524 2525 if (PTR_ERR(svm_range) == -ENOENT) { 2526 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2527 2528 addr = ret == ULONG_MAX ? 0 : ret; 2529 if (addr) 2530 goto alloc_next_range; 2531 else 2532 goto print_op_label; 2533 } 2534 2535 if (IS_ERR(svm_range)) { 2536 err = PTR_ERR(svm_range); 2537 goto unwind_prefetch_ops; 2538 } 2539 2540 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) { 2541 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2542 goto check_next_range; 2543 } 2544 2545 err = xa_alloc(&op->prefetch_range.range, 2546 &i, svm_range, xa_limit_32b, 2547 GFP_KERNEL); 2548 2549 if (err) 2550 goto unwind_prefetch_ops; 2551 2552 op->prefetch_range.ranges_count++; 2553 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2554 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2555 check_next_range: 2556 if (range_end > xe_svm_range_end(svm_range) && 2557 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2558 addr = xe_svm_range_end(svm_range); 2559 goto alloc_next_range; 2560 } 2561 } 2562 print_op_label: 2563 print_op(vm->xe, __op); 2564 } 2565 2566 return ops; 2567 2568 unwind_prefetch_ops: 2569 xe_svm_prefetch_gpuva_ops_fini(ops); 2570 drm_gpuva_ops_free(&vm->gpuvm, ops); 2571 return ERR_PTR(err); 2572 } 2573 2574 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2575 2576 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2577 struct xe_vma_mem_attr *attr, unsigned int flags) 2578 { 2579 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2580 struct xe_validation_ctx ctx; 2581 struct drm_exec exec; 2582 struct xe_vma *vma; 2583 int err = 0; 2584 2585 lockdep_assert_held_write(&vm->lock); 2586 2587 if (bo) { 2588 err = 0; 2589 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2590 (struct xe_val_flags) {.interruptible = true}, err) { 2591 if (!bo->vm) { 2592 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2593 drm_exec_retry_on_contention(&exec); 2594 } 2595 if (!err) { 2596 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2597 drm_exec_retry_on_contention(&exec); 2598 } 2599 if (err) 2600 return ERR_PTR(err); 2601 2602 vma = xe_vma_create(vm, bo, op->gem.offset, 2603 op->va.addr, op->va.addr + 2604 op->va.range - 1, attr, flags); 2605 if (IS_ERR(vma)) 2606 return vma; 2607 2608 if (!bo->vm) { 2609 err = add_preempt_fences(vm, bo); 2610 if (err) { 2611 prep_vma_destroy(vm, vma, false); 2612 xe_vma_destroy(vma, NULL); 2613 } 2614 } 2615 } 2616 if (err) 2617 return ERR_PTR(err); 2618 } else { 2619 vma = xe_vma_create(vm, NULL, op->gem.offset, 2620 op->va.addr, op->va.addr + 2621 op->va.range - 1, attr, flags); 2622 if (IS_ERR(vma)) 2623 return vma; 2624 2625 if (xe_vma_is_userptr(vma)) { 2626 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2627 /* 2628 * -EBUSY has dedicated meaning that a user fence 2629 * attached to the VMA is busy, in practice 2630 * xe_vma_userptr_pin_pages can only fail with -EBUSY if 2631 * we are low on memory so convert this to -ENOMEM. 2632 */ 2633 if (err == -EBUSY) 2634 err = -ENOMEM; 2635 } 2636 } 2637 if (err) { 2638 prep_vma_destroy(vm, vma, false); 2639 xe_vma_destroy_unlocked(vma); 2640 vma = ERR_PTR(err); 2641 } 2642 2643 return vma; 2644 } 2645 2646 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2647 { 2648 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2649 return SZ_1G; 2650 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2651 return SZ_2M; 2652 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2653 return SZ_64K; 2654 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2655 return SZ_4K; 2656 2657 return SZ_1G; /* Uninitialized, used max size */ 2658 } 2659 2660 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2661 { 2662 switch (size) { 2663 case SZ_1G: 2664 vma->gpuva.flags |= XE_VMA_PTE_1G; 2665 break; 2666 case SZ_2M: 2667 vma->gpuva.flags |= XE_VMA_PTE_2M; 2668 break; 2669 case SZ_64K: 2670 vma->gpuva.flags |= XE_VMA_PTE_64K; 2671 break; 2672 case SZ_4K: 2673 vma->gpuva.flags |= XE_VMA_PTE_4K; 2674 break; 2675 } 2676 } 2677 2678 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2679 { 2680 int err = 0; 2681 2682 lockdep_assert_held_write(&vm->lock); 2683 2684 switch (op->base.op) { 2685 case DRM_GPUVA_OP_MAP: 2686 err |= xe_vm_insert_vma(vm, op->map.vma); 2687 if (!err) 2688 op->flags |= XE_VMA_OP_COMMITTED; 2689 break; 2690 case DRM_GPUVA_OP_REMAP: 2691 { 2692 u8 tile_present = 2693 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2694 2695 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2696 true); 2697 op->flags |= XE_VMA_OP_COMMITTED; 2698 2699 if (op->remap.prev) { 2700 err |= xe_vm_insert_vma(vm, op->remap.prev); 2701 if (!err) 2702 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2703 if (!err && op->remap.skip_prev) { 2704 op->remap.prev->tile_present = 2705 tile_present; 2706 } 2707 } 2708 if (op->remap.next) { 2709 err |= xe_vm_insert_vma(vm, op->remap.next); 2710 if (!err) 2711 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2712 if (!err && op->remap.skip_next) { 2713 op->remap.next->tile_present = 2714 tile_present; 2715 } 2716 } 2717 2718 /* 2719 * Adjust for partial unbind after removing VMA from VM. In case 2720 * of unwind we might need to undo this later. 2721 */ 2722 if (!err) { 2723 op->base.remap.unmap->va->va.addr = op->remap.start; 2724 op->base.remap.unmap->va->va.range = op->remap.range; 2725 } 2726 break; 2727 } 2728 case DRM_GPUVA_OP_UNMAP: 2729 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2730 op->flags |= XE_VMA_OP_COMMITTED; 2731 break; 2732 case DRM_GPUVA_OP_PREFETCH: 2733 op->flags |= XE_VMA_OP_COMMITTED; 2734 break; 2735 default: 2736 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2737 } 2738 2739 return err; 2740 } 2741 2742 /** 2743 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2744 * @vma: Pointer to the xe_vma structure to check 2745 * 2746 * This function determines whether the given VMA (Virtual Memory Area) 2747 * has its memory attributes set to their default values. Specifically, 2748 * it checks the following conditions: 2749 * 2750 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2751 * - `pat_index` is equal to `default_pat_index` 2752 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2753 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2754 * 2755 * Return: true if all attributes are at their default values, false otherwise. 2756 */ 2757 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2758 { 2759 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2760 vma->attr.pat_index == vma->attr.default_pat_index && 2761 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2762 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2763 } 2764 2765 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2766 struct xe_vma_ops *vops) 2767 { 2768 struct xe_device *xe = vm->xe; 2769 struct drm_gpuva_op *__op; 2770 struct xe_tile *tile; 2771 u8 id, tile_mask = 0; 2772 int err = 0; 2773 2774 lockdep_assert_held_write(&vm->lock); 2775 2776 for_each_tile(tile, vm->xe, id) 2777 tile_mask |= 0x1 << id; 2778 2779 drm_gpuva_for_each_op(__op, ops) { 2780 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2781 struct xe_vma *vma; 2782 unsigned int flags = 0; 2783 2784 INIT_LIST_HEAD(&op->link); 2785 list_add_tail(&op->link, &vops->list); 2786 op->tile_mask = tile_mask; 2787 2788 switch (op->base.op) { 2789 case DRM_GPUVA_OP_MAP: 2790 { 2791 struct xe_vma_mem_attr default_attr = { 2792 .preferred_loc = { 2793 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2794 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2795 }, 2796 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2797 .default_pat_index = op->map.pat_index, 2798 .pat_index = op->map.pat_index, 2799 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 2800 }; 2801 2802 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2803 2804 vma = new_vma(vm, &op->base.map, &default_attr, 2805 flags); 2806 if (IS_ERR(vma)) 2807 return PTR_ERR(vma); 2808 2809 op->map.vma = vma; 2810 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2811 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2812 op->map.invalidate_on_bind) 2813 xe_vma_ops_incr_pt_update_ops(vops, 2814 op->tile_mask, 1); 2815 break; 2816 } 2817 case DRM_GPUVA_OP_REMAP: 2818 { 2819 struct xe_vma *old = 2820 gpuva_to_vma(op->base.remap.unmap->va); 2821 bool skip = xe_vma_is_cpu_addr_mirror(old); 2822 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2823 int num_remap_ops = 0; 2824 2825 if (op->base.remap.prev) 2826 start = op->base.remap.prev->va.addr + 2827 op->base.remap.prev->va.range; 2828 if (op->base.remap.next) 2829 end = op->base.remap.next->va.addr; 2830 2831 if (xe_vma_is_cpu_addr_mirror(old) && 2832 xe_svm_has_mapping(vm, start, end)) { 2833 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2834 xe_svm_unmap_address_range(vm, start, end); 2835 else 2836 return -EBUSY; 2837 } 2838 2839 op->remap.start = xe_vma_start(old); 2840 op->remap.range = xe_vma_size(old); 2841 op->remap.old_start = op->remap.start; 2842 op->remap.old_range = op->remap.range; 2843 2844 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2845 if (op->base.remap.prev) { 2846 vma = new_vma(vm, op->base.remap.prev, 2847 &old->attr, flags); 2848 if (IS_ERR(vma)) 2849 return PTR_ERR(vma); 2850 2851 op->remap.prev = vma; 2852 2853 /* 2854 * Userptr creates a new SG mapping so 2855 * we must also rebind. 2856 */ 2857 op->remap.skip_prev = skip || 2858 (!xe_vma_is_userptr(old) && 2859 IS_ALIGNED(xe_vma_end(vma), 2860 xe_vma_max_pte_size(old))); 2861 if (op->remap.skip_prev) { 2862 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2863 op->remap.range -= 2864 xe_vma_end(vma) - 2865 xe_vma_start(old); 2866 op->remap.start = xe_vma_end(vma); 2867 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2868 (ULL)op->remap.start, 2869 (ULL)op->remap.range); 2870 } else { 2871 num_remap_ops++; 2872 } 2873 } 2874 2875 if (op->base.remap.next) { 2876 vma = new_vma(vm, op->base.remap.next, 2877 &old->attr, flags); 2878 if (IS_ERR(vma)) 2879 return PTR_ERR(vma); 2880 2881 op->remap.next = vma; 2882 2883 /* 2884 * Userptr creates a new SG mapping so 2885 * we must also rebind. 2886 */ 2887 op->remap.skip_next = skip || 2888 (!xe_vma_is_userptr(old) && 2889 IS_ALIGNED(xe_vma_start(vma), 2890 xe_vma_max_pte_size(old))); 2891 if (op->remap.skip_next) { 2892 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2893 op->remap.range -= 2894 xe_vma_end(old) - 2895 xe_vma_start(vma); 2896 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2897 (ULL)op->remap.start, 2898 (ULL)op->remap.range); 2899 } else { 2900 num_remap_ops++; 2901 } 2902 } 2903 if (!skip) 2904 num_remap_ops++; 2905 2906 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2907 break; 2908 } 2909 case DRM_GPUVA_OP_UNMAP: 2910 vma = gpuva_to_vma(op->base.unmap.va); 2911 2912 if (xe_vma_is_cpu_addr_mirror(vma) && 2913 xe_svm_has_mapping(vm, xe_vma_start(vma), 2914 xe_vma_end(vma)) && 2915 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) 2916 return -EBUSY; 2917 2918 if (!xe_vma_is_cpu_addr_mirror(vma)) 2919 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2920 break; 2921 case DRM_GPUVA_OP_PREFETCH: 2922 vma = gpuva_to_vma(op->base.prefetch.va); 2923 2924 if (xe_vma_is_userptr(vma)) { 2925 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2926 if (err) 2927 return err; 2928 } 2929 2930 if (xe_vma_is_cpu_addr_mirror(vma)) 2931 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2932 op->prefetch_range.ranges_count); 2933 else 2934 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2935 2936 break; 2937 default: 2938 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2939 } 2940 2941 err = xe_vma_op_commit(vm, op); 2942 if (err) 2943 return err; 2944 } 2945 2946 return 0; 2947 } 2948 2949 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2950 bool post_commit, bool prev_post_commit, 2951 bool next_post_commit) 2952 { 2953 lockdep_assert_held_write(&vm->lock); 2954 2955 switch (op->base.op) { 2956 case DRM_GPUVA_OP_MAP: 2957 if (op->map.vma) { 2958 prep_vma_destroy(vm, op->map.vma, post_commit); 2959 xe_vma_destroy_unlocked(op->map.vma); 2960 } 2961 break; 2962 case DRM_GPUVA_OP_UNMAP: 2963 { 2964 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2965 2966 if (vma) { 2967 xe_svm_notifier_lock(vm); 2968 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2969 xe_svm_notifier_unlock(vm); 2970 if (post_commit) 2971 xe_vm_insert_vma(vm, vma); 2972 } 2973 break; 2974 } 2975 case DRM_GPUVA_OP_REMAP: 2976 { 2977 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2978 2979 if (op->remap.prev) { 2980 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2981 xe_vma_destroy_unlocked(op->remap.prev); 2982 } 2983 if (op->remap.next) { 2984 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2985 xe_vma_destroy_unlocked(op->remap.next); 2986 } 2987 if (vma) { 2988 xe_svm_notifier_lock(vm); 2989 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2990 xe_svm_notifier_unlock(vm); 2991 if (post_commit) { 2992 /* 2993 * Restore the old va range, in case of the 2994 * prev/next skip optimisation. Otherwise what 2995 * we re-insert here could be smaller than the 2996 * original range. 2997 */ 2998 op->base.remap.unmap->va->va.addr = 2999 op->remap.old_start; 3000 op->base.remap.unmap->va->va.range = 3001 op->remap.old_range; 3002 xe_vm_insert_vma(vm, vma); 3003 } 3004 } 3005 break; 3006 } 3007 case DRM_GPUVA_OP_PREFETCH: 3008 /* Nothing to do */ 3009 break; 3010 default: 3011 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3012 } 3013 } 3014 3015 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 3016 struct drm_gpuva_ops **ops, 3017 int num_ops_list) 3018 { 3019 int i; 3020 3021 for (i = num_ops_list - 1; i >= 0; --i) { 3022 struct drm_gpuva_ops *__ops = ops[i]; 3023 struct drm_gpuva_op *__op; 3024 3025 if (!__ops) 3026 continue; 3027 3028 drm_gpuva_for_each_op_reverse(__op, __ops) { 3029 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 3030 3031 xe_vma_op_unwind(vm, op, 3032 op->flags & XE_VMA_OP_COMMITTED, 3033 op->flags & XE_VMA_OP_PREV_COMMITTED, 3034 op->flags & XE_VMA_OP_NEXT_COMMITTED); 3035 } 3036 } 3037 } 3038 3039 /** 3040 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate() 3041 * @res_evict: Allow evicting resources during validation 3042 * @validate: Perform BO validation 3043 * @request_decompress: Request BO decompression 3044 * @check_purged: Reject operation if BO is DONTNEED or PURGED 3045 */ 3046 struct xe_vma_lock_and_validate_flags { 3047 u32 res_evict : 1; 3048 u32 validate : 1; 3049 u32 request_decompress : 1; 3050 u32 check_purged : 1; 3051 }; 3052 3053 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3054 struct xe_vma_lock_and_validate_flags flags) 3055 { 3056 struct xe_bo *bo = xe_vma_bo(vma); 3057 struct xe_vm *vm = xe_vma_vm(vma); 3058 bool validate_bo = flags.validate; 3059 int err = 0; 3060 3061 if (bo) { 3062 if (!bo->vm) 3063 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3064 3065 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */ 3066 if (!err && flags.check_purged) { 3067 if (xe_bo_madv_is_dontneed(bo)) 3068 err = -EBUSY; /* BO marked purgeable */ 3069 else if (xe_bo_is_purged(bo)) 3070 err = -EINVAL; /* BO already purged */ 3071 } 3072 3073 /* Don't validate the BO for DONTNEED/PURGED remap remnants. */ 3074 if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED) 3075 validate_bo = false; 3076 3077 if (!err && validate_bo) 3078 err = xe_bo_validate(bo, vm, 3079 xe_vm_allow_vm_eviction(vm) && 3080 flags.res_evict, exec); 3081 3082 if (err) 3083 return err; 3084 3085 if (flags.request_decompress) 3086 err = xe_bo_decompress(bo); 3087 } 3088 3089 return err; 3090 } 3091 3092 static int check_ufence(struct xe_vma *vma) 3093 { 3094 if (vma->ufence) { 3095 struct xe_user_fence * const f = vma->ufence; 3096 3097 if (!xe_sync_ufence_get_status(f)) 3098 return -EBUSY; 3099 3100 vma->ufence = NULL; 3101 xe_sync_ufence_put(f); 3102 } 3103 3104 return 0; 3105 } 3106 3107 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 3108 { 3109 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 3110 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3111 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap; 3112 int err = 0; 3113 3114 struct xe_svm_range *svm_range; 3115 struct drm_gpusvm_ctx ctx = {}; 3116 unsigned long i; 3117 3118 if (!xe_vma_is_cpu_addr_mirror(vma)) 3119 return 0; 3120 3121 ctx.read_only = xe_vma_read_only(vma); 3122 ctx.devmem_possible = devmem_possible; 3123 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 3124 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 3125 3126 /* TODO: Threading the migration */ 3127 xa_for_each(&op->prefetch_range.range, i, svm_range) { 3128 if (!dpagemap) 3129 xe_svm_range_migrate_to_smem(vm, svm_range); 3130 3131 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 3132 drm_dbg(&vm->xe->drm, 3133 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n", 3134 dpagemap ? dpagemap->drm->unique : "system", 3135 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range)); 3136 } 3137 3138 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) { 3139 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap); 3140 if (err) { 3141 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 3142 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3143 return -ENODATA; 3144 } 3145 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 3146 } 3147 3148 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 3149 if (err) { 3150 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 3151 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3152 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 3153 err = -ENODATA; 3154 return err; 3155 } 3156 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 3157 } 3158 3159 return err; 3160 } 3161 3162 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3163 struct xe_vma_ops *vops, struct xe_vma_op *op) 3164 { 3165 int err = 0; 3166 bool res_evict; 3167 3168 /* 3169 * We only allow evicting a BO within the VM if it is not part of an 3170 * array of binds, as an array of binds can evict another BO within the 3171 * bind. 3172 */ 3173 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 3174 3175 switch (op->base.op) { 3176 case DRM_GPUVA_OP_MAP: 3177 if (!op->map.invalidate_on_bind) 3178 err = vma_lock_and_validate(exec, op->map.vma, 3179 (struct xe_vma_lock_and_validate_flags) { 3180 .res_evict = res_evict, 3181 .validate = !xe_vm_in_fault_mode(vm) || 3182 op->map.immediate, 3183 .request_decompress = 3184 op->map.request_decompress, 3185 .check_purged = false, 3186 }); 3187 break; 3188 case DRM_GPUVA_OP_REMAP: 3189 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3190 if (err) 3191 break; 3192 3193 err = vma_lock_and_validate(exec, 3194 gpuva_to_vma(op->base.remap.unmap->va), 3195 (struct xe_vma_lock_and_validate_flags) { 3196 .res_evict = res_evict, 3197 .validate = false, 3198 .request_decompress = false, 3199 .check_purged = false, 3200 }); 3201 if (!err && op->remap.prev) 3202 err = vma_lock_and_validate(exec, op->remap.prev, 3203 (struct xe_vma_lock_and_validate_flags) { 3204 .res_evict = res_evict, 3205 .validate = true, 3206 .request_decompress = false, 3207 .check_purged = false, 3208 }); 3209 if (!err && op->remap.next) 3210 err = vma_lock_and_validate(exec, op->remap.next, 3211 (struct xe_vma_lock_and_validate_flags) { 3212 .res_evict = res_evict, 3213 .validate = true, 3214 .request_decompress = false, 3215 .check_purged = false, 3216 }); 3217 break; 3218 case DRM_GPUVA_OP_UNMAP: 3219 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3220 if (err) 3221 break; 3222 3223 err = vma_lock_and_validate(exec, 3224 gpuva_to_vma(op->base.unmap.va), 3225 (struct xe_vma_lock_and_validate_flags) { 3226 .res_evict = res_evict, 3227 .validate = false, 3228 .request_decompress = false, 3229 .check_purged = false, 3230 }); 3231 break; 3232 case DRM_GPUVA_OP_PREFETCH: 3233 { 3234 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3235 u32 region; 3236 3237 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3238 region = op->prefetch.region; 3239 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3240 region <= ARRAY_SIZE(region_to_mem_type)); 3241 } 3242 3243 /* 3244 * PREFETCH is the only op that still gates on BO purge state. 3245 * MAP/REMAP handle this inside xe_vma_create() so partial 3246 * unbind on a DONTNEED BO still works. PREFETCH skips 3247 * xe_vma_create() and would migrate a BO with no backing 3248 * store, so reject DONTNEED/PURGED here. 3249 */ 3250 err = vma_lock_and_validate(exec, 3251 gpuva_to_vma(op->base.prefetch.va), 3252 (struct xe_vma_lock_and_validate_flags) { 3253 .res_evict = res_evict, 3254 .validate = false, 3255 .request_decompress = false, 3256 .check_purged = true, 3257 }); 3258 if (!err && !xe_vma_has_no_bo(vma)) 3259 err = xe_bo_migrate(xe_vma_bo(vma), 3260 region_to_mem_type[region], 3261 NULL, 3262 exec); 3263 break; 3264 } 3265 default: 3266 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3267 } 3268 3269 return err; 3270 } 3271 3272 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3273 { 3274 struct xe_vma_op *op; 3275 int err; 3276 3277 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3278 return 0; 3279 3280 list_for_each_entry(op, &vops->list, link) { 3281 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3282 err = prefetch_ranges(vm, op); 3283 if (err) 3284 return err; 3285 } 3286 } 3287 3288 return 0; 3289 } 3290 3291 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3292 struct xe_vm *vm, 3293 struct xe_vma_ops *vops) 3294 { 3295 struct xe_vma_op *op; 3296 int err; 3297 3298 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3299 if (err) 3300 return err; 3301 3302 list_for_each_entry(op, &vops->list, link) { 3303 err = op_lock_and_prep(exec, vm, vops, op); 3304 if (err) 3305 return err; 3306 } 3307 3308 #ifdef TEST_VM_OPS_ERROR 3309 if (vops->inject_error && 3310 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3311 return -ENOSPC; 3312 #endif 3313 3314 return 0; 3315 } 3316 3317 static void op_trace(struct xe_vma_op *op) 3318 { 3319 switch (op->base.op) { 3320 case DRM_GPUVA_OP_MAP: 3321 trace_xe_vma_bind(op->map.vma); 3322 break; 3323 case DRM_GPUVA_OP_REMAP: 3324 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3325 if (op->remap.prev) 3326 trace_xe_vma_bind(op->remap.prev); 3327 if (op->remap.next) 3328 trace_xe_vma_bind(op->remap.next); 3329 break; 3330 case DRM_GPUVA_OP_UNMAP: 3331 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3332 break; 3333 case DRM_GPUVA_OP_PREFETCH: 3334 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3335 break; 3336 case DRM_GPUVA_OP_DRIVER: 3337 break; 3338 default: 3339 XE_WARN_ON("NOT POSSIBLE"); 3340 } 3341 } 3342 3343 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3344 { 3345 struct xe_vma_op *op; 3346 3347 list_for_each_entry(op, &vops->list, link) 3348 op_trace(op); 3349 } 3350 3351 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3352 { 3353 struct xe_exec_queue *q = vops->q; 3354 struct xe_tile *tile; 3355 int number_tiles = 0; 3356 u8 id; 3357 3358 for_each_tile(tile, vm->xe, id) { 3359 if (vops->pt_update_ops[id].num_ops) 3360 ++number_tiles; 3361 3362 if (vops->pt_update_ops[id].q) 3363 continue; 3364 3365 if (q) { 3366 vops->pt_update_ops[id].q = q; 3367 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3368 q = list_next_entry(q, multi_gt_list); 3369 } else { 3370 vops->pt_update_ops[id].q = vm->q[id]; 3371 } 3372 } 3373 3374 return number_tiles; 3375 } 3376 3377 static struct dma_fence *ops_execute(struct xe_vm *vm, 3378 struct xe_vma_ops *vops) 3379 { 3380 struct xe_tile *tile; 3381 struct dma_fence *fence = NULL; 3382 struct dma_fence **fences = NULL; 3383 struct dma_fence_array *cf = NULL; 3384 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; 3385 u8 id; 3386 3387 number_tiles = vm_ops_setup_tile_args(vm, vops); 3388 if (number_tiles == 0) 3389 return ERR_PTR(-ENODATA); 3390 3391 for_each_tile(tile, vm->xe, id) { 3392 ++n_fence; 3393 3394 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) 3395 for_each_tlb_inval(i) 3396 ++n_fence; 3397 } 3398 3399 fences = kmalloc_objs(*fences, n_fence); 3400 if (!fences) { 3401 fence = ERR_PTR(-ENOMEM); 3402 goto err_trace; 3403 } 3404 3405 cf = dma_fence_array_alloc(n_fence); 3406 if (!cf) { 3407 fence = ERR_PTR(-ENOMEM); 3408 goto err_out; 3409 } 3410 3411 for_each_tile(tile, vm->xe, id) { 3412 if (!vops->pt_update_ops[id].num_ops) 3413 continue; 3414 3415 err = xe_pt_update_ops_prepare(tile, vops); 3416 if (err) { 3417 fence = ERR_PTR(err); 3418 goto err_out; 3419 } 3420 } 3421 3422 trace_xe_vm_ops_execute(vops); 3423 3424 for_each_tile(tile, vm->xe, id) { 3425 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3426 3427 fence = NULL; 3428 if (!vops->pt_update_ops[id].num_ops) 3429 goto collect_fences; 3430 3431 fence = xe_pt_update_ops_run(tile, vops); 3432 if (IS_ERR(fence)) 3433 goto err_out; 3434 3435 collect_fences: 3436 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3437 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3438 continue; 3439 3440 xe_migrate_job_lock(tile->migrate, q); 3441 for_each_tlb_inval(i) 3442 fences[current_fence++] = 3443 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3444 xe_migrate_job_unlock(tile->migrate, q); 3445 } 3446 3447 xe_assert(vm->xe, current_fence == n_fence); 3448 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3449 1); 3450 fence = &cf->base; 3451 3452 for_each_tile(tile, vm->xe, id) { 3453 if (!vops->pt_update_ops[id].num_ops) 3454 continue; 3455 3456 xe_pt_update_ops_fini(tile, vops); 3457 } 3458 3459 return fence; 3460 3461 err_out: 3462 for_each_tile(tile, vm->xe, id) { 3463 if (!vops->pt_update_ops[id].num_ops) 3464 continue; 3465 3466 xe_pt_update_ops_abort(tile, vops); 3467 } 3468 while (current_fence) 3469 dma_fence_put(fences[--current_fence]); 3470 kfree(fences); 3471 kfree(cf); 3472 3473 err_trace: 3474 trace_xe_vm_ops_fail(vm); 3475 return fence; 3476 } 3477 3478 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3479 { 3480 if (vma->ufence) 3481 xe_sync_ufence_put(vma->ufence); 3482 vma->ufence = __xe_sync_ufence_get(ufence); 3483 } 3484 3485 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3486 struct xe_user_fence *ufence) 3487 { 3488 switch (op->base.op) { 3489 case DRM_GPUVA_OP_MAP: 3490 if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) 3491 vma_add_ufence(op->map.vma, ufence); 3492 break; 3493 case DRM_GPUVA_OP_REMAP: 3494 if (op->remap.prev) 3495 vma_add_ufence(op->remap.prev, ufence); 3496 if (op->remap.next) 3497 vma_add_ufence(op->remap.next, ufence); 3498 break; 3499 case DRM_GPUVA_OP_UNMAP: 3500 break; 3501 case DRM_GPUVA_OP_PREFETCH: 3502 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3503 break; 3504 default: 3505 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3506 } 3507 } 3508 3509 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3510 struct dma_fence *fence) 3511 { 3512 struct xe_user_fence *ufence; 3513 struct xe_vma_op *op; 3514 int i; 3515 3516 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3517 list_for_each_entry(op, &vops->list, link) { 3518 if (ufence) 3519 op_add_ufence(vm, op, ufence); 3520 3521 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3522 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3523 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3524 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3525 fence); 3526 } 3527 if (ufence) 3528 xe_sync_ufence_put(ufence); 3529 if (fence) { 3530 for (i = 0; i < vops->num_syncs; i++) 3531 xe_sync_entry_signal(vops->syncs + i, fence); 3532 } 3533 } 3534 3535 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3536 struct xe_vma_ops *vops) 3537 { 3538 struct xe_validation_ctx ctx; 3539 struct drm_exec exec; 3540 struct dma_fence *fence; 3541 int err = 0; 3542 3543 lockdep_assert_held_write(&vm->lock); 3544 3545 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3546 ((struct xe_val_flags) { 3547 .interruptible = true, 3548 .exec_ignore_duplicates = true, 3549 }), err) { 3550 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3551 drm_exec_retry_on_contention(&exec); 3552 xe_validation_retry_on_oom(&ctx, &err); 3553 if (err) 3554 return ERR_PTR(err); 3555 3556 xe_vm_set_validation_exec(vm, &exec); 3557 fence = ops_execute(vm, vops); 3558 xe_vm_set_validation_exec(vm, NULL); 3559 if (IS_ERR(fence)) { 3560 if (PTR_ERR(fence) == -ENODATA) 3561 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3562 return fence; 3563 } 3564 3565 vm_bind_ioctl_ops_fini(vm, vops, fence); 3566 } 3567 3568 return err ? ERR_PTR(err) : fence; 3569 } 3570 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3571 3572 #define SUPPORTED_FLAGS_STUB \ 3573 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3574 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3575 DRM_XE_VM_BIND_FLAG_NULL | \ 3576 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3577 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3578 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3579 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \ 3580 DRM_XE_VM_BIND_FLAG_DECOMPRESS) 3581 3582 #ifdef TEST_VM_OPS_ERROR 3583 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3584 #else 3585 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3586 #endif 3587 3588 #define XE_64K_PAGE_MASK 0xffffull 3589 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3590 3591 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3592 struct drm_xe_vm_bind *args, 3593 struct drm_xe_vm_bind_op **bind_ops) 3594 { 3595 int err; 3596 int i; 3597 3598 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3599 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3600 return -EINVAL; 3601 3602 if (XE_IOCTL_DBG(xe, args->extensions)) 3603 return -EINVAL; 3604 3605 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) 3606 return -EINVAL; 3607 3608 if (args->num_binds > 1) { 3609 u64 __user *bind_user = 3610 u64_to_user_ptr(args->vector_of_binds); 3611 3612 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op, 3613 args->num_binds, 3614 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3615 if (!*bind_ops) 3616 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3617 3618 err = copy_from_user(*bind_ops, bind_user, 3619 sizeof(struct drm_xe_vm_bind_op) * 3620 args->num_binds); 3621 if (XE_IOCTL_DBG(xe, err)) { 3622 err = -EFAULT; 3623 goto free_bind_ops; 3624 } 3625 } else { 3626 *bind_ops = &args->bind; 3627 } 3628 3629 for (i = 0; i < args->num_binds; ++i) { 3630 u64 range = (*bind_ops)[i].range; 3631 u64 addr = (*bind_ops)[i].addr; 3632 u32 op = (*bind_ops)[i].op; 3633 u32 flags = (*bind_ops)[i].flags; 3634 u32 obj = (*bind_ops)[i].obj; 3635 u64 obj_offset = (*bind_ops)[i].obj_offset; 3636 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3637 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3638 bool is_cpu_addr_mirror = flags & 3639 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3640 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 3641 u16 pat_index = (*bind_ops)[i].pat_index; 3642 u16 coh_mode; 3643 bool comp_en; 3644 3645 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3646 (!xe_vm_in_fault_mode(vm) || 3647 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3648 err = -EINVAL; 3649 goto free_bind_ops; 3650 } 3651 3652 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3653 err = -EINVAL; 3654 goto free_bind_ops; 3655 } 3656 3657 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3658 (*bind_ops)[i].pat_index = pat_index; 3659 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3660 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3661 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3662 err = -EINVAL; 3663 goto free_bind_ops; 3664 } 3665 3666 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) { 3667 err = -EINVAL; 3668 goto free_bind_ops; 3669 } 3670 3671 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3672 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3673 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3674 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3675 is_cpu_addr_mirror)) || 3676 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3677 (is_decompress || is_null || is_cpu_addr_mirror)) || 3678 XE_IOCTL_DBG(xe, is_decompress && 3679 xe_pat_index_get_comp_en(xe, pat_index)) || 3680 XE_IOCTL_DBG(xe, !obj && 3681 op == DRM_XE_VM_BIND_OP_MAP && 3682 !is_null && !is_cpu_addr_mirror) || 3683 XE_IOCTL_DBG(xe, !obj && 3684 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3685 XE_IOCTL_DBG(xe, addr && 3686 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3687 XE_IOCTL_DBG(xe, range && 3688 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3689 XE_IOCTL_DBG(xe, obj && 3690 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3691 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3692 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3693 XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && 3694 is_cpu_addr_mirror) || 3695 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && 3696 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || 3697 is_cpu_addr_mirror) && 3698 (pat_index != 19 && coh_mode != XE_COH_2WAY)) || 3699 XE_IOCTL_DBG(xe, comp_en && 3700 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3701 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3702 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3703 XE_IOCTL_DBG(xe, obj && 3704 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3705 XE_IOCTL_DBG(xe, prefetch_region && 3706 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3707 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3708 /* Guard against undefined shift in BIT(prefetch_region) */ 3709 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3710 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3711 XE_IOCTL_DBG(xe, obj && 3712 op == DRM_XE_VM_BIND_OP_UNMAP) || 3713 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3714 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3715 err = -EINVAL; 3716 goto free_bind_ops; 3717 } 3718 3719 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3720 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3721 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3722 XE_IOCTL_DBG(xe, !range && 3723 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3724 err = -EINVAL; 3725 goto free_bind_ops; 3726 } 3727 3728 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) || 3729 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) || 3730 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) { 3731 err = -EOPNOTSUPP; 3732 goto free_bind_ops; 3733 } 3734 } 3735 3736 return 0; 3737 3738 free_bind_ops: 3739 if (args->num_binds > 1) 3740 kvfree(*bind_ops); 3741 *bind_ops = NULL; 3742 return err; 3743 } 3744 3745 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3746 struct xe_exec_queue *q, 3747 struct xe_sync_entry *syncs, 3748 int num_syncs) 3749 { 3750 struct dma_fence *fence = NULL; 3751 int i, err = 0; 3752 3753 if (num_syncs) { 3754 fence = xe_sync_in_fence_get(syncs, num_syncs, 3755 to_wait_exec_queue(vm, q), vm); 3756 if (IS_ERR(fence)) 3757 return PTR_ERR(fence); 3758 3759 for (i = 0; i < num_syncs; i++) 3760 xe_sync_entry_signal(&syncs[i], fence); 3761 } 3762 3763 dma_fence_put(fence); 3764 3765 return err; 3766 } 3767 3768 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3769 struct xe_exec_queue *q, 3770 struct xe_sync_entry *syncs, u32 num_syncs) 3771 { 3772 memset(vops, 0, sizeof(*vops)); 3773 INIT_LIST_HEAD(&vops->list); 3774 vops->vm = vm; 3775 vops->q = q; 3776 vops->syncs = syncs; 3777 vops->num_syncs = num_syncs; 3778 vops->flags = 0; 3779 } 3780 3781 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3782 u64 addr, u64 range, u64 obj_offset, 3783 u16 pat_index, u32 op, u32 bind_flags) 3784 { 3785 u16 coh_mode; 3786 bool comp_en; 3787 3788 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && 3789 xe_pat_index_get_comp_en(xe, pat_index))) 3790 return -EINVAL; 3791 3792 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3793 XE_IOCTL_DBG(xe, obj_offset > 3794 xe_bo_size(bo) - range)) { 3795 return -EINVAL; 3796 } 3797 3798 /* 3799 * Some platforms require 64k VM_BIND alignment, 3800 * specifically those with XE_VRAM_FLAGS_NEED64K. 3801 * 3802 * Other platforms may have BO's set to 64k physical placement, 3803 * but can be mapped at 4k offsets anyway. This check is only 3804 * there for the former case. 3805 */ 3806 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3807 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3808 if (XE_IOCTL_DBG(xe, obj_offset & 3809 XE_64K_PAGE_MASK) || 3810 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3811 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3812 return -EINVAL; 3813 } 3814 } 3815 3816 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3817 if (bo->cpu_caching) { 3818 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3819 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3820 return -EINVAL; 3821 } 3822 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3823 /* 3824 * Imported dma-buf from a different device should 3825 * require 1way or 2way coherency since we don't know 3826 * how it was mapped on the CPU. Just assume is it 3827 * potentially cached on CPU side. 3828 */ 3829 return -EINVAL; 3830 } 3831 3832 /* 3833 * Ensures that imported buffer objects (dma-bufs) are not mapped 3834 * with a PAT index that enables compression. 3835 */ 3836 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3837 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) 3838 return -EINVAL; 3839 3840 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) && 3841 (pat_index != 19 && coh_mode != XE_COH_2WAY))) 3842 return -EINVAL; 3843 3844 /* If a BO is protected it can only be mapped if the key is still valid */ 3845 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3846 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3847 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3848 return -ENOEXEC; 3849 3850 return 0; 3851 } 3852 3853 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3854 { 3855 struct xe_device *xe = to_xe_device(dev); 3856 struct xe_file *xef = to_xe_file(file); 3857 struct drm_xe_vm_bind *args = data; 3858 struct drm_xe_sync __user *syncs_user; 3859 struct xe_bo **bos = NULL; 3860 struct drm_gpuva_ops **ops = NULL; 3861 struct xe_vm *vm; 3862 struct xe_exec_queue *q = NULL; 3863 u32 num_syncs, num_ufence = 0; 3864 struct xe_sync_entry *syncs = NULL; 3865 struct drm_xe_vm_bind_op *bind_ops = NULL; 3866 struct xe_vma_ops vops; 3867 struct dma_fence *fence; 3868 int err; 3869 int i; 3870 3871 vm = xe_vm_lookup(xef, args->vm_id); 3872 if (XE_IOCTL_DBG(xe, !vm)) 3873 return -EINVAL; 3874 3875 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3876 if (err) 3877 goto put_vm; 3878 3879 if (args->exec_queue_id) { 3880 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3881 if (XE_IOCTL_DBG(xe, !q)) { 3882 err = -ENOENT; 3883 goto free_bind_ops; 3884 } 3885 3886 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3887 err = -EINVAL; 3888 goto put_exec_queue; 3889 } 3890 } 3891 3892 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) { 3893 err = -EINVAL; 3894 goto put_exec_queue; 3895 } 3896 3897 /* Ensure all UNMAPs visible */ 3898 xe_svm_flush(vm); 3899 3900 err = down_write_killable(&vm->lock); 3901 if (err) 3902 goto put_exec_queue; 3903 3904 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3905 err = -ENOENT; 3906 goto release_vm_lock; 3907 } 3908 3909 for (i = 0; i < args->num_binds; ++i) { 3910 u64 range = bind_ops[i].range; 3911 u64 addr = bind_ops[i].addr; 3912 3913 if (XE_IOCTL_DBG(xe, range > vm->size) || 3914 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3915 err = -EINVAL; 3916 goto release_vm_lock; 3917 } 3918 } 3919 3920 if (args->num_binds) { 3921 bos = kvzalloc_objs(*bos, args->num_binds, 3922 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3923 if (!bos) { 3924 err = -ENOMEM; 3925 goto release_vm_lock; 3926 } 3927 3928 ops = kvzalloc_objs(*ops, args->num_binds, 3929 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3930 if (!ops) { 3931 err = -ENOMEM; 3932 goto free_bos; 3933 } 3934 } 3935 3936 for (i = 0; i < args->num_binds; ++i) { 3937 struct drm_gem_object *gem_obj; 3938 u64 range = bind_ops[i].range; 3939 u64 addr = bind_ops[i].addr; 3940 u32 obj = bind_ops[i].obj; 3941 u64 obj_offset = bind_ops[i].obj_offset; 3942 u16 pat_index = bind_ops[i].pat_index; 3943 u32 op = bind_ops[i].op; 3944 u32 bind_flags = bind_ops[i].flags; 3945 3946 if (!obj) 3947 continue; 3948 3949 gem_obj = drm_gem_object_lookup(file, obj); 3950 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3951 err = -ENOENT; 3952 goto put_obj; 3953 } 3954 bos[i] = gem_to_xe_bo(gem_obj); 3955 3956 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3957 obj_offset, pat_index, op, 3958 bind_flags); 3959 if (err) 3960 goto put_obj; 3961 } 3962 3963 if (args->num_syncs) { 3964 syncs = kzalloc_objs(*syncs, args->num_syncs); 3965 if (!syncs) { 3966 err = -ENOMEM; 3967 goto put_obj; 3968 } 3969 } 3970 3971 syncs_user = u64_to_user_ptr(args->syncs); 3972 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3973 struct xe_exec_queue *__q = q ?: vm->q[0]; 3974 3975 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3976 &syncs_user[num_syncs], 3977 __q->ufence_syncobj, 3978 ++__q->ufence_timeline_value, 3979 (xe_vm_in_lr_mode(vm) ? 3980 SYNC_PARSE_FLAG_LR_MODE : 0) | 3981 (!args->num_binds ? 3982 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3983 if (err) 3984 goto free_syncs; 3985 3986 if (xe_sync_is_ufence(&syncs[num_syncs])) 3987 num_ufence++; 3988 } 3989 3990 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3991 err = -EINVAL; 3992 goto free_syncs; 3993 } 3994 3995 if (!args->num_binds) { 3996 err = -ENODATA; 3997 goto free_syncs; 3998 } 3999 4000 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 4001 if (args->num_binds > 1) 4002 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 4003 for (i = 0; i < args->num_binds; ++i) { 4004 u64 range = bind_ops[i].range; 4005 u64 addr = bind_ops[i].addr; 4006 u32 op = bind_ops[i].op; 4007 u32 flags = bind_ops[i].flags; 4008 u64 obj_offset = bind_ops[i].obj_offset; 4009 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 4010 u16 pat_index = bind_ops[i].pat_index; 4011 4012 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 4013 addr, range, op, flags, 4014 prefetch_region, pat_index); 4015 if (IS_ERR(ops[i])) { 4016 err = PTR_ERR(ops[i]); 4017 ops[i] = NULL; 4018 goto unwind_ops; 4019 } 4020 4021 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 4022 if (err) 4023 goto unwind_ops; 4024 4025 #ifdef TEST_VM_OPS_ERROR 4026 if (flags & FORCE_OP_ERROR) { 4027 vops.inject_error = true; 4028 vm->xe->vm_inject_error_position = 4029 (vm->xe->vm_inject_error_position + 1) % 4030 FORCE_OP_ERROR_COUNT; 4031 } 4032 #endif 4033 } 4034 4035 /* Nothing to do */ 4036 if (list_empty(&vops.list)) { 4037 err = -ENODATA; 4038 goto unwind_ops; 4039 } 4040 4041 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 4042 if (err) 4043 goto unwind_ops; 4044 4045 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 4046 if (err) 4047 goto unwind_ops; 4048 4049 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4050 if (IS_ERR(fence)) 4051 err = PTR_ERR(fence); 4052 else 4053 dma_fence_put(fence); 4054 4055 unwind_ops: 4056 if (err && err != -ENODATA) 4057 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 4058 xe_vma_ops_fini(&vops); 4059 for (i = args->num_binds - 1; i >= 0; --i) 4060 if (ops[i]) 4061 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 4062 free_syncs: 4063 if (err == -ENODATA) 4064 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 4065 while (num_syncs--) 4066 xe_sync_entry_cleanup(&syncs[num_syncs]); 4067 4068 kfree(syncs); 4069 put_obj: 4070 for (i = 0; i < args->num_binds; ++i) 4071 xe_bo_put(bos[i]); 4072 4073 kvfree(ops); 4074 free_bos: 4075 kvfree(bos); 4076 release_vm_lock: 4077 up_write(&vm->lock); 4078 put_exec_queue: 4079 if (q) 4080 xe_exec_queue_put(q); 4081 free_bind_ops: 4082 if (args->num_binds > 1) 4083 kvfree(bind_ops); 4084 put_vm: 4085 xe_vm_put(vm); 4086 return err; 4087 } 4088 4089 /* 4090 * Map access type, fault type, and fault level from current bspec 4091 * specification to user spec abstraction. The current mapping is 4092 * approximately 1-to-1, with access type being the only notable 4093 * exception as it carries additional data with respect to prefetch 4094 * status that needs to be masked out. 4095 */ 4096 static u8 xe_to_user_access_type(u8 access_type) 4097 { 4098 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK; 4099 } 4100 4101 static u8 xe_to_user_fault_type(u8 fault_type) 4102 { 4103 return fault_type; 4104 } 4105 4106 static u8 xe_to_user_fault_level(u8 fault_level) 4107 { 4108 return fault_level; 4109 } 4110 4111 static int fill_faults(struct xe_vm *vm, 4112 struct drm_xe_vm_get_property *args) 4113 { 4114 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data); 4115 struct xe_vm_fault *fault_list, fault_entry = { 0 }; 4116 struct xe_vm_fault_entry *entry; 4117 int ret = 0, i = 0, count, entry_size; 4118 4119 entry_size = sizeof(struct xe_vm_fault); 4120 count = args->size / entry_size; 4121 4122 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL); 4123 if (!fault_list) 4124 return -ENOMEM; 4125 4126 spin_lock(&vm->faults.lock); 4127 list_for_each_entry(entry, &vm->faults.list, list) { 4128 if (i == count) 4129 break; 4130 4131 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address); 4132 fault_entry.address_precision = entry->address_precision; 4133 4134 fault_entry.access_type = xe_to_user_access_type(entry->access_type); 4135 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type); 4136 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level); 4137 4138 memcpy(&fault_list[i], &fault_entry, entry_size); 4139 4140 i++; 4141 } 4142 spin_unlock(&vm->faults.lock); 4143 4144 ret = copy_to_user(usr_ptr, fault_list, args->size); 4145 4146 kfree(fault_list); 4147 return ret ? -EFAULT : 0; 4148 } 4149 4150 static int xe_vm_get_property_helper(struct xe_vm *vm, 4151 struct drm_xe_vm_get_property *args) 4152 { 4153 size_t size; 4154 4155 switch (args->property) { 4156 case DRM_XE_VM_GET_PROPERTY_FAULTS: 4157 spin_lock(&vm->faults.lock); 4158 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len); 4159 spin_unlock(&vm->faults.lock); 4160 4161 if (!args->size) { 4162 args->size = size; 4163 return 0; 4164 } 4165 4166 /* 4167 * Number of faults may increase between calls to 4168 * xe_vm_get_property_ioctl, so just report the number of 4169 * faults the user requests if it's less than or equal to 4170 * the number of faults in the VM fault array. 4171 * 4172 * We should also at least assert that the args->size value 4173 * is a multiple of the xe_vm_fault struct size. 4174 */ 4175 if (args->size > size || args->size % sizeof(struct xe_vm_fault)) 4176 return -EINVAL; 4177 4178 return fill_faults(vm, args); 4179 } 4180 return -EINVAL; 4181 } 4182 4183 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, 4184 struct drm_file *file) 4185 { 4186 struct xe_device *xe = to_xe_device(drm); 4187 struct xe_file *xef = to_xe_file(file); 4188 struct drm_xe_vm_get_property *args = data; 4189 struct xe_vm *vm; 4190 int ret = 0; 4191 4192 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || 4193 args->reserved[2] || args->extensions || 4194 args->pad))) 4195 return -EINVAL; 4196 4197 vm = xe_vm_lookup(xef, args->vm_id); 4198 if (XE_IOCTL_DBG(xe, !vm)) 4199 return -ENOENT; 4200 4201 ret = xe_vm_get_property_helper(vm, args); 4202 4203 xe_vm_put(vm); 4204 return ret; 4205 } 4206 4207 /** 4208 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 4209 * @vm: VM to bind the BO to 4210 * @bo: BO to bind 4211 * @q: exec queue to use for the bind (optional) 4212 * @addr: address at which to bind the BO 4213 * @cache_lvl: PAT cache level to use 4214 * 4215 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 4216 * kernel-owned VM. 4217 * 4218 * Returns a dma_fence to track the binding completion if the job to do so was 4219 * successfully submitted, an error pointer otherwise. 4220 */ 4221 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 4222 struct xe_exec_queue *q, u64 addr, 4223 enum xe_cache_level cache_lvl) 4224 { 4225 struct xe_vma_ops vops; 4226 struct drm_gpuva_ops *ops = NULL; 4227 struct dma_fence *fence; 4228 int err; 4229 4230 xe_bo_get(bo); 4231 xe_vm_get(vm); 4232 if (q) 4233 xe_exec_queue_get(q); 4234 4235 down_write(&vm->lock); 4236 4237 xe_vma_ops_init(&vops, vm, q, NULL, 0); 4238 4239 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 4240 DRM_XE_VM_BIND_OP_MAP, 0, 0, 4241 xe_cache_pat_idx(vm->xe, cache_lvl)); 4242 if (IS_ERR(ops)) { 4243 err = PTR_ERR(ops); 4244 goto release_vm_lock; 4245 } 4246 4247 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4248 if (err) 4249 goto release_vm_lock; 4250 4251 xe_assert(vm->xe, !list_empty(&vops.list)); 4252 4253 err = xe_vma_ops_alloc(&vops, false); 4254 if (err) 4255 goto unwind_ops; 4256 4257 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4258 if (IS_ERR(fence)) 4259 err = PTR_ERR(fence); 4260 4261 unwind_ops: 4262 if (err && err != -ENODATA) 4263 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4264 4265 xe_vma_ops_fini(&vops); 4266 drm_gpuva_ops_free(&vm->gpuvm, ops); 4267 4268 release_vm_lock: 4269 up_write(&vm->lock); 4270 4271 if (q) 4272 xe_exec_queue_put(q); 4273 xe_vm_put(vm); 4274 xe_bo_put(bo); 4275 4276 if (err) 4277 fence = ERR_PTR(err); 4278 4279 return fence; 4280 } 4281 4282 /** 4283 * xe_vm_lock() - Lock the vm's dma_resv object 4284 * @vm: The struct xe_vm whose lock is to be locked 4285 * @intr: Whether to perform any wait interruptible 4286 * 4287 * Return: 0 on success, -EINTR if @intr is true and the wait for a 4288 * contended lock was interrupted. If @intr is false, the function 4289 * always returns 0. 4290 */ 4291 int xe_vm_lock(struct xe_vm *vm, bool intr) 4292 { 4293 int ret; 4294 4295 if (intr) 4296 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4297 else 4298 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 4299 4300 return ret; 4301 } 4302 4303 /** 4304 * xe_vm_unlock() - Unlock the vm's dma_resv object 4305 * @vm: The struct xe_vm whose lock is to be released. 4306 * 4307 * Unlock a buffer object lock that was locked by xe_vm_lock(). 4308 */ 4309 void xe_vm_unlock(struct xe_vm *vm) 4310 { 4311 dma_resv_unlock(xe_vm_resv(vm)); 4312 } 4313 4314 /** 4315 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for 4316 * VMA. 4317 * @vma: VMA to invalidate 4318 * @batch: TLB invalidation batch to populate; caller must later call 4319 * xe_tlb_inval_batch_wait() on it to wait for completion 4320 * 4321 * Walks a list of page tables leaves which it memset the entries owned by this 4322 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush 4323 * to complete, but instead populates @batch which can be waited on using 4324 * xe_tlb_inval_batch_wait(). 4325 * 4326 * Returns 0 for success, negative error code otherwise. 4327 */ 4328 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch) 4329 { 4330 struct xe_device *xe = xe_vma_vm(vma)->xe; 4331 struct xe_vm *vm = xe_vma_vm(vma); 4332 struct xe_tile *tile; 4333 u8 tile_mask = 0; 4334 int ret = 0; 4335 u8 id; 4336 4337 xe_assert(xe, !xe_vma_is_null(vma)); 4338 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4339 trace_xe_vma_invalidate(vma); 4340 4341 vm_dbg(&vm->xe->drm, 4342 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4343 xe_vma_start(vma), xe_vma_size(vma)); 4344 4345 /* 4346 * Check that we don't race with page-table updates, tile_invalidated 4347 * update is safe 4348 */ 4349 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4350 if (xe_vma_is_userptr(vma)) { 4351 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 4352 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 4353 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4354 4355 WARN_ON_ONCE(!mmu_interval_check_retry 4356 (&to_userptr_vma(vma)->userptr.notifier, 4357 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 4358 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4359 DMA_RESV_USAGE_BOOKKEEP)); 4360 4361 } else { 4362 xe_bo_assert_held(xe_vma_bo(vma)); 4363 } 4364 } 4365 4366 for_each_tile(tile, xe, id) 4367 if (xe_pt_zap_ptes(tile, vma)) 4368 tile_mask |= BIT(id); 4369 4370 xe_device_wmb(xe); 4371 4372 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid, 4373 xe_vma_start(vma), xe_vma_end(vma), 4374 tile_mask, batch); 4375 4376 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4377 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4378 return ret; 4379 } 4380 4381 /** 4382 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4383 * @vma: VMA to invalidate 4384 * 4385 * Walks a list of page tables leaves which it memset the entries owned by this 4386 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4387 * complete. 4388 * 4389 * Returns 0 for success, negative error code otherwise. 4390 */ 4391 int xe_vm_invalidate_vma(struct xe_vma *vma) 4392 { 4393 struct xe_tlb_inval_batch batch; 4394 int ret; 4395 4396 ret = xe_vm_invalidate_vma_submit(vma, &batch); 4397 if (ret) 4398 return ret; 4399 4400 xe_tlb_inval_batch_wait(&batch); 4401 return ret; 4402 } 4403 4404 int xe_vm_validate_protected(struct xe_vm *vm) 4405 { 4406 struct drm_gpuva *gpuva; 4407 int err = 0; 4408 4409 if (!vm) 4410 return -ENODEV; 4411 4412 mutex_lock(&vm->snap_mutex); 4413 4414 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4415 struct xe_vma *vma = gpuva_to_vma(gpuva); 4416 struct xe_bo *bo = vma->gpuva.gem.obj ? 4417 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4418 4419 if (!bo) 4420 continue; 4421 4422 if (xe_bo_is_protected(bo)) { 4423 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4424 if (err) 4425 break; 4426 } 4427 } 4428 4429 mutex_unlock(&vm->snap_mutex); 4430 return err; 4431 } 4432 4433 struct xe_vm_snapshot { 4434 int uapi_flags; 4435 unsigned long num_snaps; 4436 struct { 4437 u64 ofs, bo_ofs; 4438 unsigned long len; 4439 #define XE_VM_SNAP_FLAG_USERPTR BIT(0) 4440 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) 4441 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2) 4442 unsigned long flags; 4443 int uapi_mem_region; 4444 u16 pat_index; 4445 int cpu_caching; 4446 struct xe_bo *bo; 4447 void *data; 4448 struct mm_struct *mm; 4449 } snap[]; 4450 }; 4451 4452 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4453 { 4454 unsigned long num_snaps = 0, i; 4455 struct xe_vm_snapshot *snap = NULL; 4456 struct drm_gpuva *gpuva; 4457 4458 if (!vm) 4459 return NULL; 4460 4461 mutex_lock(&vm->snap_mutex); 4462 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4463 if (gpuva->flags & XE_VMA_DUMPABLE) 4464 num_snaps++; 4465 } 4466 4467 if (num_snaps) 4468 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4469 if (!snap) { 4470 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4471 goto out_unlock; 4472 } 4473 4474 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 4475 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; 4476 if (vm->flags & XE_VM_FLAG_LR_MODE) 4477 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; 4478 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) 4479 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 4480 4481 snap->num_snaps = num_snaps; 4482 i = 0; 4483 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4484 struct xe_vma *vma = gpuva_to_vma(gpuva); 4485 struct xe_bo *bo = vma->gpuva.gem.obj ? 4486 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4487 4488 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4489 continue; 4490 4491 snap->snap[i].ofs = xe_vma_start(vma); 4492 snap->snap[i].len = xe_vma_size(vma); 4493 snap->snap[i].flags = xe_vma_read_only(vma) ? 4494 XE_VM_SNAP_FLAG_READ_ONLY : 0; 4495 snap->snap[i].pat_index = vma->attr.pat_index; 4496 if (bo) { 4497 snap->snap[i].cpu_caching = bo->cpu_caching; 4498 snap->snap[i].bo = xe_bo_get(bo); 4499 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4500 switch (bo->ttm.resource->mem_type) { 4501 case XE_PL_SYSTEM: 4502 case XE_PL_TT: 4503 snap->snap[i].uapi_mem_region = 0; 4504 break; 4505 case XE_PL_VRAM0: 4506 snap->snap[i].uapi_mem_region = 1; 4507 break; 4508 case XE_PL_VRAM1: 4509 snap->snap[i].uapi_mem_region = 2; 4510 break; 4511 } 4512 } else if (xe_vma_is_userptr(vma)) { 4513 struct mm_struct *mm = 4514 to_userptr_vma(vma)->userptr.notifier.mm; 4515 4516 if (mmget_not_zero(mm)) 4517 snap->snap[i].mm = mm; 4518 else 4519 snap->snap[i].data = ERR_PTR(-EFAULT); 4520 4521 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4522 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; 4523 snap->snap[i].uapi_mem_region = 0; 4524 } else if (xe_vma_is_null(vma)) { 4525 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; 4526 snap->snap[i].uapi_mem_region = -1; 4527 } else { 4528 snap->snap[i].data = ERR_PTR(-ENOENT); 4529 snap->snap[i].uapi_mem_region = -1; 4530 } 4531 i++; 4532 } 4533 4534 out_unlock: 4535 mutex_unlock(&vm->snap_mutex); 4536 return snap; 4537 } 4538 4539 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4540 { 4541 if (IS_ERR_OR_NULL(snap)) 4542 return; 4543 4544 for (int i = 0; i < snap->num_snaps; i++) { 4545 struct xe_bo *bo = snap->snap[i].bo; 4546 int err; 4547 4548 if (IS_ERR(snap->snap[i].data) || 4549 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4550 continue; 4551 4552 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4553 if (!snap->snap[i].data) { 4554 snap->snap[i].data = ERR_PTR(-ENOMEM); 4555 goto cleanup_bo; 4556 } 4557 4558 if (bo) { 4559 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4560 snap->snap[i].data, snap->snap[i].len); 4561 } else { 4562 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4563 4564 kthread_use_mm(snap->snap[i].mm); 4565 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4566 err = 0; 4567 else 4568 err = -EFAULT; 4569 kthread_unuse_mm(snap->snap[i].mm); 4570 4571 mmput(snap->snap[i].mm); 4572 snap->snap[i].mm = NULL; 4573 } 4574 4575 if (err) { 4576 kvfree(snap->snap[i].data); 4577 snap->snap[i].data = ERR_PTR(err); 4578 } 4579 4580 cleanup_bo: 4581 xe_bo_put(bo); 4582 snap->snap[i].bo = NULL; 4583 } 4584 } 4585 4586 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4587 { 4588 unsigned long i, j; 4589 4590 if (IS_ERR_OR_NULL(snap)) { 4591 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4592 return; 4593 } 4594 4595 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); 4596 for (i = 0; i < snap->num_snaps; i++) { 4597 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4598 4599 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", 4600 snap->snap[i].ofs, 4601 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? 4602 "read_only" : "read_write", 4603 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? 4604 "null_sparse" : 4605 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? 4606 "userptr" : "bo", 4607 snap->snap[i].uapi_mem_region == -1 ? 0 : 4608 BIT(snap->snap[i].uapi_mem_region), 4609 snap->snap[i].pat_index, 4610 snap->snap[i].cpu_caching); 4611 4612 if (IS_ERR(snap->snap[i].data)) { 4613 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4614 PTR_ERR(snap->snap[i].data)); 4615 continue; 4616 } 4617 4618 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4619 continue; 4620 4621 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4622 4623 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4624 u32 *val = snap->snap[i].data + j; 4625 char dumped[ASCII85_BUFSZ]; 4626 4627 drm_puts(p, ascii85_encode(*val, dumped)); 4628 } 4629 4630 drm_puts(p, "\n"); 4631 4632 if (drm_coredump_printer_is_full(p)) 4633 return; 4634 } 4635 } 4636 4637 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4638 { 4639 unsigned long i; 4640 4641 if (IS_ERR_OR_NULL(snap)) 4642 return; 4643 4644 for (i = 0; i < snap->num_snaps; i++) { 4645 if (!IS_ERR(snap->snap[i].data)) 4646 kvfree(snap->snap[i].data); 4647 xe_bo_put(snap->snap[i].bo); 4648 if (snap->snap[i].mm) 4649 mmput(snap->snap[i].mm); 4650 } 4651 kvfree(snap); 4652 } 4653 4654 /** 4655 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4656 * @xe: Pointer to the Xe device structure 4657 * @vma: Pointer to the virtual memory area (VMA) structure 4658 * @is_atomic: In pagefault path and atomic operation 4659 * 4660 * This function determines whether the given VMA needs to be migrated to 4661 * VRAM in order to do atomic GPU operation. 4662 * 4663 * Return: 4664 * 1 - Migration to VRAM is required 4665 * 0 - Migration is not required 4666 * -EACCES - Invalid access for atomic memory attr 4667 * 4668 */ 4669 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4670 { 4671 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4672 vma->attr.atomic_access; 4673 4674 if (!IS_DGFX(xe) || !is_atomic) 4675 return false; 4676 4677 /* 4678 * NOTE: The checks implemented here are platform-specific. For 4679 * instance, on a device supporting CXL atomics, these would ideally 4680 * work universally without additional handling. 4681 */ 4682 switch (atomic_access) { 4683 case DRM_XE_ATOMIC_DEVICE: 4684 return !xe->info.has_device_atomics_on_smem; 4685 4686 case DRM_XE_ATOMIC_CPU: 4687 return -EACCES; 4688 4689 case DRM_XE_ATOMIC_UNDEFINED: 4690 case DRM_XE_ATOMIC_GLOBAL: 4691 default: 4692 return 1; 4693 } 4694 } 4695 4696 static int xe_vm_alloc_vma(struct xe_vm *vm, 4697 struct drm_gpuvm_map_req *map_req, 4698 bool is_madvise) 4699 { 4700 struct xe_vma_ops vops; 4701 struct drm_gpuva_ops *ops = NULL; 4702 struct drm_gpuva_op *__op; 4703 unsigned int vma_flags = 0; 4704 bool remap_op = false; 4705 struct xe_vma_mem_attr tmp_attr = {}; 4706 u16 default_pat; 4707 int err; 4708 4709 lockdep_assert_held_write(&vm->lock); 4710 4711 if (is_madvise) 4712 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4713 else 4714 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4715 4716 if (IS_ERR(ops)) 4717 return PTR_ERR(ops); 4718 4719 if (list_empty(&ops->list)) { 4720 err = 0; 4721 goto free_ops; 4722 } 4723 4724 drm_gpuva_for_each_op(__op, ops) { 4725 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4726 struct xe_vma *vma = NULL; 4727 4728 if (!is_madvise) { 4729 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4730 vma = gpuva_to_vma(op->base.unmap.va); 4731 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4732 default_pat = vma->attr.default_pat_index; 4733 vma_flags = vma->gpuva.flags; 4734 } 4735 4736 if (__op->op == DRM_GPUVA_OP_REMAP) { 4737 vma = gpuva_to_vma(op->base.remap.unmap->va); 4738 default_pat = vma->attr.default_pat_index; 4739 vma_flags = vma->gpuva.flags; 4740 } 4741 4742 if (__op->op == DRM_GPUVA_OP_MAP) { 4743 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4744 op->map.pat_index = default_pat; 4745 } 4746 } else { 4747 if (__op->op == DRM_GPUVA_OP_REMAP) { 4748 vma = gpuva_to_vma(op->base.remap.unmap->va); 4749 xe_assert(vm->xe, !remap_op); 4750 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4751 remap_op = true; 4752 vma_flags = vma->gpuva.flags; 4753 } 4754 4755 if (__op->op == DRM_GPUVA_OP_MAP) { 4756 xe_assert(vm->xe, remap_op); 4757 remap_op = false; 4758 /* 4759 * In case of madvise ops DRM_GPUVA_OP_MAP is 4760 * always after DRM_GPUVA_OP_REMAP, so ensure 4761 * to propagate the flags from the vma we're 4762 * unmapping. 4763 */ 4764 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4765 } 4766 } 4767 print_op(vm->xe, __op); 4768 } 4769 4770 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4771 4772 if (is_madvise) 4773 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4774 else 4775 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 4776 4777 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4778 if (err) 4779 goto unwind_ops; 4780 4781 xe_vm_lock(vm, false); 4782 4783 drm_gpuva_for_each_op(__op, ops) { 4784 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4785 struct xe_vma *vma; 4786 4787 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4788 vma = gpuva_to_vma(op->base.unmap.va); 4789 /* There should be no unmap for madvise */ 4790 if (is_madvise) 4791 XE_WARN_ON("UNEXPECTED UNMAP"); 4792 4793 xe_vma_destroy(vma, NULL); 4794 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4795 vma = gpuva_to_vma(op->base.remap.unmap->va); 4796 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4797 * VMA, so they can be assigned to newly MAP created vma. 4798 */ 4799 if (is_madvise) 4800 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr); 4801 4802 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4803 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4804 vma = op->map.vma; 4805 /* In case of madvise call, MAP will always be followed by REMAP. 4806 * Therefore temp_attr will always have sane values, making it safe to 4807 * copy them to new vma. 4808 */ 4809 if (is_madvise) 4810 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr); 4811 } 4812 } 4813 4814 xe_vm_unlock(vm); 4815 drm_gpuva_ops_free(&vm->gpuvm, ops); 4816 xe_vma_mem_attr_fini(&tmp_attr); 4817 return 0; 4818 4819 unwind_ops: 4820 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4821 free_ops: 4822 drm_gpuva_ops_free(&vm->gpuvm, ops); 4823 return err; 4824 } 4825 4826 /** 4827 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4828 * @vm: Pointer to the xe_vm structure 4829 * @start: Starting input address 4830 * @range: Size of the input range 4831 * 4832 * This function splits existing vma to create new vma for user provided input range 4833 * 4834 * Return: 0 if success 4835 */ 4836 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4837 { 4838 struct drm_gpuvm_map_req map_req = { 4839 .map.va.addr = start, 4840 .map.va.range = range, 4841 }; 4842 4843 lockdep_assert_held_write(&vm->lock); 4844 4845 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4846 4847 return xe_vm_alloc_vma(vm, &map_req, true); 4848 } 4849 4850 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) 4851 { 4852 return vma && xe_vma_is_cpu_addr_mirror(vma) && 4853 xe_vma_has_default_mem_attrs(vma); 4854 } 4855 4856 /** 4857 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs 4858 * @vm: VM to search within 4859 * @start: Input/output pointer to the starting address of the range 4860 * @end: Input/output pointer to the end address of the range 4861 * 4862 * Given a range defined by @start and @range, this function checks the VMAs 4863 * immediately before and after the range. If those neighboring VMAs are 4864 * CPU-address-mirrored and have default memory attributes, the function 4865 * updates @start and @range to include them. This extended range can then 4866 * be used for merging or other operations that require a unified VMA. 4867 * 4868 * The function does not perform the merge itself; it only computes the 4869 * mergeable boundaries. 4870 */ 4871 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) 4872 { 4873 struct xe_vma *prev, *next; 4874 4875 lockdep_assert_held(&vm->lock); 4876 4877 if (*start >= SZ_4K) { 4878 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); 4879 if (is_cpu_addr_vma_with_default_attr(prev)) 4880 *start = xe_vma_start(prev); 4881 } 4882 4883 if (*end < vm->size) { 4884 next = xe_vm_find_vma_by_addr(vm, *end + 1); 4885 if (is_cpu_addr_vma_with_default_attr(next)) 4886 *end = xe_vma_end(next); 4887 } 4888 } 4889 4890 /** 4891 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4892 * @vm: Pointer to the xe_vm structure 4893 * @start: Starting input address 4894 * @range: Size of the input range 4895 * 4896 * This function splits/merges existing vma to create new vma for user provided input range 4897 * 4898 * Return: 0 if success 4899 */ 4900 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4901 { 4902 struct drm_gpuvm_map_req map_req = { 4903 .map.va.addr = start, 4904 .map.va.range = range, 4905 }; 4906 4907 lockdep_assert_held_write(&vm->lock); 4908 4909 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4910 start, range); 4911 4912 return xe_vm_alloc_vma(vm, &map_req, false); 4913 } 4914 4915 /** 4916 * xe_vm_add_exec_queue() - Add exec queue to VM 4917 * @vm: The VM. 4918 * @q: The exec_queue 4919 * 4920 * Add exec queue to VM, skipped if the device does not have context based TLB 4921 * invalidations. 4922 */ 4923 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4924 { 4925 struct xe_device *xe = vm->xe; 4926 4927 /* User VMs and queues only */ 4928 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 4929 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 4930 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 4931 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); 4932 xe_assert(xe, vm->xef); 4933 xe_assert(xe, vm == q->vm); 4934 4935 if (!xe->info.has_ctx_tlb_inval) 4936 return; 4937 4938 down_write(&vm->exec_queues.lock); 4939 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); 4940 ++vm->exec_queues.count[q->gt->info.id]; 4941 up_write(&vm->exec_queues.lock); 4942 } 4943 4944 /** 4945 * xe_vm_remove_exec_queue() - Remove exec queue from VM 4946 * @vm: The VM. 4947 * @q: The exec_queue 4948 * 4949 * Remove exec queue from VM, skipped if the device does not have context based 4950 * TLB invalidations. 4951 */ 4952 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4953 { 4954 if (!vm->xe->info.has_ctx_tlb_inval) 4955 return; 4956 4957 down_write(&vm->exec_queues.lock); 4958 if (!list_empty(&q->vm_exec_queue_link)) { 4959 list_del(&q->vm_exec_queue_link); 4960 --vm->exec_queues.count[q->gt->info.id]; 4961 } 4962 up_write(&vm->exec_queues.lock); 4963 } 4964