1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_vm.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/nospec.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_exec.h> 13 #include <drm/drm_print.h> 14 #include <drm/ttm/ttm_tt.h> 15 #include <uapi/drm/xe_drm.h> 16 #include <linux/ascii85.h> 17 #include <linux/delay.h> 18 #include <linux/kthread.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 22 #include <generated/xe_wa_oob.h> 23 24 #include "regs/xe_gtt_defs.h" 25 #include "xe_assert.h" 26 #include "xe_bo.h" 27 #include "xe_device.h" 28 #include "xe_drm_client.h" 29 #include "xe_exec_queue.h" 30 #include "xe_gt.h" 31 #include "xe_migrate.h" 32 #include "xe_pat.h" 33 #include "xe_pm.h" 34 #include "xe_preempt_fence.h" 35 #include "xe_pt.h" 36 #include "xe_pxp.h" 37 #include "xe_sriov_vf.h" 38 #include "xe_svm.h" 39 #include "xe_sync.h" 40 #include "xe_tile.h" 41 #include "xe_tlb_inval.h" 42 #include "xe_trace_bo.h" 43 #include "xe_vm_madvise.h" 44 #include "xe_wa.h" 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 { 48 return vm->gpuvm.r_obj; 49 } 50 51 /** 52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 53 * @vm: The vm whose resv is to be locked. 54 * @exec: The drm_exec transaction. 55 * 56 * Helper to lock the vm's resv as part of a drm_exec transaction. 57 * 58 * Return: %0 on success. See drm_exec_lock_obj() for error codes. 59 */ 60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 61 { 62 return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 63 } 64 65 static bool preempt_fences_waiting(struct xe_vm *vm) 66 { 67 struct xe_exec_queue *q; 68 69 lockdep_assert_held(&vm->lock); 70 xe_vm_assert_held(vm); 71 72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 73 if (!q->lr.pfence || 74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 75 &q->lr.pfence->flags)) { 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 static void free_preempt_fences(struct list_head *list) 84 { 85 struct list_head *link, *next; 86 87 list_for_each_safe(link, next, list) 88 xe_preempt_fence_free(to_preempt_fence_from_link(link)); 89 } 90 91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 92 unsigned int *count) 93 { 94 lockdep_assert_held(&vm->lock); 95 xe_vm_assert_held(vm); 96 97 if (*count >= vm->preempt.num_exec_queues) 98 return 0; 99 100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) { 101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 102 103 if (IS_ERR(pfence)) 104 return PTR_ERR(pfence); 105 106 list_move_tail(xe_preempt_fence_link(pfence), list); 107 } 108 109 return 0; 110 } 111 112 static int wait_for_existing_preempt_fences(struct xe_vm *vm) 113 { 114 struct xe_exec_queue *q; 115 bool vf_migration = IS_SRIOV_VF(vm->xe) && 116 xe_sriov_vf_migration_supported(vm->xe); 117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; 118 119 xe_vm_assert_held(vm); 120 121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 122 if (q->lr.pfence) { 123 long timeout; 124 125 timeout = dma_fence_wait_timeout(q->lr.pfence, false, 126 wait_time); 127 if (!timeout) { 128 xe_assert(vm->xe, vf_migration); 129 return -EAGAIN; 130 } 131 132 /* Only -ETIME on fence indicates VM needs to be killed */ 133 if (timeout < 0 || q->lr.pfence->error == -ETIME) 134 return -ETIME; 135 136 dma_fence_put(q->lr.pfence); 137 q->lr.pfence = NULL; 138 } 139 } 140 141 return 0; 142 } 143 144 static bool xe_vm_is_idle(struct xe_vm *vm) 145 { 146 struct xe_exec_queue *q; 147 148 xe_vm_assert_held(vm); 149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 150 if (!xe_exec_queue_is_idle(q)) 151 return false; 152 } 153 154 return true; 155 } 156 157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 158 { 159 struct list_head *link; 160 struct xe_exec_queue *q; 161 162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 163 struct dma_fence *fence; 164 165 link = list->next; 166 xe_assert(vm->xe, link != list); 167 168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 169 q, q->lr.context, 170 ++q->lr.seqno); 171 dma_fence_put(q->lr.pfence); 172 q->lr.pfence = fence; 173 } 174 } 175 176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 177 { 178 struct xe_exec_queue *q; 179 int err; 180 181 xe_bo_assert_held(bo); 182 183 if (!vm->preempt.num_exec_queues) 184 return 0; 185 186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 187 if (err) 188 return err; 189 190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 191 if (q->lr.pfence) { 192 dma_resv_add_fence(bo->ttm.base.resv, 193 q->lr.pfence, 194 DMA_RESV_USAGE_BOOKKEEP); 195 } 196 197 return 0; 198 } 199 200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, 201 struct drm_exec *exec) 202 { 203 struct xe_exec_queue *q; 204 205 lockdep_assert_held(&vm->lock); 206 xe_vm_assert_held(vm); 207 208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 209 q->ops->resume(q); 210 211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 213 } 214 } 215 216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 217 { 218 struct drm_gpuvm_exec vm_exec = { 219 .vm = &vm->gpuvm, 220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, 221 .num_fences = 1, 222 }; 223 struct drm_exec *exec = &vm_exec.exec; 224 struct xe_validation_ctx ctx; 225 struct dma_fence *pfence; 226 int err; 227 bool wait; 228 229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 230 231 down_write(&vm->lock); 232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 233 if (err) 234 goto out_up_write; 235 236 pfence = xe_preempt_fence_create(q, q->lr.context, 237 ++q->lr.seqno); 238 if (IS_ERR(pfence)) { 239 err = PTR_ERR(pfence); 240 goto out_fini; 241 } 242 243 list_add(&q->lr.link, &vm->preempt.exec_queues); 244 ++vm->preempt.num_exec_queues; 245 q->lr.pfence = pfence; 246 247 xe_svm_notifier_lock(vm); 248 249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 251 252 /* 253 * Check to see if a preemption on VM is in flight or userptr 254 * invalidation, if so trigger this preempt fence to sync state with 255 * other preempt fences on the VM. 256 */ 257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 258 if (wait) 259 dma_fence_enable_sw_signaling(pfence); 260 261 xe_svm_notifier_unlock(vm); 262 263 out_fini: 264 xe_validation_ctx_fini(&ctx); 265 out_up_write: 266 up_write(&vm->lock); 267 268 return err; 269 } 270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO); 271 272 /** 273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 274 * @vm: The VM. 275 * @q: The exec_queue 276 * 277 * Note that this function might be called multiple times on the same queue. 278 */ 279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 280 { 281 if (!xe_vm_in_preempt_fence_mode(vm)) 282 return; 283 284 down_write(&vm->lock); 285 if (!list_empty(&q->lr.link)) { 286 list_del_init(&q->lr.link); 287 --vm->preempt.num_exec_queues; 288 } 289 if (q->lr.pfence) { 290 dma_fence_enable_sw_signaling(q->lr.pfence); 291 dma_fence_put(q->lr.pfence); 292 q->lr.pfence = NULL; 293 } 294 up_write(&vm->lock); 295 } 296 297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 298 299 /** 300 * xe_vm_kill() - VM Kill 301 * @vm: The VM. 302 * @unlocked: Flag indicates the VM's dma-resv is not held 303 * 304 * Kill the VM by setting banned flag indicated VM is no longer available for 305 * use. If in preempt fence mode, also kill all exec queue attached to the VM. 306 */ 307 void xe_vm_kill(struct xe_vm *vm, bool unlocked) 308 { 309 struct xe_exec_queue *q; 310 311 lockdep_assert_held(&vm->lock); 312 313 if (unlocked) 314 xe_vm_lock(vm, false); 315 316 vm->flags |= XE_VM_FLAG_BANNED; 317 trace_xe_vm_kill(vm); 318 319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 320 q->ops->kill(q); 321 322 if (unlocked) 323 xe_vm_unlock(vm); 324 325 /* TODO: Inform user the VM is banned */ 326 } 327 328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 329 { 330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj); 332 struct drm_gpuva *gpuva; 333 int ret; 334 335 lockdep_assert_held(&vm->lock); 336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) 337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 338 &vm->rebind_list); 339 340 /* Skip re-populating purged BOs, rebind maps scratch pages. */ 341 if (xe_bo_is_purged(bo)) { 342 vm_bo->evicted = false; 343 return 0; 344 } 345 346 if (!try_wait_for_completion(&vm->xe->pm_block)) 347 return -EAGAIN; 348 349 ret = xe_bo_validate(bo, vm, false, exec); 350 if (ret) 351 return ret; 352 353 vm_bo->evicted = false; 354 return 0; 355 } 356 357 /** 358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 359 * @vm: The vm for which we are rebinding. 360 * @exec: The struct drm_exec with the locked GEM objects. 361 * @num_fences: The number of fences to reserve for the operation, not 362 * including rebinds and validations. 363 * 364 * Validates all evicted gem objects and rebinds their vmas. Note that 365 * rebindings may cause evictions and hence the validation-rebind 366 * sequence is rerun until there are no more objects to validate. 367 * 368 * Return: 0 on success, negative error code on error. In particular, 369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 370 * the drm_exec transaction needs to be restarted. 371 */ 372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 373 unsigned int num_fences) 374 { 375 struct drm_gem_object *obj; 376 unsigned long index; 377 int ret; 378 379 do { 380 ret = drm_gpuvm_validate(&vm->gpuvm, exec); 381 if (ret) 382 return ret; 383 384 ret = xe_vm_rebind(vm, false); 385 if (ret) 386 return ret; 387 } while (!list_empty(&vm->gpuvm.evict.list)); 388 389 drm_exec_for_each_locked_object(exec, index, obj) { 390 ret = dma_resv_reserve_fences(obj->resv, num_fences); 391 if (ret) 392 return ret; 393 } 394 395 return 0; 396 } 397 398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 399 bool *done) 400 { 401 int err; 402 403 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 404 if (err) 405 return err; 406 407 if (xe_vm_is_idle(vm)) { 408 vm->preempt.rebind_deactivated = true; 409 *done = true; 410 return 0; 411 } 412 413 if (!preempt_fences_waiting(vm)) { 414 *done = true; 415 return 0; 416 } 417 418 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 419 if (err) 420 return err; 421 422 err = wait_for_existing_preempt_fences(vm); 423 if (err) 424 return err; 425 426 /* 427 * Add validation and rebinding to the locking loop since both can 428 * cause evictions which may require blocing dma_resv locks. 429 * The fence reservation here is intended for the new preempt fences 430 * we attach at the end of the rebind work. 431 */ 432 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 433 } 434 435 static bool vm_suspend_rebind_worker(struct xe_vm *vm) 436 { 437 struct xe_device *xe = vm->xe; 438 bool ret = false; 439 440 mutex_lock(&xe->rebind_resume_lock); 441 if (!try_wait_for_completion(&vm->xe->pm_block)) { 442 ret = true; 443 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); 444 } 445 mutex_unlock(&xe->rebind_resume_lock); 446 447 return ret; 448 } 449 450 /** 451 * xe_vm_resume_rebind_worker() - Resume the rebind worker. 452 * @vm: The vm whose preempt worker to resume. 453 * 454 * Resume a preempt worker that was previously suspended by 455 * vm_suspend_rebind_worker(). 456 */ 457 void xe_vm_resume_rebind_worker(struct xe_vm *vm) 458 { 459 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); 460 } 461 462 static void preempt_rebind_work_func(struct work_struct *w) 463 { 464 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 465 struct xe_validation_ctx ctx; 466 struct drm_exec exec; 467 unsigned int fence_count = 0; 468 LIST_HEAD(preempt_fences); 469 int err = 0; 470 long wait; 471 int __maybe_unused tries = 0; 472 473 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 474 trace_xe_vm_rebind_worker_enter(vm); 475 476 down_write(&vm->lock); 477 478 if (xe_vm_is_closed_or_banned(vm)) { 479 up_write(&vm->lock); 480 trace_xe_vm_rebind_worker_exit(vm); 481 return; 482 } 483 484 retry: 485 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { 486 up_write(&vm->lock); 487 /* We don't actually block but don't make progress. */ 488 xe_pm_might_block_on_suspend(); 489 return; 490 } 491 492 if (xe_vm_userptr_check_repin(vm)) { 493 err = xe_vm_userptr_pin(vm); 494 if (err) 495 goto out_unlock_outer; 496 } 497 498 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 499 (struct xe_val_flags) {.interruptible = true}); 500 if (err) 501 goto out_unlock_outer; 502 503 drm_exec_until_all_locked(&exec) { 504 bool done = false; 505 506 err = xe_preempt_work_begin(&exec, vm, &done); 507 drm_exec_retry_on_contention(&exec); 508 xe_validation_retry_on_oom(&ctx, &err); 509 if (err || done) { 510 xe_validation_ctx_fini(&ctx); 511 goto out_unlock_outer; 512 } 513 } 514 515 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 516 if (err) 517 goto out_unlock; 518 519 xe_vm_set_validation_exec(vm, &exec); 520 err = xe_vm_rebind(vm, true); 521 xe_vm_set_validation_exec(vm, NULL); 522 if (err) 523 goto out_unlock; 524 525 /* Wait on rebinds and munmap style VM unbinds */ 526 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 527 DMA_RESV_USAGE_KERNEL, 528 false, MAX_SCHEDULE_TIMEOUT); 529 if (wait <= 0) { 530 err = -ETIME; 531 goto out_unlock; 532 } 533 534 #define retry_required(__tries, __vm) \ 535 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 536 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 537 __xe_vm_userptr_needs_repin(__vm)) 538 539 xe_svm_notifier_lock(vm); 540 if (retry_required(tries, vm)) { 541 xe_svm_notifier_unlock(vm); 542 err = -EAGAIN; 543 goto out_unlock; 544 } 545 546 #undef retry_required 547 548 spin_lock(&vm->xe->ttm.lru_lock); 549 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 550 spin_unlock(&vm->xe->ttm.lru_lock); 551 552 /* Point of no return. */ 553 arm_preempt_fences(vm, &preempt_fences); 554 resume_and_reinstall_preempt_fences(vm, &exec); 555 xe_svm_notifier_unlock(vm); 556 557 out_unlock: 558 xe_validation_ctx_fini(&ctx); 559 out_unlock_outer: 560 if (err == -EAGAIN) { 561 trace_xe_vm_rebind_worker_retry(vm); 562 563 /* 564 * We can't block in workers on a VF which supports migration 565 * given this can block the VF post-migration workers from 566 * getting scheduled. 567 */ 568 if (IS_SRIOV_VF(vm->xe) && 569 xe_sriov_vf_migration_supported(vm->xe)) { 570 up_write(&vm->lock); 571 xe_vm_queue_rebind_worker(vm); 572 return; 573 } 574 575 goto retry; 576 } 577 578 if (err) { 579 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); 580 xe_vm_kill(vm, true); 581 } 582 up_write(&vm->lock); 583 584 free_preempt_fences(&preempt_fences); 585 586 trace_xe_vm_rebind_worker_exit(vm); 587 } 588 589 /** 590 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list 591 * @vm: The VM. 592 * @pf: The pagefault. 593 * 594 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list. 595 * 596 * The function exits silently if the list is full, and reports a warning if the pagefault 597 * could not be saved to the list. 598 */ 599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) 600 { 601 struct xe_vm_fault_entry *e; 602 struct xe_hw_engine *hwe; 603 604 /* Do not report faults on reserved engines */ 605 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class, 606 pf->consumer.engine_instance, false); 607 if (!hwe || xe_hw_engine_is_reserved(hwe)) 608 return; 609 610 e = kzalloc_obj(*e); 611 if (!e) { 612 drm_warn(&vm->xe->drm, 613 "Could not allocate memory for fault!\n"); 614 return; 615 } 616 617 guard(spinlock)(&vm->faults.lock); 618 619 /* 620 * Limit the number of faults in the fault list to prevent 621 * memory overuse. 622 */ 623 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) { 624 kfree(e); 625 return; 626 } 627 628 e->address = pf->consumer.page_addr; 629 /* 630 * TODO: 631 * Address precision is currently always SZ_4K, but this may change 632 * in the future. 633 */ 634 e->address_precision = SZ_4K; 635 e->access_type = pf->consumer.access_type; 636 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK, 637 pf->consumer.fault_type_level), 638 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, 639 pf->consumer.fault_type_level), 640 641 list_add_tail(&e->list, &vm->faults.list); 642 vm->faults.len++; 643 } 644 645 static void xe_vm_clear_fault_entries(struct xe_vm *vm) 646 { 647 struct xe_vm_fault_entry *e, *tmp; 648 649 guard(spinlock)(&vm->faults.lock); 650 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) { 651 list_del(&e->list); 652 kfree(e); 653 } 654 vm->faults.len = 0; 655 } 656 657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) 658 { 659 int i; 660 661 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { 662 if (!vops->pt_update_ops[i].num_ops) 663 continue; 664 665 vops->pt_update_ops[i].ops = 666 kmalloc_objs(*vops->pt_update_ops[i].ops, 667 vops->pt_update_ops[i].num_ops, 668 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 669 if (!vops->pt_update_ops[i].ops) 670 return array_of_binds ? -ENOBUFS : -ENOMEM; 671 } 672 673 return 0; 674 } 675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 676 677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 678 { 679 struct xe_vma *vma; 680 681 vma = gpuva_to_vma(op->base.prefetch.va); 682 683 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 684 xa_destroy(&op->prefetch_range.range); 685 } 686 687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 688 { 689 struct xe_vma_op *op; 690 691 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 692 return; 693 694 list_for_each_entry(op, &vops->list, link) 695 xe_vma_svm_prefetch_op_fini(op); 696 } 697 698 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 699 { 700 int i; 701 702 xe_vma_svm_prefetch_ops_fini(vops); 703 704 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 705 kfree(vops->pt_update_ops[i].ops); 706 } 707 708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 709 { 710 int i; 711 712 if (!inc_val) 713 return; 714 715 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 716 if (BIT(i) & tile_mask) 717 vops->pt_update_ops[i].num_ops += inc_val; 718 } 719 720 #define XE_VMA_CREATE_MASK ( \ 721 XE_VMA_READ_ONLY | \ 722 XE_VMA_DUMPABLE | \ 723 XE_VMA_SYSTEM_ALLOCATOR | \ 724 DRM_GPUVA_SPARSE | \ 725 XE_VMA_MADV_AUTORESET) 726 727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, 728 u8 tile_mask) 729 { 730 INIT_LIST_HEAD(&op->link); 731 op->tile_mask = tile_mask; 732 op->base.op = DRM_GPUVA_OP_MAP; 733 op->base.map.va.addr = vma->gpuva.va.addr; 734 op->base.map.va.range = vma->gpuva.va.range; 735 op->base.map.gem.obj = vma->gpuva.gem.obj; 736 op->base.map.gem.offset = vma->gpuva.gem.offset; 737 op->map.vma = vma; 738 op->map.immediate = true; 739 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; 740 } 741 742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, 743 u8 tile_mask) 744 { 745 struct xe_vma_op *op; 746 747 op = kzalloc_obj(*op); 748 if (!op) 749 return -ENOMEM; 750 751 xe_vm_populate_rebind(op, vma, tile_mask); 752 list_add_tail(&op->link, &vops->list); 753 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 754 755 return 0; 756 } 757 758 static struct dma_fence *ops_execute(struct xe_vm *vm, 759 struct xe_vma_ops *vops); 760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 761 struct xe_exec_queue *q, 762 struct xe_sync_entry *syncs, u32 num_syncs); 763 764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 765 { 766 struct dma_fence *fence; 767 struct xe_vma *vma, *next; 768 struct xe_vma_ops vops; 769 struct xe_vma_op *op, *next_op; 770 int err, i; 771 772 lockdep_assert_held(&vm->lock); 773 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || 774 list_empty(&vm->rebind_list)) 775 return 0; 776 777 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 778 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 779 vops.pt_update_ops[i].wait_vm_bookkeep = true; 780 781 xe_vm_assert_held(vm); 782 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { 783 xe_assert(vm->xe, vma->tile_present); 784 785 if (rebind_worker) 786 trace_xe_vma_rebind_worker(vma); 787 else 788 trace_xe_vma_rebind_exec(vma); 789 790 err = xe_vm_ops_add_rebind(&vops, vma, 791 vma->tile_present); 792 if (err) 793 goto free_ops; 794 } 795 796 err = xe_vma_ops_alloc(&vops, false); 797 if (err) 798 goto free_ops; 799 800 fence = ops_execute(vm, &vops); 801 if (IS_ERR(fence)) { 802 err = PTR_ERR(fence); 803 } else { 804 dma_fence_put(fence); 805 list_for_each_entry_safe(vma, next, &vm->rebind_list, 806 combined_links.rebind) 807 list_del_init(&vma->combined_links.rebind); 808 } 809 free_ops: 810 list_for_each_entry_safe(op, next_op, &vops.list, link) { 811 list_del(&op->link); 812 kfree(op); 813 } 814 xe_vma_ops_fini(&vops); 815 816 return err; 817 } 818 819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) 820 { 821 struct dma_fence *fence = NULL; 822 struct xe_vma_ops vops; 823 struct xe_vma_op *op, *next_op; 824 struct xe_tile *tile; 825 u8 id; 826 int err; 827 828 lockdep_assert_held(&vm->lock); 829 xe_vm_assert_held(vm); 830 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 831 832 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 833 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 834 for_each_tile(tile, vm->xe, id) { 835 vops.pt_update_ops[id].wait_vm_bookkeep = true; 836 vops.pt_update_ops[tile->id].q = 837 xe_migrate_exec_queue(tile->migrate); 838 } 839 840 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); 841 if (err) 842 return ERR_PTR(err); 843 844 err = xe_vma_ops_alloc(&vops, false); 845 if (err) { 846 fence = ERR_PTR(err); 847 goto free_ops; 848 } 849 850 fence = ops_execute(vm, &vops); 851 852 free_ops: 853 list_for_each_entry_safe(op, next_op, &vops.list, link) { 854 list_del(&op->link); 855 kfree(op); 856 } 857 xe_vma_ops_fini(&vops); 858 859 return fence; 860 } 861 862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op, 863 struct xe_vma *vma, 864 struct xe_svm_range *range, 865 u8 tile_mask) 866 { 867 INIT_LIST_HEAD(&op->link); 868 op->tile_mask = tile_mask; 869 op->base.op = DRM_GPUVA_OP_DRIVER; 870 op->subop = XE_VMA_SUBOP_MAP_RANGE; 871 op->map_range.vma = vma; 872 op->map_range.range = range; 873 } 874 875 static int 876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, 877 struct xe_vma *vma, 878 struct xe_svm_range *range, 879 u8 tile_mask) 880 { 881 struct xe_vma_op *op; 882 883 op = kzalloc_obj(*op); 884 if (!op) 885 return -ENOMEM; 886 887 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 888 list_add_tail(&op->link, &vops->list); 889 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 890 891 return 0; 892 } 893 894 /** 895 * xe_vm_range_rebind() - VM range (re)bind 896 * @vm: The VM which the range belongs to. 897 * @vma: The VMA which the range belongs to. 898 * @range: SVM range to rebind. 899 * @tile_mask: Tile mask to bind the range to. 900 * 901 * (re)bind SVM range setting up GPU page tables for the range. 902 * 903 * Return: dma fence for rebind to signal completion on success, ERR_PTR on 904 * failure 905 */ 906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, 907 struct xe_vma *vma, 908 struct xe_svm_range *range, 909 u8 tile_mask) 910 { 911 struct dma_fence *fence = NULL; 912 struct xe_vma_ops vops; 913 struct xe_vma_op *op, *next_op; 914 struct xe_tile *tile; 915 u8 id; 916 int err; 917 918 lockdep_assert_held(&vm->lock); 919 xe_vm_assert_held(vm); 920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 921 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 922 923 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 924 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; 925 for_each_tile(tile, vm->xe, id) { 926 vops.pt_update_ops[id].wait_vm_bookkeep = true; 927 vops.pt_update_ops[tile->id].q = 928 xe_migrate_exec_queue(tile->migrate); 929 } 930 931 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); 932 if (err) 933 return ERR_PTR(err); 934 935 err = xe_vma_ops_alloc(&vops, false); 936 if (err) { 937 fence = ERR_PTR(err); 938 goto free_ops; 939 } 940 941 fence = ops_execute(vm, &vops); 942 943 free_ops: 944 list_for_each_entry_safe(op, next_op, &vops.list, link) { 945 list_del(&op->link); 946 kfree(op); 947 } 948 xe_vma_ops_fini(&vops); 949 950 return fence; 951 } 952 953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op, 954 struct xe_svm_range *range) 955 { 956 INIT_LIST_HEAD(&op->link); 957 op->tile_mask = range->tile_present; 958 op->base.op = DRM_GPUVA_OP_DRIVER; 959 op->subop = XE_VMA_SUBOP_UNMAP_RANGE; 960 op->unmap_range.range = range; 961 } 962 963 static int 964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, 965 struct xe_svm_range *range) 966 { 967 struct xe_vma_op *op; 968 969 op = kzalloc_obj(*op); 970 if (!op) 971 return -ENOMEM; 972 973 xe_vm_populate_range_unbind(op, range); 974 list_add_tail(&op->link, &vops->list); 975 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 976 977 return 0; 978 } 979 980 /** 981 * xe_vm_range_unbind() - VM range unbind 982 * @vm: The VM which the range belongs to. 983 * @range: SVM range to rebind. 984 * 985 * Unbind SVM range removing the GPU page tables for the range. 986 * 987 * Return: dma fence for unbind to signal completion on success, ERR_PTR on 988 * failure 989 */ 990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 991 struct xe_svm_range *range) 992 { 993 struct dma_fence *fence = NULL; 994 struct xe_vma_ops vops; 995 struct xe_vma_op *op, *next_op; 996 struct xe_tile *tile; 997 u8 id; 998 int err; 999 1000 lockdep_assert_held(&vm->lock); 1001 xe_vm_assert_held(vm); 1002 xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); 1003 1004 if (!range->tile_present) 1005 return dma_fence_get_stub(); 1006 1007 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 1008 for_each_tile(tile, vm->xe, id) { 1009 vops.pt_update_ops[id].wait_vm_bookkeep = true; 1010 vops.pt_update_ops[tile->id].q = 1011 xe_migrate_exec_queue(tile->migrate); 1012 } 1013 1014 err = xe_vm_ops_add_range_unbind(&vops, range); 1015 if (err) 1016 return ERR_PTR(err); 1017 1018 err = xe_vma_ops_alloc(&vops, false); 1019 if (err) { 1020 fence = ERR_PTR(err); 1021 goto free_ops; 1022 } 1023 1024 fence = ops_execute(vm, &vops); 1025 1026 free_ops: 1027 list_for_each_entry_safe(op, next_op, &vops.list, link) { 1028 list_del(&op->link); 1029 kfree(op); 1030 } 1031 xe_vma_ops_fini(&vops); 1032 1033 return fence; 1034 } 1035 1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr) 1037 { 1038 drm_pagemap_put(attr->preferred_loc.dpagemap); 1039 } 1040 1041 static void xe_vma_free(struct xe_vma *vma) 1042 { 1043 xe_vma_mem_attr_fini(&vma->attr); 1044 1045 if (xe_vma_is_userptr(vma)) 1046 kfree(to_userptr_vma(vma)); 1047 else 1048 kfree(vma); 1049 } 1050 1051 /** 1052 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure. 1053 * @to: Destination. 1054 * @from: Source. 1055 * 1056 * Copies an xe_vma_mem_attr structure taking care to get reference 1057 * counting of individual members right. 1058 */ 1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from) 1060 { 1061 xe_vma_mem_attr_fini(to); 1062 *to = *from; 1063 if (to->preferred_loc.dpagemap) 1064 drm_pagemap_get(to->preferred_loc.dpagemap); 1065 } 1066 1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm, 1068 struct xe_bo *bo, 1069 u64 bo_offset_or_userptr, 1070 u64 start, u64 end, 1071 struct xe_vma_mem_attr *attr, 1072 unsigned int flags) 1073 { 1074 struct xe_vma *vma; 1075 struct xe_tile *tile; 1076 u8 id; 1077 bool is_null = (flags & DRM_GPUVA_SPARSE); 1078 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); 1079 1080 xe_assert(vm->xe, start < end); 1081 xe_assert(vm->xe, end < vm->size); 1082 1083 /* 1084 * Allocate and ensure that the xe_vma_is_userptr() return 1085 * matches what was allocated. 1086 */ 1087 if (!bo && !is_null && !is_cpu_addr_mirror) { 1088 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma); 1089 1090 if (!uvma) 1091 return ERR_PTR(-ENOMEM); 1092 1093 vma = &uvma->vma; 1094 } else { 1095 vma = kzalloc_obj(*vma); 1096 if (!vma) 1097 return ERR_PTR(-ENOMEM); 1098 1099 if (bo) 1100 vma->gpuva.gem.obj = &bo->ttm.base; 1101 } 1102 1103 INIT_LIST_HEAD(&vma->combined_links.rebind); 1104 1105 INIT_LIST_HEAD(&vma->gpuva.gem.entry); 1106 vma->gpuva.vm = &vm->gpuvm; 1107 vma->gpuva.va.addr = start; 1108 vma->gpuva.va.range = end - start + 1; 1109 vma->gpuva.flags = flags; 1110 1111 for_each_tile(tile, vm->xe, id) 1112 vma->tile_mask |= 0x1 << id; 1113 1114 if (vm->xe->info.has_atomic_enable_pte_bit) 1115 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; 1116 1117 xe_vma_mem_attr_copy(&vma->attr, attr); 1118 if (bo) { 1119 struct drm_gpuvm_bo *vm_bo; 1120 1121 xe_bo_assert_held(bo); 1122 1123 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); 1124 if (IS_ERR(vm_bo)) { 1125 xe_vma_free(vma); 1126 return ERR_CAST(vm_bo); 1127 } 1128 1129 drm_gpuvm_bo_extobj_add(vm_bo); 1130 drm_gem_object_get(&bo->ttm.base); 1131 vma->gpuva.gem.offset = bo_offset_or_userptr; 1132 drm_gpuva_link(&vma->gpuva, vm_bo); 1133 drm_gpuvm_bo_put(vm_bo); 1134 } else /* userptr or null */ { 1135 if (!is_null && !is_cpu_addr_mirror) { 1136 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1137 u64 size = end - start + 1; 1138 int err; 1139 1140 vma->gpuva.gem.offset = bo_offset_or_userptr; 1141 1142 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1143 if (err) { 1144 xe_vma_free(vma); 1145 return ERR_PTR(err); 1146 } 1147 } 1148 1149 xe_vm_get(vm); 1150 } 1151 1152 return vma; 1153 } 1154 1155 static void xe_vma_destroy_late(struct xe_vma *vma) 1156 { 1157 struct xe_vm *vm = xe_vma_vm(vma); 1158 struct xe_bo *bo = xe_vma_bo(vma); 1159 1160 if (vma->ufence) { 1161 xe_sync_ufence_put(vma->ufence); 1162 vma->ufence = NULL; 1163 } 1164 1165 if (xe_vma_is_userptr(vma)) { 1166 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1167 1168 xe_userptr_remove(uvma); 1169 xe_vm_put(vm); 1170 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1171 xe_vm_put(vm); 1172 } else { 1173 xe_bo_put(bo); 1174 } 1175 1176 xe_vma_free(vma); 1177 } 1178 1179 static void vma_destroy_work_func(struct work_struct *w) 1180 { 1181 struct xe_vma *vma = 1182 container_of(w, struct xe_vma, destroy_work); 1183 1184 xe_vma_destroy_late(vma); 1185 } 1186 1187 static void vma_destroy_cb(struct dma_fence *fence, 1188 struct dma_fence_cb *cb) 1189 { 1190 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 1191 1192 INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 1193 queue_work(system_dfl_wq, &vma->destroy_work); 1194 } 1195 1196 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1197 { 1198 struct xe_vm *vm = xe_vma_vm(vma); 1199 struct xe_bo *bo = xe_vma_bo(vma); 1200 1201 lockdep_assert_held_write(&vm->lock); 1202 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); 1203 1204 if (xe_vma_is_userptr(vma)) { 1205 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1206 xe_userptr_destroy(to_userptr_vma(vma)); 1207 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1208 xe_bo_assert_held(bo); 1209 1210 drm_gpuva_unlink(&vma->gpuva); 1211 xe_bo_recompute_purgeable_state(bo); 1212 } 1213 1214 xe_vm_assert_held(vm); 1215 if (fence) { 1216 int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1217 vma_destroy_cb); 1218 1219 if (ret) { 1220 XE_WARN_ON(ret != -ENOENT); 1221 xe_vma_destroy_late(vma); 1222 } 1223 } else { 1224 xe_vma_destroy_late(vma); 1225 } 1226 } 1227 1228 /** 1229 * xe_vm_lock_vma() - drm_exec utility to lock a vma 1230 * @exec: The drm_exec object we're currently locking for. 1231 * @vma: The vma for witch we want to lock the vm resv and any attached 1232 * object's resv. 1233 * 1234 * Return: 0 on success, negative error code on error. In particular 1235 * may return -EDEADLK on WW transaction contention and -EINTR if 1236 * an interruptible wait is terminated by a signal. 1237 */ 1238 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1239 { 1240 struct xe_vm *vm = xe_vma_vm(vma); 1241 struct xe_bo *bo = xe_vma_bo(vma); 1242 int err; 1243 1244 XE_WARN_ON(!vm); 1245 1246 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1247 if (!err && bo && !bo->vm) 1248 err = drm_exec_lock_obj(exec, &bo->ttm.base); 1249 1250 return err; 1251 } 1252 1253 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1254 { 1255 struct xe_device *xe = xe_vma_vm(vma)->xe; 1256 struct xe_validation_ctx ctx; 1257 struct drm_exec exec; 1258 int err = 0; 1259 1260 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1261 err = xe_vm_lock_vma(&exec, vma); 1262 drm_exec_retry_on_contention(&exec); 1263 if (XE_WARN_ON(err)) 1264 break; 1265 xe_vma_destroy(vma, NULL); 1266 } 1267 xe_assert(xe, !err); 1268 } 1269 1270 struct xe_vma * 1271 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) 1272 { 1273 struct drm_gpuva *gpuva; 1274 1275 lockdep_assert_held(&vm->lock); 1276 1277 if (xe_vm_is_closed_or_banned(vm)) 1278 return NULL; 1279 1280 xe_assert(vm->xe, start + range <= vm->size); 1281 1282 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); 1283 1284 return gpuva ? gpuva_to_vma(gpuva) : NULL; 1285 } 1286 1287 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1288 { 1289 int err; 1290 1291 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1292 lockdep_assert_held(&vm->lock); 1293 1294 mutex_lock(&vm->snap_mutex); 1295 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); 1296 mutex_unlock(&vm->snap_mutex); 1297 XE_WARN_ON(err); /* Shouldn't be possible */ 1298 1299 return err; 1300 } 1301 1302 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1303 { 1304 xe_assert(vm->xe, xe_vma_vm(vma) == vm); 1305 lockdep_assert_held(&vm->lock); 1306 1307 mutex_lock(&vm->snap_mutex); 1308 drm_gpuva_remove(&vma->gpuva); 1309 mutex_unlock(&vm->snap_mutex); 1310 if (vm->usm.last_fault_vma == vma) 1311 vm->usm.last_fault_vma = NULL; 1312 } 1313 1314 static struct drm_gpuva_op *xe_vm_op_alloc(void) 1315 { 1316 struct xe_vma_op *op; 1317 1318 op = kzalloc_obj(*op); 1319 1320 if (unlikely(!op)) 1321 return NULL; 1322 1323 return &op->base; 1324 } 1325 1326 static void xe_vm_free(struct drm_gpuvm *gpuvm); 1327 1328 static const struct drm_gpuvm_ops gpuvm_ops = { 1329 .op_alloc = xe_vm_op_alloc, 1330 .vm_bo_validate = xe_gpuvm_validate, 1331 .vm_free = xe_vm_free, 1332 }; 1333 1334 static u64 pde_encode_pat_index(u16 pat_index) 1335 { 1336 u64 pte = 0; 1337 1338 if (pat_index & BIT(0)) 1339 pte |= XE_PPGTT_PTE_PAT0; 1340 1341 if (pat_index & BIT(1)) 1342 pte |= XE_PPGTT_PTE_PAT1; 1343 1344 return pte; 1345 } 1346 1347 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1348 { 1349 u64 pte = 0; 1350 1351 if (pat_index & BIT(0)) 1352 pte |= XE_PPGTT_PTE_PAT0; 1353 1354 if (pat_index & BIT(1)) 1355 pte |= XE_PPGTT_PTE_PAT1; 1356 1357 if (pat_index & BIT(2)) { 1358 if (pt_level) 1359 pte |= XE_PPGTT_PDE_PDPE_PAT2; 1360 else 1361 pte |= XE_PPGTT_PTE_PAT2; 1362 } 1363 1364 if (pat_index & BIT(3)) 1365 pte |= XELPG_PPGTT_PTE_PAT3; 1366 1367 if (pat_index & (BIT(4))) 1368 pte |= XE2_PPGTT_PTE_PAT4; 1369 1370 return pte; 1371 } 1372 1373 static u64 pte_encode_ps(u32 pt_level) 1374 { 1375 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); 1376 1377 if (pt_level == 1) 1378 return XE_PDE_PS_2M; 1379 else if (pt_level == 2) 1380 return XE_PDPE_PS_1G; 1381 1382 return 0; 1383 } 1384 1385 static u16 pde_pat_index(struct xe_bo *bo) 1386 { 1387 struct xe_device *xe = xe_bo_device(bo); 1388 u16 pat_index; 1389 1390 /* 1391 * We only have two bits to encode the PAT index in non-leaf nodes, but 1392 * these only point to other paging structures so we only need a minimal 1393 * selection of options. The user PAT index is only for encoding leaf 1394 * nodes, where we have use of more bits to do the encoding. The 1395 * non-leaf nodes are instead under driver control so the chosen index 1396 * here should be distinct from the user PAT index. Also the 1397 * corresponding coherency of the PAT index should be tied to the 1398 * allocation type of the page table (or at least we should pick 1399 * something which is always safe). 1400 */ 1401 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) 1402 pat_index = xe->pat.idx[XE_CACHE_WB]; 1403 else 1404 pat_index = xe->pat.idx[XE_CACHE_NONE]; 1405 1406 xe_assert(xe, pat_index <= 3); 1407 1408 return pat_index; 1409 } 1410 1411 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) 1412 { 1413 u64 pde; 1414 1415 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1416 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1417 pde |= pde_encode_pat_index(pde_pat_index(bo)); 1418 1419 return pde; 1420 } 1421 1422 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1423 u16 pat_index, u32 pt_level) 1424 { 1425 u64 pte; 1426 1427 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1428 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1429 pte |= pte_encode_pat_index(pat_index, pt_level); 1430 pte |= pte_encode_ps(pt_level); 1431 1432 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 1433 pte |= XE_PPGTT_PTE_DM; 1434 1435 return pte; 1436 } 1437 1438 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1439 u16 pat_index, u32 pt_level) 1440 { 1441 struct xe_bo *bo = xe_vma_bo(vma); 1442 struct xe_vm *vm = xe_vma_vm(vma); 1443 1444 pte |= XE_PAGE_PRESENT; 1445 1446 if (likely(!xe_vma_read_only(vma))) 1447 pte |= XE_PAGE_RW; 1448 1449 pte |= pte_encode_pat_index(pat_index, pt_level); 1450 pte |= pte_encode_ps(pt_level); 1451 1452 /* 1453 * NULL PTEs redirect to scratch page (return zeros on read). 1454 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs. 1455 * Never set NULL flag without scratch page - causes undefined behavior. 1456 */ 1457 if (unlikely(xe_vma_is_null(vma) || 1458 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm)))) 1459 pte |= XE_PTE_NULL; 1460 1461 return pte; 1462 } 1463 1464 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, 1465 u16 pat_index, 1466 u32 pt_level, bool devmem, u64 flags) 1467 { 1468 u64 pte; 1469 1470 /* Avoid passing random bits directly as flags */ 1471 xe_assert(xe, !(flags & ~XE_PTE_PS64)); 1472 1473 pte = addr; 1474 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1475 pte |= pte_encode_pat_index(pat_index, pt_level); 1476 pte |= pte_encode_ps(pt_level); 1477 1478 if (devmem) 1479 pte |= XE_PPGTT_PTE_DM; 1480 1481 pte |= flags; 1482 1483 return pte; 1484 } 1485 1486 static const struct xe_pt_ops xelp_pt_ops = { 1487 .pte_encode_bo = xelp_pte_encode_bo, 1488 .pte_encode_vma = xelp_pte_encode_vma, 1489 .pte_encode_addr = xelp_pte_encode_addr, 1490 .pde_encode_bo = xelp_pde_encode_bo, 1491 }; 1492 1493 static void vm_destroy_work_func(struct work_struct *w); 1494 1495 /** 1496 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the 1497 * given tile and vm. 1498 * @xe: xe device. 1499 * @tile: tile to set up for. 1500 * @vm: vm to set up for. 1501 * @exec: The struct drm_exec object used to lock the vm resv. 1502 * 1503 * Sets up a pagetable tree with one page-table per level and a single 1504 * leaf PTE. All pagetable entries point to the single page-table or, 1505 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and 1506 * writes become NOPs. 1507 * 1508 * Return: 0 on success, negative error code on error. 1509 */ 1510 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1511 struct xe_vm *vm, struct drm_exec *exec) 1512 { 1513 u8 id = tile->id; 1514 int i; 1515 1516 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1517 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1518 if (IS_ERR(vm->scratch_pt[id][i])) { 1519 int err = PTR_ERR(vm->scratch_pt[id][i]); 1520 1521 vm->scratch_pt[id][i] = NULL; 1522 return err; 1523 } 1524 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1525 } 1526 1527 return 0; 1528 } 1529 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1530 1531 static void xe_vm_free_scratch(struct xe_vm *vm) 1532 { 1533 struct xe_tile *tile; 1534 u8 id; 1535 1536 if (!xe_vm_has_scratch(vm)) 1537 return; 1538 1539 for_each_tile(tile, vm->xe, id) { 1540 u32 i; 1541 1542 if (!vm->pt_root[id]) 1543 continue; 1544 1545 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) 1546 if (vm->scratch_pt[id][i]) 1547 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); 1548 } 1549 } 1550 1551 static void xe_vm_pt_destroy(struct xe_vm *vm) 1552 { 1553 struct xe_tile *tile; 1554 u8 id; 1555 1556 xe_vm_assert_held(vm); 1557 1558 for_each_tile(tile, vm->xe, id) { 1559 if (vm->pt_root[id]) { 1560 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1561 vm->pt_root[id] = NULL; 1562 } 1563 } 1564 } 1565 1566 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) 1567 { 1568 if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) 1569 return; 1570 1571 fs_reclaim_acquire(GFP_KERNEL); 1572 might_lock(&vm->exec_queues.lock); 1573 fs_reclaim_release(GFP_KERNEL); 1574 1575 down_read(&vm->exec_queues.lock); 1576 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); 1577 up_read(&vm->exec_queues.lock); 1578 } 1579 1580 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1581 { 1582 struct drm_gem_object *vm_resv_obj; 1583 struct xe_validation_ctx ctx; 1584 struct drm_exec exec; 1585 struct xe_vm *vm; 1586 int err; 1587 struct xe_tile *tile; 1588 u8 id; 1589 1590 /* 1591 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to 1592 * ever be in faulting mode. 1593 */ 1594 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); 1595 1596 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1597 if (!vm) 1598 return ERR_PTR(-ENOMEM); 1599 1600 vm->xe = xe; 1601 1602 vm->size = 1ull << xe->info.va_bits; 1603 vm->flags = flags; 1604 1605 if (xef) 1606 vm->xef = xe_file_get(xef); 1607 /** 1608 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be 1609 * manipulated under the PXP mutex. However, the PXP mutex can be taken 1610 * under a user-VM lock when the PXP session is started at exec_queue 1611 * creation time. Those are different VMs and therefore there is no risk 1612 * of deadlock, but we need to tell lockdep that this is the case or it 1613 * will print a warning. 1614 */ 1615 if (flags & XE_VM_FLAG_GSC) { 1616 static struct lock_class_key gsc_vm_key; 1617 1618 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); 1619 } else { 1620 init_rwsem(&vm->lock); 1621 } 1622 mutex_init(&vm->snap_mutex); 1623 1624 INIT_LIST_HEAD(&vm->rebind_list); 1625 1626 INIT_LIST_HEAD(&vm->userptr.repin_list); 1627 INIT_LIST_HEAD(&vm->userptr.invalidated); 1628 spin_lock_init(&vm->userptr.invalidated_lock); 1629 1630 INIT_LIST_HEAD(&vm->faults.list); 1631 spin_lock_init(&vm->faults.lock); 1632 1633 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 1634 1635 INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1636 1637 INIT_LIST_HEAD(&vm->preempt.exec_queues); 1638 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) 1639 INIT_LIST_HEAD(&vm->exec_queues.list[id]); 1640 if (flags & XE_VM_FLAG_FAULT_MODE) 1641 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; 1642 else 1643 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; 1644 1645 init_rwsem(&vm->exec_queues.lock); 1646 xe_vm_init_prove_locking(xe, vm); 1647 1648 for_each_tile(tile, xe, id) 1649 xe_range_fence_tree_init(&vm->rftree[id]); 1650 1651 vm->pt_ops = &xelp_pt_ops; 1652 1653 /* 1654 * Long-running workloads are not protected by the scheduler references. 1655 * By design, run_job for long-running workloads returns NULL and the 1656 * scheduler drops all the references of it, hence protecting the VM 1657 * for this case is necessary. 1658 */ 1659 if (flags & XE_VM_FLAG_LR_MODE) { 1660 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1661 xe_pm_runtime_get_noresume(xe); 1662 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1663 } 1664 1665 err = xe_svm_init(vm); 1666 if (err) 1667 goto err_no_resv; 1668 1669 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1670 if (!vm_resv_obj) { 1671 err = -ENOMEM; 1672 goto err_svm_fini; 1673 } 1674 1675 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, 1676 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); 1677 1678 drm_gem_object_put(vm_resv_obj); 1679 1680 err = 0; 1681 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1682 err) { 1683 err = xe_vm_drm_exec_lock(vm, &exec); 1684 drm_exec_retry_on_contention(&exec); 1685 1686 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1687 vm->flags |= XE_VM_FLAG_64K; 1688 1689 for_each_tile(tile, xe, id) { 1690 if (flags & XE_VM_FLAG_MIGRATION && 1691 tile->id != XE_VM_FLAG_TILE_ID(flags)) 1692 continue; 1693 1694 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1695 &exec); 1696 if (IS_ERR(vm->pt_root[id])) { 1697 err = PTR_ERR(vm->pt_root[id]); 1698 vm->pt_root[id] = NULL; 1699 xe_vm_pt_destroy(vm); 1700 drm_exec_retry_on_contention(&exec); 1701 xe_validation_retry_on_oom(&ctx, &err); 1702 break; 1703 } 1704 } 1705 if (err) 1706 break; 1707 1708 if (xe_vm_has_scratch(vm)) { 1709 for_each_tile(tile, xe, id) { 1710 if (!vm->pt_root[id]) 1711 continue; 1712 1713 err = xe_vm_create_scratch(xe, tile, vm, &exec); 1714 if (err) { 1715 xe_vm_free_scratch(vm); 1716 xe_vm_pt_destroy(vm); 1717 drm_exec_retry_on_contention(&exec); 1718 xe_validation_retry_on_oom(&ctx, &err); 1719 break; 1720 } 1721 } 1722 if (err) 1723 break; 1724 vm->batch_invalidate_tlb = true; 1725 } 1726 1727 if (vm->flags & XE_VM_FLAG_LR_MODE) { 1728 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1729 vm->batch_invalidate_tlb = false; 1730 } 1731 1732 /* Fill pt_root after allocating scratch tables */ 1733 for_each_tile(tile, xe, id) { 1734 if (!vm->pt_root[id]) 1735 continue; 1736 1737 xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1738 } 1739 } 1740 if (err) 1741 goto err_close; 1742 1743 /* Kernel migration VM shouldn't have a circular loop.. */ 1744 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1745 for_each_tile(tile, xe, id) { 1746 struct xe_exec_queue *q; 1747 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1748 1749 if (!vm->pt_root[id]) 1750 continue; 1751 1752 if (!xef) /* Not from userspace */ 1753 create_flags |= EXEC_QUEUE_FLAG_KERNEL; 1754 1755 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); 1756 if (IS_ERR(q)) { 1757 err = PTR_ERR(q); 1758 goto err_close; 1759 } 1760 vm->q[id] = q; 1761 } 1762 } 1763 1764 if (xef && xe->info.has_asid) { 1765 u32 asid; 1766 1767 down_write(&xe->usm.lock); 1768 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1769 XA_LIMIT(1, XE_MAX_ASID - 1), 1770 &xe->usm.next_asid, GFP_NOWAIT); 1771 up_write(&xe->usm.lock); 1772 if (err < 0) 1773 goto err_close; 1774 1775 vm->usm.asid = asid; 1776 } 1777 1778 trace_xe_vm_create(vm); 1779 1780 return vm; 1781 1782 err_close: 1783 xe_vm_close_and_put(vm); 1784 return ERR_PTR(err); 1785 1786 err_svm_fini: 1787 if (flags & XE_VM_FLAG_FAULT_MODE) { 1788 vm->size = 0; /* close the vm */ 1789 xe_svm_fini(vm); 1790 } 1791 err_no_resv: 1792 mutex_destroy(&vm->snap_mutex); 1793 for_each_tile(tile, xe, id) 1794 xe_range_fence_tree_fini(&vm->rftree[id]); 1795 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1796 if (vm->xef) 1797 xe_file_put(vm->xef); 1798 kfree(vm); 1799 if (flags & XE_VM_FLAG_LR_MODE) 1800 xe_pm_runtime_put(xe); 1801 return ERR_PTR(err); 1802 } 1803 1804 static void xe_vm_close(struct xe_vm *vm) 1805 { 1806 struct xe_device *xe = vm->xe; 1807 bool bound; 1808 int idx; 1809 1810 bound = drm_dev_enter(&xe->drm, &idx); 1811 1812 down_write(&vm->lock); 1813 if (xe_vm_in_fault_mode(vm)) 1814 xe_svm_notifier_lock(vm); 1815 1816 vm->size = 0; 1817 1818 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { 1819 struct xe_tile *tile; 1820 struct xe_gt *gt; 1821 u8 id; 1822 1823 /* Wait for pending binds */ 1824 dma_resv_wait_timeout(xe_vm_resv(vm), 1825 DMA_RESV_USAGE_BOOKKEEP, 1826 false, MAX_SCHEDULE_TIMEOUT); 1827 1828 if (bound) { 1829 for_each_tile(tile, xe, id) 1830 if (vm->pt_root[id]) 1831 xe_pt_clear(xe, vm->pt_root[id]); 1832 1833 for_each_gt(gt, xe, id) 1834 xe_tlb_inval_vm(>->tlb_inval, vm); 1835 } 1836 } 1837 1838 if (xe_vm_in_fault_mode(vm)) 1839 xe_svm_notifier_unlock(vm); 1840 up_write(&vm->lock); 1841 1842 if (bound) 1843 drm_dev_exit(idx); 1844 } 1845 1846 void xe_vm_close_and_put(struct xe_vm *vm) 1847 { 1848 LIST_HEAD(contested); 1849 struct xe_device *xe = vm->xe; 1850 struct xe_tile *tile; 1851 struct xe_vma *vma, *next_vma; 1852 struct drm_gpuva *gpuva, *next; 1853 u8 id; 1854 1855 xe_assert(xe, !vm->preempt.num_exec_queues); 1856 1857 xe_vm_close(vm); 1858 if (xe_vm_in_preempt_fence_mode(vm)) { 1859 mutex_lock(&xe->rebind_resume_lock); 1860 list_del_init(&vm->preempt.pm_activate_link); 1861 mutex_unlock(&xe->rebind_resume_lock); 1862 flush_work(&vm->preempt.rebind_work); 1863 } 1864 if (xe_vm_in_fault_mode(vm)) 1865 xe_svm_close(vm); 1866 1867 down_write(&vm->lock); 1868 for_each_tile(tile, xe, id) { 1869 if (vm->q[id]) { 1870 int i; 1871 1872 xe_exec_queue_last_fence_put(vm->q[id], vm); 1873 for_each_tlb_inval(i) 1874 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); 1875 } 1876 } 1877 up_write(&vm->lock); 1878 1879 for_each_tile(tile, xe, id) { 1880 if (vm->q[id]) { 1881 xe_exec_queue_kill(vm->q[id]); 1882 xe_exec_queue_put(vm->q[id]); 1883 vm->q[id] = NULL; 1884 } 1885 } 1886 1887 down_write(&vm->lock); 1888 xe_vm_lock(vm, false); 1889 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { 1890 vma = gpuva_to_vma(gpuva); 1891 1892 if (xe_vma_has_no_bo(vma)) { 1893 xe_svm_notifier_lock(vm); 1894 vma->gpuva.flags |= XE_VMA_DESTROYED; 1895 xe_svm_notifier_unlock(vm); 1896 } 1897 1898 xe_vm_remove_vma(vm, vma); 1899 1900 /* easy case, remove from VMA? */ 1901 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { 1902 list_del_init(&vma->combined_links.rebind); 1903 xe_vma_destroy(vma, NULL); 1904 continue; 1905 } 1906 1907 list_move_tail(&vma->combined_links.destroy, &contested); 1908 vma->gpuva.flags |= XE_VMA_DESTROYED; 1909 } 1910 1911 /* 1912 * All vm operations will add shared fences to resv. 1913 * The only exception is eviction for a shared object, 1914 * but even so, the unbind when evicted would still 1915 * install a fence to resv. Hence it's safe to 1916 * destroy the pagetables immediately. 1917 */ 1918 xe_vm_free_scratch(vm); 1919 xe_vm_pt_destroy(vm); 1920 xe_vm_unlock(vm); 1921 1922 /* 1923 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1924 * Since we hold a refcount to the bo, we can remove and free 1925 * the members safely without locking. 1926 */ 1927 list_for_each_entry_safe(vma, next_vma, &contested, 1928 combined_links.destroy) { 1929 list_del_init(&vma->combined_links.destroy); 1930 xe_vma_destroy_unlocked(vma); 1931 } 1932 1933 xe_svm_fini(vm); 1934 1935 up_write(&vm->lock); 1936 1937 down_write(&xe->usm.lock); 1938 if (vm->usm.asid) { 1939 void *lookup; 1940 1941 xe_assert(xe, xe->info.has_asid); 1942 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); 1943 1944 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1945 xe_assert(xe, lookup == vm); 1946 } 1947 up_write(&xe->usm.lock); 1948 1949 xe_vm_clear_fault_entries(vm); 1950 1951 for_each_tile(tile, xe, id) 1952 xe_range_fence_tree_fini(&vm->rftree[id]); 1953 1954 xe_vm_put(vm); 1955 } 1956 1957 static void vm_destroy_work_func(struct work_struct *w) 1958 { 1959 struct xe_vm *vm = 1960 container_of(w, struct xe_vm, destroy_work); 1961 struct xe_device *xe = vm->xe; 1962 struct xe_tile *tile; 1963 u8 id; 1964 1965 /* xe_vm_close_and_put was not called? */ 1966 xe_assert(xe, !vm->size); 1967 1968 if (xe_vm_in_preempt_fence_mode(vm)) 1969 flush_work(&vm->preempt.rebind_work); 1970 1971 mutex_destroy(&vm->snap_mutex); 1972 1973 if (vm->flags & XE_VM_FLAG_LR_MODE) 1974 xe_pm_runtime_put(xe); 1975 1976 for_each_tile(tile, xe, id) 1977 XE_WARN_ON(vm->pt_root[id]); 1978 1979 trace_xe_vm_free(vm); 1980 1981 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); 1982 1983 if (vm->xef) 1984 xe_file_put(vm->xef); 1985 1986 kfree(vm); 1987 } 1988 1989 static void xe_vm_free(struct drm_gpuvm *gpuvm) 1990 { 1991 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); 1992 1993 /* To destroy the VM we need to be able to sleep */ 1994 queue_work(system_dfl_wq, &vm->destroy_work); 1995 } 1996 1997 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1998 { 1999 struct xe_vm *vm; 2000 2001 mutex_lock(&xef->vm.lock); 2002 vm = xa_load(&xef->vm.xa, id); 2003 if (vm) 2004 xe_vm_get(vm); 2005 mutex_unlock(&xef->vm.lock); 2006 2007 return vm; 2008 } 2009 2010 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) 2011 { 2012 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); 2013 } 2014 2015 static struct xe_exec_queue * 2016 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 2017 { 2018 return q ? q : vm->q[0]; 2019 } 2020 2021 static struct xe_user_fence * 2022 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) 2023 { 2024 unsigned int i; 2025 2026 for (i = 0; i < num_syncs; i++) { 2027 struct xe_sync_entry *e = &syncs[i]; 2028 2029 if (xe_sync_is_ufence(e)) 2030 return xe_sync_ufence_get(e); 2031 } 2032 2033 return NULL; 2034 } 2035 2036 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ 2037 DRM_XE_VM_CREATE_FLAG_LR_MODE | \ 2038 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ 2039 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2040 2041 int xe_vm_create_ioctl(struct drm_device *dev, void *data, 2042 struct drm_file *file) 2043 { 2044 struct xe_device *xe = to_xe_device(dev); 2045 struct xe_file *xef = to_xe_file(file); 2046 struct drm_xe_vm_create *args = data; 2047 struct xe_gt *wa_gt = xe_root_mmio_gt(xe); 2048 struct xe_vm *vm; 2049 u32 id; 2050 int err; 2051 u32 flags = 0; 2052 2053 if (XE_IOCTL_DBG(xe, args->extensions)) 2054 return -EINVAL; 2055 2056 if (wa_gt && XE_GT_WA(wa_gt, 22014953428)) 2057 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 2058 2059 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2060 !xe->info.has_usm)) 2061 return -EINVAL; 2062 2063 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2064 return -EINVAL; 2065 2066 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 2067 return -EINVAL; 2068 2069 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && 2070 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 2071 !xe->info.needs_scratch)) 2072 return -EINVAL; 2073 2074 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 2075 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 2076 return -EINVAL; 2077 2078 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 2079 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) 2080 return -EINVAL; 2081 2082 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) 2083 flags |= XE_VM_FLAG_SCRATCH_PAGE; 2084 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) 2085 flags |= XE_VM_FLAG_LR_MODE; 2086 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) 2087 flags |= XE_VM_FLAG_FAULT_MODE; 2088 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) 2089 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; 2090 2091 vm = xe_vm_create(xe, flags, xef); 2092 if (IS_ERR(vm)) 2093 return PTR_ERR(vm); 2094 2095 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 2096 /* Warning: Security issue - never enable by default */ 2097 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); 2098 #endif 2099 2100 /* user id alloc must always be last in ioctl to prevent UAF */ 2101 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 2102 if (err) 2103 goto err_close_and_put; 2104 2105 args->vm_id = id; 2106 2107 return 0; 2108 2109 err_close_and_put: 2110 xe_vm_close_and_put(vm); 2111 2112 return err; 2113 } 2114 2115 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 2116 struct drm_file *file) 2117 { 2118 struct xe_device *xe = to_xe_device(dev); 2119 struct xe_file *xef = to_xe_file(file); 2120 struct drm_xe_vm_destroy *args = data; 2121 struct xe_vm *vm; 2122 int err = 0; 2123 2124 if (XE_IOCTL_DBG(xe, args->pad) || 2125 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 2126 return -EINVAL; 2127 2128 mutex_lock(&xef->vm.lock); 2129 vm = xa_load(&xef->vm.xa, args->vm_id); 2130 if (XE_IOCTL_DBG(xe, !vm)) 2131 err = -ENOENT; 2132 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) 2133 err = -EBUSY; 2134 else 2135 xa_erase(&xef->vm.xa, args->vm_id); 2136 mutex_unlock(&xef->vm.lock); 2137 2138 if (!err) 2139 xe_vm_close_and_put(vm); 2140 2141 return err; 2142 } 2143 2144 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) 2145 { 2146 struct drm_gpuva *gpuva; 2147 u32 num_vmas = 0; 2148 2149 lockdep_assert_held(&vm->lock); 2150 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) 2151 num_vmas++; 2152 2153 return num_vmas; 2154 } 2155 2156 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, 2157 u64 end, struct drm_xe_mem_range_attr *attrs) 2158 { 2159 struct drm_gpuva *gpuva; 2160 int i = 0; 2161 2162 lockdep_assert_held(&vm->lock); 2163 2164 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 2165 struct xe_vma *vma = gpuva_to_vma(gpuva); 2166 2167 if (i == *num_vmas) 2168 return -ENOSPC; 2169 2170 attrs[i].start = xe_vma_start(vma); 2171 attrs[i].end = xe_vma_end(vma); 2172 attrs[i].atomic.val = vma->attr.atomic_access; 2173 attrs[i].pat_index.val = vma->attr.pat_index; 2174 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; 2175 attrs[i].preferred_mem_loc.migration_policy = 2176 vma->attr.preferred_loc.migration_policy; 2177 2178 i++; 2179 } 2180 2181 *num_vmas = i; 2182 return 0; 2183 } 2184 2185 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2186 { 2187 struct xe_device *xe = to_xe_device(dev); 2188 struct xe_file *xef = to_xe_file(file); 2189 struct drm_xe_mem_range_attr *mem_attrs; 2190 struct drm_xe_vm_query_mem_range_attr *args = data; 2191 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2192 struct xe_vm *vm; 2193 int err = 0; 2194 2195 if (XE_IOCTL_DBG(xe, 2196 ((args->num_mem_ranges == 0 && 2197 (attrs_user || args->sizeof_mem_range_attr != 0)) || 2198 (args->num_mem_ranges > 0 && 2199 (!attrs_user || 2200 args->sizeof_mem_range_attr != 2201 sizeof(struct drm_xe_mem_range_attr)))))) 2202 return -EINVAL; 2203 2204 vm = xe_vm_lookup(xef, args->vm_id); 2205 if (XE_IOCTL_DBG(xe, !vm)) 2206 return -EINVAL; 2207 2208 err = down_read_interruptible(&vm->lock); 2209 if (err) 2210 goto put_vm; 2211 2212 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); 2213 2214 if (args->num_mem_ranges == 0 && !attrs_user) { 2215 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); 2216 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); 2217 goto unlock_vm; 2218 } 2219 2220 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, 2221 GFP_KERNEL | __GFP_ACCOUNT | 2222 __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2223 if (!mem_attrs) { 2224 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; 2225 goto unlock_vm; 2226 } 2227 2228 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); 2229 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, 2230 args->start + args->range, mem_attrs); 2231 if (err) 2232 goto free_mem_attrs; 2233 2234 err = copy_to_user(attrs_user, mem_attrs, 2235 args->sizeof_mem_range_attr * args->num_mem_ranges); 2236 if (err) 2237 err = -EFAULT; 2238 2239 free_mem_attrs: 2240 kvfree(mem_attrs); 2241 unlock_vm: 2242 up_read(&vm->lock); 2243 put_vm: 2244 xe_vm_put(vm); 2245 return err; 2246 } 2247 2248 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2249 { 2250 if (page_addr > xe_vma_end(vma) - 1 || 2251 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2252 return false; 2253 2254 return true; 2255 } 2256 2257 /** 2258 * xe_vm_find_vma_by_addr() - Find a VMA by its address 2259 * 2260 * @vm: the xe_vm the vma belongs to 2261 * @page_addr: address to look up 2262 */ 2263 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2264 { 2265 struct xe_vma *vma = NULL; 2266 2267 if (vm->usm.last_fault_vma) { /* Fast lookup */ 2268 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2269 vma = vm->usm.last_fault_vma; 2270 } 2271 if (!vma) 2272 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2273 2274 return vma; 2275 } 2276 2277 static const u32 region_to_mem_type[] = { 2278 XE_PL_TT, 2279 XE_PL_VRAM0, 2280 XE_PL_VRAM1, 2281 }; 2282 2283 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2284 bool post_commit) 2285 { 2286 xe_svm_notifier_lock(vm); 2287 vma->gpuva.flags |= XE_VMA_DESTROYED; 2288 xe_svm_notifier_unlock(vm); 2289 if (post_commit) 2290 xe_vm_remove_vma(vm, vma); 2291 } 2292 2293 #undef ULL 2294 #define ULL unsigned long long 2295 2296 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 2297 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2298 { 2299 struct xe_vma *vma; 2300 2301 switch (op->op) { 2302 case DRM_GPUVA_OP_MAP: 2303 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", 2304 (ULL)op->map.va.addr, (ULL)op->map.va.range); 2305 break; 2306 case DRM_GPUVA_OP_REMAP: 2307 vma = gpuva_to_vma(op->remap.unmap->va); 2308 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2309 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2310 op->remap.unmap->keep ? 1 : 0); 2311 if (op->remap.prev) 2312 vm_dbg(&xe->drm, 2313 "REMAP:PREV: addr=0x%016llx, range=0x%016llx", 2314 (ULL)op->remap.prev->va.addr, 2315 (ULL)op->remap.prev->va.range); 2316 if (op->remap.next) 2317 vm_dbg(&xe->drm, 2318 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", 2319 (ULL)op->remap.next->va.addr, 2320 (ULL)op->remap.next->va.range); 2321 break; 2322 case DRM_GPUVA_OP_UNMAP: 2323 vma = gpuva_to_vma(op->unmap.va); 2324 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", 2325 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), 2326 op->unmap.keep ? 1 : 0); 2327 break; 2328 case DRM_GPUVA_OP_PREFETCH: 2329 vma = gpuva_to_vma(op->prefetch.va); 2330 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", 2331 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); 2332 break; 2333 default: 2334 drm_warn(&xe->drm, "NOT POSSIBLE\n"); 2335 } 2336 } 2337 #else 2338 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) 2339 { 2340 } 2341 #endif 2342 2343 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) 2344 { 2345 if (!xe_vm_in_fault_mode(vm)) 2346 return false; 2347 2348 if (!xe_vm_has_scratch(vm)) 2349 return false; 2350 2351 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) 2352 return false; 2353 2354 return true; 2355 } 2356 2357 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2358 { 2359 struct drm_gpuva_op *__op; 2360 2361 drm_gpuva_for_each_op(__op, ops) { 2362 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2363 2364 xe_vma_svm_prefetch_op_fini(op); 2365 } 2366 } 2367 2368 /* 2369 * Create operations list from IOCTL arguments, setup operations fields so parse 2370 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2371 */ 2372 static struct drm_gpuva_ops * 2373 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2374 struct xe_bo *bo, u64 bo_offset_or_userptr, 2375 u64 addr, u64 range, 2376 u32 operation, u32 flags, 2377 u32 prefetch_region, u16 pat_index) 2378 { 2379 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; 2380 struct drm_gpuva_ops *ops; 2381 struct drm_gpuva_op *__op; 2382 struct drm_gpuvm_bo *vm_bo; 2383 u64 range_start = addr; 2384 u64 range_end = addr + range; 2385 int err; 2386 2387 lockdep_assert_held_write(&vm->lock); 2388 2389 vm_dbg(&vm->xe->drm, 2390 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", 2391 operation, (ULL)addr, (ULL)range, 2392 (ULL)bo_offset_or_userptr); 2393 2394 switch (operation) { 2395 case DRM_XE_VM_BIND_OP_MAP: 2396 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { 2397 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); 2398 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 2399 } 2400 2401 fallthrough; 2402 case DRM_XE_VM_BIND_OP_MAP_USERPTR: { 2403 struct drm_gpuvm_map_req map_req = { 2404 .map.va.addr = range_start, 2405 .map.va.range = range_end - range_start, 2406 .map.gem.obj = obj, 2407 .map.gem.offset = bo_offset_or_userptr, 2408 }; 2409 2410 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); 2411 break; 2412 } 2413 case DRM_XE_VM_BIND_OP_UNMAP: 2414 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); 2415 break; 2416 case DRM_XE_VM_BIND_OP_PREFETCH: 2417 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); 2418 break; 2419 case DRM_XE_VM_BIND_OP_UNMAP_ALL: 2420 xe_assert(vm->xe, bo); 2421 2422 err = xe_bo_lock(bo, true); 2423 if (err) 2424 return ERR_PTR(err); 2425 2426 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj); 2427 if (IS_ERR(vm_bo)) { 2428 xe_bo_unlock(bo); 2429 return ERR_CAST(vm_bo); 2430 } 2431 2432 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2433 drm_gpuvm_bo_put(vm_bo); 2434 xe_bo_unlock(bo); 2435 break; 2436 default: 2437 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2438 ops = ERR_PTR(-EINVAL); 2439 } 2440 if (IS_ERR(ops)) 2441 return ops; 2442 2443 drm_gpuva_for_each_op(__op, ops) { 2444 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2445 2446 if (__op->op == DRM_GPUVA_OP_MAP) { 2447 op->map.immediate = 2448 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; 2449 if (flags & DRM_XE_VM_BIND_FLAG_READONLY) 2450 op->map.vma_flags |= XE_VMA_READ_ONLY; 2451 if (flags & DRM_XE_VM_BIND_FLAG_NULL) 2452 op->map.vma_flags |= DRM_GPUVA_SPARSE; 2453 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) 2454 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; 2455 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) 2456 op->map.vma_flags |= XE_VMA_DUMPABLE; 2457 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) 2458 op->map.vma_flags |= XE_VMA_MADV_AUTORESET; 2459 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 2460 op->map.pat_index = pat_index; 2461 op->map.invalidate_on_bind = 2462 __xe_vm_needs_clear_scratch_pages(vm, flags); 2463 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2464 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2465 struct xe_tile *tile; 2466 struct xe_svm_range *svm_range; 2467 struct drm_gpusvm_ctx ctx = {}; 2468 struct drm_pagemap *dpagemap = NULL; 2469 u8 id, tile_mask = 0; 2470 u32 i; 2471 2472 if (!xe_vma_is_cpu_addr_mirror(vma)) { 2473 op->prefetch.region = prefetch_region; 2474 break; 2475 } 2476 2477 ctx.read_only = xe_vma_read_only(vma); 2478 ctx.devmem_possible = IS_DGFX(vm->xe) && 2479 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 2480 2481 for_each_tile(tile, vm->xe, id) 2482 tile_mask |= 0x1 << id; 2483 2484 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2485 op->prefetch_range.ranges_count = 0; 2486 2487 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { 2488 dpagemap = xe_vma_resolve_pagemap(vma, 2489 xe_device_get_root_tile(vm->xe)); 2490 } else if (prefetch_region) { 2491 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - 2492 XE_PL_VRAM0]; 2493 dpagemap = xe_tile_local_pagemap(tile); 2494 } 2495 2496 op->prefetch_range.dpagemap = dpagemap; 2497 alloc_next_range: 2498 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2499 2500 if (PTR_ERR(svm_range) == -ENOENT) { 2501 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2502 2503 addr = ret == ULONG_MAX ? 0 : ret; 2504 if (addr) 2505 goto alloc_next_range; 2506 else 2507 goto print_op_label; 2508 } 2509 2510 if (IS_ERR(svm_range)) { 2511 err = PTR_ERR(svm_range); 2512 goto unwind_prefetch_ops; 2513 } 2514 2515 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) { 2516 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2517 goto check_next_range; 2518 } 2519 2520 err = xa_alloc(&op->prefetch_range.range, 2521 &i, svm_range, xa_limit_32b, 2522 GFP_KERNEL); 2523 2524 if (err) 2525 goto unwind_prefetch_ops; 2526 2527 op->prefetch_range.ranges_count++; 2528 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2529 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2530 check_next_range: 2531 if (range_end > xe_svm_range_end(svm_range) && 2532 xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2533 addr = xe_svm_range_end(svm_range); 2534 goto alloc_next_range; 2535 } 2536 } 2537 print_op_label: 2538 print_op(vm->xe, __op); 2539 } 2540 2541 return ops; 2542 2543 unwind_prefetch_ops: 2544 xe_svm_prefetch_gpuva_ops_fini(ops); 2545 drm_gpuva_ops_free(&vm->gpuvm, ops); 2546 return ERR_PTR(err); 2547 } 2548 2549 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2550 2551 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 2552 struct xe_vma_mem_attr *attr, unsigned int flags) 2553 { 2554 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2555 struct xe_validation_ctx ctx; 2556 struct drm_exec exec; 2557 struct xe_vma *vma; 2558 int err = 0; 2559 2560 lockdep_assert_held_write(&vm->lock); 2561 2562 if (bo) { 2563 err = 0; 2564 xe_validation_guard(&ctx, &vm->xe->val, &exec, 2565 (struct xe_val_flags) {.interruptible = true}, err) { 2566 if (!bo->vm) { 2567 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2568 drm_exec_retry_on_contention(&exec); 2569 } 2570 if (!err) { 2571 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2572 drm_exec_retry_on_contention(&exec); 2573 } 2574 if (err) 2575 return ERR_PTR(err); 2576 2577 vma = xe_vma_create(vm, bo, op->gem.offset, 2578 op->va.addr, op->va.addr + 2579 op->va.range - 1, attr, flags); 2580 if (IS_ERR(vma)) 2581 return vma; 2582 2583 if (!bo->vm) { 2584 err = add_preempt_fences(vm, bo); 2585 if (err) { 2586 prep_vma_destroy(vm, vma, false); 2587 xe_vma_destroy(vma, NULL); 2588 } 2589 } 2590 } 2591 if (err) 2592 return ERR_PTR(err); 2593 } else { 2594 vma = xe_vma_create(vm, NULL, op->gem.offset, 2595 op->va.addr, op->va.addr + 2596 op->va.range - 1, attr, flags); 2597 if (IS_ERR(vma)) 2598 return vma; 2599 2600 if (xe_vma_is_userptr(vma)) { 2601 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2602 /* 2603 * -EBUSY has dedicated meaning that a user fence 2604 * attached to the VMA is busy, in practice 2605 * xe_vma_userptr_pin_pages can only fail with -EBUSY if 2606 * we are low on memory so convert this to -ENOMEM. 2607 */ 2608 if (err == -EBUSY) 2609 err = -ENOMEM; 2610 } 2611 } 2612 if (err) { 2613 prep_vma_destroy(vm, vma, false); 2614 xe_vma_destroy_unlocked(vma); 2615 vma = ERR_PTR(err); 2616 } 2617 2618 return vma; 2619 } 2620 2621 static u64 xe_vma_max_pte_size(struct xe_vma *vma) 2622 { 2623 if (vma->gpuva.flags & XE_VMA_PTE_1G) 2624 return SZ_1G; 2625 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) 2626 return SZ_2M; 2627 else if (vma->gpuva.flags & XE_VMA_PTE_64K) 2628 return SZ_64K; 2629 else if (vma->gpuva.flags & XE_VMA_PTE_4K) 2630 return SZ_4K; 2631 2632 return SZ_1G; /* Uninitialized, used max size */ 2633 } 2634 2635 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) 2636 { 2637 switch (size) { 2638 case SZ_1G: 2639 vma->gpuva.flags |= XE_VMA_PTE_1G; 2640 break; 2641 case SZ_2M: 2642 vma->gpuva.flags |= XE_VMA_PTE_2M; 2643 break; 2644 case SZ_64K: 2645 vma->gpuva.flags |= XE_VMA_PTE_64K; 2646 break; 2647 case SZ_4K: 2648 vma->gpuva.flags |= XE_VMA_PTE_4K; 2649 break; 2650 } 2651 } 2652 2653 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) 2654 { 2655 int err = 0; 2656 2657 lockdep_assert_held_write(&vm->lock); 2658 2659 switch (op->base.op) { 2660 case DRM_GPUVA_OP_MAP: 2661 err |= xe_vm_insert_vma(vm, op->map.vma); 2662 if (!err) 2663 op->flags |= XE_VMA_OP_COMMITTED; 2664 break; 2665 case DRM_GPUVA_OP_REMAP: 2666 { 2667 u8 tile_present = 2668 gpuva_to_vma(op->base.remap.unmap->va)->tile_present; 2669 2670 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), 2671 true); 2672 op->flags |= XE_VMA_OP_COMMITTED; 2673 2674 if (op->remap.prev) { 2675 err |= xe_vm_insert_vma(vm, op->remap.prev); 2676 if (!err) 2677 op->flags |= XE_VMA_OP_PREV_COMMITTED; 2678 if (!err && op->remap.skip_prev) { 2679 op->remap.prev->tile_present = 2680 tile_present; 2681 } 2682 } 2683 if (op->remap.next) { 2684 err |= xe_vm_insert_vma(vm, op->remap.next); 2685 if (!err) 2686 op->flags |= XE_VMA_OP_NEXT_COMMITTED; 2687 if (!err && op->remap.skip_next) { 2688 op->remap.next->tile_present = 2689 tile_present; 2690 } 2691 } 2692 2693 /* 2694 * Adjust for partial unbind after removing VMA from VM. In case 2695 * of unwind we might need to undo this later. 2696 */ 2697 if (!err) { 2698 op->base.remap.unmap->va->va.addr = op->remap.start; 2699 op->base.remap.unmap->va->va.range = op->remap.range; 2700 } 2701 break; 2702 } 2703 case DRM_GPUVA_OP_UNMAP: 2704 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); 2705 op->flags |= XE_VMA_OP_COMMITTED; 2706 break; 2707 case DRM_GPUVA_OP_PREFETCH: 2708 op->flags |= XE_VMA_OP_COMMITTED; 2709 break; 2710 default: 2711 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2712 } 2713 2714 return err; 2715 } 2716 2717 /** 2718 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes 2719 * @vma: Pointer to the xe_vma structure to check 2720 * 2721 * This function determines whether the given VMA (Virtual Memory Area) 2722 * has its memory attributes set to their default values. Specifically, 2723 * it checks the following conditions: 2724 * 2725 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` 2726 * - `pat_index` is equal to `default_pat_index` 2727 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` 2728 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` 2729 * 2730 * Return: true if all attributes are at their default values, false otherwise. 2731 */ 2732 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) 2733 { 2734 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && 2735 vma->attr.pat_index == vma->attr.default_pat_index && 2736 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 2737 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); 2738 } 2739 2740 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, 2741 struct xe_vma_ops *vops) 2742 { 2743 struct xe_device *xe = vm->xe; 2744 struct drm_gpuva_op *__op; 2745 struct xe_tile *tile; 2746 u8 id, tile_mask = 0; 2747 int err = 0; 2748 2749 lockdep_assert_held_write(&vm->lock); 2750 2751 for_each_tile(tile, vm->xe, id) 2752 tile_mask |= 0x1 << id; 2753 2754 drm_gpuva_for_each_op(__op, ops) { 2755 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2756 struct xe_vma *vma; 2757 unsigned int flags = 0; 2758 2759 INIT_LIST_HEAD(&op->link); 2760 list_add_tail(&op->link, &vops->list); 2761 op->tile_mask = tile_mask; 2762 2763 switch (op->base.op) { 2764 case DRM_GPUVA_OP_MAP: 2765 { 2766 struct xe_vma_mem_attr default_attr = { 2767 .preferred_loc = { 2768 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 2769 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 2770 }, 2771 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2772 .default_pat_index = op->map.pat_index, 2773 .pat_index = op->map.pat_index, 2774 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 2775 }; 2776 2777 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; 2778 2779 vma = new_vma(vm, &op->base.map, &default_attr, 2780 flags); 2781 if (IS_ERR(vma)) 2782 return PTR_ERR(vma); 2783 2784 op->map.vma = vma; 2785 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && 2786 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || 2787 op->map.invalidate_on_bind) 2788 xe_vma_ops_incr_pt_update_ops(vops, 2789 op->tile_mask, 1); 2790 break; 2791 } 2792 case DRM_GPUVA_OP_REMAP: 2793 { 2794 struct xe_vma *old = 2795 gpuva_to_vma(op->base.remap.unmap->va); 2796 bool skip = xe_vma_is_cpu_addr_mirror(old); 2797 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2798 int num_remap_ops = 0; 2799 2800 if (op->base.remap.prev) 2801 start = op->base.remap.prev->va.addr + 2802 op->base.remap.prev->va.range; 2803 if (op->base.remap.next) 2804 end = op->base.remap.next->va.addr; 2805 2806 if (xe_vma_is_cpu_addr_mirror(old) && 2807 xe_svm_has_mapping(vm, start, end)) { 2808 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) 2809 xe_svm_unmap_address_range(vm, start, end); 2810 else 2811 return -EBUSY; 2812 } 2813 2814 op->remap.start = xe_vma_start(old); 2815 op->remap.range = xe_vma_size(old); 2816 op->remap.old_start = op->remap.start; 2817 op->remap.old_range = op->remap.range; 2818 2819 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; 2820 if (op->base.remap.prev) { 2821 vma = new_vma(vm, op->base.remap.prev, 2822 &old->attr, flags); 2823 if (IS_ERR(vma)) 2824 return PTR_ERR(vma); 2825 2826 op->remap.prev = vma; 2827 2828 /* 2829 * Userptr creates a new SG mapping so 2830 * we must also rebind. 2831 */ 2832 op->remap.skip_prev = skip || 2833 (!xe_vma_is_userptr(old) && 2834 IS_ALIGNED(xe_vma_end(vma), 2835 xe_vma_max_pte_size(old))); 2836 if (op->remap.skip_prev) { 2837 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2838 op->remap.range -= 2839 xe_vma_end(vma) - 2840 xe_vma_start(old); 2841 op->remap.start = xe_vma_end(vma); 2842 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", 2843 (ULL)op->remap.start, 2844 (ULL)op->remap.range); 2845 } else { 2846 num_remap_ops++; 2847 } 2848 } 2849 2850 if (op->base.remap.next) { 2851 vma = new_vma(vm, op->base.remap.next, 2852 &old->attr, flags); 2853 if (IS_ERR(vma)) 2854 return PTR_ERR(vma); 2855 2856 op->remap.next = vma; 2857 2858 /* 2859 * Userptr creates a new SG mapping so 2860 * we must also rebind. 2861 */ 2862 op->remap.skip_next = skip || 2863 (!xe_vma_is_userptr(old) && 2864 IS_ALIGNED(xe_vma_start(vma), 2865 xe_vma_max_pte_size(old))); 2866 if (op->remap.skip_next) { 2867 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); 2868 op->remap.range -= 2869 xe_vma_end(old) - 2870 xe_vma_start(vma); 2871 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", 2872 (ULL)op->remap.start, 2873 (ULL)op->remap.range); 2874 } else { 2875 num_remap_ops++; 2876 } 2877 } 2878 if (!skip) 2879 num_remap_ops++; 2880 2881 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2882 break; 2883 } 2884 case DRM_GPUVA_OP_UNMAP: 2885 vma = gpuva_to_vma(op->base.unmap.va); 2886 2887 if (xe_vma_is_cpu_addr_mirror(vma) && 2888 xe_svm_has_mapping(vm, xe_vma_start(vma), 2889 xe_vma_end(vma)) && 2890 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) 2891 return -EBUSY; 2892 2893 if (!xe_vma_is_cpu_addr_mirror(vma)) 2894 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2895 break; 2896 case DRM_GPUVA_OP_PREFETCH: 2897 vma = gpuva_to_vma(op->base.prefetch.va); 2898 2899 if (xe_vma_is_userptr(vma)) { 2900 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2901 if (err) 2902 return err; 2903 } 2904 2905 if (xe_vma_is_cpu_addr_mirror(vma)) 2906 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2907 op->prefetch_range.ranges_count); 2908 else 2909 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2910 2911 break; 2912 default: 2913 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2914 } 2915 2916 err = xe_vma_op_commit(vm, op); 2917 if (err) 2918 return err; 2919 } 2920 2921 return 0; 2922 } 2923 2924 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, 2925 bool post_commit, bool prev_post_commit, 2926 bool next_post_commit) 2927 { 2928 lockdep_assert_held_write(&vm->lock); 2929 2930 switch (op->base.op) { 2931 case DRM_GPUVA_OP_MAP: 2932 if (op->map.vma) { 2933 prep_vma_destroy(vm, op->map.vma, post_commit); 2934 xe_vma_destroy_unlocked(op->map.vma); 2935 } 2936 break; 2937 case DRM_GPUVA_OP_UNMAP: 2938 { 2939 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2940 2941 if (vma) { 2942 xe_svm_notifier_lock(vm); 2943 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2944 xe_svm_notifier_unlock(vm); 2945 if (post_commit) 2946 xe_vm_insert_vma(vm, vma); 2947 } 2948 break; 2949 } 2950 case DRM_GPUVA_OP_REMAP: 2951 { 2952 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); 2953 2954 if (op->remap.prev) { 2955 prep_vma_destroy(vm, op->remap.prev, prev_post_commit); 2956 xe_vma_destroy_unlocked(op->remap.prev); 2957 } 2958 if (op->remap.next) { 2959 prep_vma_destroy(vm, op->remap.next, next_post_commit); 2960 xe_vma_destroy_unlocked(op->remap.next); 2961 } 2962 if (vma) { 2963 xe_svm_notifier_lock(vm); 2964 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2965 xe_svm_notifier_unlock(vm); 2966 if (post_commit) { 2967 /* 2968 * Restore the old va range, in case of the 2969 * prev/next skip optimisation. Otherwise what 2970 * we re-insert here could be smaller than the 2971 * original range. 2972 */ 2973 op->base.remap.unmap->va->va.addr = 2974 op->remap.old_start; 2975 op->base.remap.unmap->va->va.range = 2976 op->remap.old_range; 2977 xe_vm_insert_vma(vm, vma); 2978 } 2979 } 2980 break; 2981 } 2982 case DRM_GPUVA_OP_PREFETCH: 2983 /* Nothing to do */ 2984 break; 2985 default: 2986 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 2987 } 2988 } 2989 2990 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, 2991 struct drm_gpuva_ops **ops, 2992 int num_ops_list) 2993 { 2994 int i; 2995 2996 for (i = num_ops_list - 1; i >= 0; --i) { 2997 struct drm_gpuva_ops *__ops = ops[i]; 2998 struct drm_gpuva_op *__op; 2999 3000 if (!__ops) 3001 continue; 3002 3003 drm_gpuva_for_each_op_reverse(__op, __ops) { 3004 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 3005 3006 xe_vma_op_unwind(vm, op, 3007 op->flags & XE_VMA_OP_COMMITTED, 3008 op->flags & XE_VMA_OP_PREV_COMMITTED, 3009 op->flags & XE_VMA_OP_NEXT_COMMITTED); 3010 } 3011 } 3012 } 3013 3014 /** 3015 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate() 3016 * @res_evict: Allow evicting resources during validation 3017 * @validate: Perform BO validation 3018 * @request_decompress: Request BO decompression 3019 * @check_purged: Reject operation if BO is purged 3020 */ 3021 struct xe_vma_lock_and_validate_flags { 3022 u32 res_evict : 1; 3023 u32 validate : 1; 3024 u32 request_decompress : 1; 3025 u32 check_purged : 1; 3026 }; 3027 3028 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3029 struct xe_vma_lock_and_validate_flags flags) 3030 { 3031 struct xe_bo *bo = xe_vma_bo(vma); 3032 struct xe_vm *vm = xe_vma_vm(vma); 3033 int err = 0; 3034 3035 if (bo) { 3036 if (!bo->vm) 3037 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3038 3039 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */ 3040 if (!err && flags.check_purged) { 3041 if (xe_bo_madv_is_dontneed(bo)) 3042 err = -EBUSY; /* BO marked purgeable */ 3043 else if (xe_bo_is_purged(bo)) 3044 err = -EINVAL; /* BO already purged */ 3045 } 3046 3047 if (!err && flags.validate) 3048 err = xe_bo_validate(bo, vm, 3049 xe_vm_allow_vm_eviction(vm) && 3050 flags.res_evict, exec); 3051 3052 if (err) 3053 return err; 3054 3055 if (flags.request_decompress) 3056 err = xe_bo_decompress(bo); 3057 } 3058 3059 return err; 3060 } 3061 3062 static int check_ufence(struct xe_vma *vma) 3063 { 3064 if (vma->ufence) { 3065 struct xe_user_fence * const f = vma->ufence; 3066 3067 if (!xe_sync_ufence_get_status(f)) 3068 return -EBUSY; 3069 3070 vma->ufence = NULL; 3071 xe_sync_ufence_put(f); 3072 } 3073 3074 return 0; 3075 } 3076 3077 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 3078 { 3079 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 3080 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3081 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap; 3082 int err = 0; 3083 3084 struct xe_svm_range *svm_range; 3085 struct drm_gpusvm_ctx ctx = {}; 3086 unsigned long i; 3087 3088 if (!xe_vma_is_cpu_addr_mirror(vma)) 3089 return 0; 3090 3091 ctx.read_only = xe_vma_read_only(vma); 3092 ctx.devmem_possible = devmem_possible; 3093 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 3094 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 3095 3096 /* TODO: Threading the migration */ 3097 xa_for_each(&op->prefetch_range.range, i, svm_range) { 3098 if (!dpagemap) 3099 xe_svm_range_migrate_to_smem(vm, svm_range); 3100 3101 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 3102 drm_dbg(&vm->xe->drm, 3103 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n", 3104 dpagemap ? dpagemap->drm->unique : "system", 3105 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range)); 3106 } 3107 3108 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) { 3109 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap); 3110 if (err) { 3111 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 3112 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3113 return -ENODATA; 3114 } 3115 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 3116 } 3117 3118 err = xe_svm_range_get_pages(vm, svm_range, &ctx); 3119 if (err) { 3120 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 3121 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 3122 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 3123 err = -ENODATA; 3124 return err; 3125 } 3126 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 3127 } 3128 3129 return err; 3130 } 3131 3132 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 3133 struct xe_vma_ops *vops, struct xe_vma_op *op) 3134 { 3135 int err = 0; 3136 bool res_evict; 3137 3138 /* 3139 * We only allow evicting a BO within the VM if it is not part of an 3140 * array of binds, as an array of binds can evict another BO within the 3141 * bind. 3142 */ 3143 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); 3144 3145 switch (op->base.op) { 3146 case DRM_GPUVA_OP_MAP: 3147 if (!op->map.invalidate_on_bind) 3148 err = vma_lock_and_validate(exec, op->map.vma, 3149 (struct xe_vma_lock_and_validate_flags) { 3150 .res_evict = res_evict, 3151 .validate = !xe_vm_in_fault_mode(vm) || 3152 op->map.immediate, 3153 .request_decompress = 3154 op->map.request_decompress, 3155 .check_purged = true, 3156 }); 3157 break; 3158 case DRM_GPUVA_OP_REMAP: 3159 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); 3160 if (err) 3161 break; 3162 3163 err = vma_lock_and_validate(exec, 3164 gpuva_to_vma(op->base.remap.unmap->va), 3165 (struct xe_vma_lock_and_validate_flags) { 3166 .res_evict = res_evict, 3167 .validate = false, 3168 .request_decompress = false, 3169 .check_purged = false, 3170 }); 3171 if (!err && op->remap.prev) 3172 err = vma_lock_and_validate(exec, op->remap.prev, 3173 (struct xe_vma_lock_and_validate_flags) { 3174 .res_evict = res_evict, 3175 .validate = true, 3176 .request_decompress = false, 3177 .check_purged = true, 3178 }); 3179 if (!err && op->remap.next) 3180 err = vma_lock_and_validate(exec, op->remap.next, 3181 (struct xe_vma_lock_and_validate_flags) { 3182 .res_evict = res_evict, 3183 .validate = true, 3184 .request_decompress = false, 3185 .check_purged = true, 3186 }); 3187 break; 3188 case DRM_GPUVA_OP_UNMAP: 3189 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); 3190 if (err) 3191 break; 3192 3193 err = vma_lock_and_validate(exec, 3194 gpuva_to_vma(op->base.unmap.va), 3195 (struct xe_vma_lock_and_validate_flags) { 3196 .res_evict = res_evict, 3197 .validate = false, 3198 .request_decompress = false, 3199 .check_purged = false, 3200 }); 3201 break; 3202 case DRM_GPUVA_OP_PREFETCH: 3203 { 3204 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 3205 u32 region; 3206 3207 if (!xe_vma_is_cpu_addr_mirror(vma)) { 3208 region = op->prefetch.region; 3209 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || 3210 region <= ARRAY_SIZE(region_to_mem_type)); 3211 } 3212 3213 /* 3214 * Prefetch attempts to migrate BO's backing store without 3215 * repopulating it first. Purged BOs have no backing store 3216 * to migrate, so reject the operation. 3217 */ 3218 err = vma_lock_and_validate(exec, 3219 gpuva_to_vma(op->base.prefetch.va), 3220 (struct xe_vma_lock_and_validate_flags) { 3221 .res_evict = res_evict, 3222 .validate = false, 3223 .request_decompress = false, 3224 .check_purged = true, 3225 }); 3226 if (!err && !xe_vma_has_no_bo(vma)) 3227 err = xe_bo_migrate(xe_vma_bo(vma), 3228 region_to_mem_type[region], 3229 NULL, 3230 exec); 3231 break; 3232 } 3233 default: 3234 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3235 } 3236 3237 return err; 3238 } 3239 3240 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 3241 { 3242 struct xe_vma_op *op; 3243 int err; 3244 3245 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 3246 return 0; 3247 3248 list_for_each_entry(op, &vops->list, link) { 3249 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 3250 err = prefetch_ranges(vm, op); 3251 if (err) 3252 return err; 3253 } 3254 } 3255 3256 return 0; 3257 } 3258 3259 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, 3260 struct xe_vm *vm, 3261 struct xe_vma_ops *vops) 3262 { 3263 struct xe_vma_op *op; 3264 int err; 3265 3266 err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 3267 if (err) 3268 return err; 3269 3270 list_for_each_entry(op, &vops->list, link) { 3271 err = op_lock_and_prep(exec, vm, vops, op); 3272 if (err) 3273 return err; 3274 } 3275 3276 #ifdef TEST_VM_OPS_ERROR 3277 if (vops->inject_error && 3278 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) 3279 return -ENOSPC; 3280 #endif 3281 3282 return 0; 3283 } 3284 3285 static void op_trace(struct xe_vma_op *op) 3286 { 3287 switch (op->base.op) { 3288 case DRM_GPUVA_OP_MAP: 3289 trace_xe_vma_bind(op->map.vma); 3290 break; 3291 case DRM_GPUVA_OP_REMAP: 3292 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); 3293 if (op->remap.prev) 3294 trace_xe_vma_bind(op->remap.prev); 3295 if (op->remap.next) 3296 trace_xe_vma_bind(op->remap.next); 3297 break; 3298 case DRM_GPUVA_OP_UNMAP: 3299 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); 3300 break; 3301 case DRM_GPUVA_OP_PREFETCH: 3302 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); 3303 break; 3304 case DRM_GPUVA_OP_DRIVER: 3305 break; 3306 default: 3307 XE_WARN_ON("NOT POSSIBLE"); 3308 } 3309 } 3310 3311 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) 3312 { 3313 struct xe_vma_op *op; 3314 3315 list_for_each_entry(op, &vops->list, link) 3316 op_trace(op); 3317 } 3318 3319 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) 3320 { 3321 struct xe_exec_queue *q = vops->q; 3322 struct xe_tile *tile; 3323 int number_tiles = 0; 3324 u8 id; 3325 3326 for_each_tile(tile, vm->xe, id) { 3327 if (vops->pt_update_ops[id].num_ops) 3328 ++number_tiles; 3329 3330 if (vops->pt_update_ops[id].q) 3331 continue; 3332 3333 if (q) { 3334 vops->pt_update_ops[id].q = q; 3335 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) 3336 q = list_next_entry(q, multi_gt_list); 3337 } else { 3338 vops->pt_update_ops[id].q = vm->q[id]; 3339 } 3340 } 3341 3342 return number_tiles; 3343 } 3344 3345 static struct dma_fence *ops_execute(struct xe_vm *vm, 3346 struct xe_vma_ops *vops) 3347 { 3348 struct xe_tile *tile; 3349 struct dma_fence *fence = NULL; 3350 struct dma_fence **fences = NULL; 3351 struct dma_fence_array *cf = NULL; 3352 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; 3353 u8 id; 3354 3355 number_tiles = vm_ops_setup_tile_args(vm, vops); 3356 if (number_tiles == 0) 3357 return ERR_PTR(-ENODATA); 3358 3359 for_each_tile(tile, vm->xe, id) { 3360 ++n_fence; 3361 3362 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) 3363 for_each_tlb_inval(i) 3364 ++n_fence; 3365 } 3366 3367 fences = kmalloc_objs(*fences, n_fence); 3368 if (!fences) { 3369 fence = ERR_PTR(-ENOMEM); 3370 goto err_trace; 3371 } 3372 3373 cf = dma_fence_array_alloc(n_fence); 3374 if (!cf) { 3375 fence = ERR_PTR(-ENOMEM); 3376 goto err_out; 3377 } 3378 3379 for_each_tile(tile, vm->xe, id) { 3380 if (!vops->pt_update_ops[id].num_ops) 3381 continue; 3382 3383 err = xe_pt_update_ops_prepare(tile, vops); 3384 if (err) { 3385 fence = ERR_PTR(err); 3386 goto err_out; 3387 } 3388 } 3389 3390 trace_xe_vm_ops_execute(vops); 3391 3392 for_each_tile(tile, vm->xe, id) { 3393 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; 3394 3395 fence = NULL; 3396 if (!vops->pt_update_ops[id].num_ops) 3397 goto collect_fences; 3398 3399 fence = xe_pt_update_ops_run(tile, vops); 3400 if (IS_ERR(fence)) 3401 goto err_out; 3402 3403 collect_fences: 3404 fences[current_fence++] = fence ?: dma_fence_get_stub(); 3405 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) 3406 continue; 3407 3408 xe_migrate_job_lock(tile->migrate, q); 3409 for_each_tlb_inval(i) 3410 fences[current_fence++] = 3411 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); 3412 xe_migrate_job_unlock(tile->migrate, q); 3413 } 3414 3415 xe_assert(vm->xe, current_fence == n_fence); 3416 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), 3417 1, false); 3418 fence = &cf->base; 3419 3420 for_each_tile(tile, vm->xe, id) { 3421 if (!vops->pt_update_ops[id].num_ops) 3422 continue; 3423 3424 xe_pt_update_ops_fini(tile, vops); 3425 } 3426 3427 return fence; 3428 3429 err_out: 3430 for_each_tile(tile, vm->xe, id) { 3431 if (!vops->pt_update_ops[id].num_ops) 3432 continue; 3433 3434 xe_pt_update_ops_abort(tile, vops); 3435 } 3436 while (current_fence) 3437 dma_fence_put(fences[--current_fence]); 3438 kfree(fences); 3439 kfree(cf); 3440 3441 err_trace: 3442 trace_xe_vm_ops_fail(vm); 3443 return fence; 3444 } 3445 3446 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) 3447 { 3448 if (vma->ufence) 3449 xe_sync_ufence_put(vma->ufence); 3450 vma->ufence = __xe_sync_ufence_get(ufence); 3451 } 3452 3453 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, 3454 struct xe_user_fence *ufence) 3455 { 3456 switch (op->base.op) { 3457 case DRM_GPUVA_OP_MAP: 3458 if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) 3459 vma_add_ufence(op->map.vma, ufence); 3460 break; 3461 case DRM_GPUVA_OP_REMAP: 3462 if (op->remap.prev) 3463 vma_add_ufence(op->remap.prev, ufence); 3464 if (op->remap.next) 3465 vma_add_ufence(op->remap.next, ufence); 3466 break; 3467 case DRM_GPUVA_OP_UNMAP: 3468 break; 3469 case DRM_GPUVA_OP_PREFETCH: 3470 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); 3471 break; 3472 default: 3473 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n"); 3474 } 3475 } 3476 3477 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, 3478 struct dma_fence *fence) 3479 { 3480 struct xe_user_fence *ufence; 3481 struct xe_vma_op *op; 3482 int i; 3483 3484 ufence = find_ufence_get(vops->syncs, vops->num_syncs); 3485 list_for_each_entry(op, &vops->list, link) { 3486 if (ufence) 3487 op_add_ufence(vm, op, ufence); 3488 3489 if (op->base.op == DRM_GPUVA_OP_UNMAP) 3490 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); 3491 else if (op->base.op == DRM_GPUVA_OP_REMAP) 3492 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), 3493 fence); 3494 } 3495 if (ufence) 3496 xe_sync_ufence_put(ufence); 3497 if (fence) { 3498 for (i = 0; i < vops->num_syncs; i++) 3499 xe_sync_entry_signal(vops->syncs + i, fence); 3500 } 3501 } 3502 3503 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3504 struct xe_vma_ops *vops) 3505 { 3506 struct xe_validation_ctx ctx; 3507 struct drm_exec exec; 3508 struct dma_fence *fence; 3509 int err = 0; 3510 3511 lockdep_assert_held_write(&vm->lock); 3512 3513 xe_validation_guard(&ctx, &vm->xe->val, &exec, 3514 ((struct xe_val_flags) { 3515 .interruptible = true, 3516 .exec_ignore_duplicates = true, 3517 }), err) { 3518 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3519 drm_exec_retry_on_contention(&exec); 3520 xe_validation_retry_on_oom(&ctx, &err); 3521 if (err) 3522 return ERR_PTR(err); 3523 3524 xe_vm_set_validation_exec(vm, &exec); 3525 fence = ops_execute(vm, vops); 3526 xe_vm_set_validation_exec(vm, NULL); 3527 if (IS_ERR(fence)) { 3528 if (PTR_ERR(fence) == -ENODATA) 3529 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3530 return fence; 3531 } 3532 3533 vm_bind_ioctl_ops_fini(vm, vops, fence); 3534 } 3535 3536 return err ? ERR_PTR(err) : fence; 3537 } 3538 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3539 3540 #define SUPPORTED_FLAGS_STUB \ 3541 (DRM_XE_VM_BIND_FLAG_READONLY | \ 3542 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ 3543 DRM_XE_VM_BIND_FLAG_NULL | \ 3544 DRM_XE_VM_BIND_FLAG_DUMPABLE | \ 3545 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ 3546 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ 3547 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \ 3548 DRM_XE_VM_BIND_FLAG_DECOMPRESS) 3549 3550 #ifdef TEST_VM_OPS_ERROR 3551 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) 3552 #else 3553 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB 3554 #endif 3555 3556 #define XE_64K_PAGE_MASK 0xffffull 3557 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) 3558 3559 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, 3560 struct drm_xe_vm_bind *args, 3561 struct drm_xe_vm_bind_op **bind_ops) 3562 { 3563 int err; 3564 int i; 3565 3566 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || 3567 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 3568 return -EINVAL; 3569 3570 if (XE_IOCTL_DBG(xe, args->extensions)) 3571 return -EINVAL; 3572 3573 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) 3574 return -EINVAL; 3575 3576 if (args->num_binds > 1) { 3577 u64 __user *bind_user = 3578 u64_to_user_ptr(args->vector_of_binds); 3579 3580 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op, 3581 args->num_binds, 3582 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3583 if (!*bind_ops) 3584 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 3585 3586 err = copy_from_user(*bind_ops, bind_user, 3587 sizeof(struct drm_xe_vm_bind_op) * 3588 args->num_binds); 3589 if (XE_IOCTL_DBG(xe, err)) { 3590 err = -EFAULT; 3591 goto free_bind_ops; 3592 } 3593 } else { 3594 *bind_ops = &args->bind; 3595 } 3596 3597 for (i = 0; i < args->num_binds; ++i) { 3598 u64 range = (*bind_ops)[i].range; 3599 u64 addr = (*bind_ops)[i].addr; 3600 u32 op = (*bind_ops)[i].op; 3601 u32 flags = (*bind_ops)[i].flags; 3602 u32 obj = (*bind_ops)[i].obj; 3603 u64 obj_offset = (*bind_ops)[i].obj_offset; 3604 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; 3605 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; 3606 bool is_cpu_addr_mirror = flags & 3607 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; 3608 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS; 3609 u16 pat_index = (*bind_ops)[i].pat_index; 3610 u16 coh_mode; 3611 bool comp_en; 3612 3613 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && 3614 (!xe_vm_in_fault_mode(vm) || 3615 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { 3616 err = -EINVAL; 3617 goto free_bind_ops; 3618 } 3619 3620 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { 3621 err = -EINVAL; 3622 goto free_bind_ops; 3623 } 3624 3625 pat_index = array_index_nospec(pat_index, xe->pat.n_entries); 3626 (*bind_ops)[i].pat_index = pat_index; 3627 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3628 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3629 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ 3630 err = -EINVAL; 3631 goto free_bind_ops; 3632 } 3633 3634 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) { 3635 err = -EINVAL; 3636 goto free_bind_ops; 3637 } 3638 3639 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || 3640 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || 3641 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) || 3642 XE_IOCTL_DBG(xe, obj_offset && (is_null || 3643 is_cpu_addr_mirror)) || 3644 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && 3645 (is_decompress || is_null || is_cpu_addr_mirror)) || 3646 XE_IOCTL_DBG(xe, is_decompress && 3647 xe_pat_index_get_comp_en(xe, pat_index)) || 3648 XE_IOCTL_DBG(xe, !obj && 3649 op == DRM_XE_VM_BIND_OP_MAP && 3650 !is_null && !is_cpu_addr_mirror) || 3651 XE_IOCTL_DBG(xe, !obj && 3652 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3653 XE_IOCTL_DBG(xe, addr && 3654 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3655 XE_IOCTL_DBG(xe, range && 3656 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || 3657 XE_IOCTL_DBG(xe, obj && 3658 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3659 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3660 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3661 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && 3662 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || 3663 is_cpu_addr_mirror) && 3664 (pat_index != 19 && coh_mode != XE_COH_2WAY)) || 3665 XE_IOCTL_DBG(xe, comp_en && 3666 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3667 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3668 !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3669 XE_IOCTL_DBG(xe, obj && 3670 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3671 XE_IOCTL_DBG(xe, prefetch_region && 3672 op != DRM_XE_VM_BIND_OP_PREFETCH) || 3673 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && 3674 /* Guard against undefined shift in BIT(prefetch_region) */ 3675 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) || 3676 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) || 3677 XE_IOCTL_DBG(xe, obj && 3678 op == DRM_XE_VM_BIND_OP_UNMAP) || 3679 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && 3680 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { 3681 err = -EINVAL; 3682 goto free_bind_ops; 3683 } 3684 3685 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || 3686 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || 3687 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || 3688 XE_IOCTL_DBG(xe, !range && 3689 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { 3690 err = -EINVAL; 3691 goto free_bind_ops; 3692 } 3693 3694 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) || 3695 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) || 3696 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) { 3697 err = -EOPNOTSUPP; 3698 goto free_bind_ops; 3699 } 3700 } 3701 3702 return 0; 3703 3704 free_bind_ops: 3705 if (args->num_binds > 1) 3706 kvfree(*bind_ops); 3707 *bind_ops = NULL; 3708 return err; 3709 } 3710 3711 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, 3712 struct xe_exec_queue *q, 3713 struct xe_sync_entry *syncs, 3714 int num_syncs) 3715 { 3716 struct dma_fence *fence = NULL; 3717 int i, err = 0; 3718 3719 if (num_syncs) { 3720 fence = xe_sync_in_fence_get(syncs, num_syncs, 3721 to_wait_exec_queue(vm, q), vm); 3722 if (IS_ERR(fence)) 3723 return PTR_ERR(fence); 3724 3725 for (i = 0; i < num_syncs; i++) 3726 xe_sync_entry_signal(&syncs[i], fence); 3727 } 3728 3729 dma_fence_put(fence); 3730 3731 return err; 3732 } 3733 3734 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, 3735 struct xe_exec_queue *q, 3736 struct xe_sync_entry *syncs, u32 num_syncs) 3737 { 3738 memset(vops, 0, sizeof(*vops)); 3739 INIT_LIST_HEAD(&vops->list); 3740 vops->vm = vm; 3741 vops->q = q; 3742 vops->syncs = syncs; 3743 vops->num_syncs = num_syncs; 3744 vops->flags = 0; 3745 } 3746 3747 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, 3748 u64 addr, u64 range, u64 obj_offset, 3749 u16 pat_index, u32 op, u32 bind_flags) 3750 { 3751 u16 coh_mode; 3752 bool comp_en; 3753 3754 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && 3755 xe_pat_index_get_comp_en(xe, pat_index))) 3756 return -EINVAL; 3757 3758 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3759 XE_IOCTL_DBG(xe, obj_offset > 3760 xe_bo_size(bo) - range)) { 3761 return -EINVAL; 3762 } 3763 3764 /* 3765 * Some platforms require 64k VM_BIND alignment, 3766 * specifically those with XE_VRAM_FLAGS_NEED64K. 3767 * 3768 * Other platforms may have BO's set to 64k physical placement, 3769 * but can be mapped at 4k offsets anyway. This check is only 3770 * there for the former case. 3771 */ 3772 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && 3773 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { 3774 if (XE_IOCTL_DBG(xe, obj_offset & 3775 XE_64K_PAGE_MASK) || 3776 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || 3777 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { 3778 return -EINVAL; 3779 } 3780 } 3781 3782 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 3783 if (bo->cpu_caching) { 3784 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3785 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { 3786 return -EINVAL; 3787 } 3788 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { 3789 /* 3790 * Imported dma-buf from a different device should 3791 * require 1way or 2way coherency since we don't know 3792 * how it was mapped on the CPU. Just assume is it 3793 * potentially cached on CPU side. 3794 */ 3795 return -EINVAL; 3796 } 3797 3798 /* 3799 * Ensures that imported buffer objects (dma-bufs) are not mapped 3800 * with a PAT index that enables compression. 3801 */ 3802 comp_en = xe_pat_index_get_comp_en(xe, pat_index); 3803 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) 3804 return -EINVAL; 3805 3806 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) && 3807 (pat_index != 19 && coh_mode != XE_COH_2WAY))) 3808 return -EINVAL; 3809 3810 /* If a BO is protected it can only be mapped if the key is still valid */ 3811 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && 3812 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) 3813 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) 3814 return -ENOEXEC; 3815 3816 return 0; 3817 } 3818 3819 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3820 { 3821 struct xe_device *xe = to_xe_device(dev); 3822 struct xe_file *xef = to_xe_file(file); 3823 struct drm_xe_vm_bind *args = data; 3824 struct drm_xe_sync __user *syncs_user; 3825 struct xe_bo **bos = NULL; 3826 struct drm_gpuva_ops **ops = NULL; 3827 struct xe_vm *vm; 3828 struct xe_exec_queue *q = NULL; 3829 u32 num_syncs, num_ufence = 0; 3830 struct xe_sync_entry *syncs = NULL; 3831 struct drm_xe_vm_bind_op *bind_ops = NULL; 3832 struct xe_vma_ops vops; 3833 struct dma_fence *fence; 3834 int err; 3835 int i; 3836 3837 vm = xe_vm_lookup(xef, args->vm_id); 3838 if (XE_IOCTL_DBG(xe, !vm)) 3839 return -EINVAL; 3840 3841 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops); 3842 if (err) 3843 goto put_vm; 3844 3845 if (args->exec_queue_id) { 3846 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 3847 if (XE_IOCTL_DBG(xe, !q)) { 3848 err = -ENOENT; 3849 goto free_bind_ops; 3850 } 3851 3852 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { 3853 err = -EINVAL; 3854 goto put_exec_queue; 3855 } 3856 } 3857 3858 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) { 3859 err = -EINVAL; 3860 goto put_exec_queue; 3861 } 3862 3863 /* Ensure all UNMAPs visible */ 3864 xe_svm_flush(vm); 3865 3866 err = down_write_killable(&vm->lock); 3867 if (err) 3868 goto put_exec_queue; 3869 3870 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 3871 err = -ENOENT; 3872 goto release_vm_lock; 3873 } 3874 3875 for (i = 0; i < args->num_binds; ++i) { 3876 u64 range = bind_ops[i].range; 3877 u64 addr = bind_ops[i].addr; 3878 3879 if (XE_IOCTL_DBG(xe, range > vm->size) || 3880 XE_IOCTL_DBG(xe, addr > vm->size - range)) { 3881 err = -EINVAL; 3882 goto release_vm_lock; 3883 } 3884 } 3885 3886 if (args->num_binds) { 3887 bos = kvzalloc_objs(*bos, args->num_binds, 3888 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3889 if (!bos) { 3890 err = -ENOMEM; 3891 goto release_vm_lock; 3892 } 3893 3894 ops = kvzalloc_objs(*ops, args->num_binds, 3895 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 3896 if (!ops) { 3897 err = -ENOMEM; 3898 goto free_bos; 3899 } 3900 } 3901 3902 for (i = 0; i < args->num_binds; ++i) { 3903 struct drm_gem_object *gem_obj; 3904 u64 range = bind_ops[i].range; 3905 u64 addr = bind_ops[i].addr; 3906 u32 obj = bind_ops[i].obj; 3907 u64 obj_offset = bind_ops[i].obj_offset; 3908 u16 pat_index = bind_ops[i].pat_index; 3909 u32 op = bind_ops[i].op; 3910 u32 bind_flags = bind_ops[i].flags; 3911 3912 if (!obj) 3913 continue; 3914 3915 gem_obj = drm_gem_object_lookup(file, obj); 3916 if (XE_IOCTL_DBG(xe, !gem_obj)) { 3917 err = -ENOENT; 3918 goto put_obj; 3919 } 3920 bos[i] = gem_to_xe_bo(gem_obj); 3921 3922 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, 3923 obj_offset, pat_index, op, 3924 bind_flags); 3925 if (err) 3926 goto put_obj; 3927 } 3928 3929 if (args->num_syncs) { 3930 syncs = kzalloc_objs(*syncs, args->num_syncs); 3931 if (!syncs) { 3932 err = -ENOMEM; 3933 goto put_obj; 3934 } 3935 } 3936 3937 syncs_user = u64_to_user_ptr(args->syncs); 3938 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3939 struct xe_exec_queue *__q = q ?: vm->q[0]; 3940 3941 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3942 &syncs_user[num_syncs], 3943 __q->ufence_syncobj, 3944 ++__q->ufence_timeline_value, 3945 (xe_vm_in_lr_mode(vm) ? 3946 SYNC_PARSE_FLAG_LR_MODE : 0) | 3947 (!args->num_binds ? 3948 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); 3949 if (err) 3950 goto free_syncs; 3951 3952 if (xe_sync_is_ufence(&syncs[num_syncs])) 3953 num_ufence++; 3954 } 3955 3956 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { 3957 err = -EINVAL; 3958 goto free_syncs; 3959 } 3960 3961 if (!args->num_binds) { 3962 err = -ENODATA; 3963 goto free_syncs; 3964 } 3965 3966 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); 3967 if (args->num_binds > 1) 3968 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; 3969 for (i = 0; i < args->num_binds; ++i) { 3970 u64 range = bind_ops[i].range; 3971 u64 addr = bind_ops[i].addr; 3972 u32 op = bind_ops[i].op; 3973 u32 flags = bind_ops[i].flags; 3974 u64 obj_offset = bind_ops[i].obj_offset; 3975 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3976 u16 pat_index = bind_ops[i].pat_index; 3977 3978 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3979 addr, range, op, flags, 3980 prefetch_region, pat_index); 3981 if (IS_ERR(ops[i])) { 3982 err = PTR_ERR(ops[i]); 3983 ops[i] = NULL; 3984 goto unwind_ops; 3985 } 3986 3987 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); 3988 if (err) 3989 goto unwind_ops; 3990 3991 #ifdef TEST_VM_OPS_ERROR 3992 if (flags & FORCE_OP_ERROR) { 3993 vops.inject_error = true; 3994 vm->xe->vm_inject_error_position = 3995 (vm->xe->vm_inject_error_position + 1) % 3996 FORCE_OP_ERROR_COUNT; 3997 } 3998 #endif 3999 } 4000 4001 /* Nothing to do */ 4002 if (list_empty(&vops.list)) { 4003 err = -ENODATA; 4004 goto unwind_ops; 4005 } 4006 4007 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 4008 if (err) 4009 goto unwind_ops; 4010 4011 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 4012 if (err) 4013 goto unwind_ops; 4014 4015 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4016 if (IS_ERR(fence)) 4017 err = PTR_ERR(fence); 4018 else 4019 dma_fence_put(fence); 4020 4021 unwind_ops: 4022 if (err && err != -ENODATA) 4023 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); 4024 xe_vma_ops_fini(&vops); 4025 for (i = args->num_binds - 1; i >= 0; --i) 4026 if (ops[i]) 4027 drm_gpuva_ops_free(&vm->gpuvm, ops[i]); 4028 free_syncs: 4029 if (err == -ENODATA) 4030 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); 4031 while (num_syncs--) 4032 xe_sync_entry_cleanup(&syncs[num_syncs]); 4033 4034 kfree(syncs); 4035 put_obj: 4036 for (i = 0; i < args->num_binds; ++i) 4037 xe_bo_put(bos[i]); 4038 4039 kvfree(ops); 4040 free_bos: 4041 kvfree(bos); 4042 release_vm_lock: 4043 up_write(&vm->lock); 4044 put_exec_queue: 4045 if (q) 4046 xe_exec_queue_put(q); 4047 free_bind_ops: 4048 if (args->num_binds > 1) 4049 kvfree(bind_ops); 4050 put_vm: 4051 xe_vm_put(vm); 4052 return err; 4053 } 4054 4055 /* 4056 * Map access type, fault type, and fault level from current bspec 4057 * specification to user spec abstraction. The current mapping is 4058 * approximately 1-to-1, with access type being the only notable 4059 * exception as it carries additional data with respect to prefetch 4060 * status that needs to be masked out. 4061 */ 4062 static u8 xe_to_user_access_type(u8 access_type) 4063 { 4064 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK; 4065 } 4066 4067 static u8 xe_to_user_fault_type(u8 fault_type) 4068 { 4069 return fault_type; 4070 } 4071 4072 static u8 xe_to_user_fault_level(u8 fault_level) 4073 { 4074 return fault_level; 4075 } 4076 4077 static int fill_faults(struct xe_vm *vm, 4078 struct drm_xe_vm_get_property *args) 4079 { 4080 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data); 4081 struct xe_vm_fault *fault_list, fault_entry = { 0 }; 4082 struct xe_vm_fault_entry *entry; 4083 int ret = 0, i = 0, count, entry_size; 4084 4085 entry_size = sizeof(struct xe_vm_fault); 4086 count = args->size / entry_size; 4087 4088 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL); 4089 if (!fault_list) 4090 return -ENOMEM; 4091 4092 spin_lock(&vm->faults.lock); 4093 list_for_each_entry(entry, &vm->faults.list, list) { 4094 if (i == count) 4095 break; 4096 4097 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address); 4098 fault_entry.address_precision = entry->address_precision; 4099 4100 fault_entry.access_type = xe_to_user_access_type(entry->access_type); 4101 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type); 4102 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level); 4103 4104 memcpy(&fault_list[i], &fault_entry, entry_size); 4105 4106 i++; 4107 } 4108 spin_unlock(&vm->faults.lock); 4109 4110 ret = copy_to_user(usr_ptr, fault_list, args->size); 4111 4112 kfree(fault_list); 4113 return ret ? -EFAULT : 0; 4114 } 4115 4116 static int xe_vm_get_property_helper(struct xe_vm *vm, 4117 struct drm_xe_vm_get_property *args) 4118 { 4119 size_t size; 4120 4121 switch (args->property) { 4122 case DRM_XE_VM_GET_PROPERTY_FAULTS: 4123 spin_lock(&vm->faults.lock); 4124 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len); 4125 spin_unlock(&vm->faults.lock); 4126 4127 if (!args->size) { 4128 args->size = size; 4129 return 0; 4130 } 4131 4132 /* 4133 * Number of faults may increase between calls to 4134 * xe_vm_get_property_ioctl, so just report the number of 4135 * faults the user requests if it's less than or equal to 4136 * the number of faults in the VM fault array. 4137 * 4138 * We should also at least assert that the args->size value 4139 * is a multiple of the xe_vm_fault struct size. 4140 */ 4141 if (args->size > size || args->size % sizeof(struct xe_vm_fault)) 4142 return -EINVAL; 4143 4144 return fill_faults(vm, args); 4145 } 4146 return -EINVAL; 4147 } 4148 4149 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, 4150 struct drm_file *file) 4151 { 4152 struct xe_device *xe = to_xe_device(drm); 4153 struct xe_file *xef = to_xe_file(file); 4154 struct drm_xe_vm_get_property *args = data; 4155 struct xe_vm *vm; 4156 int ret = 0; 4157 4158 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || 4159 args->reserved[2]))) 4160 return -EINVAL; 4161 4162 vm = xe_vm_lookup(xef, args->vm_id); 4163 if (XE_IOCTL_DBG(xe, !vm)) 4164 return -ENOENT; 4165 4166 ret = xe_vm_get_property_helper(vm, args); 4167 4168 xe_vm_put(vm); 4169 return ret; 4170 } 4171 4172 /** 4173 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM 4174 * @vm: VM to bind the BO to 4175 * @bo: BO to bind 4176 * @q: exec queue to use for the bind (optional) 4177 * @addr: address at which to bind the BO 4178 * @cache_lvl: PAT cache level to use 4179 * 4180 * Execute a VM bind map operation on a kernel-owned BO to bind it into a 4181 * kernel-owned VM. 4182 * 4183 * Returns a dma_fence to track the binding completion if the job to do so was 4184 * successfully submitted, an error pointer otherwise. 4185 */ 4186 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, 4187 struct xe_exec_queue *q, u64 addr, 4188 enum xe_cache_level cache_lvl) 4189 { 4190 struct xe_vma_ops vops; 4191 struct drm_gpuva_ops *ops = NULL; 4192 struct dma_fence *fence; 4193 int err; 4194 4195 xe_bo_get(bo); 4196 xe_vm_get(vm); 4197 if (q) 4198 xe_exec_queue_get(q); 4199 4200 down_write(&vm->lock); 4201 4202 xe_vma_ops_init(&vops, vm, q, NULL, 0); 4203 4204 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 4205 DRM_XE_VM_BIND_OP_MAP, 0, 0, 4206 vm->xe->pat.idx[cache_lvl]); 4207 if (IS_ERR(ops)) { 4208 err = PTR_ERR(ops); 4209 goto release_vm_lock; 4210 } 4211 4212 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4213 if (err) 4214 goto release_vm_lock; 4215 4216 xe_assert(vm->xe, !list_empty(&vops.list)); 4217 4218 err = xe_vma_ops_alloc(&vops, false); 4219 if (err) 4220 goto unwind_ops; 4221 4222 fence = vm_bind_ioctl_ops_execute(vm, &vops); 4223 if (IS_ERR(fence)) 4224 err = PTR_ERR(fence); 4225 4226 unwind_ops: 4227 if (err && err != -ENODATA) 4228 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4229 4230 xe_vma_ops_fini(&vops); 4231 drm_gpuva_ops_free(&vm->gpuvm, ops); 4232 4233 release_vm_lock: 4234 up_write(&vm->lock); 4235 4236 if (q) 4237 xe_exec_queue_put(q); 4238 xe_vm_put(vm); 4239 xe_bo_put(bo); 4240 4241 if (err) 4242 fence = ERR_PTR(err); 4243 4244 return fence; 4245 } 4246 4247 /** 4248 * xe_vm_lock() - Lock the vm's dma_resv object 4249 * @vm: The struct xe_vm whose lock is to be locked 4250 * @intr: Whether to perform any wait interruptible 4251 * 4252 * Return: 0 on success, -EINTR if @intr is true and the wait for a 4253 * contended lock was interrupted. If @intr is false, the function 4254 * always returns 0. 4255 */ 4256 int xe_vm_lock(struct xe_vm *vm, bool intr) 4257 { 4258 int ret; 4259 4260 if (intr) 4261 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4262 else 4263 ret = dma_resv_lock(xe_vm_resv(vm), NULL); 4264 4265 return ret; 4266 } 4267 4268 /** 4269 * xe_vm_unlock() - Unlock the vm's dma_resv object 4270 * @vm: The struct xe_vm whose lock is to be released. 4271 * 4272 * Unlock a buffer object lock that was locked by xe_vm_lock(). 4273 */ 4274 void xe_vm_unlock(struct xe_vm *vm) 4275 { 4276 dma_resv_unlock(xe_vm_resv(vm)); 4277 } 4278 4279 /** 4280 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for 4281 * VMA. 4282 * @vma: VMA to invalidate 4283 * @batch: TLB invalidation batch to populate; caller must later call 4284 * xe_tlb_inval_batch_wait() on it to wait for completion 4285 * 4286 * Walks a list of page tables leaves which it memset the entries owned by this 4287 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush 4288 * to complete, but instead populates @batch which can be waited on using 4289 * xe_tlb_inval_batch_wait(). 4290 * 4291 * Returns 0 for success, negative error code otherwise. 4292 */ 4293 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch) 4294 { 4295 struct xe_device *xe = xe_vma_vm(vma)->xe; 4296 struct xe_vm *vm = xe_vma_vm(vma); 4297 struct xe_tile *tile; 4298 u8 tile_mask = 0; 4299 int ret = 0; 4300 u8 id; 4301 4302 xe_assert(xe, !xe_vma_is_null(vma)); 4303 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 4304 trace_xe_vma_invalidate(vma); 4305 4306 vm_dbg(&vm->xe->drm, 4307 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 4308 xe_vma_start(vma), xe_vma_size(vma)); 4309 4310 /* 4311 * Check that we don't race with page-table updates, tile_invalidated 4312 * update is safe 4313 */ 4314 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 4315 if (xe_vma_is_userptr(vma)) { 4316 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 4317 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 4318 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 4319 4320 WARN_ON_ONCE(!mmu_interval_check_retry 4321 (&to_userptr_vma(vma)->userptr.notifier, 4322 to_userptr_vma(vma)->userptr.pages.notifier_seq)); 4323 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 4324 DMA_RESV_USAGE_BOOKKEEP)); 4325 4326 } else { 4327 xe_bo_assert_held(xe_vma_bo(vma)); 4328 } 4329 } 4330 4331 for_each_tile(tile, xe, id) 4332 if (xe_pt_zap_ptes(tile, vma)) 4333 tile_mask |= BIT(id); 4334 4335 xe_device_wmb(xe); 4336 4337 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid, 4338 xe_vma_start(vma), xe_vma_end(vma), 4339 tile_mask, batch); 4340 4341 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 4342 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 4343 return ret; 4344 } 4345 4346 /** 4347 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 4348 * @vma: VMA to invalidate 4349 * 4350 * Walks a list of page tables leaves which it memset the entries owned by this 4351 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 4352 * complete. 4353 * 4354 * Returns 0 for success, negative error code otherwise. 4355 */ 4356 int xe_vm_invalidate_vma(struct xe_vma *vma) 4357 { 4358 struct xe_tlb_inval_batch batch; 4359 int ret; 4360 4361 ret = xe_vm_invalidate_vma_submit(vma, &batch); 4362 if (ret) 4363 return ret; 4364 4365 xe_tlb_inval_batch_wait(&batch); 4366 return ret; 4367 } 4368 4369 int xe_vm_validate_protected(struct xe_vm *vm) 4370 { 4371 struct drm_gpuva *gpuva; 4372 int err = 0; 4373 4374 if (!vm) 4375 return -ENODEV; 4376 4377 mutex_lock(&vm->snap_mutex); 4378 4379 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4380 struct xe_vma *vma = gpuva_to_vma(gpuva); 4381 struct xe_bo *bo = vma->gpuva.gem.obj ? 4382 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4383 4384 if (!bo) 4385 continue; 4386 4387 if (xe_bo_is_protected(bo)) { 4388 err = xe_pxp_bo_key_check(vm->xe->pxp, bo); 4389 if (err) 4390 break; 4391 } 4392 } 4393 4394 mutex_unlock(&vm->snap_mutex); 4395 return err; 4396 } 4397 4398 struct xe_vm_snapshot { 4399 int uapi_flags; 4400 unsigned long num_snaps; 4401 struct { 4402 u64 ofs, bo_ofs; 4403 unsigned long len; 4404 #define XE_VM_SNAP_FLAG_USERPTR BIT(0) 4405 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) 4406 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2) 4407 unsigned long flags; 4408 int uapi_mem_region; 4409 int pat_index; 4410 int cpu_caching; 4411 struct xe_bo *bo; 4412 void *data; 4413 struct mm_struct *mm; 4414 } snap[]; 4415 }; 4416 4417 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) 4418 { 4419 unsigned long num_snaps = 0, i; 4420 struct xe_vm_snapshot *snap = NULL; 4421 struct drm_gpuva *gpuva; 4422 4423 if (!vm) 4424 return NULL; 4425 4426 mutex_lock(&vm->snap_mutex); 4427 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4428 if (gpuva->flags & XE_VMA_DUMPABLE) 4429 num_snaps++; 4430 } 4431 4432 if (num_snaps) 4433 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); 4434 if (!snap) { 4435 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); 4436 goto out_unlock; 4437 } 4438 4439 if (vm->flags & XE_VM_FLAG_FAULT_MODE) 4440 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; 4441 if (vm->flags & XE_VM_FLAG_LR_MODE) 4442 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; 4443 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) 4444 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; 4445 4446 snap->num_snaps = num_snaps; 4447 i = 0; 4448 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { 4449 struct xe_vma *vma = gpuva_to_vma(gpuva); 4450 struct xe_bo *bo = vma->gpuva.gem.obj ? 4451 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; 4452 4453 if (!(gpuva->flags & XE_VMA_DUMPABLE)) 4454 continue; 4455 4456 snap->snap[i].ofs = xe_vma_start(vma); 4457 snap->snap[i].len = xe_vma_size(vma); 4458 snap->snap[i].flags = xe_vma_read_only(vma) ? 4459 XE_VM_SNAP_FLAG_READ_ONLY : 0; 4460 snap->snap[i].pat_index = vma->attr.pat_index; 4461 if (bo) { 4462 snap->snap[i].cpu_caching = bo->cpu_caching; 4463 snap->snap[i].bo = xe_bo_get(bo); 4464 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); 4465 switch (bo->ttm.resource->mem_type) { 4466 case XE_PL_SYSTEM: 4467 case XE_PL_TT: 4468 snap->snap[i].uapi_mem_region = 0; 4469 break; 4470 case XE_PL_VRAM0: 4471 snap->snap[i].uapi_mem_region = 1; 4472 break; 4473 case XE_PL_VRAM1: 4474 snap->snap[i].uapi_mem_region = 2; 4475 break; 4476 } 4477 } else if (xe_vma_is_userptr(vma)) { 4478 struct mm_struct *mm = 4479 to_userptr_vma(vma)->userptr.notifier.mm; 4480 4481 if (mmget_not_zero(mm)) 4482 snap->snap[i].mm = mm; 4483 else 4484 snap->snap[i].data = ERR_PTR(-EFAULT); 4485 4486 snap->snap[i].bo_ofs = xe_vma_userptr(vma); 4487 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; 4488 snap->snap[i].uapi_mem_region = 0; 4489 } else if (xe_vma_is_null(vma)) { 4490 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; 4491 snap->snap[i].uapi_mem_region = -1; 4492 } else { 4493 snap->snap[i].data = ERR_PTR(-ENOENT); 4494 snap->snap[i].uapi_mem_region = -1; 4495 } 4496 i++; 4497 } 4498 4499 out_unlock: 4500 mutex_unlock(&vm->snap_mutex); 4501 return snap; 4502 } 4503 4504 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) 4505 { 4506 if (IS_ERR_OR_NULL(snap)) 4507 return; 4508 4509 for (int i = 0; i < snap->num_snaps; i++) { 4510 struct xe_bo *bo = snap->snap[i].bo; 4511 int err; 4512 4513 if (IS_ERR(snap->snap[i].data) || 4514 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4515 continue; 4516 4517 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); 4518 if (!snap->snap[i].data) { 4519 snap->snap[i].data = ERR_PTR(-ENOMEM); 4520 goto cleanup_bo; 4521 } 4522 4523 if (bo) { 4524 err = xe_bo_read(bo, snap->snap[i].bo_ofs, 4525 snap->snap[i].data, snap->snap[i].len); 4526 } else { 4527 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 4528 4529 kthread_use_mm(snap->snap[i].mm); 4530 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) 4531 err = 0; 4532 else 4533 err = -EFAULT; 4534 kthread_unuse_mm(snap->snap[i].mm); 4535 4536 mmput(snap->snap[i].mm); 4537 snap->snap[i].mm = NULL; 4538 } 4539 4540 if (err) { 4541 kvfree(snap->snap[i].data); 4542 snap->snap[i].data = ERR_PTR(err); 4543 } 4544 4545 cleanup_bo: 4546 xe_bo_put(bo); 4547 snap->snap[i].bo = NULL; 4548 } 4549 } 4550 4551 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) 4552 { 4553 unsigned long i, j; 4554 4555 if (IS_ERR_OR_NULL(snap)) { 4556 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); 4557 return; 4558 } 4559 4560 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); 4561 for (i = 0; i < snap->num_snaps; i++) { 4562 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); 4563 4564 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", 4565 snap->snap[i].ofs, 4566 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? 4567 "read_only" : "read_write", 4568 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? 4569 "null_sparse" : 4570 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? 4571 "userptr" : "bo", 4572 snap->snap[i].uapi_mem_region == -1 ? 0 : 4573 BIT(snap->snap[i].uapi_mem_region), 4574 snap->snap[i].pat_index, 4575 snap->snap[i].cpu_caching); 4576 4577 if (IS_ERR(snap->snap[i].data)) { 4578 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, 4579 PTR_ERR(snap->snap[i].data)); 4580 continue; 4581 } 4582 4583 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) 4584 continue; 4585 4586 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); 4587 4588 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { 4589 u32 *val = snap->snap[i].data + j; 4590 char dumped[ASCII85_BUFSZ]; 4591 4592 drm_puts(p, ascii85_encode(*val, dumped)); 4593 } 4594 4595 drm_puts(p, "\n"); 4596 4597 if (drm_coredump_printer_is_full(p)) 4598 return; 4599 } 4600 } 4601 4602 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) 4603 { 4604 unsigned long i; 4605 4606 if (IS_ERR_OR_NULL(snap)) 4607 return; 4608 4609 for (i = 0; i < snap->num_snaps; i++) { 4610 if (!IS_ERR(snap->snap[i].data)) 4611 kvfree(snap->snap[i].data); 4612 xe_bo_put(snap->snap[i].bo); 4613 if (snap->snap[i].mm) 4614 mmput(snap->snap[i].mm); 4615 } 4616 kvfree(snap); 4617 } 4618 4619 /** 4620 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4621 * @xe: Pointer to the Xe device structure 4622 * @vma: Pointer to the virtual memory area (VMA) structure 4623 * @is_atomic: In pagefault path and atomic operation 4624 * 4625 * This function determines whether the given VMA needs to be migrated to 4626 * VRAM in order to do atomic GPU operation. 4627 * 4628 * Return: 4629 * 1 - Migration to VRAM is required 4630 * 0 - Migration is not required 4631 * -EACCES - Invalid access for atomic memory attr 4632 * 4633 */ 4634 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4635 { 4636 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4637 vma->attr.atomic_access; 4638 4639 if (!IS_DGFX(xe) || !is_atomic) 4640 return false; 4641 4642 /* 4643 * NOTE: The checks implemented here are platform-specific. For 4644 * instance, on a device supporting CXL atomics, these would ideally 4645 * work universally without additional handling. 4646 */ 4647 switch (atomic_access) { 4648 case DRM_XE_ATOMIC_DEVICE: 4649 return !xe->info.has_device_atomics_on_smem; 4650 4651 case DRM_XE_ATOMIC_CPU: 4652 return -EACCES; 4653 4654 case DRM_XE_ATOMIC_UNDEFINED: 4655 case DRM_XE_ATOMIC_GLOBAL: 4656 default: 4657 return 1; 4658 } 4659 } 4660 4661 static int xe_vm_alloc_vma(struct xe_vm *vm, 4662 struct drm_gpuvm_map_req *map_req, 4663 bool is_madvise) 4664 { 4665 struct xe_vma_ops vops; 4666 struct drm_gpuva_ops *ops = NULL; 4667 struct drm_gpuva_op *__op; 4668 unsigned int vma_flags = 0; 4669 bool remap_op = false; 4670 struct xe_vma_mem_attr tmp_attr = {}; 4671 u16 default_pat; 4672 int err; 4673 4674 lockdep_assert_held_write(&vm->lock); 4675 4676 if (is_madvise) 4677 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4678 else 4679 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4680 4681 if (IS_ERR(ops)) 4682 return PTR_ERR(ops); 4683 4684 if (list_empty(&ops->list)) { 4685 err = 0; 4686 goto free_ops; 4687 } 4688 4689 drm_gpuva_for_each_op(__op, ops) { 4690 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4691 struct xe_vma *vma = NULL; 4692 4693 if (!is_madvise) { 4694 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4695 vma = gpuva_to_vma(op->base.unmap.va); 4696 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4697 default_pat = vma->attr.default_pat_index; 4698 vma_flags = vma->gpuva.flags; 4699 } 4700 4701 if (__op->op == DRM_GPUVA_OP_REMAP) { 4702 vma = gpuva_to_vma(op->base.remap.unmap->va); 4703 default_pat = vma->attr.default_pat_index; 4704 vma_flags = vma->gpuva.flags; 4705 } 4706 4707 if (__op->op == DRM_GPUVA_OP_MAP) { 4708 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4709 op->map.pat_index = default_pat; 4710 } 4711 } else { 4712 if (__op->op == DRM_GPUVA_OP_REMAP) { 4713 vma = gpuva_to_vma(op->base.remap.unmap->va); 4714 xe_assert(vm->xe, !remap_op); 4715 xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4716 remap_op = true; 4717 vma_flags = vma->gpuva.flags; 4718 } 4719 4720 if (__op->op == DRM_GPUVA_OP_MAP) { 4721 xe_assert(vm->xe, remap_op); 4722 remap_op = false; 4723 /* 4724 * In case of madvise ops DRM_GPUVA_OP_MAP is 4725 * always after DRM_GPUVA_OP_REMAP, so ensure 4726 * to propagate the flags from the vma we're 4727 * unmapping. 4728 */ 4729 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; 4730 } 4731 } 4732 print_op(vm->xe, __op); 4733 } 4734 4735 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4736 4737 if (is_madvise) 4738 vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4739 else 4740 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; 4741 4742 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4743 if (err) 4744 goto unwind_ops; 4745 4746 xe_vm_lock(vm, false); 4747 4748 drm_gpuva_for_each_op(__op, ops) { 4749 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4750 struct xe_vma *vma; 4751 4752 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4753 vma = gpuva_to_vma(op->base.unmap.va); 4754 /* There should be no unmap for madvise */ 4755 if (is_madvise) 4756 XE_WARN_ON("UNEXPECTED UNMAP"); 4757 4758 xe_vma_destroy(vma, NULL); 4759 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4760 vma = gpuva_to_vma(op->base.remap.unmap->va); 4761 /* In case of madvise ops Store attributes for REMAP UNMAPPED 4762 * VMA, so they can be assigned to newly MAP created vma. 4763 */ 4764 if (is_madvise) 4765 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr); 4766 4767 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4768 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4769 vma = op->map.vma; 4770 /* In case of madvise call, MAP will always be followed by REMAP. 4771 * Therefore temp_attr will always have sane values, making it safe to 4772 * copy them to new vma. 4773 */ 4774 if (is_madvise) 4775 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr); 4776 } 4777 } 4778 4779 xe_vm_unlock(vm); 4780 drm_gpuva_ops_free(&vm->gpuvm, ops); 4781 xe_vma_mem_attr_fini(&tmp_attr); 4782 return 0; 4783 4784 unwind_ops: 4785 vm_bind_ioctl_ops_unwind(vm, &ops, 1); 4786 free_ops: 4787 drm_gpuva_ops_free(&vm->gpuvm, ops); 4788 return err; 4789 } 4790 4791 /** 4792 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4793 * @vm: Pointer to the xe_vm structure 4794 * @start: Starting input address 4795 * @range: Size of the input range 4796 * 4797 * This function splits existing vma to create new vma for user provided input range 4798 * 4799 * Return: 0 if success 4800 */ 4801 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4802 { 4803 struct drm_gpuvm_map_req map_req = { 4804 .map.va.addr = start, 4805 .map.va.range = range, 4806 }; 4807 4808 lockdep_assert_held_write(&vm->lock); 4809 4810 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4811 4812 return xe_vm_alloc_vma(vm, &map_req, true); 4813 } 4814 4815 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) 4816 { 4817 return vma && xe_vma_is_cpu_addr_mirror(vma) && 4818 xe_vma_has_default_mem_attrs(vma); 4819 } 4820 4821 /** 4822 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs 4823 * @vm: VM to search within 4824 * @start: Input/output pointer to the starting address of the range 4825 * @end: Input/output pointer to the end address of the range 4826 * 4827 * Given a range defined by @start and @range, this function checks the VMAs 4828 * immediately before and after the range. If those neighboring VMAs are 4829 * CPU-address-mirrored and have default memory attributes, the function 4830 * updates @start and @range to include them. This extended range can then 4831 * be used for merging or other operations that require a unified VMA. 4832 * 4833 * The function does not perform the merge itself; it only computes the 4834 * mergeable boundaries. 4835 */ 4836 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) 4837 { 4838 struct xe_vma *prev, *next; 4839 4840 lockdep_assert_held(&vm->lock); 4841 4842 if (*start >= SZ_4K) { 4843 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); 4844 if (is_cpu_addr_vma_with_default_attr(prev)) 4845 *start = xe_vma_start(prev); 4846 } 4847 4848 if (*end < vm->size) { 4849 next = xe_vm_find_vma_by_addr(vm, *end + 1); 4850 if (is_cpu_addr_vma_with_default_attr(next)) 4851 *end = xe_vma_end(next); 4852 } 4853 } 4854 4855 /** 4856 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4857 * @vm: Pointer to the xe_vm structure 4858 * @start: Starting input address 4859 * @range: Size of the input range 4860 * 4861 * This function splits/merges existing vma to create new vma for user provided input range 4862 * 4863 * Return: 0 if success 4864 */ 4865 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4866 { 4867 struct drm_gpuvm_map_req map_req = { 4868 .map.va.addr = start, 4869 .map.va.range = range, 4870 }; 4871 4872 lockdep_assert_held_write(&vm->lock); 4873 4874 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4875 start, range); 4876 4877 return xe_vm_alloc_vma(vm, &map_req, false); 4878 } 4879 4880 /** 4881 * xe_vm_add_exec_queue() - Add exec queue to VM 4882 * @vm: The VM. 4883 * @q: The exec_queue 4884 * 4885 * Add exec queue to VM, skipped if the device does not have context based TLB 4886 * invalidations. 4887 */ 4888 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4889 { 4890 struct xe_device *xe = vm->xe; 4891 4892 /* User VMs and queues only */ 4893 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 4894 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 4895 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 4896 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); 4897 xe_assert(xe, vm->xef); 4898 xe_assert(xe, vm == q->vm); 4899 4900 if (!xe->info.has_ctx_tlb_inval) 4901 return; 4902 4903 down_write(&vm->exec_queues.lock); 4904 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); 4905 ++vm->exec_queues.count[q->gt->info.id]; 4906 up_write(&vm->exec_queues.lock); 4907 } 4908 4909 /** 4910 * xe_vm_remove_exec_queue() - Remove exec queue from VM 4911 * @vm: The VM. 4912 * @q: The exec_queue 4913 * 4914 * Remove exec queue from VM, skipped if the device does not have context based 4915 * TLB invalidations. 4916 */ 4917 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 4918 { 4919 if (!vm->xe->info.has_ctx_tlb_inval) 4920 return; 4921 4922 down_write(&vm->exec_queues.lock); 4923 if (!list_empty(&q->vm_exec_queue_link)) { 4924 list_del(&q->vm_exec_queue_link); 4925 --vm->exec_queues.count[q->gt->info.id]; 4926 } 4927 up_write(&vm->exec_queues.lock); 4928 } 4929