1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "xe_svm.h" 7 #include "xe_userptr.h" 8 9 #include <linux/mm.h> 10 11 #include "xe_tlb_inval.h" 12 #include "xe_trace_bo.h" 13 14 static void xe_userptr_assert_in_notifier(struct xe_vm *vm) 15 { 16 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 17 (lockdep_is_held(&vm->lock) && 18 lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 19 dma_resv_held(xe_vm_resv(vm)))); 20 } 21 22 /** 23 * xe_vma_userptr_check_repin() - Advisory check for repin needed 24 * @uvma: The userptr vma 25 * 26 * Check if the userptr vma has been invalidated since last successful 27 * repin. The check is advisory only and can the function can be called 28 * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the 29 * vma userptr will remain valid after a lockless check, so typically 30 * the call needs to be followed by a proper check under the notifier_lock. 31 * 32 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 33 */ 34 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 35 { 36 return mmu_interval_check_retry(&uvma->userptr.notifier, 37 uvma->userptr.pages.notifier_seq) ? 38 -EAGAIN : 0; 39 } 40 41 /** 42 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 43 * that need repinning. 44 * @vm: The VM. 45 * 46 * This function checks for whether the VM has userptrs that need repinning, 47 * and provides a release-type barrier on the svm.gpusvm.notifier_lock after 48 * checking. 49 * 50 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 51 */ 52 int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 53 { 54 lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock); 55 56 return (list_empty(&vm->userptr.repin_list) && 57 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 58 } 59 60 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 61 { 62 struct xe_vma *vma = &uvma->vma; 63 struct xe_vm *vm = xe_vma_vm(vma); 64 struct xe_device *xe = vm->xe; 65 struct drm_gpusvm_ctx ctx = { 66 .read_only = xe_vma_read_only(vma), 67 .device_private_page_owner = xe_svm_private_page_owner(vm, false), 68 .allow_mixed = true, 69 }; 70 71 lockdep_assert_held(&vm->lock); 72 xe_assert(xe, xe_vma_is_userptr(vma)); 73 74 if (vma->gpuva.flags & XE_VMA_DESTROYED) 75 return 0; 76 77 return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 78 uvma->userptr.notifier.mm, 79 &uvma->userptr.notifier, 80 xe_vma_userptr(vma), 81 xe_vma_userptr(vma) + xe_vma_size(vma), 82 &ctx); 83 } 84 85 static struct mmu_interval_notifier_finish * 86 xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma, bool is_deferred) 87 { 88 struct xe_userptr *userptr = &uvma->userptr; 89 struct xe_vma *vma = &uvma->vma; 90 struct drm_gpusvm_ctx ctx = { 91 .in_notifier = true, 92 .read_only = xe_vma_read_only(vma), 93 }; 94 long err; 95 96 xe_userptr_assert_in_notifier(vm); 97 if (is_deferred) 98 xe_assert(vm->xe, userptr->finish_inuse && !userptr->tlb_inval_submitted); 99 100 err = dma_resv_wait_timeout(xe_vm_resv(vm), 101 DMA_RESV_USAGE_BOOKKEEP, 102 false, MAX_SCHEDULE_TIMEOUT); 103 XE_WARN_ON(err <= 0); 104 105 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 106 if (!userptr->finish_inuse) { 107 /* 108 * Defer the TLB wait to an extra pass so the caller 109 * can pipeline TLB flushes across GPUs before waiting 110 * on any of them. 111 */ 112 xe_assert(vm->xe, !userptr->tlb_inval_submitted); 113 userptr->finish_inuse = true; 114 userptr->tlb_inval_submitted = true; 115 err = xe_vm_invalidate_vma_submit(vma, &userptr->inval_batch); 116 XE_WARN_ON(err); 117 return &userptr->finish; 118 } 119 err = xe_vm_invalidate_vma(vma); 120 XE_WARN_ON(err); 121 } 122 123 if (is_deferred) 124 userptr->finish_inuse = false; 125 drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 126 xe_vma_size(vma) >> PAGE_SHIFT, &ctx); 127 return NULL; 128 } 129 130 static void 131 xe_vma_userptr_complete_tlb_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma) 132 { 133 struct xe_userptr *userptr = &uvma->userptr; 134 struct xe_vma *vma = &uvma->vma; 135 struct drm_gpusvm_ctx ctx = { 136 .in_notifier = true, 137 .read_only = xe_vma_read_only(vma), 138 }; 139 140 xe_userptr_assert_in_notifier(vm); 141 xe_assert(vm->xe, userptr->finish_inuse); 142 xe_assert(vm->xe, userptr->tlb_inval_submitted); 143 144 xe_tlb_inval_batch_wait(&userptr->inval_batch); 145 userptr->tlb_inval_submitted = false; 146 userptr->finish_inuse = false; 147 drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 148 xe_vma_size(vma) >> PAGE_SHIFT, &ctx); 149 } 150 151 static struct mmu_interval_notifier_finish * 152 xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma *uvma) 153 { 154 struct xe_userptr *userptr = &uvma->userptr; 155 struct xe_vma *vma = &uvma->vma; 156 struct dma_resv_iter cursor; 157 struct dma_fence *fence; 158 bool signaled = true; 159 160 xe_userptr_assert_in_notifier(vm); 161 162 /* 163 * Tell exec and rebind worker they need to repin and rebind this 164 * userptr. 165 */ 166 if (!xe_vm_in_fault_mode(vm) && 167 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 168 spin_lock(&vm->userptr.invalidated_lock); 169 list_move_tail(&userptr->invalidate_link, 170 &vm->userptr.invalidated); 171 spin_unlock(&vm->userptr.invalidated_lock); 172 } 173 174 /* 175 * Preempt fences turn into schedule disables, pipeline these. 176 * Note that even in fault mode, we need to wait for binds and 177 * unbinds to complete, and those are attached as BOOKMARK fences 178 * to the vm. 179 */ 180 dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 181 DMA_RESV_USAGE_BOOKKEEP); 182 dma_resv_for_each_fence_unlocked(&cursor, fence) { 183 dma_fence_enable_sw_signaling(fence); 184 if (signaled && !dma_fence_is_signaled(fence)) 185 signaled = false; 186 } 187 dma_resv_iter_end(&cursor); 188 189 /* 190 * Only one caller at a time can use the multi-pass state. 191 * If it's already in use, or all fences are already signaled, 192 * proceed directly to invalidation without deferring. 193 */ 194 if (signaled || userptr->finish_inuse) 195 return xe_vma_userptr_do_inval(vm, uvma, false); 196 197 /* Defer: the notifier core will call invalidate_finish once done. */ 198 userptr->finish_inuse = true; 199 200 return &userptr->finish; 201 } 202 203 static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier *mni, 204 const struct mmu_notifier_range *range, 205 unsigned long cur_seq, 206 struct mmu_interval_notifier_finish **p_finish) 207 { 208 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 209 struct xe_vma *vma = &uvma->vma; 210 struct xe_vm *vm = xe_vma_vm(vma); 211 212 xe_assert(vm->xe, xe_vma_is_userptr(vma)); 213 trace_xe_vma_userptr_invalidate(vma); 214 215 if (!mmu_notifier_range_blockable(range)) 216 return false; 217 218 vm_dbg(&xe_vma_vm(vma)->xe->drm, 219 "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx", 220 xe_vma_start(vma), xe_vma_size(vma)); 221 222 down_write(&vm->svm.gpusvm.notifier_lock); 223 mmu_interval_set_seq(mni, cur_seq); 224 225 *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma); 226 227 up_write(&vm->svm.gpusvm.notifier_lock); 228 if (!*p_finish) 229 trace_xe_vma_userptr_invalidate_complete(vma); 230 231 return true; 232 } 233 234 static void xe_vma_userptr_invalidate_finish(struct mmu_interval_notifier_finish *finish) 235 { 236 struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), userptr.finish); 237 struct xe_vma *vma = &uvma->vma; 238 struct xe_vm *vm = xe_vma_vm(vma); 239 240 vm_dbg(&xe_vma_vm(vma)->xe->drm, 241 "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx", 242 xe_vma_start(vma), xe_vma_size(vma)); 243 244 down_write(&vm->svm.gpusvm.notifier_lock); 245 /* 246 * If a TLB invalidation was previously submitted (deferred from the 247 * synchronous pass1 fallback), wait for it and unmap pages. 248 * Otherwise, fences have now completed: invalidate the TLB and unmap. 249 */ 250 if (uvma->userptr.tlb_inval_submitted) 251 xe_vma_userptr_complete_tlb_inval(vm, uvma); 252 else 253 xe_vma_userptr_do_inval(vm, uvma, true); 254 up_write(&vm->svm.gpusvm.notifier_lock); 255 trace_xe_vma_userptr_invalidate_complete(vma); 256 } 257 258 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 259 .invalidate_start = xe_vma_userptr_invalidate_start, 260 .invalidate_finish = xe_vma_userptr_invalidate_finish, 261 }; 262 263 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 264 /** 265 * xe_vma_userptr_force_invalidate() - force invalidate a userptr 266 * @uvma: The userptr vma to invalidate 267 * 268 * Perform a forced userptr invalidation for testing purposes. 269 */ 270 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 271 { 272 static struct mmu_interval_notifier_finish *finish; 273 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 274 275 /* Protect against concurrent userptr pinning */ 276 lockdep_assert_held(&vm->lock); 277 /* Protect against concurrent notifiers */ 278 lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); 279 /* 280 * Protect against concurrent instances of this function and 281 * the critical exec sections 282 */ 283 xe_vm_assert_held(vm); 284 285 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 286 uvma->userptr.pages.notifier_seq)) 287 uvma->userptr.pages.notifier_seq -= 2; 288 289 finish = xe_vma_userptr_invalidate_pass1(vm, uvma); 290 if (finish) 291 finish = xe_vma_userptr_do_inval(vm, uvma, true); 292 if (finish) 293 xe_vma_userptr_complete_tlb_inval(vm, uvma); 294 } 295 #endif 296 297 int xe_vm_userptr_pin(struct xe_vm *vm) 298 { 299 struct xe_userptr_vma *uvma, *next; 300 int err = 0; 301 302 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 303 lockdep_assert_held_write(&vm->lock); 304 305 /* Collect invalidated userptrs */ 306 spin_lock(&vm->userptr.invalidated_lock); 307 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 308 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 309 userptr.invalidate_link) { 310 list_del_init(&uvma->userptr.invalidate_link); 311 list_add_tail(&uvma->userptr.repin_link, 312 &vm->userptr.repin_list); 313 } 314 spin_unlock(&vm->userptr.invalidated_lock); 315 316 /* Pin and move to bind list */ 317 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 318 userptr.repin_link) { 319 err = xe_vma_userptr_pin_pages(uvma); 320 if (err == -EFAULT) { 321 list_del_init(&uvma->userptr.repin_link); 322 /* 323 * We might have already done the pin once already, but 324 * then had to retry before the re-bind happened, due 325 * some other condition in the caller, but in the 326 * meantime the userptr got dinged by the notifier such 327 * that we need to revalidate here, but this time we hit 328 * the EFAULT. In such a case make sure we remove 329 * ourselves from the rebind list to avoid going down in 330 * flames. 331 */ 332 if (!list_empty(&uvma->vma.combined_links.rebind)) 333 list_del_init(&uvma->vma.combined_links.rebind); 334 335 /* Wait for pending binds */ 336 xe_vm_lock(vm, false); 337 dma_resv_wait_timeout(xe_vm_resv(vm), 338 DMA_RESV_USAGE_BOOKKEEP, 339 false, MAX_SCHEDULE_TIMEOUT); 340 341 down_read(&vm->svm.gpusvm.notifier_lock); 342 err = xe_vm_invalidate_vma(&uvma->vma); 343 up_read(&vm->svm.gpusvm.notifier_lock); 344 xe_vm_unlock(vm); 345 if (err) 346 break; 347 } else { 348 if (err) 349 break; 350 351 list_del_init(&uvma->userptr.repin_link); 352 list_move_tail(&uvma->vma.combined_links.rebind, 353 &vm->rebind_list); 354 } 355 } 356 357 if (err) { 358 down_write(&vm->svm.gpusvm.notifier_lock); 359 spin_lock(&vm->userptr.invalidated_lock); 360 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 361 userptr.repin_link) { 362 list_del_init(&uvma->userptr.repin_link); 363 list_move_tail(&uvma->userptr.invalidate_link, 364 &vm->userptr.invalidated); 365 } 366 spin_unlock(&vm->userptr.invalidated_lock); 367 up_write(&vm->svm.gpusvm.notifier_lock); 368 } 369 return err; 370 } 371 372 /** 373 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 374 * that need repinning. 375 * @vm: The VM. 376 * 377 * This function does an advisory check for whether the VM has userptrs that 378 * need repinning. 379 * 380 * Return: 0 if there are no indications of userptrs needing repinning, 381 * -EAGAIN if there are. 382 */ 383 int xe_vm_userptr_check_repin(struct xe_vm *vm) 384 { 385 return (list_empty_careful(&vm->userptr.repin_list) && 386 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 387 } 388 389 int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, 390 unsigned long range) 391 { 392 struct xe_userptr *userptr = &uvma->userptr; 393 int err; 394 395 INIT_LIST_HEAD(&userptr->invalidate_link); 396 INIT_LIST_HEAD(&userptr->repin_link); 397 398 err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, 399 start, range, 400 &vma_userptr_notifier_ops); 401 if (err) 402 return err; 403 404 userptr->pages.notifier_seq = LONG_MAX; 405 406 return 0; 407 } 408 409 void xe_userptr_remove(struct xe_userptr_vma *uvma) 410 { 411 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 412 struct xe_userptr *userptr = &uvma->userptr; 413 414 drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 415 xe_vma_size(&uvma->vma) >> PAGE_SHIFT); 416 417 /* 418 * Since userptr pages are not pinned, we can't remove 419 * the notifier until we're sure the GPU is not accessing 420 * them anymore 421 */ 422 mmu_interval_notifier_remove(&userptr->notifier); 423 } 424 425 void xe_userptr_destroy(struct xe_userptr_vma *uvma) 426 { 427 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 428 429 spin_lock(&vm->userptr.invalidated_lock); 430 xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link)); 431 list_del(&uvma->userptr.invalidate_link); 432 spin_unlock(&vm->userptr.invalidated_lock); 433 } 434