1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 180 * gpuva_start, gpuva_end, gpusvm->mm, 181 * ctx->timeslice_ms); 182 * if (err) // CPU mappings may have changed 183 * goto retry; 184 * } 185 * 186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 188 * if (err == -EOPNOTSUPP) 189 * drm_gpusvm_range_evict(gpusvm, range); 190 * goto retry; 191 * } else if (err) { 192 * goto unlock; 193 * } 194 * 195 * err = driver_bind_range(gpusvm, range); 196 * if (err == -EAGAIN) // CPU mappings changed 197 * goto retry 198 * 199 * unlock: 200 * driver_svm_unlock(); 201 * return err; 202 * } 203 * 204 * 2) Garbage Collector 205 * 206 * .. code-block:: c 207 * 208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 209 * struct drm_gpusvm_range *range) 210 * { 211 * assert_driver_svm_locked(gpusvm); 212 * 213 * // Partial unmap, migrate any remaining device memory pages back to RAM 214 * if (range->flags.partial_unmap) 215 * drm_gpusvm_range_evict(gpusvm, range); 216 * 217 * driver_unbind_range(range); 218 * drm_gpusvm_range_remove(gpusvm, range); 219 * } 220 * 221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 222 * { 223 * assert_driver_svm_locked(gpusvm); 224 * 225 * for_each_range_in_garbage_collector(gpusvm, range) 226 * __driver_garbage_collector(gpusvm, range); 227 * } 228 * 229 * 3) Notifier callback 230 * 231 * .. code-block:: c 232 * 233 * void driver_invalidation(struct drm_gpusvm *gpusvm, 234 * struct drm_gpusvm_notifier *notifier, 235 * const struct mmu_notifier_range *mmu_range) 236 * { 237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 238 * struct drm_gpusvm_range *range = NULL; 239 * 240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 241 * 242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 243 * mmu_range->end) { 244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 245 * 246 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 247 * continue; 248 * 249 * drm_gpusvm_range_set_unmapped(range, mmu_range); 250 * driver_garbage_collector_add(gpusvm, range); 251 * } 252 * } 253 */ 254 255 /** 256 * npages_in_range() - Calculate the number of pages in a given range 257 * @start: The start address of the range 258 * @end: The end address of the range 259 * 260 * This macro calculates the number of pages in a given memory range, 261 * specified by the start and end addresses. It divides the difference 262 * between the end and start addresses by the page size (PAGE_SIZE) to 263 * determine the number of pages in the range. 264 * 265 * Return: The number of pages in the specified range. 266 */ 267 static unsigned long 268 npages_in_range(unsigned long start, unsigned long end) 269 { 270 return (end - start) >> PAGE_SHIFT; 271 } 272 273 /** 274 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 275 * @notifier: Pointer to the GPU SVM notifier structure. 276 * @start: Start address of the range 277 * @end: End address of the range 278 * 279 * Return: A pointer to the drm_gpusvm_range if found or NULL 280 */ 281 struct drm_gpusvm_range * 282 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 283 unsigned long end) 284 { 285 struct interval_tree_node *itree; 286 287 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 288 289 if (itree) 290 return container_of(itree, struct drm_gpusvm_range, itree); 291 else 292 return NULL; 293 } 294 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 295 296 /** 297 * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier 298 * @range__: Iterator variable for the ranges 299 * @next__: Iterator variable for the ranges temporay storage 300 * @notifier__: Pointer to the GPU SVM notifier 301 * @start__: Start address of the range 302 * @end__: End address of the range 303 * 304 * This macro is used to iterate over GPU SVM ranges in a notifier while 305 * removing ranges from it. 306 */ 307 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ 308 for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ 309 (next__) = __drm_gpusvm_range_next(range__); \ 310 (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ 311 (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) 312 313 /** 314 * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list 315 * @notifier: a pointer to the current drm_gpusvm_notifier 316 * 317 * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if 318 * the current notifier is the last one or if the input notifier is 319 * NULL. 320 */ 321 static struct drm_gpusvm_notifier * 322 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) 323 { 324 if (notifier && !list_is_last(¬ifier->entry, 325 ¬ifier->gpusvm->notifier_list)) 326 return list_next_entry(notifier, entry); 327 328 return NULL; 329 } 330 331 static struct drm_gpusvm_notifier * 332 notifier_iter_first(struct rb_root_cached *root, unsigned long start, 333 unsigned long last) 334 { 335 struct interval_tree_node *itree; 336 337 itree = interval_tree_iter_first(root, start, last); 338 339 if (itree) 340 return container_of(itree, struct drm_gpusvm_notifier, itree); 341 else 342 return NULL; 343 } 344 345 /** 346 * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm 347 * @notifier__: Iterator variable for the notifiers 348 * @notifier__: Pointer to the GPU SVM notifier 349 * @start__: Start address of the notifier 350 * @end__: End address of the notifier 351 * 352 * This macro is used to iterate over GPU SVM notifiers in a gpusvm. 353 */ 354 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ 355 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \ 356 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 357 (notifier__) = __drm_gpusvm_notifier_next(notifier__)) 358 359 /** 360 * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm 361 * @notifier__: Iterator variable for the notifiers 362 * @next__: Iterator variable for the notifiers temporay storage 363 * @notifier__: Pointer to the GPU SVM notifier 364 * @start__: Start address of the notifier 365 * @end__: End address of the notifier 366 * 367 * This macro is used to iterate over GPU SVM notifiers in a gpusvm while 368 * removing notifiers from it. 369 */ 370 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ 371 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \ 372 (next__) = __drm_gpusvm_notifier_next(notifier__); \ 373 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 374 (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) 375 376 /** 377 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 378 * @mni: Pointer to the mmu_interval_notifier structure. 379 * @mmu_range: Pointer to the mmu_notifier_range structure. 380 * @cur_seq: Current sequence number. 381 * 382 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 383 * notifier sequence number and calls the driver invalidate vfunc under 384 * gpusvm->notifier_lock. 385 * 386 * Return: true if the operation succeeds, false otherwise. 387 */ 388 static bool 389 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 390 const struct mmu_notifier_range *mmu_range, 391 unsigned long cur_seq) 392 { 393 struct drm_gpusvm_notifier *notifier = 394 container_of(mni, typeof(*notifier), notifier); 395 struct drm_gpusvm *gpusvm = notifier->gpusvm; 396 397 if (!mmu_notifier_range_blockable(mmu_range)) 398 return false; 399 400 down_write(&gpusvm->notifier_lock); 401 mmu_interval_set_seq(mni, cur_seq); 402 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 403 up_write(&gpusvm->notifier_lock); 404 405 return true; 406 } 407 408 /* 409 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 410 */ 411 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 412 .invalidate = drm_gpusvm_notifier_invalidate, 413 }; 414 415 /** 416 * drm_gpusvm_init() - Initialize the GPU SVM. 417 * @gpusvm: Pointer to the GPU SVM structure. 418 * @name: Name of the GPU SVM. 419 * @drm: Pointer to the DRM device structure. 420 * @mm: Pointer to the mm_struct for the address space. 421 * @device_private_page_owner: Device private pages owner. 422 * @mm_start: Start address of GPU SVM. 423 * @mm_range: Range of the GPU SVM. 424 * @notifier_size: Size of individual notifiers. 425 * @ops: Pointer to the operations structure for GPU SVM. 426 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 427 * Entries should be powers of 2 in descending order with last 428 * entry being SZ_4K. 429 * @num_chunks: Number of chunks. 430 * 431 * This function initializes the GPU SVM. 432 * 433 * Return: 0 on success, a negative error code on failure. 434 */ 435 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 436 const char *name, struct drm_device *drm, 437 struct mm_struct *mm, void *device_private_page_owner, 438 unsigned long mm_start, unsigned long mm_range, 439 unsigned long notifier_size, 440 const struct drm_gpusvm_ops *ops, 441 const unsigned long *chunk_sizes, int num_chunks) 442 { 443 if (!ops->invalidate || !num_chunks) 444 return -EINVAL; 445 446 gpusvm->name = name; 447 gpusvm->drm = drm; 448 gpusvm->mm = mm; 449 gpusvm->device_private_page_owner = device_private_page_owner; 450 gpusvm->mm_start = mm_start; 451 gpusvm->mm_range = mm_range; 452 gpusvm->notifier_size = notifier_size; 453 gpusvm->ops = ops; 454 gpusvm->chunk_sizes = chunk_sizes; 455 gpusvm->num_chunks = num_chunks; 456 457 mmgrab(mm); 458 gpusvm->root = RB_ROOT_CACHED; 459 INIT_LIST_HEAD(&gpusvm->notifier_list); 460 461 init_rwsem(&gpusvm->notifier_lock); 462 463 fs_reclaim_acquire(GFP_KERNEL); 464 might_lock(&gpusvm->notifier_lock); 465 fs_reclaim_release(GFP_KERNEL); 466 467 #ifdef CONFIG_LOCKDEP 468 gpusvm->lock_dep_map = NULL; 469 #endif 470 471 return 0; 472 } 473 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 474 475 /** 476 * drm_gpusvm_notifier_find() - Find GPU SVM notifier 477 * @gpusvm: Pointer to the GPU SVM structure 478 * @fault_addr: Fault address 479 * 480 * This function finds the GPU SVM notifier associated with the fault address. 481 * 482 * Return: Pointer to the GPU SVM notifier on success, NULL otherwise. 483 */ 484 static struct drm_gpusvm_notifier * 485 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, 486 unsigned long fault_addr) 487 { 488 return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1); 489 } 490 491 /** 492 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 493 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 494 * 495 * Return: A pointer to the containing drm_gpusvm_notifier structure. 496 */ 497 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 498 { 499 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 500 } 501 502 /** 503 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 504 * @gpusvm: Pointer to the GPU SVM structure 505 * @notifier: Pointer to the GPU SVM notifier structure 506 * 507 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 508 */ 509 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 510 struct drm_gpusvm_notifier *notifier) 511 { 512 struct rb_node *node; 513 struct list_head *head; 514 515 interval_tree_insert(¬ifier->itree, &gpusvm->root); 516 517 node = rb_prev(¬ifier->itree.rb); 518 if (node) 519 head = &(to_drm_gpusvm_notifier(node))->entry; 520 else 521 head = &gpusvm->notifier_list; 522 523 list_add(¬ifier->entry, head); 524 } 525 526 /** 527 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 528 * @gpusvm: Pointer to the GPU SVM tructure 529 * @notifier: Pointer to the GPU SVM notifier structure 530 * 531 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 532 */ 533 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 534 struct drm_gpusvm_notifier *notifier) 535 { 536 interval_tree_remove(¬ifier->itree, &gpusvm->root); 537 list_del(¬ifier->entry); 538 } 539 540 /** 541 * drm_gpusvm_fini() - Finalize the GPU SVM. 542 * @gpusvm: Pointer to the GPU SVM structure. 543 * 544 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 545 * notifiers, and dropping a reference to struct MM. 546 */ 547 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 548 { 549 struct drm_gpusvm_notifier *notifier, *next; 550 551 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 552 struct drm_gpusvm_range *range, *__next; 553 554 /* 555 * Remove notifier first to avoid racing with any invalidation 556 */ 557 mmu_interval_notifier_remove(¬ifier->notifier); 558 notifier->flags.removed = true; 559 560 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 561 LONG_MAX) 562 drm_gpusvm_range_remove(gpusvm, range); 563 } 564 565 mmdrop(gpusvm->mm); 566 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 567 } 568 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 569 570 /** 571 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 572 * @gpusvm: Pointer to the GPU SVM structure 573 * @fault_addr: Fault address 574 * 575 * This function allocates and initializes the GPU SVM notifier structure. 576 * 577 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 578 */ 579 static struct drm_gpusvm_notifier * 580 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 581 { 582 struct drm_gpusvm_notifier *notifier; 583 584 if (gpusvm->ops->notifier_alloc) 585 notifier = gpusvm->ops->notifier_alloc(); 586 else 587 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 588 589 if (!notifier) 590 return ERR_PTR(-ENOMEM); 591 592 notifier->gpusvm = gpusvm; 593 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 594 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 595 INIT_LIST_HEAD(¬ifier->entry); 596 notifier->root = RB_ROOT_CACHED; 597 INIT_LIST_HEAD(¬ifier->range_list); 598 599 return notifier; 600 } 601 602 /** 603 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 604 * @gpusvm: Pointer to the GPU SVM structure 605 * @notifier: Pointer to the GPU SVM notifier structure 606 * 607 * This function frees the GPU SVM notifier structure. 608 */ 609 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 610 struct drm_gpusvm_notifier *notifier) 611 { 612 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 613 614 if (gpusvm->ops->notifier_free) 615 gpusvm->ops->notifier_free(notifier); 616 else 617 kfree(notifier); 618 } 619 620 /** 621 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 622 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 623 * 624 * Return: A pointer to the containing drm_gpusvm_range structure. 625 */ 626 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 627 { 628 return container_of(node, struct drm_gpusvm_range, itree.rb); 629 } 630 631 /** 632 * drm_gpusvm_range_insert() - Insert GPU SVM range 633 * @notifier: Pointer to the GPU SVM notifier structure 634 * @range: Pointer to the GPU SVM range structure 635 * 636 * This function inserts the GPU SVM range into the notifier RB tree and list. 637 */ 638 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 639 struct drm_gpusvm_range *range) 640 { 641 struct rb_node *node; 642 struct list_head *head; 643 644 drm_gpusvm_notifier_lock(notifier->gpusvm); 645 interval_tree_insert(&range->itree, ¬ifier->root); 646 647 node = rb_prev(&range->itree.rb); 648 if (node) 649 head = &(to_drm_gpusvm_range(node))->entry; 650 else 651 head = ¬ifier->range_list; 652 653 list_add(&range->entry, head); 654 drm_gpusvm_notifier_unlock(notifier->gpusvm); 655 } 656 657 /** 658 * __drm_gpusvm_range_remove() - Remove GPU SVM range 659 * @notifier: Pointer to the GPU SVM notifier structure 660 * @range: Pointer to the GPU SVM range structure 661 * 662 * This macro removes the GPU SVM range from the notifier RB tree and list. 663 */ 664 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 665 struct drm_gpusvm_range *range) 666 { 667 interval_tree_remove(&range->itree, ¬ifier->root); 668 list_del(&range->entry); 669 } 670 671 /** 672 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 673 * @gpusvm: Pointer to the GPU SVM structure 674 * @notifier: Pointer to the GPU SVM notifier structure 675 * @fault_addr: Fault address 676 * @chunk_size: Chunk size 677 * @migrate_devmem: Flag indicating whether to migrate device memory 678 * 679 * This function allocates and initializes the GPU SVM range structure. 680 * 681 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 682 */ 683 static struct drm_gpusvm_range * 684 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 685 struct drm_gpusvm_notifier *notifier, 686 unsigned long fault_addr, unsigned long chunk_size, 687 bool migrate_devmem) 688 { 689 struct drm_gpusvm_range *range; 690 691 if (gpusvm->ops->range_alloc) 692 range = gpusvm->ops->range_alloc(gpusvm); 693 else 694 range = kzalloc(sizeof(*range), GFP_KERNEL); 695 696 if (!range) 697 return ERR_PTR(-ENOMEM); 698 699 kref_init(&range->refcount); 700 range->gpusvm = gpusvm; 701 range->notifier = notifier; 702 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 703 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 704 INIT_LIST_HEAD(&range->entry); 705 range->notifier_seq = LONG_MAX; 706 range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 707 708 return range; 709 } 710 711 /** 712 * drm_gpusvm_check_pages() - Check pages 713 * @gpusvm: Pointer to the GPU SVM structure 714 * @notifier: Pointer to the GPU SVM notifier structure 715 * @start: Start address 716 * @end: End address 717 * 718 * Check if pages between start and end have been faulted in on the CPU. Use to 719 * prevent migration of pages without CPU backing store. 720 * 721 * Return: True if pages have been faulted into CPU, False otherwise 722 */ 723 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 724 struct drm_gpusvm_notifier *notifier, 725 unsigned long start, unsigned long end) 726 { 727 struct hmm_range hmm_range = { 728 .default_flags = 0, 729 .notifier = ¬ifier->notifier, 730 .start = start, 731 .end = end, 732 .dev_private_owner = gpusvm->device_private_page_owner, 733 }; 734 unsigned long timeout = 735 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 736 unsigned long *pfns; 737 unsigned long npages = npages_in_range(start, end); 738 int err, i; 739 740 mmap_assert_locked(gpusvm->mm); 741 742 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 743 if (!pfns) 744 return false; 745 746 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 747 hmm_range.hmm_pfns = pfns; 748 749 while (true) { 750 err = hmm_range_fault(&hmm_range); 751 if (err == -EBUSY) { 752 if (time_after(jiffies, timeout)) 753 break; 754 755 hmm_range.notifier_seq = 756 mmu_interval_read_begin(¬ifier->notifier); 757 continue; 758 } 759 break; 760 } 761 if (err) 762 goto err_free; 763 764 for (i = 0; i < npages;) { 765 if (!(pfns[i] & HMM_PFN_VALID)) { 766 err = -EFAULT; 767 goto err_free; 768 } 769 i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 770 } 771 772 err_free: 773 kvfree(pfns); 774 return err ? false : true; 775 } 776 777 /** 778 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 779 * @gpusvm: Pointer to the GPU SVM structure 780 * @notifier: Pointer to the GPU SVM notifier structure 781 * @vas: Pointer to the virtual memory area structure 782 * @fault_addr: Fault address 783 * @gpuva_start: Start address of GPUVA which mirrors CPU 784 * @gpuva_end: End address of GPUVA which mirrors CPU 785 * @check_pages_threshold: Check CPU pages for present threshold 786 * 787 * This function determines the chunk size for the GPU SVM range based on the 788 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 789 * memory area boundaries. 790 * 791 * Return: Chunk size on success, LONG_MAX on failure. 792 */ 793 static unsigned long 794 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 795 struct drm_gpusvm_notifier *notifier, 796 struct vm_area_struct *vas, 797 unsigned long fault_addr, 798 unsigned long gpuva_start, 799 unsigned long gpuva_end, 800 unsigned long check_pages_threshold) 801 { 802 unsigned long start, end; 803 int i = 0; 804 805 retry: 806 for (; i < gpusvm->num_chunks; ++i) { 807 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 808 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 809 810 if (start >= vas->vm_start && end <= vas->vm_end && 811 start >= drm_gpusvm_notifier_start(notifier) && 812 end <= drm_gpusvm_notifier_end(notifier) && 813 start >= gpuva_start && end <= gpuva_end) 814 break; 815 } 816 817 if (i == gpusvm->num_chunks) 818 return LONG_MAX; 819 820 /* 821 * If allocation more than page, ensure not to overlap with existing 822 * ranges. 823 */ 824 if (end - start != SZ_4K) { 825 struct drm_gpusvm_range *range; 826 827 range = drm_gpusvm_range_find(notifier, start, end); 828 if (range) { 829 ++i; 830 goto retry; 831 } 832 833 /* 834 * XXX: Only create range on pages CPU has faulted in. Without 835 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 836 * process-many-malloc' fails. In the failure case, each process 837 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 838 * ranges. When migrating the SVM ranges, some processes fail in 839 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 840 * and then upon drm_gpusvm_range_get_pages device pages from 841 * other processes are collected + faulted in which creates all 842 * sorts of problems. Unsure exactly how this happening, also 843 * problem goes away if 'xe_exec_system_allocator --r 844 * process-many-malloc' mallocs at least 64k at a time. 845 */ 846 if (end - start <= check_pages_threshold && 847 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 848 ++i; 849 goto retry; 850 } 851 } 852 853 return end - start; 854 } 855 856 #ifdef CONFIG_LOCKDEP 857 /** 858 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 859 * @gpusvm: Pointer to the GPU SVM structure. 860 * 861 * Ensure driver lock is held. 862 */ 863 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 864 { 865 if ((gpusvm)->lock_dep_map) 866 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 867 } 868 #else 869 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 870 { 871 } 872 #endif 873 874 /** 875 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 876 * @gpusvm: Pointer to the GPU SVM structure 877 * @start: The inclusive start user address. 878 * @end: The exclusive end user address. 879 * 880 * Returns: The start address of first VMA within the provided range, 881 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 882 */ 883 unsigned long 884 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 885 unsigned long start, 886 unsigned long end) 887 { 888 struct mm_struct *mm = gpusvm->mm; 889 struct vm_area_struct *vma; 890 unsigned long addr = ULONG_MAX; 891 892 if (!mmget_not_zero(mm)) 893 return addr; 894 895 mmap_read_lock(mm); 896 897 vma = find_vma_intersection(mm, start, end); 898 if (vma) 899 addr = vma->vm_start; 900 901 mmap_read_unlock(mm); 902 mmput(mm); 903 904 return addr; 905 } 906 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 907 908 /** 909 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 910 * @gpusvm: Pointer to the GPU SVM structure 911 * @fault_addr: Fault address 912 * @gpuva_start: Start address of GPUVA which mirrors CPU 913 * @gpuva_end: End address of GPUVA which mirrors CPU 914 * @ctx: GPU SVM context 915 * 916 * This function finds or inserts a newly allocated a GPU SVM range based on the 917 * fault address. Caller must hold a lock to protect range lookup and insertion. 918 * 919 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 920 */ 921 struct drm_gpusvm_range * 922 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 923 unsigned long fault_addr, 924 unsigned long gpuva_start, 925 unsigned long gpuva_end, 926 const struct drm_gpusvm_ctx *ctx) 927 { 928 struct drm_gpusvm_notifier *notifier; 929 struct drm_gpusvm_range *range; 930 struct mm_struct *mm = gpusvm->mm; 931 struct vm_area_struct *vas; 932 bool notifier_alloc = false; 933 unsigned long chunk_size; 934 int err; 935 bool migrate_devmem; 936 937 drm_gpusvm_driver_lock_held(gpusvm); 938 939 if (fault_addr < gpusvm->mm_start || 940 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 941 return ERR_PTR(-EINVAL); 942 943 if (!mmget_not_zero(mm)) 944 return ERR_PTR(-EFAULT); 945 946 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr); 947 if (!notifier) { 948 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 949 if (IS_ERR(notifier)) { 950 err = PTR_ERR(notifier); 951 goto err_mmunlock; 952 } 953 notifier_alloc = true; 954 err = mmu_interval_notifier_insert(¬ifier->notifier, 955 mm, 956 drm_gpusvm_notifier_start(notifier), 957 drm_gpusvm_notifier_size(notifier), 958 &drm_gpusvm_notifier_ops); 959 if (err) 960 goto err_notifier; 961 } 962 963 mmap_read_lock(mm); 964 965 vas = vma_lookup(mm, fault_addr); 966 if (!vas) { 967 err = -ENOENT; 968 goto err_notifier_remove; 969 } 970 971 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 972 err = -EPERM; 973 goto err_notifier_remove; 974 } 975 976 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 977 if (range) 978 goto out_mmunlock; 979 /* 980 * XXX: Short-circuiting migration based on migrate_vma_* current 981 * limitations. If/when migrate_vma_* add more support, this logic will 982 * have to change. 983 */ 984 migrate_devmem = ctx->devmem_possible && 985 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 986 987 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 988 fault_addr, gpuva_start, 989 gpuva_end, 990 ctx->check_pages_threshold); 991 if (chunk_size == LONG_MAX) { 992 err = -EINVAL; 993 goto err_notifier_remove; 994 } 995 996 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 997 migrate_devmem); 998 if (IS_ERR(range)) { 999 err = PTR_ERR(range); 1000 goto err_notifier_remove; 1001 } 1002 1003 drm_gpusvm_range_insert(notifier, range); 1004 if (notifier_alloc) 1005 drm_gpusvm_notifier_insert(gpusvm, notifier); 1006 1007 out_mmunlock: 1008 mmap_read_unlock(mm); 1009 mmput(mm); 1010 1011 return range; 1012 1013 err_notifier_remove: 1014 mmap_read_unlock(mm); 1015 if (notifier_alloc) 1016 mmu_interval_notifier_remove(¬ifier->notifier); 1017 err_notifier: 1018 if (notifier_alloc) 1019 drm_gpusvm_notifier_free(gpusvm, notifier); 1020 err_mmunlock: 1021 mmput(mm); 1022 return ERR_PTR(err); 1023 } 1024 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 1025 1026 /** 1027 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 1028 * @gpusvm: Pointer to the GPU SVM structure 1029 * @range: Pointer to the GPU SVM range structure 1030 * @npages: Number of pages to unmap 1031 * 1032 * This function unmap pages associated with a GPU SVM range. Assumes and 1033 * asserts correct locking is in place when called. 1034 */ 1035 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1036 struct drm_gpusvm_range *range, 1037 unsigned long npages) 1038 { 1039 unsigned long i, j; 1040 struct drm_pagemap *dpagemap = range->dpagemap; 1041 struct device *dev = gpusvm->drm->dev; 1042 1043 lockdep_assert_held(&gpusvm->notifier_lock); 1044 1045 if (range->flags.has_dma_mapping) { 1046 struct drm_gpusvm_range_flags flags = { 1047 .__flags = range->flags.__flags, 1048 }; 1049 1050 for (i = 0, j = 0; i < npages; j++) { 1051 struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; 1052 1053 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1054 dma_unmap_page(dev, 1055 addr->addr, 1056 PAGE_SIZE << addr->order, 1057 addr->dir); 1058 else if (dpagemap && dpagemap->ops->device_unmap) 1059 dpagemap->ops->device_unmap(dpagemap, 1060 dev, *addr); 1061 i += 1 << addr->order; 1062 } 1063 1064 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1065 flags.has_devmem_pages = false; 1066 flags.has_dma_mapping = false; 1067 WRITE_ONCE(range->flags.__flags, flags.__flags); 1068 1069 range->dpagemap = NULL; 1070 } 1071 } 1072 1073 /** 1074 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1075 * @gpusvm: Pointer to the GPU SVM structure 1076 * @range: Pointer to the GPU SVM range structure 1077 * 1078 * This function frees the dma address array associated with a GPU SVM range. 1079 */ 1080 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1081 struct drm_gpusvm_range *range) 1082 { 1083 lockdep_assert_held(&gpusvm->notifier_lock); 1084 1085 if (range->dma_addr) { 1086 kvfree(range->dma_addr); 1087 range->dma_addr = NULL; 1088 } 1089 } 1090 1091 /** 1092 * drm_gpusvm_range_remove() - Remove GPU SVM range 1093 * @gpusvm: Pointer to the GPU SVM structure 1094 * @range: Pointer to the GPU SVM range to be removed 1095 * 1096 * This function removes the specified GPU SVM range and also removes the parent 1097 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1098 * hold a lock to protect range and notifier removal. 1099 */ 1100 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1101 struct drm_gpusvm_range *range) 1102 { 1103 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1104 drm_gpusvm_range_end(range)); 1105 struct drm_gpusvm_notifier *notifier; 1106 1107 drm_gpusvm_driver_lock_held(gpusvm); 1108 1109 notifier = drm_gpusvm_notifier_find(gpusvm, 1110 drm_gpusvm_range_start(range)); 1111 if (WARN_ON_ONCE(!notifier)) 1112 return; 1113 1114 drm_gpusvm_notifier_lock(gpusvm); 1115 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1116 drm_gpusvm_range_free_pages(gpusvm, range); 1117 __drm_gpusvm_range_remove(notifier, range); 1118 drm_gpusvm_notifier_unlock(gpusvm); 1119 1120 drm_gpusvm_range_put(range); 1121 1122 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1123 if (!notifier->flags.removed) 1124 mmu_interval_notifier_remove(¬ifier->notifier); 1125 drm_gpusvm_notifier_remove(gpusvm, notifier); 1126 drm_gpusvm_notifier_free(gpusvm, notifier); 1127 } 1128 } 1129 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1130 1131 /** 1132 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1133 * @range: Pointer to the GPU SVM range 1134 * 1135 * This function increments the reference count of the specified GPU SVM range. 1136 * 1137 * Return: Pointer to the GPU SVM range. 1138 */ 1139 struct drm_gpusvm_range * 1140 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1141 { 1142 kref_get(&range->refcount); 1143 1144 return range; 1145 } 1146 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1147 1148 /** 1149 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1150 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1151 * 1152 * This function destroys the specified GPU SVM range when its reference count 1153 * reaches zero. If a custom range-free function is provided, it is invoked to 1154 * free the range; otherwise, the range is deallocated using kfree(). 1155 */ 1156 static void drm_gpusvm_range_destroy(struct kref *refcount) 1157 { 1158 struct drm_gpusvm_range *range = 1159 container_of(refcount, struct drm_gpusvm_range, refcount); 1160 struct drm_gpusvm *gpusvm = range->gpusvm; 1161 1162 if (gpusvm->ops->range_free) 1163 gpusvm->ops->range_free(range); 1164 else 1165 kfree(range); 1166 } 1167 1168 /** 1169 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1170 * @range: Pointer to the GPU SVM range 1171 * 1172 * This function decrements the reference count of the specified GPU SVM range 1173 * and frees it when the count reaches zero. 1174 */ 1175 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1176 { 1177 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1178 } 1179 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1180 1181 /** 1182 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1183 * @gpusvm: Pointer to the GPU SVM structure 1184 * @range: Pointer to the GPU SVM range structure 1185 * 1186 * This function determines if a GPU SVM range pages are valid. Expected be 1187 * called holding gpusvm->notifier_lock and as the last step before committing a 1188 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1189 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1190 * function is required for finer grained checking (i.e., per range) if pages 1191 * are valid. 1192 * 1193 * Return: True if GPU SVM range has valid pages, False otherwise 1194 */ 1195 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1196 struct drm_gpusvm_range *range) 1197 { 1198 lockdep_assert_held(&gpusvm->notifier_lock); 1199 1200 return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1201 } 1202 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1203 1204 /** 1205 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1206 * @gpusvm: Pointer to the GPU SVM structure 1207 * @range: Pointer to the GPU SVM range structure 1208 * 1209 * This function determines if a GPU SVM range pages are valid. Expected be 1210 * called without holding gpusvm->notifier_lock. 1211 * 1212 * Return: True if GPU SVM range has valid pages, False otherwise 1213 */ 1214 static bool 1215 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1216 struct drm_gpusvm_range *range) 1217 { 1218 bool pages_valid; 1219 1220 if (!range->dma_addr) 1221 return false; 1222 1223 drm_gpusvm_notifier_lock(gpusvm); 1224 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1225 if (!pages_valid) 1226 drm_gpusvm_range_free_pages(gpusvm, range); 1227 drm_gpusvm_notifier_unlock(gpusvm); 1228 1229 return pages_valid; 1230 } 1231 1232 /** 1233 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1234 * @gpusvm: Pointer to the GPU SVM structure 1235 * @range: Pointer to the GPU SVM range structure 1236 * @ctx: GPU SVM context 1237 * 1238 * This function gets pages for a GPU SVM range and ensures they are mapped for 1239 * DMA access. 1240 * 1241 * Return: 0 on success, negative error code on failure. 1242 */ 1243 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1244 struct drm_gpusvm_range *range, 1245 const struct drm_gpusvm_ctx *ctx) 1246 { 1247 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1248 struct hmm_range hmm_range = { 1249 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1250 HMM_PFN_REQ_WRITE), 1251 .notifier = notifier, 1252 .start = drm_gpusvm_range_start(range), 1253 .end = drm_gpusvm_range_end(range), 1254 .dev_private_owner = gpusvm->device_private_page_owner, 1255 }; 1256 struct mm_struct *mm = gpusvm->mm; 1257 void *zdd; 1258 unsigned long timeout = 1259 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1260 unsigned long i, j; 1261 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1262 drm_gpusvm_range_end(range)); 1263 unsigned long num_dma_mapped; 1264 unsigned int order = 0; 1265 unsigned long *pfns; 1266 int err = 0; 1267 struct dev_pagemap *pagemap; 1268 struct drm_pagemap *dpagemap; 1269 struct drm_gpusvm_range_flags flags; 1270 1271 retry: 1272 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1273 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1274 goto set_seqno; 1275 1276 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1277 if (!pfns) 1278 return -ENOMEM; 1279 1280 if (!mmget_not_zero(mm)) { 1281 err = -EFAULT; 1282 goto err_free; 1283 } 1284 1285 hmm_range.hmm_pfns = pfns; 1286 while (true) { 1287 mmap_read_lock(mm); 1288 err = hmm_range_fault(&hmm_range); 1289 mmap_read_unlock(mm); 1290 1291 if (err == -EBUSY) { 1292 if (time_after(jiffies, timeout)) 1293 break; 1294 1295 hmm_range.notifier_seq = 1296 mmu_interval_read_begin(notifier); 1297 continue; 1298 } 1299 break; 1300 } 1301 mmput(mm); 1302 if (err) 1303 goto err_free; 1304 1305 map_pages: 1306 /* 1307 * Perform all dma mappings under the notifier lock to not 1308 * access freed pages. A notifier will either block on 1309 * the notifier lock or unmap dma. 1310 */ 1311 drm_gpusvm_notifier_lock(gpusvm); 1312 1313 flags.__flags = range->flags.__flags; 1314 if (flags.unmapped) { 1315 drm_gpusvm_notifier_unlock(gpusvm); 1316 err = -EFAULT; 1317 goto err_free; 1318 } 1319 1320 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1321 drm_gpusvm_notifier_unlock(gpusvm); 1322 kvfree(pfns); 1323 goto retry; 1324 } 1325 1326 if (!range->dma_addr) { 1327 /* Unlock and restart mapping to allocate memory. */ 1328 drm_gpusvm_notifier_unlock(gpusvm); 1329 range->dma_addr = kvmalloc_array(npages, 1330 sizeof(*range->dma_addr), 1331 GFP_KERNEL); 1332 if (!range->dma_addr) { 1333 err = -ENOMEM; 1334 goto err_free; 1335 } 1336 goto map_pages; 1337 } 1338 1339 zdd = NULL; 1340 pagemap = NULL; 1341 num_dma_mapped = 0; 1342 for (i = 0, j = 0; i < npages; ++j) { 1343 struct page *page = hmm_pfn_to_page(pfns[i]); 1344 1345 order = hmm_pfn_to_map_order(pfns[i]); 1346 if (is_device_private_page(page) || 1347 is_device_coherent_page(page)) { 1348 if (zdd != page->zone_device_data && i > 0) { 1349 err = -EOPNOTSUPP; 1350 goto err_unmap; 1351 } 1352 zdd = page->zone_device_data; 1353 if (pagemap != page_pgmap(page)) { 1354 if (i > 0) { 1355 err = -EOPNOTSUPP; 1356 goto err_unmap; 1357 } 1358 1359 pagemap = page_pgmap(page); 1360 dpagemap = drm_pagemap_page_to_dpagemap(page); 1361 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1362 /* 1363 * Raced. This is not supposed to happen 1364 * since hmm_range_fault() should've migrated 1365 * this page to system. 1366 */ 1367 err = -EAGAIN; 1368 goto err_unmap; 1369 } 1370 } 1371 range->dma_addr[j] = 1372 dpagemap->ops->device_map(dpagemap, 1373 gpusvm->drm->dev, 1374 page, order, 1375 DMA_BIDIRECTIONAL); 1376 if (dma_mapping_error(gpusvm->drm->dev, 1377 range->dma_addr[j].addr)) { 1378 err = -EFAULT; 1379 goto err_unmap; 1380 } 1381 } else { 1382 dma_addr_t addr; 1383 1384 if (is_zone_device_page(page) || pagemap) { 1385 err = -EOPNOTSUPP; 1386 goto err_unmap; 1387 } 1388 1389 if (ctx->devmem_only) { 1390 err = -EFAULT; 1391 goto err_unmap; 1392 } 1393 1394 addr = dma_map_page(gpusvm->drm->dev, 1395 page, 0, 1396 PAGE_SIZE << order, 1397 DMA_BIDIRECTIONAL); 1398 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1399 err = -EFAULT; 1400 goto err_unmap; 1401 } 1402 1403 range->dma_addr[j] = drm_pagemap_device_addr_encode 1404 (addr, DRM_INTERCONNECT_SYSTEM, order, 1405 DMA_BIDIRECTIONAL); 1406 } 1407 i += 1 << order; 1408 num_dma_mapped = i; 1409 flags.has_dma_mapping = true; 1410 } 1411 1412 if (pagemap) { 1413 flags.has_devmem_pages = true; 1414 range->dpagemap = dpagemap; 1415 } 1416 1417 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1418 WRITE_ONCE(range->flags.__flags, flags.__flags); 1419 1420 drm_gpusvm_notifier_unlock(gpusvm); 1421 kvfree(pfns); 1422 set_seqno: 1423 range->notifier_seq = hmm_range.notifier_seq; 1424 1425 return 0; 1426 1427 err_unmap: 1428 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1429 drm_gpusvm_notifier_unlock(gpusvm); 1430 err_free: 1431 kvfree(pfns); 1432 if (err == -EAGAIN) 1433 goto retry; 1434 return err; 1435 } 1436 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1437 1438 /** 1439 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1440 * drm_gpusvm_range_evict() - Evict GPU SVM range 1441 * @gpusvm: Pointer to the GPU SVM structure 1442 * @range: Pointer to the GPU SVM range structure 1443 * @ctx: GPU SVM context 1444 * 1445 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1446 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1447 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1448 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1449 * security model. 1450 */ 1451 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1452 struct drm_gpusvm_range *range, 1453 const struct drm_gpusvm_ctx *ctx) 1454 { 1455 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1456 drm_gpusvm_range_end(range)); 1457 1458 if (ctx->in_notifier) 1459 lockdep_assert_held_write(&gpusvm->notifier_lock); 1460 else 1461 drm_gpusvm_notifier_lock(gpusvm); 1462 1463 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1464 1465 if (!ctx->in_notifier) 1466 drm_gpusvm_notifier_unlock(gpusvm); 1467 } 1468 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1469 1470 /** 1471 * drm_gpusvm_range_evict() - Evict GPU SVM range 1472 * @gpusvm: Pointer to the GPU SVM structure 1473 * @range: Pointer to the GPU SVM range to be removed 1474 * 1475 * This function evicts the specified GPU SVM range. 1476 * 1477 * Return: 0 on success, a negative error code on failure. 1478 */ 1479 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1480 struct drm_gpusvm_range *range) 1481 { 1482 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1483 struct hmm_range hmm_range = { 1484 .default_flags = HMM_PFN_REQ_FAULT, 1485 .notifier = notifier, 1486 .start = drm_gpusvm_range_start(range), 1487 .end = drm_gpusvm_range_end(range), 1488 .dev_private_owner = NULL, 1489 }; 1490 unsigned long timeout = 1491 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1492 unsigned long *pfns; 1493 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1494 drm_gpusvm_range_end(range)); 1495 int err = 0; 1496 struct mm_struct *mm = gpusvm->mm; 1497 1498 if (!mmget_not_zero(mm)) 1499 return -EFAULT; 1500 1501 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1502 if (!pfns) 1503 return -ENOMEM; 1504 1505 hmm_range.hmm_pfns = pfns; 1506 while (!time_after(jiffies, timeout)) { 1507 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1508 if (time_after(jiffies, timeout)) { 1509 err = -ETIME; 1510 break; 1511 } 1512 1513 mmap_read_lock(mm); 1514 err = hmm_range_fault(&hmm_range); 1515 mmap_read_unlock(mm); 1516 if (err != -EBUSY) 1517 break; 1518 } 1519 1520 kvfree(pfns); 1521 mmput(mm); 1522 1523 return err; 1524 } 1525 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1526 1527 /** 1528 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1529 * @gpusvm: Pointer to the GPU SVM structure. 1530 * @start: Start address 1531 * @end: End address 1532 * 1533 * Return: True if GPU SVM has mapping, False otherwise 1534 */ 1535 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1536 unsigned long end) 1537 { 1538 struct drm_gpusvm_notifier *notifier; 1539 1540 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1541 struct drm_gpusvm_range *range = NULL; 1542 1543 drm_gpusvm_for_each_range(range, notifier, start, end) 1544 return true; 1545 } 1546 1547 return false; 1548 } 1549 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1550 1551 /** 1552 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1553 * @range: Pointer to the GPU SVM range structure. 1554 * @mmu_range: Pointer to the MMU notifier range structure. 1555 * 1556 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1557 * if the range partially falls within the provided MMU notifier range. 1558 */ 1559 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1560 const struct mmu_notifier_range *mmu_range) 1561 { 1562 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1563 1564 range->flags.unmapped = true; 1565 if (drm_gpusvm_range_start(range) < mmu_range->start || 1566 drm_gpusvm_range_end(range) > mmu_range->end) 1567 range->flags.partial_unmap = true; 1568 } 1569 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1570 1571 MODULE_DESCRIPTION("DRM GPUSVM"); 1572 MODULE_LICENSE("GPL"); 1573