1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * mmap_read_lock(mm); 180 * devmem = driver_alloc_devmem(); 181 * err = drm_pagemap_migrate_to_devmem(devmem, gpusvm->mm, gpuva_start, 182 * gpuva_end, ctx->timeslice_ms, 183 * driver_pgmap_owner()); 184 * mmap_read_unlock(mm); 185 * if (err) // CPU mappings may have changed 186 * goto retry; 187 * } 188 * 189 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 190 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 191 * if (err == -EOPNOTSUPP) 192 * drm_gpusvm_range_evict(gpusvm, range); 193 * goto retry; 194 * } else if (err) { 195 * goto unlock; 196 * } 197 * 198 * err = driver_bind_range(gpusvm, range); 199 * if (err == -EAGAIN) // CPU mappings changed 200 * goto retry 201 * 202 * unlock: 203 * driver_svm_unlock(); 204 * return err; 205 * } 206 * 207 * 2) Garbage Collector 208 * 209 * .. code-block:: c 210 * 211 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 212 * struct drm_gpusvm_range *range) 213 * { 214 * assert_driver_svm_locked(gpusvm); 215 * 216 * // Partial unmap, migrate any remaining device memory pages back to RAM 217 * if (range->flags.partial_unmap) 218 * drm_gpusvm_range_evict(gpusvm, range); 219 * 220 * driver_unbind_range(range); 221 * drm_gpusvm_range_remove(gpusvm, range); 222 * } 223 * 224 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 225 * { 226 * assert_driver_svm_locked(gpusvm); 227 * 228 * for_each_range_in_garbage_collector(gpusvm, range) 229 * __driver_garbage_collector(gpusvm, range); 230 * } 231 * 232 * 3) Notifier callback 233 * 234 * .. code-block:: c 235 * 236 * void driver_invalidation(struct drm_gpusvm *gpusvm, 237 * struct drm_gpusvm_notifier *notifier, 238 * const struct mmu_notifier_range *mmu_range) 239 * { 240 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 241 * struct drm_gpusvm_range *range = NULL; 242 * 243 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 244 * 245 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 246 * mmu_range->end) { 247 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 248 * 249 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 250 * continue; 251 * 252 * drm_gpusvm_range_set_unmapped(range, mmu_range); 253 * driver_garbage_collector_add(gpusvm, range); 254 * } 255 * } 256 */ 257 258 /** 259 * npages_in_range() - Calculate the number of pages in a given range 260 * @start: The start address of the range 261 * @end: The end address of the range 262 * 263 * This macro calculates the number of pages in a given memory range, 264 * specified by the start and end addresses. It divides the difference 265 * between the end and start addresses by the page size (PAGE_SIZE) to 266 * determine the number of pages in the range. 267 * 268 * Return: The number of pages in the specified range. 269 */ 270 static unsigned long 271 npages_in_range(unsigned long start, unsigned long end) 272 { 273 return (end - start) >> PAGE_SHIFT; 274 } 275 276 /** 277 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 278 * @notifier: Pointer to the GPU SVM notifier structure. 279 * @start: Start address of the range 280 * @end: End address of the range 281 * 282 * Return: A pointer to the drm_gpusvm_range if found or NULL 283 */ 284 struct drm_gpusvm_range * 285 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 286 unsigned long end) 287 { 288 struct interval_tree_node *itree; 289 290 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 291 292 if (itree) 293 return container_of(itree, struct drm_gpusvm_range, itree); 294 else 295 return NULL; 296 } 297 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 298 299 /** 300 * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier 301 * @range__: Iterator variable for the ranges 302 * @next__: Iterator variable for the ranges temporay storage 303 * @notifier__: Pointer to the GPU SVM notifier 304 * @start__: Start address of the range 305 * @end__: End address of the range 306 * 307 * This macro is used to iterate over GPU SVM ranges in a notifier while 308 * removing ranges from it. 309 */ 310 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ 311 for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ 312 (next__) = __drm_gpusvm_range_next(range__); \ 313 (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ 314 (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) 315 316 /** 317 * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list 318 * @notifier: a pointer to the current drm_gpusvm_notifier 319 * 320 * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if 321 * the current notifier is the last one or if the input notifier is 322 * NULL. 323 */ 324 static struct drm_gpusvm_notifier * 325 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) 326 { 327 if (notifier && !list_is_last(¬ifier->entry, 328 ¬ifier->gpusvm->notifier_list)) 329 return list_next_entry(notifier, entry); 330 331 return NULL; 332 } 333 334 static struct drm_gpusvm_notifier * 335 notifier_iter_first(struct rb_root_cached *root, unsigned long start, 336 unsigned long last) 337 { 338 struct interval_tree_node *itree; 339 340 itree = interval_tree_iter_first(root, start, last); 341 342 if (itree) 343 return container_of(itree, struct drm_gpusvm_notifier, itree); 344 else 345 return NULL; 346 } 347 348 /** 349 * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm 350 * @notifier__: Iterator variable for the notifiers 351 * @notifier__: Pointer to the GPU SVM notifier 352 * @start__: Start address of the notifier 353 * @end__: End address of the notifier 354 * 355 * This macro is used to iterate over GPU SVM notifiers in a gpusvm. 356 */ 357 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ 358 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \ 359 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 360 (notifier__) = __drm_gpusvm_notifier_next(notifier__)) 361 362 /** 363 * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm 364 * @notifier__: Iterator variable for the notifiers 365 * @next__: Iterator variable for the notifiers temporay storage 366 * @notifier__: Pointer to the GPU SVM notifier 367 * @start__: Start address of the notifier 368 * @end__: End address of the notifier 369 * 370 * This macro is used to iterate over GPU SVM notifiers in a gpusvm while 371 * removing notifiers from it. 372 */ 373 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ 374 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \ 375 (next__) = __drm_gpusvm_notifier_next(notifier__); \ 376 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 377 (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) 378 379 /** 380 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 381 * @mni: Pointer to the mmu_interval_notifier structure. 382 * @mmu_range: Pointer to the mmu_notifier_range structure. 383 * @cur_seq: Current sequence number. 384 * 385 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 386 * notifier sequence number and calls the driver invalidate vfunc under 387 * gpusvm->notifier_lock. 388 * 389 * Return: true if the operation succeeds, false otherwise. 390 */ 391 static bool 392 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 393 const struct mmu_notifier_range *mmu_range, 394 unsigned long cur_seq) 395 { 396 struct drm_gpusvm_notifier *notifier = 397 container_of(mni, typeof(*notifier), notifier); 398 struct drm_gpusvm *gpusvm = notifier->gpusvm; 399 400 if (!mmu_notifier_range_blockable(mmu_range)) 401 return false; 402 403 down_write(&gpusvm->notifier_lock); 404 mmu_interval_set_seq(mni, cur_seq); 405 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 406 up_write(&gpusvm->notifier_lock); 407 408 return true; 409 } 410 411 /* 412 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 413 */ 414 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 415 .invalidate = drm_gpusvm_notifier_invalidate, 416 }; 417 418 /** 419 * drm_gpusvm_init() - Initialize the GPU SVM. 420 * @gpusvm: Pointer to the GPU SVM structure. 421 * @name: Name of the GPU SVM. 422 * @drm: Pointer to the DRM device structure. 423 * @mm: Pointer to the mm_struct for the address space. 424 * @device_private_page_owner: Device private pages owner. 425 * @mm_start: Start address of GPU SVM. 426 * @mm_range: Range of the GPU SVM. 427 * @notifier_size: Size of individual notifiers. 428 * @ops: Pointer to the operations structure for GPU SVM. 429 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 430 * Entries should be powers of 2 in descending order with last 431 * entry being SZ_4K. 432 * @num_chunks: Number of chunks. 433 * 434 * This function initializes the GPU SVM. 435 * 436 * Return: 0 on success, a negative error code on failure. 437 */ 438 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 439 const char *name, struct drm_device *drm, 440 struct mm_struct *mm, void *device_private_page_owner, 441 unsigned long mm_start, unsigned long mm_range, 442 unsigned long notifier_size, 443 const struct drm_gpusvm_ops *ops, 444 const unsigned long *chunk_sizes, int num_chunks) 445 { 446 if (!ops->invalidate || !num_chunks) 447 return -EINVAL; 448 449 gpusvm->name = name; 450 gpusvm->drm = drm; 451 gpusvm->mm = mm; 452 gpusvm->device_private_page_owner = device_private_page_owner; 453 gpusvm->mm_start = mm_start; 454 gpusvm->mm_range = mm_range; 455 gpusvm->notifier_size = notifier_size; 456 gpusvm->ops = ops; 457 gpusvm->chunk_sizes = chunk_sizes; 458 gpusvm->num_chunks = num_chunks; 459 460 mmgrab(mm); 461 gpusvm->root = RB_ROOT_CACHED; 462 INIT_LIST_HEAD(&gpusvm->notifier_list); 463 464 init_rwsem(&gpusvm->notifier_lock); 465 466 fs_reclaim_acquire(GFP_KERNEL); 467 might_lock(&gpusvm->notifier_lock); 468 fs_reclaim_release(GFP_KERNEL); 469 470 #ifdef CONFIG_LOCKDEP 471 gpusvm->lock_dep_map = NULL; 472 #endif 473 474 return 0; 475 } 476 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 477 478 /** 479 * drm_gpusvm_notifier_find() - Find GPU SVM notifier 480 * @gpusvm: Pointer to the GPU SVM structure 481 * @fault_addr: Fault address 482 * 483 * This function finds the GPU SVM notifier associated with the fault address. 484 * 485 * Return: Pointer to the GPU SVM notifier on success, NULL otherwise. 486 */ 487 static struct drm_gpusvm_notifier * 488 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, 489 unsigned long fault_addr) 490 { 491 return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1); 492 } 493 494 /** 495 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 496 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 497 * 498 * Return: A pointer to the containing drm_gpusvm_notifier structure. 499 */ 500 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 501 { 502 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 503 } 504 505 /** 506 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 507 * @gpusvm: Pointer to the GPU SVM structure 508 * @notifier: Pointer to the GPU SVM notifier structure 509 * 510 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 511 */ 512 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 513 struct drm_gpusvm_notifier *notifier) 514 { 515 struct rb_node *node; 516 struct list_head *head; 517 518 interval_tree_insert(¬ifier->itree, &gpusvm->root); 519 520 node = rb_prev(¬ifier->itree.rb); 521 if (node) 522 head = &(to_drm_gpusvm_notifier(node))->entry; 523 else 524 head = &gpusvm->notifier_list; 525 526 list_add(¬ifier->entry, head); 527 } 528 529 /** 530 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 531 * @gpusvm: Pointer to the GPU SVM tructure 532 * @notifier: Pointer to the GPU SVM notifier structure 533 * 534 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 535 */ 536 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 537 struct drm_gpusvm_notifier *notifier) 538 { 539 interval_tree_remove(¬ifier->itree, &gpusvm->root); 540 list_del(¬ifier->entry); 541 } 542 543 /** 544 * drm_gpusvm_fini() - Finalize the GPU SVM. 545 * @gpusvm: Pointer to the GPU SVM structure. 546 * 547 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 548 * notifiers, and dropping a reference to struct MM. 549 */ 550 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 551 { 552 struct drm_gpusvm_notifier *notifier, *next; 553 554 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 555 struct drm_gpusvm_range *range, *__next; 556 557 /* 558 * Remove notifier first to avoid racing with any invalidation 559 */ 560 mmu_interval_notifier_remove(¬ifier->notifier); 561 notifier->flags.removed = true; 562 563 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 564 LONG_MAX) 565 drm_gpusvm_range_remove(gpusvm, range); 566 } 567 568 mmdrop(gpusvm->mm); 569 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 570 } 571 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 572 573 /** 574 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 575 * @gpusvm: Pointer to the GPU SVM structure 576 * @fault_addr: Fault address 577 * 578 * This function allocates and initializes the GPU SVM notifier structure. 579 * 580 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 581 */ 582 static struct drm_gpusvm_notifier * 583 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 584 { 585 struct drm_gpusvm_notifier *notifier; 586 587 if (gpusvm->ops->notifier_alloc) 588 notifier = gpusvm->ops->notifier_alloc(); 589 else 590 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 591 592 if (!notifier) 593 return ERR_PTR(-ENOMEM); 594 595 notifier->gpusvm = gpusvm; 596 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 597 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 598 INIT_LIST_HEAD(¬ifier->entry); 599 notifier->root = RB_ROOT_CACHED; 600 INIT_LIST_HEAD(¬ifier->range_list); 601 602 return notifier; 603 } 604 605 /** 606 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 607 * @gpusvm: Pointer to the GPU SVM structure 608 * @notifier: Pointer to the GPU SVM notifier structure 609 * 610 * This function frees the GPU SVM notifier structure. 611 */ 612 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 613 struct drm_gpusvm_notifier *notifier) 614 { 615 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 616 617 if (gpusvm->ops->notifier_free) 618 gpusvm->ops->notifier_free(notifier); 619 else 620 kfree(notifier); 621 } 622 623 /** 624 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 625 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 626 * 627 * Return: A pointer to the containing drm_gpusvm_range structure. 628 */ 629 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 630 { 631 return container_of(node, struct drm_gpusvm_range, itree.rb); 632 } 633 634 /** 635 * drm_gpusvm_range_insert() - Insert GPU SVM range 636 * @notifier: Pointer to the GPU SVM notifier structure 637 * @range: Pointer to the GPU SVM range structure 638 * 639 * This function inserts the GPU SVM range into the notifier RB tree and list. 640 */ 641 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 642 struct drm_gpusvm_range *range) 643 { 644 struct rb_node *node; 645 struct list_head *head; 646 647 drm_gpusvm_notifier_lock(notifier->gpusvm); 648 interval_tree_insert(&range->itree, ¬ifier->root); 649 650 node = rb_prev(&range->itree.rb); 651 if (node) 652 head = &(to_drm_gpusvm_range(node))->entry; 653 else 654 head = ¬ifier->range_list; 655 656 list_add(&range->entry, head); 657 drm_gpusvm_notifier_unlock(notifier->gpusvm); 658 } 659 660 /** 661 * __drm_gpusvm_range_remove() - Remove GPU SVM range 662 * @notifier: Pointer to the GPU SVM notifier structure 663 * @range: Pointer to the GPU SVM range structure 664 * 665 * This macro removes the GPU SVM range from the notifier RB tree and list. 666 */ 667 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 668 struct drm_gpusvm_range *range) 669 { 670 interval_tree_remove(&range->itree, ¬ifier->root); 671 list_del(&range->entry); 672 } 673 674 /** 675 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 676 * @gpusvm: Pointer to the GPU SVM structure 677 * @notifier: Pointer to the GPU SVM notifier structure 678 * @fault_addr: Fault address 679 * @chunk_size: Chunk size 680 * @migrate_devmem: Flag indicating whether to migrate device memory 681 * 682 * This function allocates and initializes the GPU SVM range structure. 683 * 684 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 685 */ 686 static struct drm_gpusvm_range * 687 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 688 struct drm_gpusvm_notifier *notifier, 689 unsigned long fault_addr, unsigned long chunk_size, 690 bool migrate_devmem) 691 { 692 struct drm_gpusvm_range *range; 693 694 if (gpusvm->ops->range_alloc) 695 range = gpusvm->ops->range_alloc(gpusvm); 696 else 697 range = kzalloc(sizeof(*range), GFP_KERNEL); 698 699 if (!range) 700 return ERR_PTR(-ENOMEM); 701 702 kref_init(&range->refcount); 703 range->gpusvm = gpusvm; 704 range->notifier = notifier; 705 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 706 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 707 INIT_LIST_HEAD(&range->entry); 708 range->notifier_seq = LONG_MAX; 709 range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 710 711 return range; 712 } 713 714 /** 715 * drm_gpusvm_check_pages() - Check pages 716 * @gpusvm: Pointer to the GPU SVM structure 717 * @notifier: Pointer to the GPU SVM notifier structure 718 * @start: Start address 719 * @end: End address 720 * 721 * Check if pages between start and end have been faulted in on the CPU. Use to 722 * prevent migration of pages without CPU backing store. 723 * 724 * Return: True if pages have been faulted into CPU, False otherwise 725 */ 726 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 727 struct drm_gpusvm_notifier *notifier, 728 unsigned long start, unsigned long end) 729 { 730 struct hmm_range hmm_range = { 731 .default_flags = 0, 732 .notifier = ¬ifier->notifier, 733 .start = start, 734 .end = end, 735 .dev_private_owner = gpusvm->device_private_page_owner, 736 }; 737 unsigned long timeout = 738 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 739 unsigned long *pfns; 740 unsigned long npages = npages_in_range(start, end); 741 int err, i; 742 743 mmap_assert_locked(gpusvm->mm); 744 745 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 746 if (!pfns) 747 return false; 748 749 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 750 hmm_range.hmm_pfns = pfns; 751 752 while (true) { 753 err = hmm_range_fault(&hmm_range); 754 if (err == -EBUSY) { 755 if (time_after(jiffies, timeout)) 756 break; 757 758 hmm_range.notifier_seq = 759 mmu_interval_read_begin(¬ifier->notifier); 760 continue; 761 } 762 break; 763 } 764 if (err) 765 goto err_free; 766 767 for (i = 0; i < npages;) { 768 if (!(pfns[i] & HMM_PFN_VALID)) { 769 err = -EFAULT; 770 goto err_free; 771 } 772 i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 773 } 774 775 err_free: 776 kvfree(pfns); 777 return err ? false : true; 778 } 779 780 /** 781 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 782 * @gpusvm: Pointer to the GPU SVM structure 783 * @notifier: Pointer to the GPU SVM notifier structure 784 * @vas: Pointer to the virtual memory area structure 785 * @fault_addr: Fault address 786 * @gpuva_start: Start address of GPUVA which mirrors CPU 787 * @gpuva_end: End address of GPUVA which mirrors CPU 788 * @check_pages_threshold: Check CPU pages for present threshold 789 * 790 * This function determines the chunk size for the GPU SVM range based on the 791 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 792 * memory area boundaries. 793 * 794 * Return: Chunk size on success, LONG_MAX on failure. 795 */ 796 static unsigned long 797 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 798 struct drm_gpusvm_notifier *notifier, 799 struct vm_area_struct *vas, 800 unsigned long fault_addr, 801 unsigned long gpuva_start, 802 unsigned long gpuva_end, 803 unsigned long check_pages_threshold) 804 { 805 unsigned long start, end; 806 int i = 0; 807 808 retry: 809 for (; i < gpusvm->num_chunks; ++i) { 810 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 811 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 812 813 if (start >= vas->vm_start && end <= vas->vm_end && 814 start >= drm_gpusvm_notifier_start(notifier) && 815 end <= drm_gpusvm_notifier_end(notifier) && 816 start >= gpuva_start && end <= gpuva_end) 817 break; 818 } 819 820 if (i == gpusvm->num_chunks) 821 return LONG_MAX; 822 823 /* 824 * If allocation more than page, ensure not to overlap with existing 825 * ranges. 826 */ 827 if (end - start != SZ_4K) { 828 struct drm_gpusvm_range *range; 829 830 range = drm_gpusvm_range_find(notifier, start, end); 831 if (range) { 832 ++i; 833 goto retry; 834 } 835 836 /* 837 * XXX: Only create range on pages CPU has faulted in. Without 838 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 839 * process-many-malloc' fails. In the failure case, each process 840 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 841 * ranges. When migrating the SVM ranges, some processes fail in 842 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 843 * and then upon drm_gpusvm_range_get_pages device pages from 844 * other processes are collected + faulted in which creates all 845 * sorts of problems. Unsure exactly how this happening, also 846 * problem goes away if 'xe_exec_system_allocator --r 847 * process-many-malloc' mallocs at least 64k at a time. 848 */ 849 if (end - start <= check_pages_threshold && 850 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 851 ++i; 852 goto retry; 853 } 854 } 855 856 return end - start; 857 } 858 859 #ifdef CONFIG_LOCKDEP 860 /** 861 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 862 * @gpusvm: Pointer to the GPU SVM structure. 863 * 864 * Ensure driver lock is held. 865 */ 866 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 867 { 868 if ((gpusvm)->lock_dep_map) 869 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 870 } 871 #else 872 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 873 { 874 } 875 #endif 876 877 /** 878 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 879 * @gpusvm: Pointer to the GPU SVM structure 880 * @start: The inclusive start user address. 881 * @end: The exclusive end user address. 882 * 883 * Returns: The start address of first VMA within the provided range, 884 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 885 */ 886 unsigned long 887 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 888 unsigned long start, 889 unsigned long end) 890 { 891 struct mm_struct *mm = gpusvm->mm; 892 struct vm_area_struct *vma; 893 unsigned long addr = ULONG_MAX; 894 895 if (!mmget_not_zero(mm)) 896 return addr; 897 898 mmap_read_lock(mm); 899 900 vma = find_vma_intersection(mm, start, end); 901 if (vma) 902 addr = vma->vm_start; 903 904 mmap_read_unlock(mm); 905 mmput(mm); 906 907 return addr; 908 } 909 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 910 911 /** 912 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 913 * @gpusvm: Pointer to the GPU SVM structure 914 * @fault_addr: Fault address 915 * @gpuva_start: Start address of GPUVA which mirrors CPU 916 * @gpuva_end: End address of GPUVA which mirrors CPU 917 * @ctx: GPU SVM context 918 * 919 * This function finds or inserts a newly allocated a GPU SVM range based on the 920 * fault address. Caller must hold a lock to protect range lookup and insertion. 921 * 922 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 923 */ 924 struct drm_gpusvm_range * 925 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 926 unsigned long fault_addr, 927 unsigned long gpuva_start, 928 unsigned long gpuva_end, 929 const struct drm_gpusvm_ctx *ctx) 930 { 931 struct drm_gpusvm_notifier *notifier; 932 struct drm_gpusvm_range *range; 933 struct mm_struct *mm = gpusvm->mm; 934 struct vm_area_struct *vas; 935 bool notifier_alloc = false; 936 unsigned long chunk_size; 937 int err; 938 bool migrate_devmem; 939 940 drm_gpusvm_driver_lock_held(gpusvm); 941 942 if (fault_addr < gpusvm->mm_start || 943 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 944 return ERR_PTR(-EINVAL); 945 946 if (!mmget_not_zero(mm)) 947 return ERR_PTR(-EFAULT); 948 949 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr); 950 if (!notifier) { 951 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 952 if (IS_ERR(notifier)) { 953 err = PTR_ERR(notifier); 954 goto err_mmunlock; 955 } 956 notifier_alloc = true; 957 err = mmu_interval_notifier_insert(¬ifier->notifier, 958 mm, 959 drm_gpusvm_notifier_start(notifier), 960 drm_gpusvm_notifier_size(notifier), 961 &drm_gpusvm_notifier_ops); 962 if (err) 963 goto err_notifier; 964 } 965 966 mmap_read_lock(mm); 967 968 vas = vma_lookup(mm, fault_addr); 969 if (!vas) { 970 err = -ENOENT; 971 goto err_notifier_remove; 972 } 973 974 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 975 err = -EPERM; 976 goto err_notifier_remove; 977 } 978 979 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 980 if (range) 981 goto out_mmunlock; 982 /* 983 * XXX: Short-circuiting migration based on migrate_vma_* current 984 * limitations. If/when migrate_vma_* add more support, this logic will 985 * have to change. 986 */ 987 migrate_devmem = ctx->devmem_possible && 988 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 989 990 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 991 fault_addr, gpuva_start, 992 gpuva_end, 993 ctx->check_pages_threshold); 994 if (chunk_size == LONG_MAX) { 995 err = -EINVAL; 996 goto err_notifier_remove; 997 } 998 999 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 1000 migrate_devmem); 1001 if (IS_ERR(range)) { 1002 err = PTR_ERR(range); 1003 goto err_notifier_remove; 1004 } 1005 1006 drm_gpusvm_range_insert(notifier, range); 1007 if (notifier_alloc) 1008 drm_gpusvm_notifier_insert(gpusvm, notifier); 1009 1010 out_mmunlock: 1011 mmap_read_unlock(mm); 1012 mmput(mm); 1013 1014 return range; 1015 1016 err_notifier_remove: 1017 mmap_read_unlock(mm); 1018 if (notifier_alloc) 1019 mmu_interval_notifier_remove(¬ifier->notifier); 1020 err_notifier: 1021 if (notifier_alloc) 1022 drm_gpusvm_notifier_free(gpusvm, notifier); 1023 err_mmunlock: 1024 mmput(mm); 1025 return ERR_PTR(err); 1026 } 1027 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 1028 1029 /** 1030 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 1031 * @gpusvm: Pointer to the GPU SVM structure 1032 * @range: Pointer to the GPU SVM range structure 1033 * @npages: Number of pages to unmap 1034 * 1035 * This function unmap pages associated with a GPU SVM range. Assumes and 1036 * asserts correct locking is in place when called. 1037 */ 1038 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1039 struct drm_gpusvm_range *range, 1040 unsigned long npages) 1041 { 1042 unsigned long i, j; 1043 struct drm_pagemap *dpagemap = range->dpagemap; 1044 struct device *dev = gpusvm->drm->dev; 1045 1046 lockdep_assert_held(&gpusvm->notifier_lock); 1047 1048 if (range->flags.has_dma_mapping) { 1049 struct drm_gpusvm_range_flags flags = { 1050 .__flags = range->flags.__flags, 1051 }; 1052 1053 for (i = 0, j = 0; i < npages; j++) { 1054 struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; 1055 1056 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1057 dma_unmap_page(dev, 1058 addr->addr, 1059 PAGE_SIZE << addr->order, 1060 addr->dir); 1061 else if (dpagemap && dpagemap->ops->device_unmap) 1062 dpagemap->ops->device_unmap(dpagemap, 1063 dev, *addr); 1064 i += 1 << addr->order; 1065 } 1066 1067 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1068 flags.has_devmem_pages = false; 1069 flags.has_dma_mapping = false; 1070 WRITE_ONCE(range->flags.__flags, flags.__flags); 1071 1072 range->dpagemap = NULL; 1073 } 1074 } 1075 1076 /** 1077 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1078 * @gpusvm: Pointer to the GPU SVM structure 1079 * @range: Pointer to the GPU SVM range structure 1080 * 1081 * This function frees the dma address array associated with a GPU SVM range. 1082 */ 1083 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1084 struct drm_gpusvm_range *range) 1085 { 1086 lockdep_assert_held(&gpusvm->notifier_lock); 1087 1088 if (range->dma_addr) { 1089 kvfree(range->dma_addr); 1090 range->dma_addr = NULL; 1091 } 1092 } 1093 1094 /** 1095 * drm_gpusvm_range_remove() - Remove GPU SVM range 1096 * @gpusvm: Pointer to the GPU SVM structure 1097 * @range: Pointer to the GPU SVM range to be removed 1098 * 1099 * This function removes the specified GPU SVM range and also removes the parent 1100 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1101 * hold a lock to protect range and notifier removal. 1102 */ 1103 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1104 struct drm_gpusvm_range *range) 1105 { 1106 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1107 drm_gpusvm_range_end(range)); 1108 struct drm_gpusvm_notifier *notifier; 1109 1110 drm_gpusvm_driver_lock_held(gpusvm); 1111 1112 notifier = drm_gpusvm_notifier_find(gpusvm, 1113 drm_gpusvm_range_start(range)); 1114 if (WARN_ON_ONCE(!notifier)) 1115 return; 1116 1117 drm_gpusvm_notifier_lock(gpusvm); 1118 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1119 drm_gpusvm_range_free_pages(gpusvm, range); 1120 __drm_gpusvm_range_remove(notifier, range); 1121 drm_gpusvm_notifier_unlock(gpusvm); 1122 1123 drm_gpusvm_range_put(range); 1124 1125 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1126 if (!notifier->flags.removed) 1127 mmu_interval_notifier_remove(¬ifier->notifier); 1128 drm_gpusvm_notifier_remove(gpusvm, notifier); 1129 drm_gpusvm_notifier_free(gpusvm, notifier); 1130 } 1131 } 1132 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1133 1134 /** 1135 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1136 * @range: Pointer to the GPU SVM range 1137 * 1138 * This function increments the reference count of the specified GPU SVM range. 1139 * 1140 * Return: Pointer to the GPU SVM range. 1141 */ 1142 struct drm_gpusvm_range * 1143 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1144 { 1145 kref_get(&range->refcount); 1146 1147 return range; 1148 } 1149 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1150 1151 /** 1152 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1153 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1154 * 1155 * This function destroys the specified GPU SVM range when its reference count 1156 * reaches zero. If a custom range-free function is provided, it is invoked to 1157 * free the range; otherwise, the range is deallocated using kfree(). 1158 */ 1159 static void drm_gpusvm_range_destroy(struct kref *refcount) 1160 { 1161 struct drm_gpusvm_range *range = 1162 container_of(refcount, struct drm_gpusvm_range, refcount); 1163 struct drm_gpusvm *gpusvm = range->gpusvm; 1164 1165 if (gpusvm->ops->range_free) 1166 gpusvm->ops->range_free(range); 1167 else 1168 kfree(range); 1169 } 1170 1171 /** 1172 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1173 * @range: Pointer to the GPU SVM range 1174 * 1175 * This function decrements the reference count of the specified GPU SVM range 1176 * and frees it when the count reaches zero. 1177 */ 1178 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1179 { 1180 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1181 } 1182 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1183 1184 /** 1185 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1186 * @gpusvm: Pointer to the GPU SVM structure 1187 * @range: Pointer to the GPU SVM range structure 1188 * 1189 * This function determines if a GPU SVM range pages are valid. Expected be 1190 * called holding gpusvm->notifier_lock and as the last step before committing a 1191 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1192 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1193 * function is required for finer grained checking (i.e., per range) if pages 1194 * are valid. 1195 * 1196 * Return: True if GPU SVM range has valid pages, False otherwise 1197 */ 1198 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1199 struct drm_gpusvm_range *range) 1200 { 1201 lockdep_assert_held(&gpusvm->notifier_lock); 1202 1203 return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1204 } 1205 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1206 1207 /** 1208 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1209 * @gpusvm: Pointer to the GPU SVM structure 1210 * @range: Pointer to the GPU SVM range structure 1211 * 1212 * This function determines if a GPU SVM range pages are valid. Expected be 1213 * called without holding gpusvm->notifier_lock. 1214 * 1215 * Return: True if GPU SVM range has valid pages, False otherwise 1216 */ 1217 static bool 1218 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1219 struct drm_gpusvm_range *range) 1220 { 1221 bool pages_valid; 1222 1223 if (!range->dma_addr) 1224 return false; 1225 1226 drm_gpusvm_notifier_lock(gpusvm); 1227 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1228 if (!pages_valid) 1229 drm_gpusvm_range_free_pages(gpusvm, range); 1230 drm_gpusvm_notifier_unlock(gpusvm); 1231 1232 return pages_valid; 1233 } 1234 1235 /** 1236 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1237 * @gpusvm: Pointer to the GPU SVM structure 1238 * @range: Pointer to the GPU SVM range structure 1239 * @ctx: GPU SVM context 1240 * 1241 * This function gets pages for a GPU SVM range and ensures they are mapped for 1242 * DMA access. 1243 * 1244 * Return: 0 on success, negative error code on failure. 1245 */ 1246 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1247 struct drm_gpusvm_range *range, 1248 const struct drm_gpusvm_ctx *ctx) 1249 { 1250 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1251 struct hmm_range hmm_range = { 1252 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1253 HMM_PFN_REQ_WRITE), 1254 .notifier = notifier, 1255 .start = drm_gpusvm_range_start(range), 1256 .end = drm_gpusvm_range_end(range), 1257 .dev_private_owner = gpusvm->device_private_page_owner, 1258 }; 1259 struct mm_struct *mm = gpusvm->mm; 1260 void *zdd; 1261 unsigned long timeout = 1262 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1263 unsigned long i, j; 1264 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1265 drm_gpusvm_range_end(range)); 1266 unsigned long num_dma_mapped; 1267 unsigned int order = 0; 1268 unsigned long *pfns; 1269 int err = 0; 1270 struct dev_pagemap *pagemap; 1271 struct drm_pagemap *dpagemap; 1272 struct drm_gpusvm_range_flags flags; 1273 1274 retry: 1275 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1276 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1277 goto set_seqno; 1278 1279 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1280 if (!pfns) 1281 return -ENOMEM; 1282 1283 if (!mmget_not_zero(mm)) { 1284 err = -EFAULT; 1285 goto err_free; 1286 } 1287 1288 hmm_range.hmm_pfns = pfns; 1289 while (true) { 1290 mmap_read_lock(mm); 1291 err = hmm_range_fault(&hmm_range); 1292 mmap_read_unlock(mm); 1293 1294 if (err == -EBUSY) { 1295 if (time_after(jiffies, timeout)) 1296 break; 1297 1298 hmm_range.notifier_seq = 1299 mmu_interval_read_begin(notifier); 1300 continue; 1301 } 1302 break; 1303 } 1304 mmput(mm); 1305 if (err) 1306 goto err_free; 1307 1308 map_pages: 1309 /* 1310 * Perform all dma mappings under the notifier lock to not 1311 * access freed pages. A notifier will either block on 1312 * the notifier lock or unmap dma. 1313 */ 1314 drm_gpusvm_notifier_lock(gpusvm); 1315 1316 flags.__flags = range->flags.__flags; 1317 if (flags.unmapped) { 1318 drm_gpusvm_notifier_unlock(gpusvm); 1319 err = -EFAULT; 1320 goto err_free; 1321 } 1322 1323 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1324 drm_gpusvm_notifier_unlock(gpusvm); 1325 kvfree(pfns); 1326 goto retry; 1327 } 1328 1329 if (!range->dma_addr) { 1330 /* Unlock and restart mapping to allocate memory. */ 1331 drm_gpusvm_notifier_unlock(gpusvm); 1332 range->dma_addr = kvmalloc_array(npages, 1333 sizeof(*range->dma_addr), 1334 GFP_KERNEL); 1335 if (!range->dma_addr) { 1336 err = -ENOMEM; 1337 goto err_free; 1338 } 1339 goto map_pages; 1340 } 1341 1342 zdd = NULL; 1343 pagemap = NULL; 1344 num_dma_mapped = 0; 1345 for (i = 0, j = 0; i < npages; ++j) { 1346 struct page *page = hmm_pfn_to_page(pfns[i]); 1347 1348 order = hmm_pfn_to_map_order(pfns[i]); 1349 if (is_device_private_page(page) || 1350 is_device_coherent_page(page)) { 1351 if (zdd != page->zone_device_data && i > 0) { 1352 err = -EOPNOTSUPP; 1353 goto err_unmap; 1354 } 1355 zdd = page->zone_device_data; 1356 if (pagemap != page_pgmap(page)) { 1357 if (i > 0) { 1358 err = -EOPNOTSUPP; 1359 goto err_unmap; 1360 } 1361 1362 pagemap = page_pgmap(page); 1363 dpagemap = drm_pagemap_page_to_dpagemap(page); 1364 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1365 /* 1366 * Raced. This is not supposed to happen 1367 * since hmm_range_fault() should've migrated 1368 * this page to system. 1369 */ 1370 err = -EAGAIN; 1371 goto err_unmap; 1372 } 1373 } 1374 range->dma_addr[j] = 1375 dpagemap->ops->device_map(dpagemap, 1376 gpusvm->drm->dev, 1377 page, order, 1378 DMA_BIDIRECTIONAL); 1379 if (dma_mapping_error(gpusvm->drm->dev, 1380 range->dma_addr[j].addr)) { 1381 err = -EFAULT; 1382 goto err_unmap; 1383 } 1384 } else { 1385 dma_addr_t addr; 1386 1387 if (is_zone_device_page(page) || pagemap) { 1388 err = -EOPNOTSUPP; 1389 goto err_unmap; 1390 } 1391 1392 if (ctx->devmem_only) { 1393 err = -EFAULT; 1394 goto err_unmap; 1395 } 1396 1397 addr = dma_map_page(gpusvm->drm->dev, 1398 page, 0, 1399 PAGE_SIZE << order, 1400 DMA_BIDIRECTIONAL); 1401 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1402 err = -EFAULT; 1403 goto err_unmap; 1404 } 1405 1406 range->dma_addr[j] = drm_pagemap_device_addr_encode 1407 (addr, DRM_INTERCONNECT_SYSTEM, order, 1408 DMA_BIDIRECTIONAL); 1409 } 1410 i += 1 << order; 1411 num_dma_mapped = i; 1412 flags.has_dma_mapping = true; 1413 } 1414 1415 if (pagemap) { 1416 flags.has_devmem_pages = true; 1417 range->dpagemap = dpagemap; 1418 } 1419 1420 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1421 WRITE_ONCE(range->flags.__flags, flags.__flags); 1422 1423 drm_gpusvm_notifier_unlock(gpusvm); 1424 kvfree(pfns); 1425 set_seqno: 1426 range->notifier_seq = hmm_range.notifier_seq; 1427 1428 return 0; 1429 1430 err_unmap: 1431 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1432 drm_gpusvm_notifier_unlock(gpusvm); 1433 err_free: 1434 kvfree(pfns); 1435 if (err == -EAGAIN) 1436 goto retry; 1437 return err; 1438 } 1439 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1440 1441 /** 1442 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1443 * drm_gpusvm_range_evict() - Evict GPU SVM range 1444 * @gpusvm: Pointer to the GPU SVM structure 1445 * @range: Pointer to the GPU SVM range structure 1446 * @ctx: GPU SVM context 1447 * 1448 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1449 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1450 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1451 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1452 * security model. 1453 */ 1454 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1455 struct drm_gpusvm_range *range, 1456 const struct drm_gpusvm_ctx *ctx) 1457 { 1458 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1459 drm_gpusvm_range_end(range)); 1460 1461 if (ctx->in_notifier) 1462 lockdep_assert_held_write(&gpusvm->notifier_lock); 1463 else 1464 drm_gpusvm_notifier_lock(gpusvm); 1465 1466 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1467 1468 if (!ctx->in_notifier) 1469 drm_gpusvm_notifier_unlock(gpusvm); 1470 } 1471 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1472 1473 /** 1474 * drm_gpusvm_range_evict() - Evict GPU SVM range 1475 * @gpusvm: Pointer to the GPU SVM structure 1476 * @range: Pointer to the GPU SVM range to be removed 1477 * 1478 * This function evicts the specified GPU SVM range. 1479 * 1480 * Return: 0 on success, a negative error code on failure. 1481 */ 1482 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1483 struct drm_gpusvm_range *range) 1484 { 1485 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1486 struct hmm_range hmm_range = { 1487 .default_flags = HMM_PFN_REQ_FAULT, 1488 .notifier = notifier, 1489 .start = drm_gpusvm_range_start(range), 1490 .end = drm_gpusvm_range_end(range), 1491 .dev_private_owner = NULL, 1492 }; 1493 unsigned long timeout = 1494 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1495 unsigned long *pfns; 1496 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1497 drm_gpusvm_range_end(range)); 1498 int err = 0; 1499 struct mm_struct *mm = gpusvm->mm; 1500 1501 if (!mmget_not_zero(mm)) 1502 return -EFAULT; 1503 1504 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1505 if (!pfns) 1506 return -ENOMEM; 1507 1508 hmm_range.hmm_pfns = pfns; 1509 while (!time_after(jiffies, timeout)) { 1510 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1511 if (time_after(jiffies, timeout)) { 1512 err = -ETIME; 1513 break; 1514 } 1515 1516 mmap_read_lock(mm); 1517 err = hmm_range_fault(&hmm_range); 1518 mmap_read_unlock(mm); 1519 if (err != -EBUSY) 1520 break; 1521 } 1522 1523 kvfree(pfns); 1524 mmput(mm); 1525 1526 return err; 1527 } 1528 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1529 1530 /** 1531 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1532 * @gpusvm: Pointer to the GPU SVM structure. 1533 * @start: Start address 1534 * @end: End address 1535 * 1536 * Return: True if GPU SVM has mapping, False otherwise 1537 */ 1538 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1539 unsigned long end) 1540 { 1541 struct drm_gpusvm_notifier *notifier; 1542 1543 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1544 struct drm_gpusvm_range *range = NULL; 1545 1546 drm_gpusvm_for_each_range(range, notifier, start, end) 1547 return true; 1548 } 1549 1550 return false; 1551 } 1552 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1553 1554 /** 1555 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1556 * @range: Pointer to the GPU SVM range structure. 1557 * @mmu_range: Pointer to the MMU notifier range structure. 1558 * 1559 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1560 * if the range partially falls within the provided MMU notifier range. 1561 */ 1562 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1563 const struct mmu_notifier_range *mmu_range) 1564 { 1565 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1566 1567 range->flags.unmapped = true; 1568 if (drm_gpusvm_range_start(range) < mmu_range->start || 1569 drm_gpusvm_range_end(range) > mmu_range->end) 1570 range->flags.partial_unmap = true; 1571 } 1572 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1573 1574 MODULE_DESCRIPTION("DRM GPUSVM"); 1575 MODULE_LICENSE("GPL"); 1576