1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/hmm.h> 11 #include <linux/memremap.h> 12 #include <linux/migrate.h> 13 #include <linux/mm_types.h> 14 #include <linux/pagemap.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Migration 112 * 113 * The migration support is quite simple, allowing migration between RAM and 114 * device memory at the range granularity. For example, GPU SVM currently does 115 * not support mixing RAM and device memory pages within a range. This means 116 * that upon GPU fault, the entire range can be migrated to device memory, and 117 * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device 118 * memory storage within a range could be added in the future if required. 119 * 120 * The reasoning for only supporting range granularity is as follows: it 121 * simplifies the implementation, and range sizes are driver-defined and should 122 * be relatively small. 123 */ 124 125 /** 126 * DOC: Partial Unmapping of Ranges 127 * 128 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 129 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 130 * being that a subset of the range still has CPU and GPU mappings. If the 131 * backing store for the range is in device memory, a subset of the backing 132 * store has references. One option would be to split the range and device 133 * memory backing store, but the implementation for this would be quite 134 * complicated. Given that partial unmappings are rare and driver-defined range 135 * sizes are relatively small, GPU SVM does not support splitting of ranges. 136 * 137 * With no support for range splitting, upon partial unmapping of a range, the 138 * driver is expected to invalidate and destroy the entire range. If the range 139 * has device memory as its backing, the driver is also expected to migrate any 140 * remaining pages back to RAM. 141 */ 142 143 /** 144 * DOC: Examples 145 * 146 * This section provides three examples of how to build the expected driver 147 * components: the GPU page fault handler, the garbage collector, and the 148 * notifier callback. 149 * 150 * The generic code provided does not include logic for complex migration 151 * policies, optimized invalidations, fined grained driver locking, or other 152 * potentially required driver locking (e.g., DMA-resv locks). 153 * 154 * 1) GPU page fault handler 155 * 156 * .. code-block:: c 157 * 158 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 159 * { 160 * int err = 0; 161 * 162 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 163 * 164 * drm_gpusvm_notifier_lock(gpusvm); 165 * if (drm_gpusvm_range_pages_valid(range)) 166 * driver_commit_bind(gpusvm, range); 167 * else 168 * err = -EAGAIN; 169 * drm_gpusvm_notifier_unlock(gpusvm); 170 * 171 * return err; 172 * } 173 * 174 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 175 * unsigned long gpuva_start, unsigned long gpuva_end) 176 * { 177 * struct drm_gpusvm_ctx ctx = {}; 178 * int err; 179 * 180 * driver_svm_lock(); 181 * retry: 182 * // Always process UNMAPs first so view of GPU SVM ranges is current 183 * driver_garbage_collector(gpusvm); 184 * 185 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 186 * gpuva_start, gpuva_end, 187 * &ctx); 188 * if (IS_ERR(range)) { 189 * err = PTR_ERR(range); 190 * goto unlock; 191 * } 192 * 193 * if (driver_migration_policy(range)) { 194 * mmap_read_lock(mm); 195 * devmem = driver_alloc_devmem(); 196 * err = drm_gpusvm_migrate_to_devmem(gpusvm, range, 197 * devmem_allocation, 198 * &ctx); 199 * mmap_read_unlock(mm); 200 * if (err) // CPU mappings may have changed 201 * goto retry; 202 * } 203 * 204 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 205 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 206 * if (err == -EOPNOTSUPP) 207 * drm_gpusvm_range_evict(gpusvm, range); 208 * goto retry; 209 * } else if (err) { 210 * goto unlock; 211 * } 212 * 213 * err = driver_bind_range(gpusvm, range); 214 * if (err == -EAGAIN) // CPU mappings changed 215 * goto retry 216 * 217 * unlock: 218 * driver_svm_unlock(); 219 * return err; 220 * } 221 * 222 * 2) Garbage Collector 223 * 224 * .. code-block:: c 225 * 226 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 227 * struct drm_gpusvm_range *range) 228 * { 229 * assert_driver_svm_locked(gpusvm); 230 * 231 * // Partial unmap, migrate any remaining device memory pages back to RAM 232 * if (range->flags.partial_unmap) 233 * drm_gpusvm_range_evict(gpusvm, range); 234 * 235 * driver_unbind_range(range); 236 * drm_gpusvm_range_remove(gpusvm, range); 237 * } 238 * 239 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 240 * { 241 * assert_driver_svm_locked(gpusvm); 242 * 243 * for_each_range_in_garbage_collector(gpusvm, range) 244 * __driver_garbage_collector(gpusvm, range); 245 * } 246 * 247 * 3) Notifier callback 248 * 249 * .. code-block:: c 250 * 251 * void driver_invalidation(struct drm_gpusvm *gpusvm, 252 * struct drm_gpusvm_notifier *notifier, 253 * const struct mmu_notifier_range *mmu_range) 254 * { 255 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 256 * struct drm_gpusvm_range *range = NULL; 257 * 258 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 259 * 260 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 261 * mmu_range->end) { 262 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 263 * 264 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 265 * continue; 266 * 267 * drm_gpusvm_range_set_unmapped(range, mmu_range); 268 * driver_garbage_collector_add(gpusvm, range); 269 * } 270 * } 271 */ 272 273 /** 274 * npages_in_range() - Calculate the number of pages in a given range 275 * @start: The start address of the range 276 * @end: The end address of the range 277 * 278 * This macro calculates the number of pages in a given memory range, 279 * specified by the start and end addresses. It divides the difference 280 * between the end and start addresses by the page size (PAGE_SIZE) to 281 * determine the number of pages in the range. 282 * 283 * Return: The number of pages in the specified range. 284 */ 285 static unsigned long 286 npages_in_range(unsigned long start, unsigned long end) 287 { 288 return (end - start) >> PAGE_SHIFT; 289 } 290 291 /** 292 * struct drm_gpusvm_zdd - GPU SVM zone device data 293 * 294 * @refcount: Reference count for the zdd 295 * @devmem_allocation: device memory allocation 296 * @device_private_page_owner: Device private pages owner 297 * 298 * This structure serves as a generic wrapper installed in 299 * page->zone_device_data. It provides infrastructure for looking up a device 300 * memory allocation upon CPU page fault and asynchronously releasing device 301 * memory once the CPU has no page references. Asynchronous release is useful 302 * because CPU page references can be dropped in IRQ contexts, while releasing 303 * device memory likely requires sleeping locks. 304 */ 305 struct drm_gpusvm_zdd { 306 struct kref refcount; 307 struct drm_gpusvm_devmem *devmem_allocation; 308 void *device_private_page_owner; 309 }; 310 311 /** 312 * drm_gpusvm_zdd_alloc() - Allocate a zdd structure. 313 * @device_private_page_owner: Device private pages owner 314 * 315 * This function allocates and initializes a new zdd structure. It sets up the 316 * reference count and initializes the destroy work. 317 * 318 * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. 319 */ 320 static struct drm_gpusvm_zdd * 321 drm_gpusvm_zdd_alloc(void *device_private_page_owner) 322 { 323 struct drm_gpusvm_zdd *zdd; 324 325 zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); 326 if (!zdd) 327 return NULL; 328 329 kref_init(&zdd->refcount); 330 zdd->devmem_allocation = NULL; 331 zdd->device_private_page_owner = device_private_page_owner; 332 333 return zdd; 334 } 335 336 /** 337 * drm_gpusvm_zdd_get() - Get a reference to a zdd structure. 338 * @zdd: Pointer to the zdd structure. 339 * 340 * This function increments the reference count of the provided zdd structure. 341 * 342 * Return: Pointer to the zdd structure. 343 */ 344 static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd) 345 { 346 kref_get(&zdd->refcount); 347 return zdd; 348 } 349 350 /** 351 * drm_gpusvm_zdd_destroy() - Destroy a zdd structure. 352 * @ref: Pointer to the reference count structure. 353 * 354 * This function queues the destroy_work of the zdd for asynchronous destruction. 355 */ 356 static void drm_gpusvm_zdd_destroy(struct kref *ref) 357 { 358 struct drm_gpusvm_zdd *zdd = 359 container_of(ref, struct drm_gpusvm_zdd, refcount); 360 struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation; 361 362 if (devmem) { 363 complete_all(&devmem->detached); 364 if (devmem->ops->devmem_release) 365 devmem->ops->devmem_release(devmem); 366 } 367 kfree(zdd); 368 } 369 370 /** 371 * drm_gpusvm_zdd_put() - Put a zdd reference. 372 * @zdd: Pointer to the zdd structure. 373 * 374 * This function decrements the reference count of the provided zdd structure 375 * and schedules its destruction if the count drops to zero. 376 */ 377 static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd) 378 { 379 kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy); 380 } 381 382 /** 383 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 384 * @notifier: Pointer to the GPU SVM notifier structure. 385 * @start: Start address of the range 386 * @end: End address of the range 387 * 388 * Return: A pointer to the drm_gpusvm_range if found or NULL 389 */ 390 struct drm_gpusvm_range * 391 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 392 unsigned long end) 393 { 394 struct interval_tree_node *itree; 395 396 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 397 398 if (itree) 399 return container_of(itree, struct drm_gpusvm_range, itree); 400 else 401 return NULL; 402 } 403 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 404 405 /** 406 * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier 407 * @range__: Iterator variable for the ranges 408 * @next__: Iterator variable for the ranges temporay storage 409 * @notifier__: Pointer to the GPU SVM notifier 410 * @start__: Start address of the range 411 * @end__: End address of the range 412 * 413 * This macro is used to iterate over GPU SVM ranges in a notifier while 414 * removing ranges from it. 415 */ 416 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ 417 for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ 418 (next__) = __drm_gpusvm_range_next(range__); \ 419 (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ 420 (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) 421 422 /** 423 * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list 424 * @notifier: a pointer to the current drm_gpusvm_notifier 425 * 426 * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if 427 * the current notifier is the last one or if the input notifier is 428 * NULL. 429 */ 430 static struct drm_gpusvm_notifier * 431 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) 432 { 433 if (notifier && !list_is_last(¬ifier->entry, 434 ¬ifier->gpusvm->notifier_list)) 435 return list_next_entry(notifier, entry); 436 437 return NULL; 438 } 439 440 static struct drm_gpusvm_notifier * 441 notifier_iter_first(struct rb_root_cached *root, unsigned long start, 442 unsigned long last) 443 { 444 struct interval_tree_node *itree; 445 446 itree = interval_tree_iter_first(root, start, last); 447 448 if (itree) 449 return container_of(itree, struct drm_gpusvm_notifier, itree); 450 else 451 return NULL; 452 } 453 454 /** 455 * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm 456 * @notifier__: Iterator variable for the notifiers 457 * @notifier__: Pointer to the GPU SVM notifier 458 * @start__: Start address of the notifier 459 * @end__: End address of the notifier 460 * 461 * This macro is used to iterate over GPU SVM notifiers in a gpusvm. 462 */ 463 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ 464 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \ 465 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 466 (notifier__) = __drm_gpusvm_notifier_next(notifier__)) 467 468 /** 469 * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm 470 * @notifier__: Iterator variable for the notifiers 471 * @next__: Iterator variable for the notifiers temporay storage 472 * @notifier__: Pointer to the GPU SVM notifier 473 * @start__: Start address of the notifier 474 * @end__: End address of the notifier 475 * 476 * This macro is used to iterate over GPU SVM notifiers in a gpusvm while 477 * removing notifiers from it. 478 */ 479 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ 480 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \ 481 (next__) = __drm_gpusvm_notifier_next(notifier__); \ 482 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 483 (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) 484 485 /** 486 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 487 * @mni: Pointer to the mmu_interval_notifier structure. 488 * @mmu_range: Pointer to the mmu_notifier_range structure. 489 * @cur_seq: Current sequence number. 490 * 491 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 492 * notifier sequence number and calls the driver invalidate vfunc under 493 * gpusvm->notifier_lock. 494 * 495 * Return: true if the operation succeeds, false otherwise. 496 */ 497 static bool 498 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 499 const struct mmu_notifier_range *mmu_range, 500 unsigned long cur_seq) 501 { 502 struct drm_gpusvm_notifier *notifier = 503 container_of(mni, typeof(*notifier), notifier); 504 struct drm_gpusvm *gpusvm = notifier->gpusvm; 505 506 if (!mmu_notifier_range_blockable(mmu_range)) 507 return false; 508 509 down_write(&gpusvm->notifier_lock); 510 mmu_interval_set_seq(mni, cur_seq); 511 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 512 up_write(&gpusvm->notifier_lock); 513 514 return true; 515 } 516 517 /* 518 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 519 */ 520 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 521 .invalidate = drm_gpusvm_notifier_invalidate, 522 }; 523 524 /** 525 * drm_gpusvm_init() - Initialize the GPU SVM. 526 * @gpusvm: Pointer to the GPU SVM structure. 527 * @name: Name of the GPU SVM. 528 * @drm: Pointer to the DRM device structure. 529 * @mm: Pointer to the mm_struct for the address space. 530 * @device_private_page_owner: Device private pages owner. 531 * @mm_start: Start address of GPU SVM. 532 * @mm_range: Range of the GPU SVM. 533 * @notifier_size: Size of individual notifiers. 534 * @ops: Pointer to the operations structure for GPU SVM. 535 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 536 * Entries should be powers of 2 in descending order with last 537 * entry being SZ_4K. 538 * @num_chunks: Number of chunks. 539 * 540 * This function initializes the GPU SVM. 541 * 542 * Return: 0 on success, a negative error code on failure. 543 */ 544 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 545 const char *name, struct drm_device *drm, 546 struct mm_struct *mm, void *device_private_page_owner, 547 unsigned long mm_start, unsigned long mm_range, 548 unsigned long notifier_size, 549 const struct drm_gpusvm_ops *ops, 550 const unsigned long *chunk_sizes, int num_chunks) 551 { 552 if (!ops->invalidate || !num_chunks) 553 return -EINVAL; 554 555 gpusvm->name = name; 556 gpusvm->drm = drm; 557 gpusvm->mm = mm; 558 gpusvm->device_private_page_owner = device_private_page_owner; 559 gpusvm->mm_start = mm_start; 560 gpusvm->mm_range = mm_range; 561 gpusvm->notifier_size = notifier_size; 562 gpusvm->ops = ops; 563 gpusvm->chunk_sizes = chunk_sizes; 564 gpusvm->num_chunks = num_chunks; 565 566 mmgrab(mm); 567 gpusvm->root = RB_ROOT_CACHED; 568 INIT_LIST_HEAD(&gpusvm->notifier_list); 569 570 init_rwsem(&gpusvm->notifier_lock); 571 572 fs_reclaim_acquire(GFP_KERNEL); 573 might_lock(&gpusvm->notifier_lock); 574 fs_reclaim_release(GFP_KERNEL); 575 576 #ifdef CONFIG_LOCKDEP 577 gpusvm->lock_dep_map = NULL; 578 #endif 579 580 return 0; 581 } 582 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 583 584 /** 585 * drm_gpusvm_notifier_find() - Find GPU SVM notifier 586 * @gpusvm: Pointer to the GPU SVM structure 587 * @fault_addr: Fault address 588 * 589 * This function finds the GPU SVM notifier associated with the fault address. 590 * 591 * Return: Pointer to the GPU SVM notifier on success, NULL otherwise. 592 */ 593 static struct drm_gpusvm_notifier * 594 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, 595 unsigned long fault_addr) 596 { 597 return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1); 598 } 599 600 /** 601 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 602 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 603 * 604 * Return: A pointer to the containing drm_gpusvm_notifier structure. 605 */ 606 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 607 { 608 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 609 } 610 611 /** 612 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 613 * @gpusvm: Pointer to the GPU SVM structure 614 * @notifier: Pointer to the GPU SVM notifier structure 615 * 616 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 617 */ 618 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 619 struct drm_gpusvm_notifier *notifier) 620 { 621 struct rb_node *node; 622 struct list_head *head; 623 624 interval_tree_insert(¬ifier->itree, &gpusvm->root); 625 626 node = rb_prev(¬ifier->itree.rb); 627 if (node) 628 head = &(to_drm_gpusvm_notifier(node))->entry; 629 else 630 head = &gpusvm->notifier_list; 631 632 list_add(¬ifier->entry, head); 633 } 634 635 /** 636 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 637 * @gpusvm: Pointer to the GPU SVM tructure 638 * @notifier: Pointer to the GPU SVM notifier structure 639 * 640 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 641 */ 642 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 643 struct drm_gpusvm_notifier *notifier) 644 { 645 interval_tree_remove(¬ifier->itree, &gpusvm->root); 646 list_del(¬ifier->entry); 647 } 648 649 /** 650 * drm_gpusvm_fini() - Finalize the GPU SVM. 651 * @gpusvm: Pointer to the GPU SVM structure. 652 * 653 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 654 * notifiers, and dropping a reference to struct MM. 655 */ 656 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 657 { 658 struct drm_gpusvm_notifier *notifier, *next; 659 660 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 661 struct drm_gpusvm_range *range, *__next; 662 663 /* 664 * Remove notifier first to avoid racing with any invalidation 665 */ 666 mmu_interval_notifier_remove(¬ifier->notifier); 667 notifier->flags.removed = true; 668 669 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 670 LONG_MAX) 671 drm_gpusvm_range_remove(gpusvm, range); 672 } 673 674 mmdrop(gpusvm->mm); 675 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 676 } 677 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 678 679 /** 680 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 681 * @gpusvm: Pointer to the GPU SVM structure 682 * @fault_addr: Fault address 683 * 684 * This function allocates and initializes the GPU SVM notifier structure. 685 * 686 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 687 */ 688 static struct drm_gpusvm_notifier * 689 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 690 { 691 struct drm_gpusvm_notifier *notifier; 692 693 if (gpusvm->ops->notifier_alloc) 694 notifier = gpusvm->ops->notifier_alloc(); 695 else 696 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 697 698 if (!notifier) 699 return ERR_PTR(-ENOMEM); 700 701 notifier->gpusvm = gpusvm; 702 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 703 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 704 INIT_LIST_HEAD(¬ifier->entry); 705 notifier->root = RB_ROOT_CACHED; 706 INIT_LIST_HEAD(¬ifier->range_list); 707 708 return notifier; 709 } 710 711 /** 712 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 713 * @gpusvm: Pointer to the GPU SVM structure 714 * @notifier: Pointer to the GPU SVM notifier structure 715 * 716 * This function frees the GPU SVM notifier structure. 717 */ 718 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 719 struct drm_gpusvm_notifier *notifier) 720 { 721 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 722 723 if (gpusvm->ops->notifier_free) 724 gpusvm->ops->notifier_free(notifier); 725 else 726 kfree(notifier); 727 } 728 729 /** 730 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 731 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 732 * 733 * Return: A pointer to the containing drm_gpusvm_range structure. 734 */ 735 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 736 { 737 return container_of(node, struct drm_gpusvm_range, itree.rb); 738 } 739 740 /** 741 * drm_gpusvm_range_insert() - Insert GPU SVM range 742 * @notifier: Pointer to the GPU SVM notifier structure 743 * @range: Pointer to the GPU SVM range structure 744 * 745 * This function inserts the GPU SVM range into the notifier RB tree and list. 746 */ 747 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 748 struct drm_gpusvm_range *range) 749 { 750 struct rb_node *node; 751 struct list_head *head; 752 753 drm_gpusvm_notifier_lock(notifier->gpusvm); 754 interval_tree_insert(&range->itree, ¬ifier->root); 755 756 node = rb_prev(&range->itree.rb); 757 if (node) 758 head = &(to_drm_gpusvm_range(node))->entry; 759 else 760 head = ¬ifier->range_list; 761 762 list_add(&range->entry, head); 763 drm_gpusvm_notifier_unlock(notifier->gpusvm); 764 } 765 766 /** 767 * __drm_gpusvm_range_remove() - Remove GPU SVM range 768 * @notifier: Pointer to the GPU SVM notifier structure 769 * @range: Pointer to the GPU SVM range structure 770 * 771 * This macro removes the GPU SVM range from the notifier RB tree and list. 772 */ 773 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 774 struct drm_gpusvm_range *range) 775 { 776 interval_tree_remove(&range->itree, ¬ifier->root); 777 list_del(&range->entry); 778 } 779 780 /** 781 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 782 * @gpusvm: Pointer to the GPU SVM structure 783 * @notifier: Pointer to the GPU SVM notifier structure 784 * @fault_addr: Fault address 785 * @chunk_size: Chunk size 786 * @migrate_devmem: Flag indicating whether to migrate device memory 787 * 788 * This function allocates and initializes the GPU SVM range structure. 789 * 790 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 791 */ 792 static struct drm_gpusvm_range * 793 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 794 struct drm_gpusvm_notifier *notifier, 795 unsigned long fault_addr, unsigned long chunk_size, 796 bool migrate_devmem) 797 { 798 struct drm_gpusvm_range *range; 799 800 if (gpusvm->ops->range_alloc) 801 range = gpusvm->ops->range_alloc(gpusvm); 802 else 803 range = kzalloc(sizeof(*range), GFP_KERNEL); 804 805 if (!range) 806 return ERR_PTR(-ENOMEM); 807 808 kref_init(&range->refcount); 809 range->gpusvm = gpusvm; 810 range->notifier = notifier; 811 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 812 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 813 INIT_LIST_HEAD(&range->entry); 814 range->notifier_seq = LONG_MAX; 815 range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 816 817 return range; 818 } 819 820 /** 821 * drm_gpusvm_check_pages() - Check pages 822 * @gpusvm: Pointer to the GPU SVM structure 823 * @notifier: Pointer to the GPU SVM notifier structure 824 * @start: Start address 825 * @end: End address 826 * 827 * Check if pages between start and end have been faulted in on the CPU. Use to 828 * prevent migration of pages without CPU backing store. 829 * 830 * Return: True if pages have been faulted into CPU, False otherwise 831 */ 832 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 833 struct drm_gpusvm_notifier *notifier, 834 unsigned long start, unsigned long end) 835 { 836 struct hmm_range hmm_range = { 837 .default_flags = 0, 838 .notifier = ¬ifier->notifier, 839 .start = start, 840 .end = end, 841 .dev_private_owner = gpusvm->device_private_page_owner, 842 }; 843 unsigned long timeout = 844 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 845 unsigned long *pfns; 846 unsigned long npages = npages_in_range(start, end); 847 int err, i; 848 849 mmap_assert_locked(gpusvm->mm); 850 851 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 852 if (!pfns) 853 return false; 854 855 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 856 hmm_range.hmm_pfns = pfns; 857 858 while (true) { 859 err = hmm_range_fault(&hmm_range); 860 if (err == -EBUSY) { 861 if (time_after(jiffies, timeout)) 862 break; 863 864 hmm_range.notifier_seq = 865 mmu_interval_read_begin(¬ifier->notifier); 866 continue; 867 } 868 break; 869 } 870 if (err) 871 goto err_free; 872 873 for (i = 0; i < npages;) { 874 if (!(pfns[i] & HMM_PFN_VALID)) { 875 err = -EFAULT; 876 goto err_free; 877 } 878 i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 879 } 880 881 err_free: 882 kvfree(pfns); 883 return err ? false : true; 884 } 885 886 /** 887 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 888 * @gpusvm: Pointer to the GPU SVM structure 889 * @notifier: Pointer to the GPU SVM notifier structure 890 * @vas: Pointer to the virtual memory area structure 891 * @fault_addr: Fault address 892 * @gpuva_start: Start address of GPUVA which mirrors CPU 893 * @gpuva_end: End address of GPUVA which mirrors CPU 894 * @check_pages_threshold: Check CPU pages for present threshold 895 * 896 * This function determines the chunk size for the GPU SVM range based on the 897 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 898 * memory area boundaries. 899 * 900 * Return: Chunk size on success, LONG_MAX on failure. 901 */ 902 static unsigned long 903 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 904 struct drm_gpusvm_notifier *notifier, 905 struct vm_area_struct *vas, 906 unsigned long fault_addr, 907 unsigned long gpuva_start, 908 unsigned long gpuva_end, 909 unsigned long check_pages_threshold) 910 { 911 unsigned long start, end; 912 int i = 0; 913 914 retry: 915 for (; i < gpusvm->num_chunks; ++i) { 916 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 917 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 918 919 if (start >= vas->vm_start && end <= vas->vm_end && 920 start >= drm_gpusvm_notifier_start(notifier) && 921 end <= drm_gpusvm_notifier_end(notifier) && 922 start >= gpuva_start && end <= gpuva_end) 923 break; 924 } 925 926 if (i == gpusvm->num_chunks) 927 return LONG_MAX; 928 929 /* 930 * If allocation more than page, ensure not to overlap with existing 931 * ranges. 932 */ 933 if (end - start != SZ_4K) { 934 struct drm_gpusvm_range *range; 935 936 range = drm_gpusvm_range_find(notifier, start, end); 937 if (range) { 938 ++i; 939 goto retry; 940 } 941 942 /* 943 * XXX: Only create range on pages CPU has faulted in. Without 944 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 945 * process-many-malloc' fails. In the failure case, each process 946 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 947 * ranges. When migrating the SVM ranges, some processes fail in 948 * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages' 949 * and then upon drm_gpusvm_range_get_pages device pages from 950 * other processes are collected + faulted in which creates all 951 * sorts of problems. Unsure exactly how this happening, also 952 * problem goes away if 'xe_exec_system_allocator --r 953 * process-many-malloc' mallocs at least 64k at a time. 954 */ 955 if (end - start <= check_pages_threshold && 956 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 957 ++i; 958 goto retry; 959 } 960 } 961 962 return end - start; 963 } 964 965 #ifdef CONFIG_LOCKDEP 966 /** 967 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 968 * @gpusvm: Pointer to the GPU SVM structure. 969 * 970 * Ensure driver lock is held. 971 */ 972 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 973 { 974 if ((gpusvm)->lock_dep_map) 975 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 976 } 977 #else 978 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 979 { 980 } 981 #endif 982 983 /** 984 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 985 * @gpusvm: Pointer to the GPU SVM structure 986 * @start: The inclusive start user address. 987 * @end: The exclusive end user address. 988 * 989 * Returns: The start address of first VMA within the provided range, 990 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 991 */ 992 unsigned long 993 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 994 unsigned long start, 995 unsigned long end) 996 { 997 struct mm_struct *mm = gpusvm->mm; 998 struct vm_area_struct *vma; 999 unsigned long addr = ULONG_MAX; 1000 1001 if (!mmget_not_zero(mm)) 1002 return addr; 1003 1004 mmap_read_lock(mm); 1005 1006 vma = find_vma_intersection(mm, start, end); 1007 if (vma) 1008 addr = vma->vm_start; 1009 1010 mmap_read_unlock(mm); 1011 mmput(mm); 1012 1013 return addr; 1014 } 1015 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 1016 1017 /** 1018 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 1019 * @gpusvm: Pointer to the GPU SVM structure 1020 * @fault_addr: Fault address 1021 * @gpuva_start: Start address of GPUVA which mirrors CPU 1022 * @gpuva_end: End address of GPUVA which mirrors CPU 1023 * @ctx: GPU SVM context 1024 * 1025 * This function finds or inserts a newly allocated a GPU SVM range based on the 1026 * fault address. Caller must hold a lock to protect range lookup and insertion. 1027 * 1028 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 1029 */ 1030 struct drm_gpusvm_range * 1031 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 1032 unsigned long fault_addr, 1033 unsigned long gpuva_start, 1034 unsigned long gpuva_end, 1035 const struct drm_gpusvm_ctx *ctx) 1036 { 1037 struct drm_gpusvm_notifier *notifier; 1038 struct drm_gpusvm_range *range; 1039 struct mm_struct *mm = gpusvm->mm; 1040 struct vm_area_struct *vas; 1041 bool notifier_alloc = false; 1042 unsigned long chunk_size; 1043 int err; 1044 bool migrate_devmem; 1045 1046 drm_gpusvm_driver_lock_held(gpusvm); 1047 1048 if (fault_addr < gpusvm->mm_start || 1049 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 1050 return ERR_PTR(-EINVAL); 1051 1052 if (!mmget_not_zero(mm)) 1053 return ERR_PTR(-EFAULT); 1054 1055 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr); 1056 if (!notifier) { 1057 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 1058 if (IS_ERR(notifier)) { 1059 err = PTR_ERR(notifier); 1060 goto err_mmunlock; 1061 } 1062 notifier_alloc = true; 1063 err = mmu_interval_notifier_insert(¬ifier->notifier, 1064 mm, 1065 drm_gpusvm_notifier_start(notifier), 1066 drm_gpusvm_notifier_size(notifier), 1067 &drm_gpusvm_notifier_ops); 1068 if (err) 1069 goto err_notifier; 1070 } 1071 1072 mmap_read_lock(mm); 1073 1074 vas = vma_lookup(mm, fault_addr); 1075 if (!vas) { 1076 err = -ENOENT; 1077 goto err_notifier_remove; 1078 } 1079 1080 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 1081 err = -EPERM; 1082 goto err_notifier_remove; 1083 } 1084 1085 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 1086 if (range) 1087 goto out_mmunlock; 1088 /* 1089 * XXX: Short-circuiting migration based on migrate_vma_* current 1090 * limitations. If/when migrate_vma_* add more support, this logic will 1091 * have to change. 1092 */ 1093 migrate_devmem = ctx->devmem_possible && 1094 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 1095 1096 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 1097 fault_addr, gpuva_start, 1098 gpuva_end, 1099 ctx->check_pages_threshold); 1100 if (chunk_size == LONG_MAX) { 1101 err = -EINVAL; 1102 goto err_notifier_remove; 1103 } 1104 1105 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 1106 migrate_devmem); 1107 if (IS_ERR(range)) { 1108 err = PTR_ERR(range); 1109 goto err_notifier_remove; 1110 } 1111 1112 drm_gpusvm_range_insert(notifier, range); 1113 if (notifier_alloc) 1114 drm_gpusvm_notifier_insert(gpusvm, notifier); 1115 1116 out_mmunlock: 1117 mmap_read_unlock(mm); 1118 mmput(mm); 1119 1120 return range; 1121 1122 err_notifier_remove: 1123 mmap_read_unlock(mm); 1124 if (notifier_alloc) 1125 mmu_interval_notifier_remove(¬ifier->notifier); 1126 err_notifier: 1127 if (notifier_alloc) 1128 drm_gpusvm_notifier_free(gpusvm, notifier); 1129 err_mmunlock: 1130 mmput(mm); 1131 return ERR_PTR(err); 1132 } 1133 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 1134 1135 /** 1136 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 1137 * @gpusvm: Pointer to the GPU SVM structure 1138 * @range: Pointer to the GPU SVM range structure 1139 * @npages: Number of pages to unmap 1140 * 1141 * This function unmap pages associated with a GPU SVM range. Assumes and 1142 * asserts correct locking is in place when called. 1143 */ 1144 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1145 struct drm_gpusvm_range *range, 1146 unsigned long npages) 1147 { 1148 unsigned long i, j; 1149 struct drm_pagemap *dpagemap = range->dpagemap; 1150 struct device *dev = gpusvm->drm->dev; 1151 1152 lockdep_assert_held(&gpusvm->notifier_lock); 1153 1154 if (range->flags.has_dma_mapping) { 1155 struct drm_gpusvm_range_flags flags = { 1156 .__flags = range->flags.__flags, 1157 }; 1158 1159 for (i = 0, j = 0; i < npages; j++) { 1160 struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; 1161 1162 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1163 dma_unmap_page(dev, 1164 addr->addr, 1165 PAGE_SIZE << addr->order, 1166 addr->dir); 1167 else if (dpagemap && dpagemap->ops->device_unmap) 1168 dpagemap->ops->device_unmap(dpagemap, 1169 dev, *addr); 1170 i += 1 << addr->order; 1171 } 1172 1173 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1174 flags.has_devmem_pages = false; 1175 flags.has_dma_mapping = false; 1176 WRITE_ONCE(range->flags.__flags, flags.__flags); 1177 1178 range->dpagemap = NULL; 1179 } 1180 } 1181 1182 /** 1183 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1184 * @gpusvm: Pointer to the GPU SVM structure 1185 * @range: Pointer to the GPU SVM range structure 1186 * 1187 * This function frees the dma address array associated with a GPU SVM range. 1188 */ 1189 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1190 struct drm_gpusvm_range *range) 1191 { 1192 lockdep_assert_held(&gpusvm->notifier_lock); 1193 1194 if (range->dma_addr) { 1195 kvfree(range->dma_addr); 1196 range->dma_addr = NULL; 1197 } 1198 } 1199 1200 /** 1201 * drm_gpusvm_range_remove() - Remove GPU SVM range 1202 * @gpusvm: Pointer to the GPU SVM structure 1203 * @range: Pointer to the GPU SVM range to be removed 1204 * 1205 * This function removes the specified GPU SVM range and also removes the parent 1206 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1207 * hold a lock to protect range and notifier removal. 1208 */ 1209 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1210 struct drm_gpusvm_range *range) 1211 { 1212 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1213 drm_gpusvm_range_end(range)); 1214 struct drm_gpusvm_notifier *notifier; 1215 1216 drm_gpusvm_driver_lock_held(gpusvm); 1217 1218 notifier = drm_gpusvm_notifier_find(gpusvm, 1219 drm_gpusvm_range_start(range)); 1220 if (WARN_ON_ONCE(!notifier)) 1221 return; 1222 1223 drm_gpusvm_notifier_lock(gpusvm); 1224 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1225 drm_gpusvm_range_free_pages(gpusvm, range); 1226 __drm_gpusvm_range_remove(notifier, range); 1227 drm_gpusvm_notifier_unlock(gpusvm); 1228 1229 drm_gpusvm_range_put(range); 1230 1231 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1232 if (!notifier->flags.removed) 1233 mmu_interval_notifier_remove(¬ifier->notifier); 1234 drm_gpusvm_notifier_remove(gpusvm, notifier); 1235 drm_gpusvm_notifier_free(gpusvm, notifier); 1236 } 1237 } 1238 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1239 1240 /** 1241 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1242 * @range: Pointer to the GPU SVM range 1243 * 1244 * This function increments the reference count of the specified GPU SVM range. 1245 * 1246 * Return: Pointer to the GPU SVM range. 1247 */ 1248 struct drm_gpusvm_range * 1249 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1250 { 1251 kref_get(&range->refcount); 1252 1253 return range; 1254 } 1255 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1256 1257 /** 1258 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1259 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1260 * 1261 * This function destroys the specified GPU SVM range when its reference count 1262 * reaches zero. If a custom range-free function is provided, it is invoked to 1263 * free the range; otherwise, the range is deallocated using kfree(). 1264 */ 1265 static void drm_gpusvm_range_destroy(struct kref *refcount) 1266 { 1267 struct drm_gpusvm_range *range = 1268 container_of(refcount, struct drm_gpusvm_range, refcount); 1269 struct drm_gpusvm *gpusvm = range->gpusvm; 1270 1271 if (gpusvm->ops->range_free) 1272 gpusvm->ops->range_free(range); 1273 else 1274 kfree(range); 1275 } 1276 1277 /** 1278 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1279 * @range: Pointer to the GPU SVM range 1280 * 1281 * This function decrements the reference count of the specified GPU SVM range 1282 * and frees it when the count reaches zero. 1283 */ 1284 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1285 { 1286 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1287 } 1288 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1289 1290 /** 1291 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1292 * @gpusvm: Pointer to the GPU SVM structure 1293 * @range: Pointer to the GPU SVM range structure 1294 * 1295 * This function determines if a GPU SVM range pages are valid. Expected be 1296 * called holding gpusvm->notifier_lock and as the last step before committing a 1297 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1298 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1299 * function is required for finer grained checking (i.e., per range) if pages 1300 * are valid. 1301 * 1302 * Return: True if GPU SVM range has valid pages, False otherwise 1303 */ 1304 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1305 struct drm_gpusvm_range *range) 1306 { 1307 lockdep_assert_held(&gpusvm->notifier_lock); 1308 1309 return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1310 } 1311 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1312 1313 /** 1314 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1315 * @gpusvm: Pointer to the GPU SVM structure 1316 * @range: Pointer to the GPU SVM range structure 1317 * 1318 * This function determines if a GPU SVM range pages are valid. Expected be 1319 * called without holding gpusvm->notifier_lock. 1320 * 1321 * Return: True if GPU SVM range has valid pages, False otherwise 1322 */ 1323 static bool 1324 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1325 struct drm_gpusvm_range *range) 1326 { 1327 bool pages_valid; 1328 1329 if (!range->dma_addr) 1330 return false; 1331 1332 drm_gpusvm_notifier_lock(gpusvm); 1333 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1334 if (!pages_valid) 1335 drm_gpusvm_range_free_pages(gpusvm, range); 1336 drm_gpusvm_notifier_unlock(gpusvm); 1337 1338 return pages_valid; 1339 } 1340 1341 /** 1342 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1343 * @gpusvm: Pointer to the GPU SVM structure 1344 * @range: Pointer to the GPU SVM range structure 1345 * @ctx: GPU SVM context 1346 * 1347 * This function gets pages for a GPU SVM range and ensures they are mapped for 1348 * DMA access. 1349 * 1350 * Return: 0 on success, negative error code on failure. 1351 */ 1352 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1353 struct drm_gpusvm_range *range, 1354 const struct drm_gpusvm_ctx *ctx) 1355 { 1356 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1357 struct hmm_range hmm_range = { 1358 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1359 HMM_PFN_REQ_WRITE), 1360 .notifier = notifier, 1361 .start = drm_gpusvm_range_start(range), 1362 .end = drm_gpusvm_range_end(range), 1363 .dev_private_owner = gpusvm->device_private_page_owner, 1364 }; 1365 struct mm_struct *mm = gpusvm->mm; 1366 struct drm_gpusvm_zdd *zdd; 1367 unsigned long timeout = 1368 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1369 unsigned long i, j; 1370 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1371 drm_gpusvm_range_end(range)); 1372 unsigned long num_dma_mapped; 1373 unsigned int order = 0; 1374 unsigned long *pfns; 1375 struct page **pages; 1376 int err = 0; 1377 struct dev_pagemap *pagemap; 1378 struct drm_pagemap *dpagemap; 1379 struct drm_gpusvm_range_flags flags; 1380 1381 retry: 1382 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1383 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1384 goto set_seqno; 1385 1386 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1387 if (!pfns) 1388 return -ENOMEM; 1389 1390 if (!mmget_not_zero(mm)) { 1391 err = -EFAULT; 1392 goto err_free; 1393 } 1394 1395 hmm_range.hmm_pfns = pfns; 1396 while (true) { 1397 mmap_read_lock(mm); 1398 err = hmm_range_fault(&hmm_range); 1399 mmap_read_unlock(mm); 1400 1401 if (err == -EBUSY) { 1402 if (time_after(jiffies, timeout)) 1403 break; 1404 1405 hmm_range.notifier_seq = 1406 mmu_interval_read_begin(notifier); 1407 continue; 1408 } 1409 break; 1410 } 1411 mmput(mm); 1412 if (err) 1413 goto err_free; 1414 1415 pages = (struct page **)pfns; 1416 map_pages: 1417 /* 1418 * Perform all dma mappings under the notifier lock to not 1419 * access freed pages. A notifier will either block on 1420 * the notifier lock or unmap dma. 1421 */ 1422 drm_gpusvm_notifier_lock(gpusvm); 1423 1424 flags.__flags = range->flags.__flags; 1425 if (flags.unmapped) { 1426 drm_gpusvm_notifier_unlock(gpusvm); 1427 err = -EFAULT; 1428 goto err_free; 1429 } 1430 1431 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1432 drm_gpusvm_notifier_unlock(gpusvm); 1433 kvfree(pfns); 1434 goto retry; 1435 } 1436 1437 if (!range->dma_addr) { 1438 /* Unlock and restart mapping to allocate memory. */ 1439 drm_gpusvm_notifier_unlock(gpusvm); 1440 range->dma_addr = kvmalloc_array(npages, 1441 sizeof(*range->dma_addr), 1442 GFP_KERNEL); 1443 if (!range->dma_addr) { 1444 err = -ENOMEM; 1445 goto err_free; 1446 } 1447 goto map_pages; 1448 } 1449 1450 zdd = NULL; 1451 num_dma_mapped = 0; 1452 for (i = 0, j = 0; i < npages; ++j) { 1453 struct page *page = hmm_pfn_to_page(pfns[i]); 1454 1455 order = hmm_pfn_to_map_order(pfns[i]); 1456 if (is_device_private_page(page) || 1457 is_device_coherent_page(page)) { 1458 if (zdd != page->zone_device_data && i > 0) { 1459 err = -EOPNOTSUPP; 1460 goto err_unmap; 1461 } 1462 zdd = page->zone_device_data; 1463 if (pagemap != page_pgmap(page)) { 1464 if (i > 0) { 1465 err = -EOPNOTSUPP; 1466 goto err_unmap; 1467 } 1468 1469 pagemap = page_pgmap(page); 1470 dpagemap = zdd->devmem_allocation->dpagemap; 1471 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1472 /* 1473 * Raced. This is not supposed to happen 1474 * since hmm_range_fault() should've migrated 1475 * this page to system. 1476 */ 1477 err = -EAGAIN; 1478 goto err_unmap; 1479 } 1480 } 1481 range->dma_addr[j] = 1482 dpagemap->ops->device_map(dpagemap, 1483 gpusvm->drm->dev, 1484 page, order, 1485 DMA_BIDIRECTIONAL); 1486 if (dma_mapping_error(gpusvm->drm->dev, 1487 range->dma_addr[j].addr)) { 1488 err = -EFAULT; 1489 goto err_unmap; 1490 } 1491 1492 pages[i] = page; 1493 } else { 1494 dma_addr_t addr; 1495 1496 if (is_zone_device_page(page) || zdd) { 1497 err = -EOPNOTSUPP; 1498 goto err_unmap; 1499 } 1500 1501 if (ctx->devmem_only) { 1502 err = -EFAULT; 1503 goto err_unmap; 1504 } 1505 1506 addr = dma_map_page(gpusvm->drm->dev, 1507 page, 0, 1508 PAGE_SIZE << order, 1509 DMA_BIDIRECTIONAL); 1510 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1511 err = -EFAULT; 1512 goto err_unmap; 1513 } 1514 1515 range->dma_addr[j] = drm_pagemap_device_addr_encode 1516 (addr, DRM_INTERCONNECT_SYSTEM, order, 1517 DMA_BIDIRECTIONAL); 1518 } 1519 i += 1 << order; 1520 num_dma_mapped = i; 1521 flags.has_dma_mapping = true; 1522 } 1523 1524 if (zdd) { 1525 flags.has_devmem_pages = true; 1526 range->dpagemap = dpagemap; 1527 } 1528 1529 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1530 WRITE_ONCE(range->flags.__flags, flags.__flags); 1531 1532 drm_gpusvm_notifier_unlock(gpusvm); 1533 kvfree(pfns); 1534 set_seqno: 1535 range->notifier_seq = hmm_range.notifier_seq; 1536 1537 return 0; 1538 1539 err_unmap: 1540 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1541 drm_gpusvm_notifier_unlock(gpusvm); 1542 err_free: 1543 kvfree(pfns); 1544 if (err == -EAGAIN) 1545 goto retry; 1546 return err; 1547 } 1548 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1549 1550 /** 1551 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1552 * @gpusvm: Pointer to the GPU SVM structure 1553 * @range: Pointer to the GPU SVM range structure 1554 * @ctx: GPU SVM context 1555 * 1556 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1557 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1558 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1559 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1560 * security model. 1561 */ 1562 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1563 struct drm_gpusvm_range *range, 1564 const struct drm_gpusvm_ctx *ctx) 1565 { 1566 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1567 drm_gpusvm_range_end(range)); 1568 1569 if (ctx->in_notifier) 1570 lockdep_assert_held_write(&gpusvm->notifier_lock); 1571 else 1572 drm_gpusvm_notifier_lock(gpusvm); 1573 1574 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1575 1576 if (!ctx->in_notifier) 1577 drm_gpusvm_notifier_unlock(gpusvm); 1578 } 1579 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1580 1581 /** 1582 * drm_gpusvm_migration_unlock_put_page() - Put a migration page 1583 * @page: Pointer to the page to put 1584 * 1585 * This function unlocks and puts a page. 1586 */ 1587 static void drm_gpusvm_migration_unlock_put_page(struct page *page) 1588 { 1589 unlock_page(page); 1590 put_page(page); 1591 } 1592 1593 /** 1594 * drm_gpusvm_migration_unlock_put_pages() - Put migration pages 1595 * @npages: Number of pages 1596 * @migrate_pfn: Array of migrate page frame numbers 1597 * 1598 * This function unlocks and puts an array of pages. 1599 */ 1600 static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages, 1601 unsigned long *migrate_pfn) 1602 { 1603 unsigned long i; 1604 1605 for (i = 0; i < npages; ++i) { 1606 struct page *page; 1607 1608 if (!migrate_pfn[i]) 1609 continue; 1610 1611 page = migrate_pfn_to_page(migrate_pfn[i]); 1612 drm_gpusvm_migration_unlock_put_page(page); 1613 migrate_pfn[i] = 0; 1614 } 1615 } 1616 1617 /** 1618 * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page 1619 * @page: Pointer to the page 1620 * @zdd: Pointer to the GPU SVM zone device data 1621 * 1622 * This function associates the given page with the specified GPU SVM zone 1623 * device data and initializes it for zone device usage. 1624 */ 1625 static void drm_gpusvm_get_devmem_page(struct page *page, 1626 struct drm_gpusvm_zdd *zdd) 1627 { 1628 page->zone_device_data = drm_gpusvm_zdd_get(zdd); 1629 zone_device_page_init(page); 1630 } 1631 1632 /** 1633 * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration 1634 * @dev: The device for which the pages are being mapped 1635 * @dma_addr: Array to store DMA addresses corresponding to mapped pages 1636 * @migrate_pfn: Array of migrate page frame numbers to map 1637 * @npages: Number of pages to map 1638 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1639 * 1640 * This function maps pages of memory for migration usage in GPU SVM. It 1641 * iterates over each page frame number provided in @migrate_pfn, maps the 1642 * corresponding page, and stores the DMA address in the provided @dma_addr 1643 * array. 1644 * 1645 * Return: 0 on success, -EFAULT if an error occurs during mapping. 1646 */ 1647 static int drm_gpusvm_migrate_map_pages(struct device *dev, 1648 dma_addr_t *dma_addr, 1649 unsigned long *migrate_pfn, 1650 unsigned long npages, 1651 enum dma_data_direction dir) 1652 { 1653 unsigned long i; 1654 1655 for (i = 0; i < npages; ++i) { 1656 struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 1657 1658 if (!page) 1659 continue; 1660 1661 if (WARN_ON_ONCE(is_zone_device_page(page))) 1662 return -EFAULT; 1663 1664 dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); 1665 if (dma_mapping_error(dev, dma_addr[i])) 1666 return -EFAULT; 1667 } 1668 1669 return 0; 1670 } 1671 1672 /** 1673 * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration 1674 * @dev: The device for which the pages were mapped 1675 * @dma_addr: Array of DMA addresses corresponding to mapped pages 1676 * @npages: Number of pages to unmap 1677 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1678 * 1679 * This function unmaps previously mapped pages of memory for GPU Shared Virtual 1680 * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks 1681 * if it's valid and not already unmapped, and unmaps the corresponding page. 1682 */ 1683 static void drm_gpusvm_migrate_unmap_pages(struct device *dev, 1684 dma_addr_t *dma_addr, 1685 unsigned long npages, 1686 enum dma_data_direction dir) 1687 { 1688 unsigned long i; 1689 1690 for (i = 0; i < npages; ++i) { 1691 if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) 1692 continue; 1693 1694 dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); 1695 } 1696 } 1697 1698 /** 1699 * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory 1700 * @gpusvm: Pointer to the GPU SVM structure 1701 * @range: Pointer to the GPU SVM range structure 1702 * @devmem_allocation: Pointer to the device memory allocation. The caller 1703 * should hold a reference to the device memory allocation, 1704 * which should be dropped via ops->devmem_release or upon 1705 * the failure of this function. 1706 * @ctx: GPU SVM context 1707 * 1708 * This function migrates the specified GPU SVM range to device memory. It 1709 * performs the necessary setup and invokes the driver-specific operations for 1710 * migration to device memory. Upon successful return, @devmem_allocation can 1711 * safely reference @range until ops->devmem_release is called which only upon 1712 * successful return. Expected to be called while holding the mmap lock in read 1713 * mode. 1714 * 1715 * Return: 0 on success, negative error code on failure. 1716 */ 1717 int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, 1718 struct drm_gpusvm_range *range, 1719 struct drm_gpusvm_devmem *devmem_allocation, 1720 const struct drm_gpusvm_ctx *ctx) 1721 { 1722 const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1723 unsigned long start = drm_gpusvm_range_start(range), 1724 end = drm_gpusvm_range_end(range); 1725 struct migrate_vma migrate = { 1726 .start = start, 1727 .end = end, 1728 .pgmap_owner = gpusvm->device_private_page_owner, 1729 .flags = MIGRATE_VMA_SELECT_SYSTEM, 1730 }; 1731 struct mm_struct *mm = gpusvm->mm; 1732 unsigned long i, npages = npages_in_range(start, end); 1733 struct vm_area_struct *vas; 1734 struct drm_gpusvm_zdd *zdd = NULL; 1735 struct page **pages; 1736 dma_addr_t *dma_addr; 1737 void *buf; 1738 int err; 1739 1740 mmap_assert_locked(gpusvm->mm); 1741 1742 if (!range->flags.migrate_devmem) 1743 return -EINVAL; 1744 1745 if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || 1746 !ops->copy_to_ram) 1747 return -EOPNOTSUPP; 1748 1749 vas = vma_lookup(mm, start); 1750 if (!vas) { 1751 err = -ENOENT; 1752 goto err_out; 1753 } 1754 1755 if (end > vas->vm_end || start < vas->vm_start) { 1756 err = -EINVAL; 1757 goto err_out; 1758 } 1759 1760 if (!vma_is_anonymous(vas)) { 1761 err = -EBUSY; 1762 goto err_out; 1763 } 1764 1765 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 1766 sizeof(*pages), GFP_KERNEL); 1767 if (!buf) { 1768 err = -ENOMEM; 1769 goto err_out; 1770 } 1771 dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 1772 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 1773 1774 zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner); 1775 if (!zdd) { 1776 err = -ENOMEM; 1777 goto err_free; 1778 } 1779 1780 migrate.vma = vas; 1781 migrate.src = buf; 1782 migrate.dst = migrate.src + npages; 1783 1784 err = migrate_vma_setup(&migrate); 1785 if (err) 1786 goto err_free; 1787 1788 if (!migrate.cpages) { 1789 err = -EFAULT; 1790 goto err_free; 1791 } 1792 1793 if (migrate.cpages != npages) { 1794 err = -EBUSY; 1795 goto err_finalize; 1796 } 1797 1798 err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); 1799 if (err) 1800 goto err_finalize; 1801 1802 err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1803 migrate.src, npages, DMA_TO_DEVICE); 1804 if (err) 1805 goto err_finalize; 1806 1807 for (i = 0; i < npages; ++i) { 1808 struct page *page = pfn_to_page(migrate.dst[i]); 1809 1810 pages[i] = page; 1811 migrate.dst[i] = migrate_pfn(migrate.dst[i]); 1812 drm_gpusvm_get_devmem_page(page, zdd); 1813 } 1814 1815 err = ops->copy_to_devmem(pages, dma_addr, npages); 1816 if (err) 1817 goto err_finalize; 1818 1819 /* Upon success bind devmem allocation to range and zdd */ 1820 devmem_allocation->timeslice_expiration = get_jiffies_64() + 1821 msecs_to_jiffies(ctx->timeslice_ms); 1822 zdd->devmem_allocation = devmem_allocation; /* Owns ref */ 1823 1824 err_finalize: 1825 if (err) 1826 drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 1827 migrate_vma_pages(&migrate); 1828 migrate_vma_finalize(&migrate); 1829 drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1830 DMA_TO_DEVICE); 1831 err_free: 1832 if (zdd) 1833 drm_gpusvm_zdd_put(zdd); 1834 kvfree(buf); 1835 err_out: 1836 return err; 1837 } 1838 EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem); 1839 1840 /** 1841 * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area 1842 * @vas: Pointer to the VM area structure, can be NULL 1843 * @fault_page: Fault page 1844 * @npages: Number of pages to populate 1845 * @mpages: Number of pages to migrate 1846 * @src_mpfn: Source array of migrate PFNs 1847 * @mpfn: Array of migrate PFNs to populate 1848 * @addr: Start address for PFN allocation 1849 * 1850 * This function populates the RAM migrate page frame numbers (PFNs) for the 1851 * specified VM area structure. It allocates and locks pages in the VM area for 1852 * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use 1853 * alloc_page for allocation. 1854 * 1855 * Return: 0 on success, negative error code on failure. 1856 */ 1857 static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas, 1858 struct page *fault_page, 1859 unsigned long npages, 1860 unsigned long *mpages, 1861 unsigned long *src_mpfn, 1862 unsigned long *mpfn, 1863 unsigned long addr) 1864 { 1865 unsigned long i; 1866 1867 for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { 1868 struct page *page, *src_page; 1869 1870 if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) 1871 continue; 1872 1873 src_page = migrate_pfn_to_page(src_mpfn[i]); 1874 if (!src_page) 1875 continue; 1876 1877 if (fault_page) { 1878 if (src_page->zone_device_data != 1879 fault_page->zone_device_data) 1880 continue; 1881 } 1882 1883 if (vas) 1884 page = alloc_page_vma(GFP_HIGHUSER, vas, addr); 1885 else 1886 page = alloc_page(GFP_HIGHUSER); 1887 1888 if (!page) 1889 goto free_pages; 1890 1891 mpfn[i] = migrate_pfn(page_to_pfn(page)); 1892 } 1893 1894 for (i = 0; i < npages; ++i) { 1895 struct page *page = migrate_pfn_to_page(mpfn[i]); 1896 1897 if (!page) 1898 continue; 1899 1900 WARN_ON_ONCE(!trylock_page(page)); 1901 ++*mpages; 1902 } 1903 1904 return 0; 1905 1906 free_pages: 1907 for (i = 0; i < npages; ++i) { 1908 struct page *page = migrate_pfn_to_page(mpfn[i]); 1909 1910 if (!page) 1911 continue; 1912 1913 put_page(page); 1914 mpfn[i] = 0; 1915 } 1916 return -ENOMEM; 1917 } 1918 1919 /** 1920 * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM 1921 * @devmem_allocation: Pointer to the device memory allocation 1922 * 1923 * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and 1924 * migration done via migrate_device_* functions. 1925 * 1926 * Return: 0 on success, negative error code on failure. 1927 */ 1928 int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation) 1929 { 1930 const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1931 unsigned long npages, mpages = 0; 1932 struct page **pages; 1933 unsigned long *src, *dst; 1934 dma_addr_t *dma_addr; 1935 void *buf; 1936 int i, err = 0; 1937 unsigned int retry_count = 2; 1938 1939 npages = devmem_allocation->size >> PAGE_SHIFT; 1940 1941 retry: 1942 if (!mmget_not_zero(devmem_allocation->mm)) 1943 return -EFAULT; 1944 1945 buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + 1946 sizeof(*pages), GFP_KERNEL); 1947 if (!buf) { 1948 err = -ENOMEM; 1949 goto err_out; 1950 } 1951 src = buf; 1952 dst = buf + (sizeof(*src) * npages); 1953 dma_addr = buf + (2 * sizeof(*src) * npages); 1954 pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; 1955 1956 err = ops->populate_devmem_pfn(devmem_allocation, npages, src); 1957 if (err) 1958 goto err_free; 1959 1960 err = migrate_device_pfns(src, npages); 1961 if (err) 1962 goto err_free; 1963 1964 err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, 1965 src, dst, 0); 1966 if (err || !mpages) 1967 goto err_finalize; 1968 1969 err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1970 dst, npages, DMA_FROM_DEVICE); 1971 if (err) 1972 goto err_finalize; 1973 1974 for (i = 0; i < npages; ++i) 1975 pages[i] = migrate_pfn_to_page(src[i]); 1976 1977 err = ops->copy_to_ram(pages, dma_addr, npages); 1978 if (err) 1979 goto err_finalize; 1980 1981 err_finalize: 1982 if (err) 1983 drm_gpusvm_migration_unlock_put_pages(npages, dst); 1984 migrate_device_pages(src, dst, npages); 1985 migrate_device_finalize(src, dst, npages); 1986 drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1987 DMA_FROM_DEVICE); 1988 err_free: 1989 kvfree(buf); 1990 err_out: 1991 mmput_async(devmem_allocation->mm); 1992 1993 if (completion_done(&devmem_allocation->detached)) 1994 return 0; 1995 1996 if (retry_count--) { 1997 cond_resched(); 1998 goto retry; 1999 } 2000 2001 return err ?: -EBUSY; 2002 } 2003 EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram); 2004 2005 /** 2006 * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) 2007 * @vas: Pointer to the VM area structure 2008 * @device_private_page_owner: Device private pages owner 2009 * @page: Pointer to the page for fault handling (can be NULL) 2010 * @fault_addr: Fault address 2011 * @size: Size of migration 2012 * 2013 * This internal function performs the migration of the specified GPU SVM range 2014 * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and 2015 * invokes the driver-specific operations for migration to RAM. 2016 * 2017 * Return: 0 on success, negative error code on failure. 2018 */ 2019 static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, 2020 void *device_private_page_owner, 2021 struct page *page, 2022 unsigned long fault_addr, 2023 unsigned long size) 2024 { 2025 struct migrate_vma migrate = { 2026 .vma = vas, 2027 .pgmap_owner = device_private_page_owner, 2028 .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 2029 MIGRATE_VMA_SELECT_DEVICE_COHERENT, 2030 .fault_page = page, 2031 }; 2032 struct drm_gpusvm_zdd *zdd; 2033 const struct drm_gpusvm_devmem_ops *ops; 2034 struct device *dev = NULL; 2035 unsigned long npages, mpages = 0; 2036 struct page **pages; 2037 dma_addr_t *dma_addr; 2038 unsigned long start, end; 2039 void *buf; 2040 int i, err = 0; 2041 2042 if (page) { 2043 zdd = page->zone_device_data; 2044 if (time_before64(get_jiffies_64(), 2045 zdd->devmem_allocation->timeslice_expiration)) 2046 return 0; 2047 } 2048 2049 start = ALIGN_DOWN(fault_addr, size); 2050 end = ALIGN(fault_addr + 1, size); 2051 2052 /* Corner where VMA area struct has been partially unmapped */ 2053 if (start < vas->vm_start) 2054 start = vas->vm_start; 2055 if (end > vas->vm_end) 2056 end = vas->vm_end; 2057 2058 migrate.start = start; 2059 migrate.end = end; 2060 npages = npages_in_range(start, end); 2061 2062 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 2063 sizeof(*pages), GFP_KERNEL); 2064 if (!buf) { 2065 err = -ENOMEM; 2066 goto err_out; 2067 } 2068 dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 2069 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 2070 2071 migrate.vma = vas; 2072 migrate.src = buf; 2073 migrate.dst = migrate.src + npages; 2074 2075 err = migrate_vma_setup(&migrate); 2076 if (err) 2077 goto err_free; 2078 2079 /* Raced with another CPU fault, nothing to do */ 2080 if (!migrate.cpages) 2081 goto err_free; 2082 2083 if (!page) { 2084 for (i = 0; i < npages; ++i) { 2085 if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) 2086 continue; 2087 2088 page = migrate_pfn_to_page(migrate.src[i]); 2089 break; 2090 } 2091 2092 if (!page) 2093 goto err_finalize; 2094 } 2095 zdd = page->zone_device_data; 2096 ops = zdd->devmem_allocation->ops; 2097 dev = zdd->devmem_allocation->dev; 2098 2099 err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages, 2100 migrate.src, migrate.dst, 2101 start); 2102 if (err) 2103 goto err_finalize; 2104 2105 err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages, 2106 DMA_FROM_DEVICE); 2107 if (err) 2108 goto err_finalize; 2109 2110 for (i = 0; i < npages; ++i) 2111 pages[i] = migrate_pfn_to_page(migrate.src[i]); 2112 2113 err = ops->copy_to_ram(pages, dma_addr, npages); 2114 if (err) 2115 goto err_finalize; 2116 2117 err_finalize: 2118 if (err) 2119 drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 2120 migrate_vma_pages(&migrate); 2121 migrate_vma_finalize(&migrate); 2122 if (dev) 2123 drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages, 2124 DMA_FROM_DEVICE); 2125 err_free: 2126 kvfree(buf); 2127 err_out: 2128 2129 return err; 2130 } 2131 2132 /** 2133 * drm_gpusvm_range_evict - Evict GPU SVM range 2134 * @range: Pointer to the GPU SVM range to be removed 2135 * 2136 * This function evicts the specified GPU SVM range. This function will not 2137 * evict coherent pages. 2138 * 2139 * Return: 0 on success, a negative error code on failure. 2140 */ 2141 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 2142 struct drm_gpusvm_range *range) 2143 { 2144 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 2145 struct hmm_range hmm_range = { 2146 .default_flags = HMM_PFN_REQ_FAULT, 2147 .notifier = notifier, 2148 .start = drm_gpusvm_range_start(range), 2149 .end = drm_gpusvm_range_end(range), 2150 .dev_private_owner = NULL, 2151 }; 2152 unsigned long timeout = 2153 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 2154 unsigned long *pfns; 2155 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 2156 drm_gpusvm_range_end(range)); 2157 int err = 0; 2158 struct mm_struct *mm = gpusvm->mm; 2159 2160 if (!mmget_not_zero(mm)) 2161 return -EFAULT; 2162 2163 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 2164 if (!pfns) 2165 return -ENOMEM; 2166 2167 hmm_range.hmm_pfns = pfns; 2168 while (!time_after(jiffies, timeout)) { 2169 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 2170 if (time_after(jiffies, timeout)) { 2171 err = -ETIME; 2172 break; 2173 } 2174 2175 mmap_read_lock(mm); 2176 err = hmm_range_fault(&hmm_range); 2177 mmap_read_unlock(mm); 2178 if (err != -EBUSY) 2179 break; 2180 } 2181 2182 kvfree(pfns); 2183 mmput(mm); 2184 2185 return err; 2186 } 2187 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 2188 2189 /** 2190 * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page 2191 * @page: Pointer to the page 2192 * 2193 * This function is a callback used to put the GPU SVM zone device data 2194 * associated with a page when it is being released. 2195 */ 2196 static void drm_gpusvm_page_free(struct page *page) 2197 { 2198 drm_gpusvm_zdd_put(page->zone_device_data); 2199 } 2200 2201 /** 2202 * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler) 2203 * @vmf: Pointer to the fault information structure 2204 * 2205 * This function is a page fault handler used to migrate a GPU SVM range to RAM. 2206 * It retrieves the GPU SVM range information from the faulting page and invokes 2207 * the internal migration function to migrate the range back to RAM. 2208 * 2209 * Return: VM_FAULT_SIGBUS on failure, 0 on success. 2210 */ 2211 static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) 2212 { 2213 struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data; 2214 int err; 2215 2216 err = __drm_gpusvm_migrate_to_ram(vmf->vma, 2217 zdd->device_private_page_owner, 2218 vmf->page, vmf->address, 2219 zdd->devmem_allocation->size); 2220 2221 return err ? VM_FAULT_SIGBUS : 0; 2222 } 2223 2224 /* 2225 * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM 2226 */ 2227 static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { 2228 .page_free = drm_gpusvm_page_free, 2229 .migrate_to_ram = drm_gpusvm_migrate_to_ram, 2230 }; 2231 2232 /** 2233 * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations 2234 * 2235 * Return: Pointer to the GPU SVM device page map operations structure. 2236 */ 2237 const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void) 2238 { 2239 return &drm_gpusvm_pagemap_ops; 2240 } 2241 EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get); 2242 2243 /** 2244 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 2245 * @gpusvm: Pointer to the GPU SVM structure. 2246 * @start: Start address 2247 * @end: End address 2248 * 2249 * Return: True if GPU SVM has mapping, False otherwise 2250 */ 2251 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 2252 unsigned long end) 2253 { 2254 struct drm_gpusvm_notifier *notifier; 2255 2256 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 2257 struct drm_gpusvm_range *range = NULL; 2258 2259 drm_gpusvm_for_each_range(range, notifier, start, end) 2260 return true; 2261 } 2262 2263 return false; 2264 } 2265 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 2266 2267 /** 2268 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 2269 * @range: Pointer to the GPU SVM range structure. 2270 * @mmu_range: Pointer to the MMU notifier range structure. 2271 * 2272 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 2273 * if the range partially falls within the provided MMU notifier range. 2274 */ 2275 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 2276 const struct mmu_notifier_range *mmu_range) 2277 { 2278 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 2279 2280 range->flags.unmapped = true; 2281 if (drm_gpusvm_range_start(range) < mmu_range->start || 2282 drm_gpusvm_range_end(range) > mmu_range->end) 2283 range->flags.partial_unmap = true; 2284 } 2285 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 2286 2287 /** 2288 * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation 2289 * 2290 * @dev: Pointer to the device structure which device memory allocation belongs to 2291 * @mm: Pointer to the mm_struct for the address space 2292 * @ops: Pointer to the operations structure for GPU SVM device memory 2293 * @dpagemap: The struct drm_pagemap we're allocating from. 2294 * @size: Size of device memory allocation 2295 */ 2296 void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, 2297 struct device *dev, struct mm_struct *mm, 2298 const struct drm_gpusvm_devmem_ops *ops, 2299 struct drm_pagemap *dpagemap, size_t size) 2300 { 2301 init_completion(&devmem_allocation->detached); 2302 devmem_allocation->dev = dev; 2303 devmem_allocation->mm = mm; 2304 devmem_allocation->ops = ops; 2305 devmem_allocation->dpagemap = dpagemap; 2306 devmem_allocation->size = size; 2307 } 2308 EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init); 2309 2310 MODULE_DESCRIPTION("DRM GPUSVM"); 2311 MODULE_LICENSE("GPL"); 2312