1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/memremap.h> 13 #include <linux/migrate.h> 14 #include <linux/mm_types.h> 15 #include <linux/pagemap.h> 16 #include <linux/slab.h> 17 18 #include <drm/drm_device.h> 19 #include <drm/drm_gpusvm.h> 20 #include <drm/drm_pagemap.h> 21 #include <drm/drm_print.h> 22 23 /** 24 * DOC: Overview 25 * 26 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 27 * is a component of the DRM framework designed to manage shared virtual memory 28 * between the CPU and GPU. It enables efficient data exchange and processing 29 * for GPU-accelerated applications by allowing memory sharing and 30 * synchronization between the CPU's and GPU's virtual address spaces. 31 * 32 * Key GPU SVM Components: 33 * 34 * - Notifiers: 35 * Used for tracking memory intervals and notifying the GPU of changes, 36 * notifiers are sized based on a GPU SVM initialization parameter, with a 37 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 38 * list of ranges that fall within the notifier interval. Notifiers are 39 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 40 * inserted or removed as ranges within the interval are created or 41 * destroyed. 42 * - Ranges: 43 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 44 * They are sized based on an array of chunk sizes, which is a GPU SVM 45 * initialization parameter, and the CPU address space. Upon GPU fault, 46 * the largest aligned chunk that fits within the faulting CPU address 47 * space is chosen for the range size. Ranges are expected to be 48 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 49 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 50 * tree. 51 * 52 * - Operations: 53 * Define the interface for driver-specific GPU SVM operations such as 54 * range allocation, notifier allocation, and invalidations. 55 * 56 * - Device Memory Allocations: 57 * Embedded structure containing enough information for GPU SVM to migrate 58 * to / from device memory. 59 * 60 * - Device Memory Operations: 61 * Define the interface for driver-specific device memory operations 62 * release memory, populate pfns, and copy to / from device memory. 63 * 64 * This layer provides interfaces for allocating, mapping, migrating, and 65 * releasing memory ranges between the CPU and GPU. It handles all core memory 66 * management interactions (DMA mapping, HMM, and migration) and provides 67 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 68 * to build the expected driver components for an SVM implementation as detailed 69 * below. 70 * 71 * Expected Driver Components: 72 * 73 * - GPU page fault handler: 74 * Used to create ranges and notifiers based on the fault address, 75 * optionally migrate the range to device memory, and create GPU bindings. 76 * 77 * - Garbage collector: 78 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 79 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 80 * notifier callback. 81 * 82 * - Notifier callback: 83 * Used to invalidate and DMA unmap GPU bindings for ranges. 84 */ 85 86 /** 87 * DOC: Locking 88 * 89 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 90 * mmap lock as needed. 91 * 92 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 93 * range RB tree and list, as well as the range's DMA mappings and sequence 94 * number. GPU SVM manages all necessary locking and unlocking operations, 95 * except for the recheck range's pages being valid 96 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 97 * This lock corresponds to the ``driver->update`` lock mentioned in 98 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 99 * global lock to a per-notifier lock if finer-grained locking is deemed 100 * necessary. 101 * 102 * In addition to the locking mentioned above, the driver should implement a 103 * lock to safeguard core GPU SVM function calls that modify state, such as 104 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 105 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 106 * locking should also be possible for concurrent GPU fault processing within a 107 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 108 * to add annotations to GPU SVM. 109 */ 110 111 /** 112 * DOC: Migration 113 * 114 * The migration support is quite simple, allowing migration between RAM and 115 * device memory at the range granularity. For example, GPU SVM currently does 116 * not support mixing RAM and device memory pages within a range. This means 117 * that upon GPU fault, the entire range can be migrated to device memory, and 118 * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device 119 * memory storage within a range could be added in the future if required. 120 * 121 * The reasoning for only supporting range granularity is as follows: it 122 * simplifies the implementation, and range sizes are driver-defined and should 123 * be relatively small. 124 */ 125 126 /** 127 * DOC: Partial Unmapping of Ranges 128 * 129 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 130 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 131 * being that a subset of the range still has CPU and GPU mappings. If the 132 * backing store for the range is in device memory, a subset of the backing 133 * store has references. One option would be to split the range and device 134 * memory backing store, but the implementation for this would be quite 135 * complicated. Given that partial unmappings are rare and driver-defined range 136 * sizes are relatively small, GPU SVM does not support splitting of ranges. 137 * 138 * With no support for range splitting, upon partial unmapping of a range, the 139 * driver is expected to invalidate and destroy the entire range. If the range 140 * has device memory as its backing, the driver is also expected to migrate any 141 * remaining pages back to RAM. 142 */ 143 144 /** 145 * DOC: Examples 146 * 147 * This section provides three examples of how to build the expected driver 148 * components: the GPU page fault handler, the garbage collector, and the 149 * notifier callback. 150 * 151 * The generic code provided does not include logic for complex migration 152 * policies, optimized invalidations, fined grained driver locking, or other 153 * potentially required driver locking (e.g., DMA-resv locks). 154 * 155 * 1) GPU page fault handler 156 * 157 * .. code-block:: c 158 * 159 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 160 * { 161 * int err = 0; 162 * 163 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 164 * 165 * drm_gpusvm_notifier_lock(gpusvm); 166 * if (drm_gpusvm_range_pages_valid(range)) 167 * driver_commit_bind(gpusvm, range); 168 * else 169 * err = -EAGAIN; 170 * drm_gpusvm_notifier_unlock(gpusvm); 171 * 172 * return err; 173 * } 174 * 175 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 176 * unsigned long gpuva_start, unsigned long gpuva_end) 177 * { 178 * struct drm_gpusvm_ctx ctx = {}; 179 * int err; 180 * 181 * driver_svm_lock(); 182 * retry: 183 * // Always process UNMAPs first so view of GPU SVM ranges is current 184 * driver_garbage_collector(gpusvm); 185 * 186 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 187 * gpuva_start, gpuva_end, 188 * &ctx); 189 * if (IS_ERR(range)) { 190 * err = PTR_ERR(range); 191 * goto unlock; 192 * } 193 * 194 * if (driver_migration_policy(range)) { 195 * mmap_read_lock(mm); 196 * devmem = driver_alloc_devmem(); 197 * err = drm_gpusvm_migrate_to_devmem(gpusvm, range, 198 * devmem_allocation, 199 * &ctx); 200 * mmap_read_unlock(mm); 201 * if (err) // CPU mappings may have changed 202 * goto retry; 203 * } 204 * 205 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 206 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 207 * if (err == -EOPNOTSUPP) 208 * drm_gpusvm_range_evict(gpusvm, range); 209 * goto retry; 210 * } else if (err) { 211 * goto unlock; 212 * } 213 * 214 * err = driver_bind_range(gpusvm, range); 215 * if (err == -EAGAIN) // CPU mappings changed 216 * goto retry 217 * 218 * unlock: 219 * driver_svm_unlock(); 220 * return err; 221 * } 222 * 223 * 2) Garbage Collector 224 * 225 * .. code-block:: c 226 * 227 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 228 * struct drm_gpusvm_range *range) 229 * { 230 * assert_driver_svm_locked(gpusvm); 231 * 232 * // Partial unmap, migrate any remaining device memory pages back to RAM 233 * if (range->flags.partial_unmap) 234 * drm_gpusvm_range_evict(gpusvm, range); 235 * 236 * driver_unbind_range(range); 237 * drm_gpusvm_range_remove(gpusvm, range); 238 * } 239 * 240 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 241 * { 242 * assert_driver_svm_locked(gpusvm); 243 * 244 * for_each_range_in_garbage_collector(gpusvm, range) 245 * __driver_garbage_collector(gpusvm, range); 246 * } 247 * 248 * 3) Notifier callback 249 * 250 * .. code-block:: c 251 * 252 * void driver_invalidation(struct drm_gpusvm *gpusvm, 253 * struct drm_gpusvm_notifier *notifier, 254 * const struct mmu_notifier_range *mmu_range) 255 * { 256 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 257 * struct drm_gpusvm_range *range = NULL; 258 * 259 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 260 * 261 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 262 * mmu_range->end) { 263 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 264 * 265 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 266 * continue; 267 * 268 * drm_gpusvm_range_set_unmapped(range, mmu_range); 269 * driver_garbage_collector_add(gpusvm, range); 270 * } 271 * } 272 */ 273 274 /** 275 * npages_in_range() - Calculate the number of pages in a given range 276 * @start: The start address of the range 277 * @end: The end address of the range 278 * 279 * This macro calculates the number of pages in a given memory range, 280 * specified by the start and end addresses. It divides the difference 281 * between the end and start addresses by the page size (PAGE_SIZE) to 282 * determine the number of pages in the range. 283 * 284 * Return: The number of pages in the specified range. 285 */ 286 static unsigned long 287 npages_in_range(unsigned long start, unsigned long end) 288 { 289 return (end - start) >> PAGE_SHIFT; 290 } 291 292 /** 293 * struct drm_gpusvm_zdd - GPU SVM zone device data 294 * 295 * @refcount: Reference count for the zdd 296 * @devmem_allocation: device memory allocation 297 * @device_private_page_owner: Device private pages owner 298 * 299 * This structure serves as a generic wrapper installed in 300 * page->zone_device_data. It provides infrastructure for looking up a device 301 * memory allocation upon CPU page fault and asynchronously releasing device 302 * memory once the CPU has no page references. Asynchronous release is useful 303 * because CPU page references can be dropped in IRQ contexts, while releasing 304 * device memory likely requires sleeping locks. 305 */ 306 struct drm_gpusvm_zdd { 307 struct kref refcount; 308 struct drm_gpusvm_devmem *devmem_allocation; 309 void *device_private_page_owner; 310 }; 311 312 /** 313 * drm_gpusvm_zdd_alloc() - Allocate a zdd structure. 314 * @device_private_page_owner: Device private pages owner 315 * 316 * This function allocates and initializes a new zdd structure. It sets up the 317 * reference count and initializes the destroy work. 318 * 319 * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. 320 */ 321 static struct drm_gpusvm_zdd * 322 drm_gpusvm_zdd_alloc(void *device_private_page_owner) 323 { 324 struct drm_gpusvm_zdd *zdd; 325 326 zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); 327 if (!zdd) 328 return NULL; 329 330 kref_init(&zdd->refcount); 331 zdd->devmem_allocation = NULL; 332 zdd->device_private_page_owner = device_private_page_owner; 333 334 return zdd; 335 } 336 337 /** 338 * drm_gpusvm_zdd_get() - Get a reference to a zdd structure. 339 * @zdd: Pointer to the zdd structure. 340 * 341 * This function increments the reference count of the provided zdd structure. 342 * 343 * Return: Pointer to the zdd structure. 344 */ 345 static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd) 346 { 347 kref_get(&zdd->refcount); 348 return zdd; 349 } 350 351 /** 352 * drm_gpusvm_zdd_destroy() - Destroy a zdd structure. 353 * @ref: Pointer to the reference count structure. 354 * 355 * This function queues the destroy_work of the zdd for asynchronous destruction. 356 */ 357 static void drm_gpusvm_zdd_destroy(struct kref *ref) 358 { 359 struct drm_gpusvm_zdd *zdd = 360 container_of(ref, struct drm_gpusvm_zdd, refcount); 361 struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation; 362 363 if (devmem) { 364 complete_all(&devmem->detached); 365 if (devmem->ops->devmem_release) 366 devmem->ops->devmem_release(devmem); 367 } 368 kfree(zdd); 369 } 370 371 /** 372 * drm_gpusvm_zdd_put() - Put a zdd reference. 373 * @zdd: Pointer to the zdd structure. 374 * 375 * This function decrements the reference count of the provided zdd structure 376 * and schedules its destruction if the count drops to zero. 377 */ 378 static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd) 379 { 380 kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy); 381 } 382 383 /** 384 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 385 * @notifier: Pointer to the GPU SVM notifier structure. 386 * @start: Start address of the range 387 * @end: End address of the range 388 * 389 * Return: A pointer to the drm_gpusvm_range if found or NULL 390 */ 391 struct drm_gpusvm_range * 392 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 393 unsigned long end) 394 { 395 struct interval_tree_node *itree; 396 397 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 398 399 if (itree) 400 return container_of(itree, struct drm_gpusvm_range, itree); 401 else 402 return NULL; 403 } 404 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 405 406 /** 407 * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier 408 * @range__: Iterator variable for the ranges 409 * @next__: Iterator variable for the ranges temporay storage 410 * @notifier__: Pointer to the GPU SVM notifier 411 * @start__: Start address of the range 412 * @end__: End address of the range 413 * 414 * This macro is used to iterate over GPU SVM ranges in a notifier while 415 * removing ranges from it. 416 */ 417 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ 418 for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ 419 (next__) = __drm_gpusvm_range_next(range__); \ 420 (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ 421 (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) 422 423 /** 424 * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list 425 * @notifier: a pointer to the current drm_gpusvm_notifier 426 * 427 * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if 428 * the current notifier is the last one or if the input notifier is 429 * NULL. 430 */ 431 static struct drm_gpusvm_notifier * 432 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) 433 { 434 if (notifier && !list_is_last(¬ifier->entry, 435 ¬ifier->gpusvm->notifier_list)) 436 return list_next_entry(notifier, entry); 437 438 return NULL; 439 } 440 441 static struct drm_gpusvm_notifier * 442 notifier_iter_first(struct rb_root_cached *root, unsigned long start, 443 unsigned long last) 444 { 445 struct interval_tree_node *itree; 446 447 itree = interval_tree_iter_first(root, start, last); 448 449 if (itree) 450 return container_of(itree, struct drm_gpusvm_notifier, itree); 451 else 452 return NULL; 453 } 454 455 /** 456 * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm 457 * @notifier__: Iterator variable for the notifiers 458 * @notifier__: Pointer to the GPU SVM notifier 459 * @start__: Start address of the notifier 460 * @end__: End address of the notifier 461 * 462 * This macro is used to iterate over GPU SVM notifiers in a gpusvm. 463 */ 464 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ 465 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \ 466 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 467 (notifier__) = __drm_gpusvm_notifier_next(notifier__)) 468 469 /** 470 * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm 471 * @notifier__: Iterator variable for the notifiers 472 * @next__: Iterator variable for the notifiers temporay storage 473 * @notifier__: Pointer to the GPU SVM notifier 474 * @start__: Start address of the notifier 475 * @end__: End address of the notifier 476 * 477 * This macro is used to iterate over GPU SVM notifiers in a gpusvm while 478 * removing notifiers from it. 479 */ 480 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ 481 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \ 482 (next__) = __drm_gpusvm_notifier_next(notifier__); \ 483 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ 484 (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) 485 486 /** 487 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 488 * @mni: Pointer to the mmu_interval_notifier structure. 489 * @mmu_range: Pointer to the mmu_notifier_range structure. 490 * @cur_seq: Current sequence number. 491 * 492 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 493 * notifier sequence number and calls the driver invalidate vfunc under 494 * gpusvm->notifier_lock. 495 * 496 * Return: true if the operation succeeds, false otherwise. 497 */ 498 static bool 499 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 500 const struct mmu_notifier_range *mmu_range, 501 unsigned long cur_seq) 502 { 503 struct drm_gpusvm_notifier *notifier = 504 container_of(mni, typeof(*notifier), notifier); 505 struct drm_gpusvm *gpusvm = notifier->gpusvm; 506 507 if (!mmu_notifier_range_blockable(mmu_range)) 508 return false; 509 510 down_write(&gpusvm->notifier_lock); 511 mmu_interval_set_seq(mni, cur_seq); 512 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 513 up_write(&gpusvm->notifier_lock); 514 515 return true; 516 } 517 518 /* 519 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 520 */ 521 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 522 .invalidate = drm_gpusvm_notifier_invalidate, 523 }; 524 525 /** 526 * drm_gpusvm_init() - Initialize the GPU SVM. 527 * @gpusvm: Pointer to the GPU SVM structure. 528 * @name: Name of the GPU SVM. 529 * @drm: Pointer to the DRM device structure. 530 * @mm: Pointer to the mm_struct for the address space. 531 * @device_private_page_owner: Device private pages owner. 532 * @mm_start: Start address of GPU SVM. 533 * @mm_range: Range of the GPU SVM. 534 * @notifier_size: Size of individual notifiers. 535 * @ops: Pointer to the operations structure for GPU SVM. 536 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 537 * Entries should be powers of 2 in descending order with last 538 * entry being SZ_4K. 539 * @num_chunks: Number of chunks. 540 * 541 * This function initializes the GPU SVM. 542 * 543 * Return: 0 on success, a negative error code on failure. 544 */ 545 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 546 const char *name, struct drm_device *drm, 547 struct mm_struct *mm, void *device_private_page_owner, 548 unsigned long mm_start, unsigned long mm_range, 549 unsigned long notifier_size, 550 const struct drm_gpusvm_ops *ops, 551 const unsigned long *chunk_sizes, int num_chunks) 552 { 553 if (!ops->invalidate || !num_chunks) 554 return -EINVAL; 555 556 gpusvm->name = name; 557 gpusvm->drm = drm; 558 gpusvm->mm = mm; 559 gpusvm->device_private_page_owner = device_private_page_owner; 560 gpusvm->mm_start = mm_start; 561 gpusvm->mm_range = mm_range; 562 gpusvm->notifier_size = notifier_size; 563 gpusvm->ops = ops; 564 gpusvm->chunk_sizes = chunk_sizes; 565 gpusvm->num_chunks = num_chunks; 566 567 mmgrab(mm); 568 gpusvm->root = RB_ROOT_CACHED; 569 INIT_LIST_HEAD(&gpusvm->notifier_list); 570 571 init_rwsem(&gpusvm->notifier_lock); 572 573 fs_reclaim_acquire(GFP_KERNEL); 574 might_lock(&gpusvm->notifier_lock); 575 fs_reclaim_release(GFP_KERNEL); 576 577 #ifdef CONFIG_LOCKDEP 578 gpusvm->lock_dep_map = NULL; 579 #endif 580 581 return 0; 582 } 583 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 584 585 /** 586 * drm_gpusvm_notifier_find() - Find GPU SVM notifier 587 * @gpusvm: Pointer to the GPU SVM structure 588 * @fault_addr: Fault address 589 * 590 * This function finds the GPU SVM notifier associated with the fault address. 591 * 592 * Return: Pointer to the GPU SVM notifier on success, NULL otherwise. 593 */ 594 static struct drm_gpusvm_notifier * 595 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, 596 unsigned long fault_addr) 597 { 598 return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1); 599 } 600 601 /** 602 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 603 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 604 * 605 * Return: A pointer to the containing drm_gpusvm_notifier structure. 606 */ 607 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 608 { 609 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 610 } 611 612 /** 613 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 614 * @gpusvm: Pointer to the GPU SVM structure 615 * @notifier: Pointer to the GPU SVM notifier structure 616 * 617 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 618 */ 619 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 620 struct drm_gpusvm_notifier *notifier) 621 { 622 struct rb_node *node; 623 struct list_head *head; 624 625 interval_tree_insert(¬ifier->itree, &gpusvm->root); 626 627 node = rb_prev(¬ifier->itree.rb); 628 if (node) 629 head = &(to_drm_gpusvm_notifier(node))->entry; 630 else 631 head = &gpusvm->notifier_list; 632 633 list_add(¬ifier->entry, head); 634 } 635 636 /** 637 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 638 * @gpusvm: Pointer to the GPU SVM tructure 639 * @notifier: Pointer to the GPU SVM notifier structure 640 * 641 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 642 */ 643 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 644 struct drm_gpusvm_notifier *notifier) 645 { 646 interval_tree_remove(¬ifier->itree, &gpusvm->root); 647 list_del(¬ifier->entry); 648 } 649 650 /** 651 * drm_gpusvm_fini() - Finalize the GPU SVM. 652 * @gpusvm: Pointer to the GPU SVM structure. 653 * 654 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 655 * notifiers, and dropping a reference to struct MM. 656 */ 657 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 658 { 659 struct drm_gpusvm_notifier *notifier, *next; 660 661 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 662 struct drm_gpusvm_range *range, *__next; 663 664 /* 665 * Remove notifier first to avoid racing with any invalidation 666 */ 667 mmu_interval_notifier_remove(¬ifier->notifier); 668 notifier->flags.removed = true; 669 670 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 671 LONG_MAX) 672 drm_gpusvm_range_remove(gpusvm, range); 673 } 674 675 mmdrop(gpusvm->mm); 676 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 677 } 678 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 679 680 /** 681 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 682 * @gpusvm: Pointer to the GPU SVM structure 683 * @fault_addr: Fault address 684 * 685 * This function allocates and initializes the GPU SVM notifier structure. 686 * 687 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 688 */ 689 static struct drm_gpusvm_notifier * 690 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 691 { 692 struct drm_gpusvm_notifier *notifier; 693 694 if (gpusvm->ops->notifier_alloc) 695 notifier = gpusvm->ops->notifier_alloc(); 696 else 697 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 698 699 if (!notifier) 700 return ERR_PTR(-ENOMEM); 701 702 notifier->gpusvm = gpusvm; 703 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 704 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 705 INIT_LIST_HEAD(¬ifier->entry); 706 notifier->root = RB_ROOT_CACHED; 707 INIT_LIST_HEAD(¬ifier->range_list); 708 709 return notifier; 710 } 711 712 /** 713 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 714 * @gpusvm: Pointer to the GPU SVM structure 715 * @notifier: Pointer to the GPU SVM notifier structure 716 * 717 * This function frees the GPU SVM notifier structure. 718 */ 719 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 720 struct drm_gpusvm_notifier *notifier) 721 { 722 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 723 724 if (gpusvm->ops->notifier_free) 725 gpusvm->ops->notifier_free(notifier); 726 else 727 kfree(notifier); 728 } 729 730 /** 731 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 732 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 733 * 734 * Return: A pointer to the containing drm_gpusvm_range structure. 735 */ 736 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 737 { 738 return container_of(node, struct drm_gpusvm_range, itree.rb); 739 } 740 741 /** 742 * drm_gpusvm_range_insert() - Insert GPU SVM range 743 * @notifier: Pointer to the GPU SVM notifier structure 744 * @range: Pointer to the GPU SVM range structure 745 * 746 * This function inserts the GPU SVM range into the notifier RB tree and list. 747 */ 748 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 749 struct drm_gpusvm_range *range) 750 { 751 struct rb_node *node; 752 struct list_head *head; 753 754 drm_gpusvm_notifier_lock(notifier->gpusvm); 755 interval_tree_insert(&range->itree, ¬ifier->root); 756 757 node = rb_prev(&range->itree.rb); 758 if (node) 759 head = &(to_drm_gpusvm_range(node))->entry; 760 else 761 head = ¬ifier->range_list; 762 763 list_add(&range->entry, head); 764 drm_gpusvm_notifier_unlock(notifier->gpusvm); 765 } 766 767 /** 768 * __drm_gpusvm_range_remove() - Remove GPU SVM range 769 * @notifier: Pointer to the GPU SVM notifier structure 770 * @range: Pointer to the GPU SVM range structure 771 * 772 * This macro removes the GPU SVM range from the notifier RB tree and list. 773 */ 774 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 775 struct drm_gpusvm_range *range) 776 { 777 interval_tree_remove(&range->itree, ¬ifier->root); 778 list_del(&range->entry); 779 } 780 781 /** 782 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 783 * @gpusvm: Pointer to the GPU SVM structure 784 * @notifier: Pointer to the GPU SVM notifier structure 785 * @fault_addr: Fault address 786 * @chunk_size: Chunk size 787 * @migrate_devmem: Flag indicating whether to migrate device memory 788 * 789 * This function allocates and initializes the GPU SVM range structure. 790 * 791 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 792 */ 793 static struct drm_gpusvm_range * 794 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 795 struct drm_gpusvm_notifier *notifier, 796 unsigned long fault_addr, unsigned long chunk_size, 797 bool migrate_devmem) 798 { 799 struct drm_gpusvm_range *range; 800 801 if (gpusvm->ops->range_alloc) 802 range = gpusvm->ops->range_alloc(gpusvm); 803 else 804 range = kzalloc(sizeof(*range), GFP_KERNEL); 805 806 if (!range) 807 return ERR_PTR(-ENOMEM); 808 809 kref_init(&range->refcount); 810 range->gpusvm = gpusvm; 811 range->notifier = notifier; 812 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 813 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 814 INIT_LIST_HEAD(&range->entry); 815 range->notifier_seq = LONG_MAX; 816 range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 817 818 return range; 819 } 820 821 /** 822 * drm_gpusvm_check_pages() - Check pages 823 * @gpusvm: Pointer to the GPU SVM structure 824 * @notifier: Pointer to the GPU SVM notifier structure 825 * @start: Start address 826 * @end: End address 827 * 828 * Check if pages between start and end have been faulted in on the CPU. Use to 829 * prevent migration of pages without CPU backing store. 830 * 831 * Return: True if pages have been faulted into CPU, False otherwise 832 */ 833 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 834 struct drm_gpusvm_notifier *notifier, 835 unsigned long start, unsigned long end) 836 { 837 struct hmm_range hmm_range = { 838 .default_flags = 0, 839 .notifier = ¬ifier->notifier, 840 .start = start, 841 .end = end, 842 .dev_private_owner = gpusvm->device_private_page_owner, 843 }; 844 unsigned long timeout = 845 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 846 unsigned long *pfns; 847 unsigned long npages = npages_in_range(start, end); 848 int err, i; 849 850 mmap_assert_locked(gpusvm->mm); 851 852 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 853 if (!pfns) 854 return false; 855 856 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 857 hmm_range.hmm_pfns = pfns; 858 859 while (true) { 860 err = hmm_range_fault(&hmm_range); 861 if (err == -EBUSY) { 862 if (time_after(jiffies, timeout)) 863 break; 864 865 hmm_range.notifier_seq = 866 mmu_interval_read_begin(¬ifier->notifier); 867 continue; 868 } 869 break; 870 } 871 if (err) 872 goto err_free; 873 874 for (i = 0; i < npages;) { 875 if (!(pfns[i] & HMM_PFN_VALID)) { 876 err = -EFAULT; 877 goto err_free; 878 } 879 i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 880 } 881 882 err_free: 883 kvfree(pfns); 884 return err ? false : true; 885 } 886 887 /** 888 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 889 * @gpusvm: Pointer to the GPU SVM structure 890 * @notifier: Pointer to the GPU SVM notifier structure 891 * @vas: Pointer to the virtual memory area structure 892 * @fault_addr: Fault address 893 * @gpuva_start: Start address of GPUVA which mirrors CPU 894 * @gpuva_end: End address of GPUVA which mirrors CPU 895 * @check_pages_threshold: Check CPU pages for present threshold 896 * 897 * This function determines the chunk size for the GPU SVM range based on the 898 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 899 * memory area boundaries. 900 * 901 * Return: Chunk size on success, LONG_MAX on failure. 902 */ 903 static unsigned long 904 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 905 struct drm_gpusvm_notifier *notifier, 906 struct vm_area_struct *vas, 907 unsigned long fault_addr, 908 unsigned long gpuva_start, 909 unsigned long gpuva_end, 910 unsigned long check_pages_threshold) 911 { 912 unsigned long start, end; 913 int i = 0; 914 915 retry: 916 for (; i < gpusvm->num_chunks; ++i) { 917 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 918 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 919 920 if (start >= vas->vm_start && end <= vas->vm_end && 921 start >= drm_gpusvm_notifier_start(notifier) && 922 end <= drm_gpusvm_notifier_end(notifier) && 923 start >= gpuva_start && end <= gpuva_end) 924 break; 925 } 926 927 if (i == gpusvm->num_chunks) 928 return LONG_MAX; 929 930 /* 931 * If allocation more than page, ensure not to overlap with existing 932 * ranges. 933 */ 934 if (end - start != SZ_4K) { 935 struct drm_gpusvm_range *range; 936 937 range = drm_gpusvm_range_find(notifier, start, end); 938 if (range) { 939 ++i; 940 goto retry; 941 } 942 943 /* 944 * XXX: Only create range on pages CPU has faulted in. Without 945 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 946 * process-many-malloc' fails. In the failure case, each process 947 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 948 * ranges. When migrating the SVM ranges, some processes fail in 949 * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages' 950 * and then upon drm_gpusvm_range_get_pages device pages from 951 * other processes are collected + faulted in which creates all 952 * sorts of problems. Unsure exactly how this happening, also 953 * problem goes away if 'xe_exec_system_allocator --r 954 * process-many-malloc' mallocs at least 64k at a time. 955 */ 956 if (end - start <= check_pages_threshold && 957 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 958 ++i; 959 goto retry; 960 } 961 } 962 963 return end - start; 964 } 965 966 #ifdef CONFIG_LOCKDEP 967 /** 968 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 969 * @gpusvm: Pointer to the GPU SVM structure. 970 * 971 * Ensure driver lock is held. 972 */ 973 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 974 { 975 if ((gpusvm)->lock_dep_map) 976 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 977 } 978 #else 979 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 980 { 981 } 982 #endif 983 984 /** 985 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 986 * @gpusvm: Pointer to the GPU SVM structure 987 * @start: The inclusive start user address. 988 * @end: The exclusive end user address. 989 * 990 * Returns: The start address of first VMA within the provided range, 991 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 992 */ 993 unsigned long 994 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 995 unsigned long start, 996 unsigned long end) 997 { 998 struct mm_struct *mm = gpusvm->mm; 999 struct vm_area_struct *vma; 1000 unsigned long addr = ULONG_MAX; 1001 1002 if (!mmget_not_zero(mm)) 1003 return addr; 1004 1005 mmap_read_lock(mm); 1006 1007 vma = find_vma_intersection(mm, start, end); 1008 if (vma) 1009 addr = vma->vm_start; 1010 1011 mmap_read_unlock(mm); 1012 mmput(mm); 1013 1014 return addr; 1015 } 1016 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 1017 1018 /** 1019 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 1020 * @gpusvm: Pointer to the GPU SVM structure 1021 * @fault_addr: Fault address 1022 * @gpuva_start: Start address of GPUVA which mirrors CPU 1023 * @gpuva_end: End address of GPUVA which mirrors CPU 1024 * @ctx: GPU SVM context 1025 * 1026 * This function finds or inserts a newly allocated a GPU SVM range based on the 1027 * fault address. Caller must hold a lock to protect range lookup and insertion. 1028 * 1029 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 1030 */ 1031 struct drm_gpusvm_range * 1032 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 1033 unsigned long fault_addr, 1034 unsigned long gpuva_start, 1035 unsigned long gpuva_end, 1036 const struct drm_gpusvm_ctx *ctx) 1037 { 1038 struct drm_gpusvm_notifier *notifier; 1039 struct drm_gpusvm_range *range; 1040 struct mm_struct *mm = gpusvm->mm; 1041 struct vm_area_struct *vas; 1042 bool notifier_alloc = false; 1043 unsigned long chunk_size; 1044 int err; 1045 bool migrate_devmem; 1046 1047 drm_gpusvm_driver_lock_held(gpusvm); 1048 1049 if (fault_addr < gpusvm->mm_start || 1050 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 1051 return ERR_PTR(-EINVAL); 1052 1053 if (!mmget_not_zero(mm)) 1054 return ERR_PTR(-EFAULT); 1055 1056 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr); 1057 if (!notifier) { 1058 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 1059 if (IS_ERR(notifier)) { 1060 err = PTR_ERR(notifier); 1061 goto err_mmunlock; 1062 } 1063 notifier_alloc = true; 1064 err = mmu_interval_notifier_insert(¬ifier->notifier, 1065 mm, 1066 drm_gpusvm_notifier_start(notifier), 1067 drm_gpusvm_notifier_size(notifier), 1068 &drm_gpusvm_notifier_ops); 1069 if (err) 1070 goto err_notifier; 1071 } 1072 1073 mmap_read_lock(mm); 1074 1075 vas = vma_lookup(mm, fault_addr); 1076 if (!vas) { 1077 err = -ENOENT; 1078 goto err_notifier_remove; 1079 } 1080 1081 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 1082 err = -EPERM; 1083 goto err_notifier_remove; 1084 } 1085 1086 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 1087 if (range) 1088 goto out_mmunlock; 1089 /* 1090 * XXX: Short-circuiting migration based on migrate_vma_* current 1091 * limitations. If/when migrate_vma_* add more support, this logic will 1092 * have to change. 1093 */ 1094 migrate_devmem = ctx->devmem_possible && 1095 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 1096 1097 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 1098 fault_addr, gpuva_start, 1099 gpuva_end, 1100 ctx->check_pages_threshold); 1101 if (chunk_size == LONG_MAX) { 1102 err = -EINVAL; 1103 goto err_notifier_remove; 1104 } 1105 1106 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 1107 migrate_devmem); 1108 if (IS_ERR(range)) { 1109 err = PTR_ERR(range); 1110 goto err_notifier_remove; 1111 } 1112 1113 drm_gpusvm_range_insert(notifier, range); 1114 if (notifier_alloc) 1115 drm_gpusvm_notifier_insert(gpusvm, notifier); 1116 1117 out_mmunlock: 1118 mmap_read_unlock(mm); 1119 mmput(mm); 1120 1121 return range; 1122 1123 err_notifier_remove: 1124 mmap_read_unlock(mm); 1125 if (notifier_alloc) 1126 mmu_interval_notifier_remove(¬ifier->notifier); 1127 err_notifier: 1128 if (notifier_alloc) 1129 drm_gpusvm_notifier_free(gpusvm, notifier); 1130 err_mmunlock: 1131 mmput(mm); 1132 return ERR_PTR(err); 1133 } 1134 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 1135 1136 /** 1137 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 1138 * @gpusvm: Pointer to the GPU SVM structure 1139 * @range: Pointer to the GPU SVM range structure 1140 * @npages: Number of pages to unmap 1141 * 1142 * This function unmap pages associated with a GPU SVM range. Assumes and 1143 * asserts correct locking is in place when called. 1144 */ 1145 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1146 struct drm_gpusvm_range *range, 1147 unsigned long npages) 1148 { 1149 unsigned long i, j; 1150 struct drm_pagemap *dpagemap = range->dpagemap; 1151 struct device *dev = gpusvm->drm->dev; 1152 1153 lockdep_assert_held(&gpusvm->notifier_lock); 1154 1155 if (range->flags.has_dma_mapping) { 1156 struct drm_gpusvm_range_flags flags = { 1157 .__flags = range->flags.__flags, 1158 }; 1159 1160 for (i = 0, j = 0; i < npages; j++) { 1161 struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; 1162 1163 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1164 dma_unmap_page(dev, 1165 addr->addr, 1166 PAGE_SIZE << addr->order, 1167 addr->dir); 1168 else if (dpagemap && dpagemap->ops->device_unmap) 1169 dpagemap->ops->device_unmap(dpagemap, 1170 dev, *addr); 1171 i += 1 << addr->order; 1172 } 1173 1174 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1175 flags.has_devmem_pages = false; 1176 flags.has_dma_mapping = false; 1177 WRITE_ONCE(range->flags.__flags, flags.__flags); 1178 1179 range->dpagemap = NULL; 1180 } 1181 } 1182 1183 /** 1184 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1185 * @gpusvm: Pointer to the GPU SVM structure 1186 * @range: Pointer to the GPU SVM range structure 1187 * 1188 * This function frees the dma address array associated with a GPU SVM range. 1189 */ 1190 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1191 struct drm_gpusvm_range *range) 1192 { 1193 lockdep_assert_held(&gpusvm->notifier_lock); 1194 1195 if (range->dma_addr) { 1196 kvfree(range->dma_addr); 1197 range->dma_addr = NULL; 1198 } 1199 } 1200 1201 /** 1202 * drm_gpusvm_range_remove() - Remove GPU SVM range 1203 * @gpusvm: Pointer to the GPU SVM structure 1204 * @range: Pointer to the GPU SVM range to be removed 1205 * 1206 * This function removes the specified GPU SVM range and also removes the parent 1207 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1208 * hold a lock to protect range and notifier removal. 1209 */ 1210 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1211 struct drm_gpusvm_range *range) 1212 { 1213 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1214 drm_gpusvm_range_end(range)); 1215 struct drm_gpusvm_notifier *notifier; 1216 1217 drm_gpusvm_driver_lock_held(gpusvm); 1218 1219 notifier = drm_gpusvm_notifier_find(gpusvm, 1220 drm_gpusvm_range_start(range)); 1221 if (WARN_ON_ONCE(!notifier)) 1222 return; 1223 1224 drm_gpusvm_notifier_lock(gpusvm); 1225 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1226 drm_gpusvm_range_free_pages(gpusvm, range); 1227 __drm_gpusvm_range_remove(notifier, range); 1228 drm_gpusvm_notifier_unlock(gpusvm); 1229 1230 drm_gpusvm_range_put(range); 1231 1232 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1233 if (!notifier->flags.removed) 1234 mmu_interval_notifier_remove(¬ifier->notifier); 1235 drm_gpusvm_notifier_remove(gpusvm, notifier); 1236 drm_gpusvm_notifier_free(gpusvm, notifier); 1237 } 1238 } 1239 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1240 1241 /** 1242 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1243 * @range: Pointer to the GPU SVM range 1244 * 1245 * This function increments the reference count of the specified GPU SVM range. 1246 * 1247 * Return: Pointer to the GPU SVM range. 1248 */ 1249 struct drm_gpusvm_range * 1250 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1251 { 1252 kref_get(&range->refcount); 1253 1254 return range; 1255 } 1256 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1257 1258 /** 1259 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1260 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1261 * 1262 * This function destroys the specified GPU SVM range when its reference count 1263 * reaches zero. If a custom range-free function is provided, it is invoked to 1264 * free the range; otherwise, the range is deallocated using kfree(). 1265 */ 1266 static void drm_gpusvm_range_destroy(struct kref *refcount) 1267 { 1268 struct drm_gpusvm_range *range = 1269 container_of(refcount, struct drm_gpusvm_range, refcount); 1270 struct drm_gpusvm *gpusvm = range->gpusvm; 1271 1272 if (gpusvm->ops->range_free) 1273 gpusvm->ops->range_free(range); 1274 else 1275 kfree(range); 1276 } 1277 1278 /** 1279 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1280 * @range: Pointer to the GPU SVM range 1281 * 1282 * This function decrements the reference count of the specified GPU SVM range 1283 * and frees it when the count reaches zero. 1284 */ 1285 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1286 { 1287 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1288 } 1289 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1290 1291 /** 1292 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1293 * @gpusvm: Pointer to the GPU SVM structure 1294 * @range: Pointer to the GPU SVM range structure 1295 * 1296 * This function determines if a GPU SVM range pages are valid. Expected be 1297 * called holding gpusvm->notifier_lock and as the last step before committing a 1298 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1299 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1300 * function is required for finer grained checking (i.e., per range) if pages 1301 * are valid. 1302 * 1303 * Return: True if GPU SVM range has valid pages, False otherwise 1304 */ 1305 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1306 struct drm_gpusvm_range *range) 1307 { 1308 lockdep_assert_held(&gpusvm->notifier_lock); 1309 1310 return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1311 } 1312 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1313 1314 /** 1315 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1316 * @gpusvm: Pointer to the GPU SVM structure 1317 * @range: Pointer to the GPU SVM range structure 1318 * 1319 * This function determines if a GPU SVM range pages are valid. Expected be 1320 * called without holding gpusvm->notifier_lock. 1321 * 1322 * Return: True if GPU SVM range has valid pages, False otherwise 1323 */ 1324 static bool 1325 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1326 struct drm_gpusvm_range *range) 1327 { 1328 bool pages_valid; 1329 1330 if (!range->dma_addr) 1331 return false; 1332 1333 drm_gpusvm_notifier_lock(gpusvm); 1334 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1335 if (!pages_valid) 1336 drm_gpusvm_range_free_pages(gpusvm, range); 1337 drm_gpusvm_notifier_unlock(gpusvm); 1338 1339 return pages_valid; 1340 } 1341 1342 /** 1343 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1344 * @gpusvm: Pointer to the GPU SVM structure 1345 * @range: Pointer to the GPU SVM range structure 1346 * @ctx: GPU SVM context 1347 * 1348 * This function gets pages for a GPU SVM range and ensures they are mapped for 1349 * DMA access. 1350 * 1351 * Return: 0 on success, negative error code on failure. 1352 */ 1353 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1354 struct drm_gpusvm_range *range, 1355 const struct drm_gpusvm_ctx *ctx) 1356 { 1357 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1358 struct hmm_range hmm_range = { 1359 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1360 HMM_PFN_REQ_WRITE), 1361 .notifier = notifier, 1362 .start = drm_gpusvm_range_start(range), 1363 .end = drm_gpusvm_range_end(range), 1364 .dev_private_owner = gpusvm->device_private_page_owner, 1365 }; 1366 struct mm_struct *mm = gpusvm->mm; 1367 struct drm_gpusvm_zdd *zdd; 1368 unsigned long timeout = 1369 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1370 unsigned long i, j; 1371 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1372 drm_gpusvm_range_end(range)); 1373 unsigned long num_dma_mapped; 1374 unsigned int order = 0; 1375 unsigned long *pfns; 1376 int err = 0; 1377 struct dev_pagemap *pagemap; 1378 struct drm_pagemap *dpagemap; 1379 struct drm_gpusvm_range_flags flags; 1380 1381 retry: 1382 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1383 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1384 goto set_seqno; 1385 1386 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1387 if (!pfns) 1388 return -ENOMEM; 1389 1390 if (!mmget_not_zero(mm)) { 1391 err = -EFAULT; 1392 goto err_free; 1393 } 1394 1395 hmm_range.hmm_pfns = pfns; 1396 while (true) { 1397 mmap_read_lock(mm); 1398 err = hmm_range_fault(&hmm_range); 1399 mmap_read_unlock(mm); 1400 1401 if (err == -EBUSY) { 1402 if (time_after(jiffies, timeout)) 1403 break; 1404 1405 hmm_range.notifier_seq = 1406 mmu_interval_read_begin(notifier); 1407 continue; 1408 } 1409 break; 1410 } 1411 mmput(mm); 1412 if (err) 1413 goto err_free; 1414 1415 map_pages: 1416 /* 1417 * Perform all dma mappings under the notifier lock to not 1418 * access freed pages. A notifier will either block on 1419 * the notifier lock or unmap dma. 1420 */ 1421 drm_gpusvm_notifier_lock(gpusvm); 1422 1423 flags.__flags = range->flags.__flags; 1424 if (flags.unmapped) { 1425 drm_gpusvm_notifier_unlock(gpusvm); 1426 err = -EFAULT; 1427 goto err_free; 1428 } 1429 1430 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1431 drm_gpusvm_notifier_unlock(gpusvm); 1432 kvfree(pfns); 1433 goto retry; 1434 } 1435 1436 if (!range->dma_addr) { 1437 /* Unlock and restart mapping to allocate memory. */ 1438 drm_gpusvm_notifier_unlock(gpusvm); 1439 range->dma_addr = kvmalloc_array(npages, 1440 sizeof(*range->dma_addr), 1441 GFP_KERNEL); 1442 if (!range->dma_addr) { 1443 err = -ENOMEM; 1444 goto err_free; 1445 } 1446 goto map_pages; 1447 } 1448 1449 zdd = NULL; 1450 num_dma_mapped = 0; 1451 for (i = 0, j = 0; i < npages; ++j) { 1452 struct page *page = hmm_pfn_to_page(pfns[i]); 1453 1454 order = hmm_pfn_to_map_order(pfns[i]); 1455 if (is_device_private_page(page) || 1456 is_device_coherent_page(page)) { 1457 if (zdd != page->zone_device_data && i > 0) { 1458 err = -EOPNOTSUPP; 1459 goto err_unmap; 1460 } 1461 zdd = page->zone_device_data; 1462 if (pagemap != page_pgmap(page)) { 1463 if (i > 0) { 1464 err = -EOPNOTSUPP; 1465 goto err_unmap; 1466 } 1467 1468 pagemap = page_pgmap(page); 1469 dpagemap = zdd->devmem_allocation->dpagemap; 1470 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1471 /* 1472 * Raced. This is not supposed to happen 1473 * since hmm_range_fault() should've migrated 1474 * this page to system. 1475 */ 1476 err = -EAGAIN; 1477 goto err_unmap; 1478 } 1479 } 1480 range->dma_addr[j] = 1481 dpagemap->ops->device_map(dpagemap, 1482 gpusvm->drm->dev, 1483 page, order, 1484 DMA_BIDIRECTIONAL); 1485 if (dma_mapping_error(gpusvm->drm->dev, 1486 range->dma_addr[j].addr)) { 1487 err = -EFAULT; 1488 goto err_unmap; 1489 } 1490 } else { 1491 dma_addr_t addr; 1492 1493 if (is_zone_device_page(page) || zdd) { 1494 err = -EOPNOTSUPP; 1495 goto err_unmap; 1496 } 1497 1498 if (ctx->devmem_only) { 1499 err = -EFAULT; 1500 goto err_unmap; 1501 } 1502 1503 addr = dma_map_page(gpusvm->drm->dev, 1504 page, 0, 1505 PAGE_SIZE << order, 1506 DMA_BIDIRECTIONAL); 1507 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1508 err = -EFAULT; 1509 goto err_unmap; 1510 } 1511 1512 range->dma_addr[j] = drm_pagemap_device_addr_encode 1513 (addr, DRM_INTERCONNECT_SYSTEM, order, 1514 DMA_BIDIRECTIONAL); 1515 } 1516 i += 1 << order; 1517 num_dma_mapped = i; 1518 flags.has_dma_mapping = true; 1519 } 1520 1521 if (zdd) { 1522 flags.has_devmem_pages = true; 1523 range->dpagemap = dpagemap; 1524 } 1525 1526 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1527 WRITE_ONCE(range->flags.__flags, flags.__flags); 1528 1529 drm_gpusvm_notifier_unlock(gpusvm); 1530 kvfree(pfns); 1531 set_seqno: 1532 range->notifier_seq = hmm_range.notifier_seq; 1533 1534 return 0; 1535 1536 err_unmap: 1537 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1538 drm_gpusvm_notifier_unlock(gpusvm); 1539 err_free: 1540 kvfree(pfns); 1541 if (err == -EAGAIN) 1542 goto retry; 1543 return err; 1544 } 1545 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1546 1547 /** 1548 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1549 * @gpusvm: Pointer to the GPU SVM structure 1550 * @range: Pointer to the GPU SVM range structure 1551 * @ctx: GPU SVM context 1552 * 1553 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1554 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1555 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1556 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1557 * security model. 1558 */ 1559 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1560 struct drm_gpusvm_range *range, 1561 const struct drm_gpusvm_ctx *ctx) 1562 { 1563 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1564 drm_gpusvm_range_end(range)); 1565 1566 if (ctx->in_notifier) 1567 lockdep_assert_held_write(&gpusvm->notifier_lock); 1568 else 1569 drm_gpusvm_notifier_lock(gpusvm); 1570 1571 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1572 1573 if (!ctx->in_notifier) 1574 drm_gpusvm_notifier_unlock(gpusvm); 1575 } 1576 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1577 1578 /** 1579 * drm_gpusvm_migration_unlock_put_page() - Put a migration page 1580 * @page: Pointer to the page to put 1581 * 1582 * This function unlocks and puts a page. 1583 */ 1584 static void drm_gpusvm_migration_unlock_put_page(struct page *page) 1585 { 1586 unlock_page(page); 1587 put_page(page); 1588 } 1589 1590 /** 1591 * drm_gpusvm_migration_unlock_put_pages() - Put migration pages 1592 * @npages: Number of pages 1593 * @migrate_pfn: Array of migrate page frame numbers 1594 * 1595 * This function unlocks and puts an array of pages. 1596 */ 1597 static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages, 1598 unsigned long *migrate_pfn) 1599 { 1600 unsigned long i; 1601 1602 for (i = 0; i < npages; ++i) { 1603 struct page *page; 1604 1605 if (!migrate_pfn[i]) 1606 continue; 1607 1608 page = migrate_pfn_to_page(migrate_pfn[i]); 1609 drm_gpusvm_migration_unlock_put_page(page); 1610 migrate_pfn[i] = 0; 1611 } 1612 } 1613 1614 /** 1615 * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page 1616 * @page: Pointer to the page 1617 * @zdd: Pointer to the GPU SVM zone device data 1618 * 1619 * This function associates the given page with the specified GPU SVM zone 1620 * device data and initializes it for zone device usage. 1621 */ 1622 static void drm_gpusvm_get_devmem_page(struct page *page, 1623 struct drm_gpusvm_zdd *zdd) 1624 { 1625 page->zone_device_data = drm_gpusvm_zdd_get(zdd); 1626 zone_device_page_init(page); 1627 } 1628 1629 /** 1630 * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration 1631 * @dev: The device for which the pages are being mapped 1632 * @dma_addr: Array to store DMA addresses corresponding to mapped pages 1633 * @migrate_pfn: Array of migrate page frame numbers to map 1634 * @npages: Number of pages to map 1635 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1636 * 1637 * This function maps pages of memory for migration usage in GPU SVM. It 1638 * iterates over each page frame number provided in @migrate_pfn, maps the 1639 * corresponding page, and stores the DMA address in the provided @dma_addr 1640 * array. 1641 * 1642 * Return: 0 on success, -EFAULT if an error occurs during mapping. 1643 */ 1644 static int drm_gpusvm_migrate_map_pages(struct device *dev, 1645 dma_addr_t *dma_addr, 1646 unsigned long *migrate_pfn, 1647 unsigned long npages, 1648 enum dma_data_direction dir) 1649 { 1650 unsigned long i; 1651 1652 for (i = 0; i < npages; ++i) { 1653 struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 1654 1655 if (!page) 1656 continue; 1657 1658 if (WARN_ON_ONCE(is_zone_device_page(page))) 1659 return -EFAULT; 1660 1661 dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); 1662 if (dma_mapping_error(dev, dma_addr[i])) 1663 return -EFAULT; 1664 } 1665 1666 return 0; 1667 } 1668 1669 /** 1670 * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration 1671 * @dev: The device for which the pages were mapped 1672 * @dma_addr: Array of DMA addresses corresponding to mapped pages 1673 * @npages: Number of pages to unmap 1674 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1675 * 1676 * This function unmaps previously mapped pages of memory for GPU Shared Virtual 1677 * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks 1678 * if it's valid and not already unmapped, and unmaps the corresponding page. 1679 */ 1680 static void drm_gpusvm_migrate_unmap_pages(struct device *dev, 1681 dma_addr_t *dma_addr, 1682 unsigned long npages, 1683 enum dma_data_direction dir) 1684 { 1685 unsigned long i; 1686 1687 for (i = 0; i < npages; ++i) { 1688 if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) 1689 continue; 1690 1691 dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); 1692 } 1693 } 1694 1695 /** 1696 * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory 1697 * @gpusvm: Pointer to the GPU SVM structure 1698 * @range: Pointer to the GPU SVM range structure 1699 * @devmem_allocation: Pointer to the device memory allocation. The caller 1700 * should hold a reference to the device memory allocation, 1701 * which should be dropped via ops->devmem_release or upon 1702 * the failure of this function. 1703 * @ctx: GPU SVM context 1704 * 1705 * This function migrates the specified GPU SVM range to device memory. It 1706 * performs the necessary setup and invokes the driver-specific operations for 1707 * migration to device memory. Upon successful return, @devmem_allocation can 1708 * safely reference @range until ops->devmem_release is called which only upon 1709 * successful return. Expected to be called while holding the mmap lock in read 1710 * mode. 1711 * 1712 * Return: 0 on success, negative error code on failure. 1713 */ 1714 int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, 1715 struct drm_gpusvm_range *range, 1716 struct drm_gpusvm_devmem *devmem_allocation, 1717 const struct drm_gpusvm_ctx *ctx) 1718 { 1719 const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1720 unsigned long start = drm_gpusvm_range_start(range), 1721 end = drm_gpusvm_range_end(range); 1722 struct migrate_vma migrate = { 1723 .start = start, 1724 .end = end, 1725 .pgmap_owner = gpusvm->device_private_page_owner, 1726 .flags = MIGRATE_VMA_SELECT_SYSTEM, 1727 }; 1728 struct mm_struct *mm = gpusvm->mm; 1729 unsigned long i, npages = npages_in_range(start, end); 1730 struct vm_area_struct *vas; 1731 struct drm_gpusvm_zdd *zdd = NULL; 1732 struct page **pages; 1733 dma_addr_t *dma_addr; 1734 void *buf; 1735 int err; 1736 1737 mmap_assert_locked(gpusvm->mm); 1738 1739 if (!range->flags.migrate_devmem) 1740 return -EINVAL; 1741 1742 if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || 1743 !ops->copy_to_ram) 1744 return -EOPNOTSUPP; 1745 1746 vas = vma_lookup(mm, start); 1747 if (!vas) { 1748 err = -ENOENT; 1749 goto err_out; 1750 } 1751 1752 if (end > vas->vm_end || start < vas->vm_start) { 1753 err = -EINVAL; 1754 goto err_out; 1755 } 1756 1757 if (!vma_is_anonymous(vas)) { 1758 err = -EBUSY; 1759 goto err_out; 1760 } 1761 1762 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 1763 sizeof(*pages), GFP_KERNEL); 1764 if (!buf) { 1765 err = -ENOMEM; 1766 goto err_out; 1767 } 1768 dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 1769 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 1770 1771 zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner); 1772 if (!zdd) { 1773 err = -ENOMEM; 1774 goto err_free; 1775 } 1776 1777 migrate.vma = vas; 1778 migrate.src = buf; 1779 migrate.dst = migrate.src + npages; 1780 1781 err = migrate_vma_setup(&migrate); 1782 if (err) 1783 goto err_free; 1784 1785 if (!migrate.cpages) { 1786 err = -EFAULT; 1787 goto err_free; 1788 } 1789 1790 if (migrate.cpages != npages) { 1791 err = -EBUSY; 1792 goto err_finalize; 1793 } 1794 1795 err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); 1796 if (err) 1797 goto err_finalize; 1798 1799 err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1800 migrate.src, npages, DMA_TO_DEVICE); 1801 if (err) 1802 goto err_finalize; 1803 1804 for (i = 0; i < npages; ++i) { 1805 struct page *page = pfn_to_page(migrate.dst[i]); 1806 1807 pages[i] = page; 1808 migrate.dst[i] = migrate_pfn(migrate.dst[i]); 1809 drm_gpusvm_get_devmem_page(page, zdd); 1810 } 1811 1812 err = ops->copy_to_devmem(pages, dma_addr, npages); 1813 if (err) 1814 goto err_finalize; 1815 1816 /* Upon success bind devmem allocation to range and zdd */ 1817 devmem_allocation->timeslice_expiration = get_jiffies_64() + 1818 msecs_to_jiffies(ctx->timeslice_ms); 1819 zdd->devmem_allocation = devmem_allocation; /* Owns ref */ 1820 1821 err_finalize: 1822 if (err) 1823 drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 1824 migrate_vma_pages(&migrate); 1825 migrate_vma_finalize(&migrate); 1826 drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1827 DMA_TO_DEVICE); 1828 err_free: 1829 if (zdd) 1830 drm_gpusvm_zdd_put(zdd); 1831 kvfree(buf); 1832 err_out: 1833 return err; 1834 } 1835 EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem); 1836 1837 /** 1838 * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area 1839 * @vas: Pointer to the VM area structure, can be NULL 1840 * @fault_page: Fault page 1841 * @npages: Number of pages to populate 1842 * @mpages: Number of pages to migrate 1843 * @src_mpfn: Source array of migrate PFNs 1844 * @mpfn: Array of migrate PFNs to populate 1845 * @addr: Start address for PFN allocation 1846 * 1847 * This function populates the RAM migrate page frame numbers (PFNs) for the 1848 * specified VM area structure. It allocates and locks pages in the VM area for 1849 * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use 1850 * alloc_page for allocation. 1851 * 1852 * Return: 0 on success, negative error code on failure. 1853 */ 1854 static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas, 1855 struct page *fault_page, 1856 unsigned long npages, 1857 unsigned long *mpages, 1858 unsigned long *src_mpfn, 1859 unsigned long *mpfn, 1860 unsigned long addr) 1861 { 1862 unsigned long i; 1863 1864 for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { 1865 struct page *page, *src_page; 1866 1867 if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) 1868 continue; 1869 1870 src_page = migrate_pfn_to_page(src_mpfn[i]); 1871 if (!src_page) 1872 continue; 1873 1874 if (fault_page) { 1875 if (src_page->zone_device_data != 1876 fault_page->zone_device_data) 1877 continue; 1878 } 1879 1880 if (vas) 1881 page = alloc_page_vma(GFP_HIGHUSER, vas, addr); 1882 else 1883 page = alloc_page(GFP_HIGHUSER); 1884 1885 if (!page) 1886 goto free_pages; 1887 1888 mpfn[i] = migrate_pfn(page_to_pfn(page)); 1889 } 1890 1891 for (i = 0; i < npages; ++i) { 1892 struct page *page = migrate_pfn_to_page(mpfn[i]); 1893 1894 if (!page) 1895 continue; 1896 1897 WARN_ON_ONCE(!trylock_page(page)); 1898 ++*mpages; 1899 } 1900 1901 return 0; 1902 1903 free_pages: 1904 for (i = 0; i < npages; ++i) { 1905 struct page *page = migrate_pfn_to_page(mpfn[i]); 1906 1907 if (!page) 1908 continue; 1909 1910 put_page(page); 1911 mpfn[i] = 0; 1912 } 1913 return -ENOMEM; 1914 } 1915 1916 /** 1917 * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM 1918 * @devmem_allocation: Pointer to the device memory allocation 1919 * 1920 * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and 1921 * migration done via migrate_device_* functions. 1922 * 1923 * Return: 0 on success, negative error code on failure. 1924 */ 1925 int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation) 1926 { 1927 const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1928 unsigned long npages, mpages = 0; 1929 struct page **pages; 1930 unsigned long *src, *dst; 1931 dma_addr_t *dma_addr; 1932 void *buf; 1933 int i, err = 0; 1934 unsigned int retry_count = 2; 1935 1936 npages = devmem_allocation->size >> PAGE_SHIFT; 1937 1938 retry: 1939 if (!mmget_not_zero(devmem_allocation->mm)) 1940 return -EFAULT; 1941 1942 buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + 1943 sizeof(*pages), GFP_KERNEL); 1944 if (!buf) { 1945 err = -ENOMEM; 1946 goto err_out; 1947 } 1948 src = buf; 1949 dst = buf + (sizeof(*src) * npages); 1950 dma_addr = buf + (2 * sizeof(*src) * npages); 1951 pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; 1952 1953 err = ops->populate_devmem_pfn(devmem_allocation, npages, src); 1954 if (err) 1955 goto err_free; 1956 1957 err = migrate_device_pfns(src, npages); 1958 if (err) 1959 goto err_free; 1960 1961 err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, 1962 src, dst, 0); 1963 if (err || !mpages) 1964 goto err_finalize; 1965 1966 err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1967 dst, npages, DMA_FROM_DEVICE); 1968 if (err) 1969 goto err_finalize; 1970 1971 for (i = 0; i < npages; ++i) 1972 pages[i] = migrate_pfn_to_page(src[i]); 1973 1974 err = ops->copy_to_ram(pages, dma_addr, npages); 1975 if (err) 1976 goto err_finalize; 1977 1978 err_finalize: 1979 if (err) 1980 drm_gpusvm_migration_unlock_put_pages(npages, dst); 1981 migrate_device_pages(src, dst, npages); 1982 migrate_device_finalize(src, dst, npages); 1983 drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1984 DMA_FROM_DEVICE); 1985 err_free: 1986 kvfree(buf); 1987 err_out: 1988 mmput_async(devmem_allocation->mm); 1989 1990 if (completion_done(&devmem_allocation->detached)) 1991 return 0; 1992 1993 if (retry_count--) { 1994 cond_resched(); 1995 goto retry; 1996 } 1997 1998 return err ?: -EBUSY; 1999 } 2000 EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram); 2001 2002 /** 2003 * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) 2004 * @vas: Pointer to the VM area structure 2005 * @device_private_page_owner: Device private pages owner 2006 * @page: Pointer to the page for fault handling (can be NULL) 2007 * @fault_addr: Fault address 2008 * @size: Size of migration 2009 * 2010 * This internal function performs the migration of the specified GPU SVM range 2011 * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and 2012 * invokes the driver-specific operations for migration to RAM. 2013 * 2014 * Return: 0 on success, negative error code on failure. 2015 */ 2016 static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, 2017 void *device_private_page_owner, 2018 struct page *page, 2019 unsigned long fault_addr, 2020 unsigned long size) 2021 { 2022 struct migrate_vma migrate = { 2023 .vma = vas, 2024 .pgmap_owner = device_private_page_owner, 2025 .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 2026 MIGRATE_VMA_SELECT_DEVICE_COHERENT, 2027 .fault_page = page, 2028 }; 2029 struct drm_gpusvm_zdd *zdd; 2030 const struct drm_gpusvm_devmem_ops *ops; 2031 struct device *dev = NULL; 2032 unsigned long npages, mpages = 0; 2033 struct page **pages; 2034 dma_addr_t *dma_addr; 2035 unsigned long start, end; 2036 void *buf; 2037 int i, err = 0; 2038 2039 if (page) { 2040 zdd = page->zone_device_data; 2041 if (time_before64(get_jiffies_64(), 2042 zdd->devmem_allocation->timeslice_expiration)) 2043 return 0; 2044 } 2045 2046 start = ALIGN_DOWN(fault_addr, size); 2047 end = ALIGN(fault_addr + 1, size); 2048 2049 /* Corner where VMA area struct has been partially unmapped */ 2050 if (start < vas->vm_start) 2051 start = vas->vm_start; 2052 if (end > vas->vm_end) 2053 end = vas->vm_end; 2054 2055 migrate.start = start; 2056 migrate.end = end; 2057 npages = npages_in_range(start, end); 2058 2059 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 2060 sizeof(*pages), GFP_KERNEL); 2061 if (!buf) { 2062 err = -ENOMEM; 2063 goto err_out; 2064 } 2065 dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 2066 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 2067 2068 migrate.vma = vas; 2069 migrate.src = buf; 2070 migrate.dst = migrate.src + npages; 2071 2072 err = migrate_vma_setup(&migrate); 2073 if (err) 2074 goto err_free; 2075 2076 /* Raced with another CPU fault, nothing to do */ 2077 if (!migrate.cpages) 2078 goto err_free; 2079 2080 if (!page) { 2081 for (i = 0; i < npages; ++i) { 2082 if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) 2083 continue; 2084 2085 page = migrate_pfn_to_page(migrate.src[i]); 2086 break; 2087 } 2088 2089 if (!page) 2090 goto err_finalize; 2091 } 2092 zdd = page->zone_device_data; 2093 ops = zdd->devmem_allocation->ops; 2094 dev = zdd->devmem_allocation->dev; 2095 2096 err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages, 2097 migrate.src, migrate.dst, 2098 start); 2099 if (err) 2100 goto err_finalize; 2101 2102 err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages, 2103 DMA_FROM_DEVICE); 2104 if (err) 2105 goto err_finalize; 2106 2107 for (i = 0; i < npages; ++i) 2108 pages[i] = migrate_pfn_to_page(migrate.src[i]); 2109 2110 err = ops->copy_to_ram(pages, dma_addr, npages); 2111 if (err) 2112 goto err_finalize; 2113 2114 err_finalize: 2115 if (err) 2116 drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 2117 migrate_vma_pages(&migrate); 2118 migrate_vma_finalize(&migrate); 2119 if (dev) 2120 drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages, 2121 DMA_FROM_DEVICE); 2122 err_free: 2123 kvfree(buf); 2124 err_out: 2125 2126 return err; 2127 } 2128 2129 /** 2130 * drm_gpusvm_range_evict - Evict GPU SVM range 2131 * @range: Pointer to the GPU SVM range to be removed 2132 * 2133 * This function evicts the specified GPU SVM range. This function will not 2134 * evict coherent pages. 2135 * 2136 * Return: 0 on success, a negative error code on failure. 2137 */ 2138 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 2139 struct drm_gpusvm_range *range) 2140 { 2141 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 2142 struct hmm_range hmm_range = { 2143 .default_flags = HMM_PFN_REQ_FAULT, 2144 .notifier = notifier, 2145 .start = drm_gpusvm_range_start(range), 2146 .end = drm_gpusvm_range_end(range), 2147 .dev_private_owner = NULL, 2148 }; 2149 unsigned long timeout = 2150 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 2151 unsigned long *pfns; 2152 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 2153 drm_gpusvm_range_end(range)); 2154 int err = 0; 2155 struct mm_struct *mm = gpusvm->mm; 2156 2157 if (!mmget_not_zero(mm)) 2158 return -EFAULT; 2159 2160 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 2161 if (!pfns) 2162 return -ENOMEM; 2163 2164 hmm_range.hmm_pfns = pfns; 2165 while (!time_after(jiffies, timeout)) { 2166 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 2167 if (time_after(jiffies, timeout)) { 2168 err = -ETIME; 2169 break; 2170 } 2171 2172 mmap_read_lock(mm); 2173 err = hmm_range_fault(&hmm_range); 2174 mmap_read_unlock(mm); 2175 if (err != -EBUSY) 2176 break; 2177 } 2178 2179 kvfree(pfns); 2180 mmput(mm); 2181 2182 return err; 2183 } 2184 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 2185 2186 /** 2187 * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page 2188 * @page: Pointer to the page 2189 * 2190 * This function is a callback used to put the GPU SVM zone device data 2191 * associated with a page when it is being released. 2192 */ 2193 static void drm_gpusvm_page_free(struct page *page) 2194 { 2195 drm_gpusvm_zdd_put(page->zone_device_data); 2196 } 2197 2198 /** 2199 * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler) 2200 * @vmf: Pointer to the fault information structure 2201 * 2202 * This function is a page fault handler used to migrate a GPU SVM range to RAM. 2203 * It retrieves the GPU SVM range information from the faulting page and invokes 2204 * the internal migration function to migrate the range back to RAM. 2205 * 2206 * Return: VM_FAULT_SIGBUS on failure, 0 on success. 2207 */ 2208 static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) 2209 { 2210 struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data; 2211 int err; 2212 2213 err = __drm_gpusvm_migrate_to_ram(vmf->vma, 2214 zdd->device_private_page_owner, 2215 vmf->page, vmf->address, 2216 zdd->devmem_allocation->size); 2217 2218 return err ? VM_FAULT_SIGBUS : 0; 2219 } 2220 2221 /* 2222 * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM 2223 */ 2224 static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { 2225 .page_free = drm_gpusvm_page_free, 2226 .migrate_to_ram = drm_gpusvm_migrate_to_ram, 2227 }; 2228 2229 /** 2230 * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations 2231 * 2232 * Return: Pointer to the GPU SVM device page map operations structure. 2233 */ 2234 const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void) 2235 { 2236 return &drm_gpusvm_pagemap_ops; 2237 } 2238 EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get); 2239 2240 /** 2241 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 2242 * @gpusvm: Pointer to the GPU SVM structure. 2243 * @start: Start address 2244 * @end: End address 2245 * 2246 * Return: True if GPU SVM has mapping, False otherwise 2247 */ 2248 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 2249 unsigned long end) 2250 { 2251 struct drm_gpusvm_notifier *notifier; 2252 2253 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 2254 struct drm_gpusvm_range *range = NULL; 2255 2256 drm_gpusvm_for_each_range(range, notifier, start, end) 2257 return true; 2258 } 2259 2260 return false; 2261 } 2262 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 2263 2264 /** 2265 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 2266 * @range: Pointer to the GPU SVM range structure. 2267 * @mmu_range: Pointer to the MMU notifier range structure. 2268 * 2269 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 2270 * if the range partially falls within the provided MMU notifier range. 2271 */ 2272 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 2273 const struct mmu_notifier_range *mmu_range) 2274 { 2275 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 2276 2277 range->flags.unmapped = true; 2278 if (drm_gpusvm_range_start(range) < mmu_range->start || 2279 drm_gpusvm_range_end(range) > mmu_range->end) 2280 range->flags.partial_unmap = true; 2281 } 2282 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 2283 2284 /** 2285 * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation 2286 * 2287 * @dev: Pointer to the device structure which device memory allocation belongs to 2288 * @mm: Pointer to the mm_struct for the address space 2289 * @ops: Pointer to the operations structure for GPU SVM device memory 2290 * @dpagemap: The struct drm_pagemap we're allocating from. 2291 * @size: Size of device memory allocation 2292 */ 2293 void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, 2294 struct device *dev, struct mm_struct *mm, 2295 const struct drm_gpusvm_devmem_ops *ops, 2296 struct drm_pagemap *dpagemap, size_t size) 2297 { 2298 init_completion(&devmem_allocation->detached); 2299 devmem_allocation->dev = dev; 2300 devmem_allocation->mm = mm; 2301 devmem_allocation->ops = ops; 2302 devmem_allocation->dpagemap = dpagemap; 2303 devmem_allocation->size = size; 2304 } 2305 EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init); 2306 2307 MODULE_DESCRIPTION("DRM GPUSVM"); 2308 MODULE_LICENSE("GPL"); 2309