1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024-2025 Intel Corporation 4 */ 5 6 #include <linux/dma-fence.h> 7 #include <linux/dma-mapping.h> 8 #include <linux/migrate.h> 9 #include <linux/pagemap.h> 10 #include <drm/drm_drv.h> 11 #include <drm/drm_pagemap.h> 12 #include <drm/drm_pagemap_util.h> 13 #include <drm/drm_print.h> 14 15 /** 16 * DOC: Overview 17 * 18 * The DRM pagemap layer is intended to augment the dev_pagemap functionality by 19 * providing a way to populate a struct mm_struct virtual range with device 20 * private pages and to provide helpers to abstract device memory allocations, 21 * to migrate memory back and forth between device memory and system RAM and 22 * to handle access (and in the future migration) between devices implementing 23 * a fast interconnect that is not necessarily visible to the rest of the 24 * system. 25 * 26 * Typically the DRM pagemap receives requests from one or more DRM GPU SVM 27 * instances to populate struct mm_struct virtual ranges with memory, and the 28 * migration is best effort only and may thus fail. The implementation should 29 * also handle device unbinding by blocking (return an -ENODEV) error for new 30 * population requests and after that migrate all device pages to system ram. 31 */ 32 33 /** 34 * DOC: Migration 35 * 36 * Migration granularity typically follows the GPU SVM range requests, but 37 * if there are clashes, due to races or due to the fact that multiple GPU 38 * SVM instances have different views of the ranges used, and because of that 39 * parts of a requested range is already present in the requested device memory, 40 * the implementation has a variety of options. It can fail and it can choose 41 * to populate only the part of the range that isn't already in device memory, 42 * and it can evict the range to system before trying to migrate. Ideally an 43 * implementation would just try to migrate the missing part of the range and 44 * allocate just enough memory to do so. 45 * 46 * When migrating to system memory as a response to a cpu fault or a device 47 * memory eviction request, currently a full device memory allocation is 48 * migrated back to system. Moving forward this might need improvement for 49 * situations where a single page needs bouncing between system memory and 50 * device memory due to, for example, atomic operations. 51 * 52 * Key DRM pagemap components: 53 * 54 * - Device Memory Allocations: 55 * Embedded structure containing enough information for the drm_pagemap to 56 * migrate to / from device memory. 57 * 58 * - Device Memory Operations: 59 * Define the interface for driver-specific device memory operations 60 * release memory, populate pfns, and copy to / from device memory. 61 */ 62 63 /** 64 * struct drm_pagemap_zdd - GPU SVM zone device data 65 * 66 * @refcount: Reference count for the zdd 67 * @devmem_allocation: device memory allocation 68 * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap. 69 * 70 * This structure serves as a generic wrapper installed in 71 * page->zone_device_data. It provides infrastructure for looking up a device 72 * memory allocation upon CPU page fault and asynchronously releasing device 73 * memory once the CPU has no page references. Asynchronous release is useful 74 * because CPU page references can be dropped in IRQ contexts, while releasing 75 * device memory likely requires sleeping locks. 76 */ 77 struct drm_pagemap_zdd { 78 struct kref refcount; 79 struct drm_pagemap_devmem *devmem_allocation; 80 struct drm_pagemap *dpagemap; 81 }; 82 83 /** 84 * drm_pagemap_zdd_alloc() - Allocate a zdd structure. 85 * @dpagemap: Pointer to the underlying struct drm_pagemap. 86 * 87 * This function allocates and initializes a new zdd structure. It sets up the 88 * reference count and initializes the destroy work. 89 * 90 * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. 91 */ 92 static struct drm_pagemap_zdd * 93 drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap) 94 { 95 struct drm_pagemap_zdd *zdd; 96 97 zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); 98 if (!zdd) 99 return NULL; 100 101 kref_init(&zdd->refcount); 102 zdd->devmem_allocation = NULL; 103 zdd->dpagemap = drm_pagemap_get(dpagemap); 104 105 return zdd; 106 } 107 108 /** 109 * drm_pagemap_zdd_get() - Get a reference to a zdd structure. 110 * @zdd: Pointer to the zdd structure. 111 * 112 * This function increments the reference count of the provided zdd structure. 113 * 114 * Return: Pointer to the zdd structure. 115 */ 116 static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd) 117 { 118 kref_get(&zdd->refcount); 119 return zdd; 120 } 121 122 /** 123 * drm_pagemap_zdd_destroy() - Destroy a zdd structure. 124 * @ref: Pointer to the reference count structure. 125 * 126 * This function queues the destroy_work of the zdd for asynchronous destruction. 127 */ 128 static void drm_pagemap_zdd_destroy(struct kref *ref) 129 { 130 struct drm_pagemap_zdd *zdd = 131 container_of(ref, struct drm_pagemap_zdd, refcount); 132 struct drm_pagemap_devmem *devmem = zdd->devmem_allocation; 133 struct drm_pagemap *dpagemap = zdd->dpagemap; 134 135 if (devmem) { 136 complete_all(&devmem->detached); 137 if (devmem->ops->devmem_release) 138 devmem->ops->devmem_release(devmem); 139 } 140 kfree(zdd); 141 drm_pagemap_put(dpagemap); 142 } 143 144 /** 145 * drm_pagemap_zdd_put() - Put a zdd reference. 146 * @zdd: Pointer to the zdd structure. 147 * 148 * This function decrements the reference count of the provided zdd structure 149 * and schedules its destruction if the count drops to zero. 150 */ 151 static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd) 152 { 153 kref_put(&zdd->refcount, drm_pagemap_zdd_destroy); 154 } 155 156 /** 157 * drm_pagemap_migration_unlock_put_page() - Put a migration page 158 * @page: Pointer to the page to put 159 * 160 * This function unlocks and puts a page. 161 */ 162 static void drm_pagemap_migration_unlock_put_page(struct page *page) 163 { 164 unlock_page(page); 165 put_page(page); 166 } 167 168 /** 169 * drm_pagemap_migration_unlock_put_pages() - Put migration pages 170 * @npages: Number of pages 171 * @migrate_pfn: Array of migrate page frame numbers 172 * 173 * This function unlocks and puts an array of pages. 174 */ 175 static void drm_pagemap_migration_unlock_put_pages(unsigned long npages, 176 unsigned long *migrate_pfn) 177 { 178 unsigned long i; 179 180 for (i = 0; i < npages; ++i) { 181 struct page *page; 182 183 if (!migrate_pfn[i]) 184 continue; 185 186 page = migrate_pfn_to_page(migrate_pfn[i]); 187 drm_pagemap_migration_unlock_put_page(page); 188 migrate_pfn[i] = 0; 189 } 190 } 191 192 /** 193 * drm_pagemap_get_devmem_page() - Get a reference to a device memory page 194 * @page: Pointer to the page 195 * @zdd: Pointer to the GPU SVM zone device data 196 * 197 * This function associates the given page with the specified GPU SVM zone 198 * device data and initializes it for zone device usage. 199 */ 200 static void drm_pagemap_get_devmem_page(struct page *page, 201 struct drm_pagemap_zdd *zdd) 202 { 203 page->zone_device_data = drm_pagemap_zdd_get(zdd); 204 zone_device_page_init(page, page_pgmap(page), 0); 205 } 206 207 /** 208 * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration 209 * @dev: The device performing the migration. 210 * @local_dpagemap: The drm_pagemap local to the migrating device. 211 * @pagemap_addr: Array to store DMA information corresponding to mapped pages. 212 * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map. 213 * @npages: Number of system pages or peer pages to map. 214 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 215 * @mdetails: Details governing the migration behaviour. 216 * 217 * This function maps pages of memory for migration usage in GPU SVM. It 218 * iterates over each page frame number provided in @migrate_pfn, maps the 219 * corresponding page, and stores the DMA address in the provided @dma_addr 220 * array. 221 * 222 * Returns: 0 on success, -EFAULT if an error occurs during mapping. 223 */ 224 static int drm_pagemap_migrate_map_pages(struct device *dev, 225 struct drm_pagemap *local_dpagemap, 226 struct drm_pagemap_addr *pagemap_addr, 227 unsigned long *migrate_pfn, 228 unsigned long npages, 229 enum dma_data_direction dir, 230 const struct drm_pagemap_migrate_details *mdetails) 231 { 232 unsigned long num_peer_pages = 0, num_local_pages = 0, i; 233 234 for (i = 0; i < npages;) { 235 struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 236 dma_addr_t dma_addr; 237 struct folio *folio; 238 unsigned int order = 0; 239 240 if (!page) 241 goto next; 242 243 folio = page_folio(page); 244 order = folio_order(folio); 245 246 if (is_device_private_page(page)) { 247 struct drm_pagemap_zdd *zdd = page->zone_device_data; 248 struct drm_pagemap *dpagemap = zdd->dpagemap; 249 struct drm_pagemap_addr addr; 250 251 if (dpagemap == local_dpagemap) { 252 if (!mdetails->can_migrate_same_pagemap) 253 goto next; 254 255 num_local_pages += NR_PAGES(order); 256 } else { 257 num_peer_pages += NR_PAGES(order); 258 } 259 260 addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir); 261 if (dma_mapping_error(dev, addr.addr)) 262 return -EFAULT; 263 264 pagemap_addr[i] = addr; 265 } else { 266 dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); 267 if (dma_mapping_error(dev, dma_addr)) 268 return -EFAULT; 269 270 pagemap_addr[i] = 271 drm_pagemap_addr_encode(dma_addr, 272 DRM_INTERCONNECT_SYSTEM, 273 order, dir); 274 } 275 276 next: 277 i += NR_PAGES(order); 278 } 279 280 if (num_peer_pages) 281 drm_dbg(local_dpagemap->drm, "Migrating %lu peer pages over interconnect.\n", 282 num_peer_pages); 283 if (num_local_pages) 284 drm_dbg(local_dpagemap->drm, "Migrating %lu local pages over interconnect.\n", 285 num_local_pages); 286 287 return 0; 288 } 289 290 /** 291 * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration 292 * @dev: The device for which the pages were mapped 293 * @migrate_pfn: Array of migrate pfns set up for the mapped pages. Used to 294 * determine the drm_pagemap of a peer device private page. 295 * @pagemap_addr: Array of DMA information corresponding to mapped pages 296 * @npages: Number of pages to unmap 297 * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 298 * 299 * This function unmaps previously mapped pages of memory for GPU Shared Virtual 300 * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks 301 * if it's valid and not already unmapped, and unmaps the corresponding page. 302 */ 303 static void drm_pagemap_migrate_unmap_pages(struct device *dev, 304 struct drm_pagemap_addr *pagemap_addr, 305 unsigned long *migrate_pfn, 306 unsigned long npages, 307 enum dma_data_direction dir) 308 { 309 unsigned long i; 310 311 for (i = 0; i < npages;) { 312 struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 313 314 if (!page || !pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr)) 315 goto next; 316 317 if (is_zone_device_page(page)) { 318 struct drm_pagemap_zdd *zdd = page->zone_device_data; 319 struct drm_pagemap *dpagemap = zdd->dpagemap; 320 321 dpagemap->ops->device_unmap(dpagemap, dev, pagemap_addr[i]); 322 } else { 323 dma_unmap_page(dev, pagemap_addr[i].addr, 324 PAGE_SIZE << pagemap_addr[i].order, dir); 325 } 326 327 next: 328 i += NR_PAGES(pagemap_addr[i].order); 329 } 330 } 331 332 static unsigned long 333 npages_in_range(unsigned long start, unsigned long end) 334 { 335 return (end - start) >> PAGE_SHIFT; 336 } 337 338 static int 339 drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem, 340 struct device *remote_device, 341 struct drm_pagemap *remote_dpagemap, 342 unsigned long local_pfns[], 343 struct page *remote_pages[], 344 struct drm_pagemap_addr pagemap_addr[], 345 unsigned long npages, 346 const struct drm_pagemap_devmem_ops *ops, 347 const struct drm_pagemap_migrate_details *mdetails) 348 349 { 350 int err = drm_pagemap_migrate_map_pages(remote_device, remote_dpagemap, 351 pagemap_addr, local_pfns, 352 npages, DMA_FROM_DEVICE, mdetails); 353 354 if (err) 355 goto out; 356 357 err = ops->copy_to_ram(remote_pages, pagemap_addr, npages, 358 devmem->pre_migrate_fence); 359 out: 360 drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns, 361 npages, DMA_FROM_DEVICE); 362 return err; 363 } 364 365 static int 366 drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem, 367 unsigned long sys_pfns[], 368 struct page *local_pages[], 369 struct drm_pagemap_addr pagemap_addr[], 370 unsigned long npages, 371 const struct drm_pagemap_devmem_ops *ops, 372 const struct drm_pagemap_migrate_details *mdetails) 373 { 374 int err = drm_pagemap_migrate_map_pages(devmem->dev, devmem->dpagemap, 375 pagemap_addr, sys_pfns, npages, 376 DMA_TO_DEVICE, mdetails); 377 378 if (err) 379 goto out; 380 381 err = ops->copy_to_devmem(local_pages, pagemap_addr, npages, 382 devmem->pre_migrate_fence); 383 out: 384 drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages, 385 DMA_TO_DEVICE); 386 return err; 387 } 388 389 /** 390 * struct migrate_range_loc - Cursor into the loop over migrate_pfns for migrating to 391 * device. 392 * @start: The current loop index. 393 * @device: migrating device. 394 * @dpagemap: Pointer to struct drm_pagemap used by the migrating device. 395 * @ops: The copy ops to be used for the migrating device. 396 */ 397 struct migrate_range_loc { 398 unsigned long start; 399 struct device *device; 400 struct drm_pagemap *dpagemap; 401 const struct drm_pagemap_devmem_ops *ops; 402 }; 403 404 static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem, 405 unsigned long src_pfns[], 406 unsigned long dst_pfns[], 407 struct page *pages[], 408 struct drm_pagemap_addr pagemap_addr[], 409 struct migrate_range_loc *last, 410 const struct migrate_range_loc *cur, 411 const struct drm_pagemap_migrate_details *mdetails) 412 { 413 int ret = 0; 414 415 if (cur->start == 0) 416 goto out; 417 418 if (cur->start <= last->start) 419 return 0; 420 421 if (cur->dpagemap == last->dpagemap && cur->ops == last->ops) 422 return 0; 423 424 if (last->dpagemap) 425 ret = drm_pagemap_migrate_remote_to_local(devmem, 426 last->device, 427 last->dpagemap, 428 &dst_pfns[last->start], 429 &pages[last->start], 430 &pagemap_addr[last->start], 431 cur->start - last->start, 432 last->ops, mdetails); 433 434 else 435 ret = drm_pagemap_migrate_sys_to_dev(devmem, 436 &src_pfns[last->start], 437 &pages[last->start], 438 &pagemap_addr[last->start], 439 cur->start - last->start, 440 last->ops, mdetails); 441 442 out: 443 *last = *cur; 444 return ret; 445 } 446 447 /** 448 * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory 449 * @devmem_allocation: The device memory allocation to migrate to. 450 * The caller should hold a reference to the device memory allocation, 451 * and the reference is consumed by this function even if it returns with 452 * an error. 453 * @mm: Pointer to the struct mm_struct. 454 * @start: Start of the virtual address range to migrate. 455 * @end: End of the virtual address range to migrate. 456 * @mdetails: Details to govern the migration. 457 * 458 * This function migrates the specified virtual address range to device memory. 459 * It performs the necessary setup and invokes the driver-specific operations for 460 * migration to device memory. Expected to be called while holding the mmap lock in 461 * at least read mode. 462 * 463 * Note: The @timeslice_ms parameter can typically be used to force data to 464 * remain in pagemap pages long enough for a GPU to perform a task and to prevent 465 * a migration livelock. One alternative would be for the GPU driver to block 466 * in a mmu_notifier for the specified amount of time, but adding the 467 * functionality to the pagemap is likely nicer to the system as a whole. 468 * 469 * Return: %0 on success, negative error code on failure. 470 */ 471 int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, 472 struct mm_struct *mm, 473 unsigned long start, unsigned long end, 474 const struct drm_pagemap_migrate_details *mdetails) 475 { 476 const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; 477 struct drm_pagemap *dpagemap = devmem_allocation->dpagemap; 478 struct dev_pagemap *pagemap = dpagemap->pagemap; 479 struct migrate_vma migrate = { 480 .start = start, 481 .end = end, 482 .pgmap_owner = pagemap->owner, 483 /* 484 * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently 485 * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa 486 * engine reset and a hard hang due to getting stuck on a folio 487 * lock. This should work and needs to be root-caused. The only 488 * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE 489 * is that device-to-device migrations won’t work; instead, 490 * memory will bounce through system memory. This path should be 491 * rare and only occur when the madvise attributes of memory are 492 * changed or atomics are being used. 493 */ 494 .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT, 495 }; 496 unsigned long i, npages = npages_in_range(start, end); 497 unsigned long own_pages = 0, migrated_pages = 0; 498 struct migrate_range_loc cur, last = {.device = dpagemap->drm->dev, .ops = ops}; 499 struct vm_area_struct *vas; 500 struct drm_pagemap_zdd *zdd = NULL; 501 struct page **pages; 502 struct drm_pagemap_addr *pagemap_addr; 503 void *buf; 504 int err; 505 506 mmap_assert_locked(mm); 507 508 if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || 509 !ops->copy_to_ram) 510 return -EOPNOTSUPP; 511 512 vas = vma_lookup(mm, start); 513 if (!vas) { 514 err = -ENOENT; 515 goto err_out; 516 } 517 518 if (end > vas->vm_end || start < vas->vm_start) { 519 err = -EINVAL; 520 goto err_out; 521 } 522 523 if (!vma_is_anonymous(vas)) { 524 err = -EBUSY; 525 goto err_out; 526 } 527 528 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + 529 sizeof(*pages), GFP_KERNEL); 530 if (!buf) { 531 err = -ENOMEM; 532 goto err_out; 533 } 534 pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); 535 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; 536 537 zdd = drm_pagemap_zdd_alloc(dpagemap); 538 if (!zdd) { 539 err = -ENOMEM; 540 kvfree(buf); 541 goto err_out; 542 } 543 zdd->devmem_allocation = devmem_allocation; /* Owns ref */ 544 545 migrate.vma = vas; 546 migrate.src = buf; 547 migrate.dst = migrate.src + npages; 548 549 err = migrate_vma_setup(&migrate); 550 if (err) 551 goto err_free; 552 553 if (!migrate.cpages) { 554 /* No pages to migrate. Raced or unknown device pages. */ 555 err = -EBUSY; 556 goto err_free; 557 } 558 559 if (migrate.cpages != npages) { 560 /* 561 * Some pages to migrate. But we want to migrate all or 562 * nothing. Raced or unknown device pages. 563 */ 564 err = -EBUSY; 565 goto err_aborted_migration; 566 } 567 568 /* Count device-private pages to migrate */ 569 for (i = 0; i < npages;) { 570 struct page *src_page = migrate_pfn_to_page(migrate.src[i]); 571 unsigned long nr_pages = src_page ? NR_PAGES(folio_order(page_folio(src_page))) : 1; 572 573 if (src_page && is_zone_device_page(src_page)) { 574 if (page_pgmap(src_page) == pagemap) 575 own_pages += nr_pages; 576 } 577 578 i += nr_pages; 579 } 580 581 drm_dbg(dpagemap->drm, "Total pages %lu; Own pages: %lu.\n", 582 npages, own_pages); 583 if (own_pages == npages) { 584 err = 0; 585 drm_dbg(dpagemap->drm, "Migration wasn't necessary.\n"); 586 goto err_aborted_migration; 587 } else if (own_pages && !mdetails->can_migrate_same_pagemap) { 588 err = -EBUSY; 589 drm_dbg(dpagemap->drm, "Migration aborted due to fragmentation.\n"); 590 goto err_aborted_migration; 591 } 592 593 err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); 594 if (err) 595 goto err_aborted_migration; 596 597 own_pages = 0; 598 599 for (i = 0; i < npages; ++i) { 600 struct page *page = pfn_to_page(migrate.dst[i]); 601 struct page *src_page = migrate_pfn_to_page(migrate.src[i]); 602 cur.start = i; 603 604 pages[i] = NULL; 605 if (src_page && is_device_private_page(src_page)) { 606 struct drm_pagemap_zdd *src_zdd = src_page->zone_device_data; 607 608 if (page_pgmap(src_page) == pagemap && 609 !mdetails->can_migrate_same_pagemap) { 610 migrate.dst[i] = 0; 611 own_pages++; 612 continue; 613 } 614 if (mdetails->source_peer_migrates) { 615 cur.dpagemap = src_zdd->dpagemap; 616 cur.ops = src_zdd->devmem_allocation->ops; 617 cur.device = cur.dpagemap->drm->dev; 618 pages[i] = src_page; 619 } 620 } 621 if (!pages[i]) { 622 cur.dpagemap = NULL; 623 cur.ops = ops; 624 cur.device = dpagemap->drm->dev; 625 pages[i] = page; 626 } 627 migrate.dst[i] = migrate_pfn(migrate.dst[i]); 628 drm_pagemap_get_devmem_page(page, zdd); 629 630 /* If we switched the migrating drm_pagemap, migrate previous pages now */ 631 err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, 632 pages, pagemap_addr, &last, &cur, 633 mdetails); 634 if (err) { 635 npages = i + 1; 636 goto err_finalize; 637 } 638 } 639 cur.start = npages; 640 cur.ops = NULL; /* Force migration */ 641 err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, 642 pages, pagemap_addr, &last, &cur, mdetails); 643 if (err) 644 goto err_finalize; 645 646 drm_WARN_ON(dpagemap->drm, !!own_pages); 647 648 dma_fence_put(devmem_allocation->pre_migrate_fence); 649 devmem_allocation->pre_migrate_fence = NULL; 650 651 /* Upon success bind devmem allocation to range and zdd */ 652 devmem_allocation->timeslice_expiration = get_jiffies_64() + 653 msecs_to_jiffies(mdetails->timeslice_ms); 654 655 err_finalize: 656 if (err) 657 drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); 658 err_aborted_migration: 659 migrate_vma_pages(&migrate); 660 661 for (i = 0; !err && i < npages;) { 662 struct page *page = migrate_pfn_to_page(migrate.src[i]); 663 unsigned long nr_pages = page ? NR_PAGES(folio_order(page_folio(page))) : 1; 664 665 if (migrate.src[i] & MIGRATE_PFN_MIGRATE) 666 migrated_pages += nr_pages; 667 668 i += nr_pages; 669 } 670 671 if (!err && migrated_pages < npages - own_pages) { 672 drm_dbg(dpagemap->drm, "Raced while finalizing migration.\n"); 673 err = -EBUSY; 674 } 675 676 migrate_vma_finalize(&migrate); 677 err_free: 678 drm_pagemap_zdd_put(zdd); 679 kvfree(buf); 680 return err; 681 682 err_out: 683 devmem_allocation->ops->devmem_release(devmem_allocation); 684 return err; 685 } 686 EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem); 687 688 /** 689 * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area 690 * @vas: Pointer to the VM area structure, can be NULL 691 * @fault_page: Fault page 692 * @npages: Number of pages to populate 693 * @mpages: Number of pages to migrate 694 * @src_mpfn: Source array of migrate PFNs 695 * @mpfn: Array of migrate PFNs to populate 696 * @addr: Start address for PFN allocation 697 * 698 * This function populates the RAM migrate page frame numbers (PFNs) for the 699 * specified VM area structure. It allocates and locks pages in the VM area for 700 * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use 701 * alloc_page for allocation. 702 * 703 * Return: 0 on success, negative error code on failure. 704 */ 705 static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, 706 struct page *fault_page, 707 unsigned long npages, 708 unsigned long *mpages, 709 unsigned long *src_mpfn, 710 unsigned long *mpfn, 711 unsigned long addr) 712 { 713 unsigned long i; 714 715 for (i = 0; i < npages;) { 716 struct page *page = NULL, *src_page; 717 struct folio *folio; 718 unsigned int order = 0; 719 720 if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) 721 goto next; 722 723 src_page = migrate_pfn_to_page(src_mpfn[i]); 724 if (!src_page) 725 goto next; 726 727 if (fault_page) { 728 if (src_page->zone_device_data != 729 fault_page->zone_device_data) 730 goto next; 731 } 732 733 order = folio_order(page_folio(src_page)); 734 735 /* TODO: Support fallback to single pages if THP allocation fails */ 736 if (vas) 737 folio = vma_alloc_folio(GFP_HIGHUSER, order, vas, addr); 738 else 739 folio = folio_alloc(GFP_HIGHUSER, order); 740 741 if (!folio) 742 goto free_pages; 743 744 page = folio_page(folio, 0); 745 mpfn[i] = migrate_pfn(page_to_pfn(page)); 746 747 next: 748 if (page) 749 addr += page_size(page); 750 else 751 addr += PAGE_SIZE; 752 753 i += NR_PAGES(order); 754 } 755 756 for (i = 0; i < npages;) { 757 struct page *page = migrate_pfn_to_page(mpfn[i]); 758 unsigned int order = 0; 759 760 if (!page) 761 goto next_lock; 762 763 WARN_ON_ONCE(!folio_trylock(page_folio(page))); 764 765 order = folio_order(page_folio(page)); 766 *mpages += NR_PAGES(order); 767 768 next_lock: 769 i += NR_PAGES(order); 770 } 771 772 return 0; 773 774 free_pages: 775 for (i = 0; i < npages;) { 776 struct page *page = migrate_pfn_to_page(mpfn[i]); 777 unsigned int order = 0; 778 779 if (!page) 780 goto next_put; 781 782 put_page(page); 783 mpfn[i] = 0; 784 785 order = folio_order(page_folio(page)); 786 787 next_put: 788 i += NR_PAGES(order); 789 } 790 return -ENOMEM; 791 } 792 793 static void drm_pagemap_dev_unhold_work(struct work_struct *work); 794 static LLIST_HEAD(drm_pagemap_unhold_list); 795 static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work); 796 797 /** 798 * struct drm_pagemap_dev_hold - Struct to aid in drm_device release. 799 * @link: Link into drm_pagemap_unhold_list for deferred reference releases. 800 * @drm: drm device to put. 801 * 802 * When a struct drm_pagemap is released, we also need to release the 803 * reference it holds on the drm device. However, typically that needs 804 * to be done separately from a system-wide workqueue. 805 * Each time a struct drm_pagemap is initialized 806 * (or re-initialized if cached) therefore allocate a separate 807 * drm_pagemap_dev_hold item, from which we put the drm device and 808 * associated module. 809 */ 810 struct drm_pagemap_dev_hold { 811 struct llist_node link; 812 struct drm_device *drm; 813 }; 814 815 static void drm_pagemap_release(struct kref *ref) 816 { 817 struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref); 818 struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold; 819 820 /* 821 * We know the pagemap provider is alive at this point, since 822 * the struct drm_pagemap_dev_hold holds a reference to the 823 * pagemap provider drm_device and its module. 824 */ 825 dpagemap->dev_hold = NULL; 826 drm_pagemap_shrinker_add(dpagemap); 827 llist_add(&dev_hold->link, &drm_pagemap_unhold_list); 828 schedule_work(&drm_pagemap_work); 829 /* 830 * Here, either the provider device is still alive, since if called from 831 * page_free(), the caller is holding a reference on the dev_pagemap, 832 * or if called from drm_pagemap_put(), the direct caller is still alive. 833 * This ensures we can't race with THIS module unload. 834 */ 835 } 836 837 static void drm_pagemap_dev_unhold_work(struct work_struct *work) 838 { 839 struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list); 840 struct drm_pagemap_dev_hold *dev_hold, *next; 841 842 /* 843 * Deferred release of drm_pagemap provider device and module. 844 * THIS module is kept alive during the release by the 845 * flush_work() in the drm_pagemap_exit() function. 846 */ 847 llist_for_each_entry_safe(dev_hold, next, node, link) { 848 struct drm_device *drm = dev_hold->drm; 849 struct module *module = drm->driver->fops->owner; 850 851 drm_dbg(drm, "Releasing reference on provider device and module.\n"); 852 drm_dev_put(drm); 853 module_put(module); 854 kfree(dev_hold); 855 } 856 } 857 858 static struct drm_pagemap_dev_hold * 859 drm_pagemap_dev_hold(struct drm_pagemap *dpagemap) 860 { 861 struct drm_pagemap_dev_hold *dev_hold; 862 struct drm_device *drm = dpagemap->drm; 863 864 dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL); 865 if (!dev_hold) 866 return ERR_PTR(-ENOMEM); 867 868 init_llist_node(&dev_hold->link); 869 dev_hold->drm = drm; 870 (void)try_module_get(drm->driver->fops->owner); 871 drm_dev_get(drm); 872 873 return dev_hold; 874 } 875 876 /** 877 * drm_pagemap_reinit() - Reinitialize a drm_pagemap 878 * @dpagemap: The drm_pagemap to reinitialize 879 * 880 * Reinitialize a drm_pagemap, for which drm_pagemap_release 881 * has already been called. This interface is intended for the 882 * situation where the driver caches a destroyed drm_pagemap. 883 * 884 * Return: 0 on success, negative error code on failure. 885 */ 886 int drm_pagemap_reinit(struct drm_pagemap *dpagemap) 887 { 888 dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap); 889 if (IS_ERR(dpagemap->dev_hold)) 890 return PTR_ERR(dpagemap->dev_hold); 891 892 kref_init(&dpagemap->ref); 893 return 0; 894 } 895 EXPORT_SYMBOL(drm_pagemap_reinit); 896 897 /** 898 * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap 899 * @dpagemap: The drm_pagemap to initialize. 900 * @pagemap: The associated dev_pagemap providing the device 901 * private pages. 902 * @drm: The drm device. The drm_pagemap holds a reference on the 903 * drm_device and the module owning the drm_device until 904 * drm_pagemap_release(). This facilitates drm_pagemap exporting. 905 * @ops: The drm_pagemap ops. 906 * 907 * Initialize and take an initial reference on a drm_pagemap. 908 * After successful return, use drm_pagemap_put() to destroy. 909 * 910 ** Return: 0 on success, negative error code on error. 911 */ 912 int drm_pagemap_init(struct drm_pagemap *dpagemap, 913 struct dev_pagemap *pagemap, 914 struct drm_device *drm, 915 const struct drm_pagemap_ops *ops) 916 { 917 kref_init(&dpagemap->ref); 918 dpagemap->ops = ops; 919 dpagemap->pagemap = pagemap; 920 dpagemap->drm = drm; 921 dpagemap->cache = NULL; 922 INIT_LIST_HEAD(&dpagemap->shrink_link); 923 924 return drm_pagemap_reinit(dpagemap); 925 } 926 EXPORT_SYMBOL(drm_pagemap_init); 927 928 /** 929 * drm_pagemap_put() - Put a struct drm_pagemap reference 930 * @dpagemap: Pointer to a struct drm_pagemap object. 931 * 932 * Puts a struct drm_pagemap reference and frees the drm_pagemap object 933 * if the refount reaches zero. 934 */ 935 void drm_pagemap_put(struct drm_pagemap *dpagemap) 936 { 937 if (likely(dpagemap)) { 938 drm_pagemap_shrinker_might_lock(dpagemap); 939 kref_put(&dpagemap->ref, drm_pagemap_release); 940 } 941 } 942 EXPORT_SYMBOL(drm_pagemap_put); 943 944 /** 945 * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM 946 * @devmem_allocation: Pointer to the device memory allocation 947 * 948 * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and 949 * migration done via migrate_device_* functions. 950 * 951 * Return: 0 on success, negative error code on failure. 952 */ 953 int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) 954 { 955 const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; 956 struct drm_pagemap_migrate_details mdetails = {}; 957 unsigned long npages, mpages = 0; 958 struct page **pages; 959 unsigned long *src, *dst; 960 struct drm_pagemap_addr *pagemap_addr; 961 void *buf; 962 int i, err = 0; 963 unsigned int retry_count = 2; 964 965 npages = devmem_allocation->size >> PAGE_SHIFT; 966 967 retry: 968 if (!mmget_not_zero(devmem_allocation->mm)) 969 return -EFAULT; 970 971 buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*pagemap_addr) + 972 sizeof(*pages), GFP_KERNEL); 973 if (!buf) { 974 err = -ENOMEM; 975 goto err_out; 976 } 977 src = buf; 978 dst = buf + (sizeof(*src) * npages); 979 pagemap_addr = buf + (2 * sizeof(*src) * npages); 980 pages = buf + (2 * sizeof(*src) + sizeof(*pagemap_addr)) * npages; 981 982 err = ops->populate_devmem_pfn(devmem_allocation, npages, src); 983 if (err) 984 goto err_free; 985 986 err = migrate_device_pfns(src, npages); 987 if (err) 988 goto err_free; 989 990 err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, 991 src, dst, 0); 992 if (err || !mpages) 993 goto err_finalize; 994 995 err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, 996 devmem_allocation->dpagemap, pagemap_addr, 997 dst, npages, DMA_FROM_DEVICE, 998 &mdetails); 999 if (err) 1000 goto err_finalize; 1001 1002 for (i = 0; i < npages; ++i) 1003 pages[i] = migrate_pfn_to_page(src[i]); 1004 1005 err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); 1006 if (err) 1007 goto err_finalize; 1008 1009 err_finalize: 1010 if (err) 1011 drm_pagemap_migration_unlock_put_pages(npages, dst); 1012 migrate_device_pages(src, dst, npages); 1013 migrate_device_finalize(src, dst, npages); 1014 drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages, 1015 DMA_FROM_DEVICE); 1016 1017 err_free: 1018 kvfree(buf); 1019 err_out: 1020 mmput_async(devmem_allocation->mm); 1021 1022 if (completion_done(&devmem_allocation->detached)) 1023 return 0; 1024 1025 if (retry_count--) { 1026 cond_resched(); 1027 goto retry; 1028 } 1029 1030 return err ?: -EBUSY; 1031 } 1032 EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); 1033 1034 /** 1035 * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) 1036 * @vas: Pointer to the VM area structure 1037 * @page: Pointer to the page for fault handling. 1038 * @fault_addr: Fault address 1039 * @size: Size of migration 1040 * 1041 * This internal function performs the migration of the specified GPU SVM range 1042 * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and 1043 * invokes the driver-specific operations for migration to RAM. 1044 * 1045 * Return: 0 on success, negative error code on failure. 1046 */ 1047 static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, 1048 struct page *page, 1049 unsigned long fault_addr, 1050 unsigned long size) 1051 { 1052 struct migrate_vma migrate = { 1053 .vma = vas, 1054 .pgmap_owner = page_pgmap(page)->owner, 1055 .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 1056 MIGRATE_VMA_SELECT_DEVICE_COHERENT, 1057 .fault_page = page, 1058 }; 1059 struct drm_pagemap_migrate_details mdetails = {}; 1060 struct drm_pagemap_zdd *zdd; 1061 const struct drm_pagemap_devmem_ops *ops; 1062 struct device *dev = NULL; 1063 unsigned long npages, mpages = 0; 1064 struct page **pages; 1065 struct drm_pagemap_addr *pagemap_addr; 1066 unsigned long start, end; 1067 void *buf; 1068 int i, err = 0; 1069 1070 zdd = page->zone_device_data; 1071 if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration)) 1072 return 0; 1073 1074 start = ALIGN_DOWN(fault_addr, size); 1075 end = ALIGN(fault_addr + 1, size); 1076 1077 /* Corner where VMA area struct has been partially unmapped */ 1078 if (start < vas->vm_start) 1079 start = vas->vm_start; 1080 if (end > vas->vm_end) 1081 end = vas->vm_end; 1082 1083 migrate.start = start; 1084 migrate.end = end; 1085 npages = npages_in_range(start, end); 1086 1087 buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + 1088 sizeof(*pages), GFP_KERNEL); 1089 if (!buf) { 1090 err = -ENOMEM; 1091 goto err_out; 1092 } 1093 pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); 1094 pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; 1095 1096 migrate.vma = vas; 1097 migrate.src = buf; 1098 migrate.dst = migrate.src + npages; 1099 1100 err = migrate_vma_setup(&migrate); 1101 if (err) 1102 goto err_free; 1103 1104 /* Raced with another CPU fault, nothing to do */ 1105 if (!migrate.cpages) 1106 goto err_free; 1107 1108 ops = zdd->devmem_allocation->ops; 1109 dev = zdd->devmem_allocation->dev; 1110 1111 err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages, 1112 migrate.src, migrate.dst, 1113 start); 1114 if (err) 1115 goto err_finalize; 1116 1117 err = drm_pagemap_migrate_map_pages(dev, zdd->dpagemap, pagemap_addr, migrate.dst, npages, 1118 DMA_FROM_DEVICE, &mdetails); 1119 if (err) 1120 goto err_finalize; 1121 1122 for (i = 0; i < npages; ++i) 1123 pages[i] = migrate_pfn_to_page(migrate.src[i]); 1124 1125 err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); 1126 if (err) 1127 goto err_finalize; 1128 1129 err_finalize: 1130 if (err) 1131 drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); 1132 migrate_vma_pages(&migrate); 1133 migrate_vma_finalize(&migrate); 1134 if (dev) 1135 drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst, 1136 npages, DMA_FROM_DEVICE); 1137 err_free: 1138 kvfree(buf); 1139 err_out: 1140 1141 return err; 1142 } 1143 1144 /** 1145 * drm_pagemap_folio_free() - Put GPU SVM zone device data associated with a folio 1146 * @folio: Pointer to the folio 1147 * 1148 * This function is a callback used to put the GPU SVM zone device data 1149 * associated with a page when it is being released. 1150 */ 1151 static void drm_pagemap_folio_free(struct folio *folio) 1152 { 1153 drm_pagemap_zdd_put(folio->page.zone_device_data); 1154 } 1155 1156 /** 1157 * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler) 1158 * @vmf: Pointer to the fault information structure 1159 * 1160 * This function is a page fault handler used to migrate a virtual range 1161 * to ram. The device memory allocation in which the device page is found is 1162 * migrated in its entirety. 1163 * 1164 * Returns: 1165 * VM_FAULT_SIGBUS on failure, 0 on success. 1166 */ 1167 static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) 1168 { 1169 struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data; 1170 int err; 1171 1172 err = __drm_pagemap_migrate_to_ram(vmf->vma, 1173 vmf->page, vmf->address, 1174 zdd->devmem_allocation->size); 1175 1176 return err ? VM_FAULT_SIGBUS : 0; 1177 } 1178 1179 static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = { 1180 .folio_free = drm_pagemap_folio_free, 1181 .migrate_to_ram = drm_pagemap_migrate_to_ram, 1182 }; 1183 1184 /** 1185 * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations 1186 * 1187 * Returns: 1188 * Pointer to the GPU SVM device page map operations structure. 1189 */ 1190 const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void) 1191 { 1192 return &drm_pagemap_pagemap_ops; 1193 } 1194 EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); 1195 1196 /** 1197 * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation 1198 * 1199 * @devmem_allocation: The struct drm_pagemap_devmem to initialize. 1200 * @dev: Pointer to the device structure which device memory allocation belongs to 1201 * @mm: Pointer to the mm_struct for the address space 1202 * @ops: Pointer to the operations structure for GPU SVM device memory 1203 * @dpagemap: The struct drm_pagemap we're allocating from. 1204 * @size: Size of device memory allocation 1205 * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts. 1206 * (May be NULL). 1207 */ 1208 void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, 1209 struct device *dev, struct mm_struct *mm, 1210 const struct drm_pagemap_devmem_ops *ops, 1211 struct drm_pagemap *dpagemap, size_t size, 1212 struct dma_fence *pre_migrate_fence) 1213 { 1214 init_completion(&devmem_allocation->detached); 1215 devmem_allocation->dev = dev; 1216 devmem_allocation->mm = mm; 1217 devmem_allocation->ops = ops; 1218 devmem_allocation->dpagemap = dpagemap; 1219 devmem_allocation->size = size; 1220 devmem_allocation->pre_migrate_fence = pre_migrate_fence; 1221 } 1222 EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); 1223 1224 /** 1225 * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page 1226 * @page: The struct page. 1227 * 1228 * Return: A pointer to the struct drm_pagemap of a device private page that 1229 * was populated from the struct drm_pagemap. If the page was *not* populated 1230 * from a struct drm_pagemap, the result is undefined and the function call 1231 * may result in dereferencing and invalid address. 1232 */ 1233 struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) 1234 { 1235 struct drm_pagemap_zdd *zdd = page->zone_device_data; 1236 1237 return zdd->devmem_allocation->dpagemap; 1238 } 1239 EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap); 1240 1241 /** 1242 * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages 1243 * @dpagemap: Pointer to the drm_pagemap managing the device memory 1244 * @start: Start of the virtual range to populate. 1245 * @end: End of the virtual range to populate. 1246 * @mm: Pointer to the virtual address space. 1247 * @timeslice_ms: The time requested for the migrated pagemap pages to 1248 * be present in @mm before being allowed to be migrated back. 1249 * 1250 * Attempt to populate a virtual range with device memory pages, 1251 * clearing them or migrating data from the existing pages if necessary. 1252 * The function is best effort only, and implementations may vary 1253 * in how hard they try to satisfy the request. 1254 * 1255 * Return: %0 on success, negative error code on error. If the hardware 1256 * device was removed / unbound the function will return %-ENODEV. 1257 */ 1258 int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 1259 unsigned long start, unsigned long end, 1260 struct mm_struct *mm, 1261 unsigned long timeslice_ms) 1262 { 1263 int err; 1264 1265 if (!mmget_not_zero(mm)) 1266 return -EFAULT; 1267 mmap_read_lock(mm); 1268 err = dpagemap->ops->populate_mm(dpagemap, start, end, mm, 1269 timeslice_ms); 1270 mmap_read_unlock(mm); 1271 mmput(mm); 1272 1273 return err; 1274 } 1275 EXPORT_SYMBOL(drm_pagemap_populate_mm); 1276 1277 void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim) 1278 { 1279 if (dpagemap->ops->destroy) 1280 dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim); 1281 else 1282 kfree(dpagemap); 1283 } 1284 1285 static void drm_pagemap_exit(void) 1286 { 1287 flush_work(&drm_pagemap_work); 1288 if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list))) 1289 disable_work_sync(&drm_pagemap_work); 1290 } 1291 module_exit(drm_pagemap_exit); 1292