1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This is a module to test the HMM (Heterogeneous Memory Management) 4 * mirror and zone device private memory migration APIs of the kernel. 5 * Userspace programs can register with the driver to mirror their own address 6 * space and can use the device to read/write any valid virtual address. 7 */ 8 #include <linux/init.h> 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/module.h> 12 #include <linux/kernel.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/memremap.h> 16 #include <linux/mutex.h> 17 #include <linux/rwsem.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 #include <linux/highmem.h> 21 #include <linux/delay.h> 22 #include <linux/pagemap.h> 23 #include <linux/hmm.h> 24 #include <linux/vmalloc.h> 25 #include <linux/swap.h> 26 #include <linux/swapops.h> 27 #include <linux/sched/mm.h> 28 #include <linux/platform_device.h> 29 #include <linux/rmap.h> 30 #include <linux/mmu_notifier.h> 31 #include <linux/migrate.h> 32 33 #include "test_hmm_uapi.h" 34 35 #define DMIRROR_NDEVICES 4 36 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 37 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) 38 #define DEVMEM_CHUNKS_RESERVE 16 39 40 /* 41 * For device_private pages, dpage is just a dummy struct page 42 * representing a piece of device memory. dmirror_devmem_alloc_page 43 * allocates a real system memory page as backing storage to fake a 44 * real device. zone_device_data points to that backing page. But 45 * for device_coherent memory, the struct page represents real 46 * physical CPU-accessible memory that we can use directly. 47 */ 48 #define BACKING_PAGE(page) (is_device_private_page((page)) ? \ 49 (page)->zone_device_data : (page)) 50 51 static unsigned long spm_addr_dev0; 52 module_param(spm_addr_dev0, long, 0644); 53 MODULE_PARM_DESC(spm_addr_dev0, 54 "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 55 56 static unsigned long spm_addr_dev1; 57 module_param(spm_addr_dev1, long, 0644); 58 MODULE_PARM_DESC(spm_addr_dev1, 59 "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 60 61 static const struct dev_pagemap_ops dmirror_devmem_ops; 62 static const struct mmu_interval_notifier_ops dmirror_min_ops; 63 static dev_t dmirror_dev; 64 65 struct dmirror_device; 66 67 struct dmirror_bounce { 68 void *ptr; 69 unsigned long size; 70 unsigned long addr; 71 unsigned long cpages; 72 }; 73 74 #define DPT_XA_TAG_ATOMIC 1UL 75 #define DPT_XA_TAG_WRITE 3UL 76 77 /* 78 * Data structure to track address ranges and register for mmu interval 79 * notifier updates. 80 */ 81 struct dmirror_interval { 82 struct mmu_interval_notifier notifier; 83 struct dmirror *dmirror; 84 }; 85 86 /* 87 * Data attached to the open device file. 88 * Note that it might be shared after a fork(). 89 */ 90 struct dmirror { 91 struct dmirror_device *mdevice; 92 struct xarray pt; 93 struct mmu_interval_notifier notifier; 94 struct mutex mutex; 95 }; 96 97 /* 98 * ZONE_DEVICE pages for migration and simulating device memory. 99 */ 100 struct dmirror_chunk { 101 struct dev_pagemap pagemap; 102 struct dmirror_device *mdevice; 103 bool remove; 104 }; 105 106 /* 107 * Per device data. 108 */ 109 struct dmirror_device { 110 struct cdev cdevice; 111 unsigned int zone_device_type; 112 struct device device; 113 114 unsigned int devmem_capacity; 115 unsigned int devmem_count; 116 struct dmirror_chunk **devmem_chunks; 117 struct mutex devmem_lock; /* protects the above */ 118 119 unsigned long calloc; 120 unsigned long cfree; 121 struct page *free_pages; 122 spinlock_t lock; /* protects the above */ 123 }; 124 125 static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; 126 127 static int dmirror_bounce_init(struct dmirror_bounce *bounce, 128 unsigned long addr, 129 unsigned long size) 130 { 131 bounce->addr = addr; 132 bounce->size = size; 133 bounce->cpages = 0; 134 bounce->ptr = vmalloc(size); 135 if (!bounce->ptr) 136 return -ENOMEM; 137 return 0; 138 } 139 140 static bool dmirror_is_private_zone(struct dmirror_device *mdevice) 141 { 142 return (mdevice->zone_device_type == 143 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; 144 } 145 146 static enum migrate_vma_direction 147 dmirror_select_device(struct dmirror *dmirror) 148 { 149 return (dmirror->mdevice->zone_device_type == 150 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? 151 MIGRATE_VMA_SELECT_DEVICE_PRIVATE : 152 MIGRATE_VMA_SELECT_DEVICE_COHERENT; 153 } 154 155 static void dmirror_bounce_fini(struct dmirror_bounce *bounce) 156 { 157 vfree(bounce->ptr); 158 } 159 160 static int dmirror_fops_open(struct inode *inode, struct file *filp) 161 { 162 struct cdev *cdev = inode->i_cdev; 163 struct dmirror *dmirror; 164 int ret; 165 166 /* Mirror this process address space */ 167 dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); 168 if (dmirror == NULL) 169 return -ENOMEM; 170 171 dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); 172 mutex_init(&dmirror->mutex); 173 xa_init(&dmirror->pt); 174 175 ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, 176 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); 177 if (ret) { 178 kfree(dmirror); 179 return ret; 180 } 181 182 filp->private_data = dmirror; 183 return 0; 184 } 185 186 static int dmirror_fops_release(struct inode *inode, struct file *filp) 187 { 188 struct dmirror *dmirror = filp->private_data; 189 190 mmu_interval_notifier_remove(&dmirror->notifier); 191 xa_destroy(&dmirror->pt); 192 kfree(dmirror); 193 return 0; 194 } 195 196 static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) 197 { 198 return container_of(page_pgmap(page), struct dmirror_chunk, 199 pagemap); 200 } 201 202 static struct dmirror_device *dmirror_page_to_device(struct page *page) 203 204 { 205 return dmirror_page_to_chunk(page)->mdevice; 206 } 207 208 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) 209 { 210 unsigned long *pfns = range->hmm_pfns; 211 unsigned long pfn; 212 213 for (pfn = (range->start >> PAGE_SHIFT); 214 pfn < (range->end >> PAGE_SHIFT); 215 pfn++, pfns++) { 216 struct page *page; 217 void *entry; 218 219 /* 220 * Since we asked for hmm_range_fault() to populate pages, 221 * it shouldn't return an error entry on success. 222 */ 223 WARN_ON(*pfns & HMM_PFN_ERROR); 224 WARN_ON(!(*pfns & HMM_PFN_VALID)); 225 226 page = hmm_pfn_to_page(*pfns); 227 WARN_ON(!page); 228 229 entry = page; 230 if (*pfns & HMM_PFN_WRITE) 231 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 232 else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) 233 return -EFAULT; 234 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 235 if (xa_is_err(entry)) 236 return xa_err(entry); 237 } 238 239 return 0; 240 } 241 242 static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, 243 unsigned long end) 244 { 245 unsigned long pfn; 246 void *entry; 247 248 /* 249 * The XArray doesn't hold references to pages since it relies on 250 * the mmu notifier to clear page pointers when they become stale. 251 * Therefore, it is OK to just clear the entry. 252 */ 253 xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, 254 end >> PAGE_SHIFT) 255 xa_erase(&dmirror->pt, pfn); 256 } 257 258 static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, 259 const struct mmu_notifier_range *range, 260 unsigned long cur_seq) 261 { 262 struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); 263 264 /* 265 * Ignore invalidation callbacks for device private pages since 266 * the invalidation is handled as part of the migration process. 267 */ 268 if (range->event == MMU_NOTIFY_MIGRATE && 269 range->owner == dmirror->mdevice) 270 return true; 271 272 if (mmu_notifier_range_blockable(range)) 273 mutex_lock(&dmirror->mutex); 274 else if (!mutex_trylock(&dmirror->mutex)) 275 return false; 276 277 mmu_interval_set_seq(mni, cur_seq); 278 dmirror_do_update(dmirror, range->start, range->end); 279 280 mutex_unlock(&dmirror->mutex); 281 return true; 282 } 283 284 static const struct mmu_interval_notifier_ops dmirror_min_ops = { 285 .invalidate = dmirror_interval_invalidate, 286 }; 287 288 static int dmirror_range_fault(struct dmirror *dmirror, 289 struct hmm_range *range) 290 { 291 struct mm_struct *mm = dmirror->notifier.mm; 292 unsigned long timeout = 293 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 294 int ret; 295 296 while (true) { 297 if (time_after(jiffies, timeout)) { 298 ret = -EBUSY; 299 goto out; 300 } 301 302 range->notifier_seq = mmu_interval_read_begin(range->notifier); 303 mmap_read_lock(mm); 304 ret = hmm_range_fault(range); 305 mmap_read_unlock(mm); 306 if (ret) { 307 if (ret == -EBUSY) 308 continue; 309 goto out; 310 } 311 312 mutex_lock(&dmirror->mutex); 313 if (mmu_interval_read_retry(range->notifier, 314 range->notifier_seq)) { 315 mutex_unlock(&dmirror->mutex); 316 continue; 317 } 318 break; 319 } 320 321 ret = dmirror_do_fault(dmirror, range); 322 323 mutex_unlock(&dmirror->mutex); 324 out: 325 return ret; 326 } 327 328 static int dmirror_fault(struct dmirror *dmirror, unsigned long start, 329 unsigned long end, bool write) 330 { 331 struct mm_struct *mm = dmirror->notifier.mm; 332 unsigned long addr; 333 unsigned long pfns[32]; 334 struct hmm_range range = { 335 .notifier = &dmirror->notifier, 336 .hmm_pfns = pfns, 337 .pfn_flags_mask = 0, 338 .default_flags = 339 HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), 340 .dev_private_owner = dmirror->mdevice, 341 }; 342 int ret = 0; 343 344 /* Since the mm is for the mirrored process, get a reference first. */ 345 if (!mmget_not_zero(mm)) 346 return 0; 347 348 for (addr = start; addr < end; addr = range.end) { 349 range.start = addr; 350 range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 351 352 ret = dmirror_range_fault(dmirror, &range); 353 if (ret) 354 break; 355 } 356 357 mmput(mm); 358 return ret; 359 } 360 361 static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, 362 unsigned long end, struct dmirror_bounce *bounce) 363 { 364 unsigned long pfn; 365 void *ptr; 366 367 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 368 369 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 370 void *entry; 371 struct page *page; 372 373 entry = xa_load(&dmirror->pt, pfn); 374 page = xa_untag_pointer(entry); 375 if (!page) 376 return -ENOENT; 377 378 memcpy_from_page(ptr, page, 0, PAGE_SIZE); 379 380 ptr += PAGE_SIZE; 381 bounce->cpages++; 382 } 383 384 return 0; 385 } 386 387 static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 388 { 389 struct dmirror_bounce bounce; 390 unsigned long start, end; 391 unsigned long size = cmd->npages << PAGE_SHIFT; 392 int ret; 393 394 start = cmd->addr; 395 end = start + size; 396 if (end < start) 397 return -EINVAL; 398 399 ret = dmirror_bounce_init(&bounce, start, size); 400 if (ret) 401 return ret; 402 403 while (1) { 404 mutex_lock(&dmirror->mutex); 405 ret = dmirror_do_read(dmirror, start, end, &bounce); 406 mutex_unlock(&dmirror->mutex); 407 if (ret != -ENOENT) 408 break; 409 410 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 411 ret = dmirror_fault(dmirror, start, end, false); 412 if (ret) 413 break; 414 cmd->faults++; 415 } 416 417 if (ret == 0) { 418 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 419 bounce.size)) 420 ret = -EFAULT; 421 } 422 cmd->cpages = bounce.cpages; 423 dmirror_bounce_fini(&bounce); 424 return ret; 425 } 426 427 static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, 428 unsigned long end, struct dmirror_bounce *bounce) 429 { 430 unsigned long pfn; 431 void *ptr; 432 433 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 434 435 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 436 void *entry; 437 struct page *page; 438 439 entry = xa_load(&dmirror->pt, pfn); 440 page = xa_untag_pointer(entry); 441 if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) 442 return -ENOENT; 443 444 memcpy_to_page(page, 0, ptr, PAGE_SIZE); 445 446 ptr += PAGE_SIZE; 447 bounce->cpages++; 448 } 449 450 return 0; 451 } 452 453 static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 454 { 455 struct dmirror_bounce bounce; 456 unsigned long start, end; 457 unsigned long size = cmd->npages << PAGE_SHIFT; 458 int ret; 459 460 start = cmd->addr; 461 end = start + size; 462 if (end < start) 463 return -EINVAL; 464 465 ret = dmirror_bounce_init(&bounce, start, size); 466 if (ret) 467 return ret; 468 if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), 469 bounce.size)) { 470 ret = -EFAULT; 471 goto fini; 472 } 473 474 while (1) { 475 mutex_lock(&dmirror->mutex); 476 ret = dmirror_do_write(dmirror, start, end, &bounce); 477 mutex_unlock(&dmirror->mutex); 478 if (ret != -ENOENT) 479 break; 480 481 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 482 ret = dmirror_fault(dmirror, start, end, true); 483 if (ret) 484 break; 485 cmd->faults++; 486 } 487 488 fini: 489 cmd->cpages = bounce.cpages; 490 dmirror_bounce_fini(&bounce); 491 return ret; 492 } 493 494 static int dmirror_allocate_chunk(struct dmirror_device *mdevice, 495 struct page **ppage) 496 { 497 struct dmirror_chunk *devmem; 498 struct resource *res = NULL; 499 unsigned long pfn; 500 unsigned long pfn_first; 501 unsigned long pfn_last; 502 void *ptr; 503 int ret = -ENOMEM; 504 505 devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); 506 if (!devmem) 507 return ret; 508 509 switch (mdevice->zone_device_type) { 510 case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: 511 res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, 512 "hmm_dmirror"); 513 if (IS_ERR_OR_NULL(res)) 514 goto err_devmem; 515 devmem->pagemap.range.start = res->start; 516 devmem->pagemap.range.end = res->end; 517 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 518 break; 519 case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: 520 devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? 521 spm_addr_dev0 : 522 spm_addr_dev1; 523 devmem->pagemap.range.end = devmem->pagemap.range.start + 524 DEVMEM_CHUNK_SIZE - 1; 525 devmem->pagemap.type = MEMORY_DEVICE_COHERENT; 526 break; 527 default: 528 ret = -EINVAL; 529 goto err_devmem; 530 } 531 532 devmem->pagemap.nr_range = 1; 533 devmem->pagemap.ops = &dmirror_devmem_ops; 534 devmem->pagemap.owner = mdevice; 535 536 mutex_lock(&mdevice->devmem_lock); 537 538 if (mdevice->devmem_count == mdevice->devmem_capacity) { 539 struct dmirror_chunk **new_chunks; 540 unsigned int new_capacity; 541 542 new_capacity = mdevice->devmem_capacity + 543 DEVMEM_CHUNKS_RESERVE; 544 new_chunks = krealloc(mdevice->devmem_chunks, 545 sizeof(new_chunks[0]) * new_capacity, 546 GFP_KERNEL); 547 if (!new_chunks) 548 goto err_release; 549 mdevice->devmem_capacity = new_capacity; 550 mdevice->devmem_chunks = new_chunks; 551 } 552 ptr = memremap_pages(&devmem->pagemap, numa_node_id()); 553 if (IS_ERR_OR_NULL(ptr)) { 554 if (ptr) 555 ret = PTR_ERR(ptr); 556 else 557 ret = -EFAULT; 558 goto err_release; 559 } 560 561 devmem->mdevice = mdevice; 562 pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; 563 pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT); 564 mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; 565 566 mutex_unlock(&mdevice->devmem_lock); 567 568 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", 569 DEVMEM_CHUNK_SIZE / (1024 * 1024), 570 mdevice->devmem_count, 571 mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), 572 pfn_first, pfn_last); 573 574 spin_lock(&mdevice->lock); 575 for (pfn = pfn_first; pfn < pfn_last; pfn++) { 576 struct page *page = pfn_to_page(pfn); 577 578 page->zone_device_data = mdevice->free_pages; 579 mdevice->free_pages = page; 580 } 581 if (ppage) { 582 *ppage = mdevice->free_pages; 583 mdevice->free_pages = (*ppage)->zone_device_data; 584 mdevice->calloc++; 585 } 586 spin_unlock(&mdevice->lock); 587 588 return 0; 589 590 err_release: 591 mutex_unlock(&mdevice->devmem_lock); 592 if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 593 release_mem_region(devmem->pagemap.range.start, 594 range_len(&devmem->pagemap.range)); 595 err_devmem: 596 kfree(devmem); 597 598 return ret; 599 } 600 601 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) 602 { 603 struct page *dpage = NULL; 604 struct page *rpage = NULL; 605 606 /* 607 * For ZONE_DEVICE private type, this is a fake device so we allocate 608 * real system memory to store our device memory. 609 * For ZONE_DEVICE coherent type we use the actual dpage to store the 610 * data and ignore rpage. 611 */ 612 if (dmirror_is_private_zone(mdevice)) { 613 rpage = alloc_page(GFP_HIGHUSER); 614 if (!rpage) 615 return NULL; 616 } 617 spin_lock(&mdevice->lock); 618 619 if (mdevice->free_pages) { 620 dpage = mdevice->free_pages; 621 mdevice->free_pages = dpage->zone_device_data; 622 mdevice->calloc++; 623 spin_unlock(&mdevice->lock); 624 } else { 625 spin_unlock(&mdevice->lock); 626 if (dmirror_allocate_chunk(mdevice, &dpage)) 627 goto error; 628 } 629 630 zone_device_page_init(dpage); 631 dpage->zone_device_data = rpage; 632 return dpage; 633 634 error: 635 if (rpage) 636 __free_page(rpage); 637 return NULL; 638 } 639 640 static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, 641 struct dmirror *dmirror) 642 { 643 struct dmirror_device *mdevice = dmirror->mdevice; 644 const unsigned long *src = args->src; 645 unsigned long *dst = args->dst; 646 unsigned long addr; 647 648 for (addr = args->start; addr < args->end; addr += PAGE_SIZE, 649 src++, dst++) { 650 struct page *spage; 651 struct page *dpage; 652 struct page *rpage; 653 654 if (!(*src & MIGRATE_PFN_MIGRATE)) 655 continue; 656 657 /* 658 * Note that spage might be NULL which is OK since it is an 659 * unallocated pte_none() or read-only zero page. 660 */ 661 spage = migrate_pfn_to_page(*src); 662 if (WARN(spage && is_zone_device_page(spage), 663 "page already in device spage pfn: 0x%lx\n", 664 page_to_pfn(spage))) 665 continue; 666 667 dpage = dmirror_devmem_alloc_page(mdevice); 668 if (!dpage) 669 continue; 670 671 rpage = BACKING_PAGE(dpage); 672 if (spage) 673 copy_highpage(rpage, spage); 674 else 675 clear_highpage(rpage); 676 677 /* 678 * Normally, a device would use the page->zone_device_data to 679 * point to the mirror but here we use it to hold the page for 680 * the simulated device memory and that page holds the pointer 681 * to the mirror. 682 */ 683 rpage->zone_device_data = dmirror; 684 685 pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", 686 page_to_pfn(spage), page_to_pfn(dpage)); 687 *dst = migrate_pfn(page_to_pfn(dpage)); 688 if ((*src & MIGRATE_PFN_WRITE) || 689 (!spage && args->vma->vm_flags & VM_WRITE)) 690 *dst |= MIGRATE_PFN_WRITE; 691 } 692 } 693 694 static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, 695 unsigned long end) 696 { 697 unsigned long pfn; 698 699 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 700 void *entry; 701 702 entry = xa_load(&dmirror->pt, pfn); 703 if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) 704 return -EPERM; 705 } 706 707 return 0; 708 } 709 710 static int dmirror_atomic_map(unsigned long addr, struct page *page, 711 struct dmirror *dmirror) 712 { 713 void *entry; 714 715 /* Map the migrated pages into the device's page tables. */ 716 mutex_lock(&dmirror->mutex); 717 718 entry = xa_tag_pointer(page, DPT_XA_TAG_ATOMIC); 719 entry = xa_store(&dmirror->pt, addr >> PAGE_SHIFT, entry, GFP_ATOMIC); 720 if (xa_is_err(entry)) { 721 mutex_unlock(&dmirror->mutex); 722 return xa_err(entry); 723 } 724 725 mutex_unlock(&dmirror->mutex); 726 return 0; 727 } 728 729 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 730 struct dmirror *dmirror) 731 { 732 unsigned long start = args->start; 733 unsigned long end = args->end; 734 const unsigned long *src = args->src; 735 const unsigned long *dst = args->dst; 736 unsigned long pfn; 737 738 /* Map the migrated pages into the device's page tables. */ 739 mutex_lock(&dmirror->mutex); 740 741 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, 742 src++, dst++) { 743 struct page *dpage; 744 void *entry; 745 746 if (!(*src & MIGRATE_PFN_MIGRATE)) 747 continue; 748 749 dpage = migrate_pfn_to_page(*dst); 750 if (!dpage) 751 continue; 752 753 entry = BACKING_PAGE(dpage); 754 if (*dst & MIGRATE_PFN_WRITE) 755 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 756 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 757 if (xa_is_err(entry)) { 758 mutex_unlock(&dmirror->mutex); 759 return xa_err(entry); 760 } 761 } 762 763 mutex_unlock(&dmirror->mutex); 764 return 0; 765 } 766 767 static int dmirror_exclusive(struct dmirror *dmirror, 768 struct hmm_dmirror_cmd *cmd) 769 { 770 unsigned long start, end, addr; 771 unsigned long size = cmd->npages << PAGE_SHIFT; 772 struct mm_struct *mm = dmirror->notifier.mm; 773 struct dmirror_bounce bounce; 774 int ret = 0; 775 776 start = cmd->addr; 777 end = start + size; 778 if (end < start) 779 return -EINVAL; 780 781 /* Since the mm is for the mirrored process, get a reference first. */ 782 if (!mmget_not_zero(mm)) 783 return -EINVAL; 784 785 mmap_read_lock(mm); 786 for (addr = start; !ret && addr < end; addr += PAGE_SIZE) { 787 struct folio *folio; 788 struct page *page; 789 790 page = make_device_exclusive(mm, addr, NULL, &folio); 791 if (IS_ERR(page)) { 792 ret = PTR_ERR(page); 793 break; 794 } 795 796 ret = dmirror_atomic_map(addr, page, dmirror); 797 folio_unlock(folio); 798 folio_put(folio); 799 } 800 mmap_read_unlock(mm); 801 mmput(mm); 802 803 if (ret) 804 return ret; 805 806 /* Return the migrated data for verification. */ 807 ret = dmirror_bounce_init(&bounce, start, size); 808 if (ret) 809 return ret; 810 mutex_lock(&dmirror->mutex); 811 ret = dmirror_do_read(dmirror, start, end, &bounce); 812 mutex_unlock(&dmirror->mutex); 813 if (ret == 0) { 814 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 815 bounce.size)) 816 ret = -EFAULT; 817 } 818 819 cmd->cpages = bounce.cpages; 820 dmirror_bounce_fini(&bounce); 821 return ret; 822 } 823 824 static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, 825 struct dmirror *dmirror) 826 { 827 const unsigned long *src = args->src; 828 unsigned long *dst = args->dst; 829 unsigned long start = args->start; 830 unsigned long end = args->end; 831 unsigned long addr; 832 833 for (addr = start; addr < end; addr += PAGE_SIZE, 834 src++, dst++) { 835 struct page *dpage, *spage; 836 837 spage = migrate_pfn_to_page(*src); 838 if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) 839 continue; 840 841 if (WARN_ON(!is_device_private_page(spage) && 842 !is_device_coherent_page(spage))) 843 continue; 844 spage = BACKING_PAGE(spage); 845 dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); 846 if (!dpage) 847 continue; 848 pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", 849 page_to_pfn(spage), page_to_pfn(dpage)); 850 851 lock_page(dpage); 852 xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); 853 copy_highpage(dpage, spage); 854 *dst = migrate_pfn(page_to_pfn(dpage)); 855 if (*src & MIGRATE_PFN_WRITE) 856 *dst |= MIGRATE_PFN_WRITE; 857 } 858 return 0; 859 } 860 861 static unsigned long 862 dmirror_successful_migrated_pages(struct migrate_vma *migrate) 863 { 864 unsigned long cpages = 0; 865 unsigned long i; 866 867 for (i = 0; i < migrate->npages; i++) { 868 if (migrate->src[i] & MIGRATE_PFN_VALID && 869 migrate->src[i] & MIGRATE_PFN_MIGRATE) 870 cpages++; 871 } 872 return cpages; 873 } 874 875 static int dmirror_migrate_to_system(struct dmirror *dmirror, 876 struct hmm_dmirror_cmd *cmd) 877 { 878 unsigned long start, end, addr; 879 unsigned long size = cmd->npages << PAGE_SHIFT; 880 struct mm_struct *mm = dmirror->notifier.mm; 881 struct vm_area_struct *vma; 882 unsigned long src_pfns[32] = { 0 }; 883 unsigned long dst_pfns[32] = { 0 }; 884 struct migrate_vma args = { 0 }; 885 unsigned long next; 886 int ret; 887 888 start = cmd->addr; 889 end = start + size; 890 if (end < start) 891 return -EINVAL; 892 893 /* Since the mm is for the mirrored process, get a reference first. */ 894 if (!mmget_not_zero(mm)) 895 return -EINVAL; 896 897 cmd->cpages = 0; 898 mmap_read_lock(mm); 899 for (addr = start; addr < end; addr = next) { 900 vma = vma_lookup(mm, addr); 901 if (!vma || !(vma->vm_flags & VM_READ)) { 902 ret = -EINVAL; 903 goto out; 904 } 905 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 906 if (next > vma->vm_end) 907 next = vma->vm_end; 908 909 args.vma = vma; 910 args.src = src_pfns; 911 args.dst = dst_pfns; 912 args.start = addr; 913 args.end = next; 914 args.pgmap_owner = dmirror->mdevice; 915 args.flags = dmirror_select_device(dmirror); 916 917 ret = migrate_vma_setup(&args); 918 if (ret) 919 goto out; 920 921 pr_debug("Migrating from device mem to sys mem\n"); 922 dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 923 924 migrate_vma_pages(&args); 925 cmd->cpages += dmirror_successful_migrated_pages(&args); 926 migrate_vma_finalize(&args); 927 } 928 out: 929 mmap_read_unlock(mm); 930 mmput(mm); 931 932 return ret; 933 } 934 935 static int dmirror_migrate_to_device(struct dmirror *dmirror, 936 struct hmm_dmirror_cmd *cmd) 937 { 938 unsigned long start, end, addr; 939 unsigned long size = cmd->npages << PAGE_SHIFT; 940 struct mm_struct *mm = dmirror->notifier.mm; 941 struct vm_area_struct *vma; 942 unsigned long src_pfns[32] = { 0 }; 943 unsigned long dst_pfns[32] = { 0 }; 944 struct dmirror_bounce bounce; 945 struct migrate_vma args = { 0 }; 946 unsigned long next; 947 int ret; 948 949 start = cmd->addr; 950 end = start + size; 951 if (end < start) 952 return -EINVAL; 953 954 /* Since the mm is for the mirrored process, get a reference first. */ 955 if (!mmget_not_zero(mm)) 956 return -EINVAL; 957 958 mmap_read_lock(mm); 959 for (addr = start; addr < end; addr = next) { 960 vma = vma_lookup(mm, addr); 961 if (!vma || !(vma->vm_flags & VM_READ)) { 962 ret = -EINVAL; 963 goto out; 964 } 965 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 966 if (next > vma->vm_end) 967 next = vma->vm_end; 968 969 args.vma = vma; 970 args.src = src_pfns; 971 args.dst = dst_pfns; 972 args.start = addr; 973 args.end = next; 974 args.pgmap_owner = dmirror->mdevice; 975 args.flags = MIGRATE_VMA_SELECT_SYSTEM; 976 ret = migrate_vma_setup(&args); 977 if (ret) 978 goto out; 979 980 pr_debug("Migrating from sys mem to device mem\n"); 981 dmirror_migrate_alloc_and_copy(&args, dmirror); 982 migrate_vma_pages(&args); 983 dmirror_migrate_finalize_and_map(&args, dmirror); 984 migrate_vma_finalize(&args); 985 } 986 mmap_read_unlock(mm); 987 mmput(mm); 988 989 /* 990 * Return the migrated data for verification. 991 * Only for pages in device zone 992 */ 993 ret = dmirror_bounce_init(&bounce, start, size); 994 if (ret) 995 return ret; 996 mutex_lock(&dmirror->mutex); 997 ret = dmirror_do_read(dmirror, start, end, &bounce); 998 mutex_unlock(&dmirror->mutex); 999 if (ret == 0) { 1000 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 1001 bounce.size)) 1002 ret = -EFAULT; 1003 } 1004 cmd->cpages = bounce.cpages; 1005 dmirror_bounce_fini(&bounce); 1006 return ret; 1007 1008 out: 1009 mmap_read_unlock(mm); 1010 mmput(mm); 1011 return ret; 1012 } 1013 1014 static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, 1015 unsigned char *perm, unsigned long entry) 1016 { 1017 struct page *page; 1018 1019 if (entry & HMM_PFN_ERROR) { 1020 *perm = HMM_DMIRROR_PROT_ERROR; 1021 return; 1022 } 1023 if (!(entry & HMM_PFN_VALID)) { 1024 *perm = HMM_DMIRROR_PROT_NONE; 1025 return; 1026 } 1027 1028 page = hmm_pfn_to_page(entry); 1029 if (is_device_private_page(page)) { 1030 /* Is the page migrated to this device or some other? */ 1031 if (dmirror->mdevice == dmirror_page_to_device(page)) 1032 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; 1033 else 1034 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; 1035 } else if (is_device_coherent_page(page)) { 1036 /* Is the page migrated to this device or some other? */ 1037 if (dmirror->mdevice == dmirror_page_to_device(page)) 1038 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL; 1039 else 1040 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE; 1041 } else if (is_zero_pfn(page_to_pfn(page))) 1042 *perm = HMM_DMIRROR_PROT_ZERO; 1043 else 1044 *perm = HMM_DMIRROR_PROT_NONE; 1045 if (entry & HMM_PFN_WRITE) 1046 *perm |= HMM_DMIRROR_PROT_WRITE; 1047 else 1048 *perm |= HMM_DMIRROR_PROT_READ; 1049 if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT) 1050 *perm |= HMM_DMIRROR_PROT_PMD; 1051 else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT) 1052 *perm |= HMM_DMIRROR_PROT_PUD; 1053 } 1054 1055 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, 1056 const struct mmu_notifier_range *range, 1057 unsigned long cur_seq) 1058 { 1059 struct dmirror_interval *dmi = 1060 container_of(mni, struct dmirror_interval, notifier); 1061 struct dmirror *dmirror = dmi->dmirror; 1062 1063 if (mmu_notifier_range_blockable(range)) 1064 mutex_lock(&dmirror->mutex); 1065 else if (!mutex_trylock(&dmirror->mutex)) 1066 return false; 1067 1068 /* 1069 * Snapshots only need to set the sequence number since any 1070 * invalidation in the interval invalidates the whole snapshot. 1071 */ 1072 mmu_interval_set_seq(mni, cur_seq); 1073 1074 mutex_unlock(&dmirror->mutex); 1075 return true; 1076 } 1077 1078 static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { 1079 .invalidate = dmirror_snapshot_invalidate, 1080 }; 1081 1082 static int dmirror_range_snapshot(struct dmirror *dmirror, 1083 struct hmm_range *range, 1084 unsigned char *perm) 1085 { 1086 struct mm_struct *mm = dmirror->notifier.mm; 1087 struct dmirror_interval notifier; 1088 unsigned long timeout = 1089 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1090 unsigned long i; 1091 unsigned long n; 1092 int ret = 0; 1093 1094 notifier.dmirror = dmirror; 1095 range->notifier = ¬ifier.notifier; 1096 1097 ret = mmu_interval_notifier_insert(range->notifier, mm, 1098 range->start, range->end - range->start, 1099 &dmirror_mrn_ops); 1100 if (ret) 1101 return ret; 1102 1103 while (true) { 1104 if (time_after(jiffies, timeout)) { 1105 ret = -EBUSY; 1106 goto out; 1107 } 1108 1109 range->notifier_seq = mmu_interval_read_begin(range->notifier); 1110 1111 mmap_read_lock(mm); 1112 ret = hmm_range_fault(range); 1113 mmap_read_unlock(mm); 1114 if (ret) { 1115 if (ret == -EBUSY) 1116 continue; 1117 goto out; 1118 } 1119 1120 mutex_lock(&dmirror->mutex); 1121 if (mmu_interval_read_retry(range->notifier, 1122 range->notifier_seq)) { 1123 mutex_unlock(&dmirror->mutex); 1124 continue; 1125 } 1126 break; 1127 } 1128 1129 n = (range->end - range->start) >> PAGE_SHIFT; 1130 for (i = 0; i < n; i++) 1131 dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); 1132 1133 mutex_unlock(&dmirror->mutex); 1134 out: 1135 mmu_interval_notifier_remove(range->notifier); 1136 return ret; 1137 } 1138 1139 static int dmirror_snapshot(struct dmirror *dmirror, 1140 struct hmm_dmirror_cmd *cmd) 1141 { 1142 struct mm_struct *mm = dmirror->notifier.mm; 1143 unsigned long start, end; 1144 unsigned long size = cmd->npages << PAGE_SHIFT; 1145 unsigned long addr; 1146 unsigned long next; 1147 unsigned long pfns[32]; 1148 unsigned char perm[32]; 1149 char __user *uptr; 1150 struct hmm_range range = { 1151 .hmm_pfns = pfns, 1152 .dev_private_owner = dmirror->mdevice, 1153 }; 1154 int ret = 0; 1155 1156 start = cmd->addr; 1157 end = start + size; 1158 if (end < start) 1159 return -EINVAL; 1160 1161 /* Since the mm is for the mirrored process, get a reference first. */ 1162 if (!mmget_not_zero(mm)) 1163 return -EINVAL; 1164 1165 /* 1166 * Register a temporary notifier to detect invalidations even if it 1167 * overlaps with other mmu_interval_notifiers. 1168 */ 1169 uptr = u64_to_user_ptr(cmd->ptr); 1170 for (addr = start; addr < end; addr = next) { 1171 unsigned long n; 1172 1173 next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 1174 range.start = addr; 1175 range.end = next; 1176 1177 ret = dmirror_range_snapshot(dmirror, &range, perm); 1178 if (ret) 1179 break; 1180 1181 n = (range.end - range.start) >> PAGE_SHIFT; 1182 if (copy_to_user(uptr, perm, n)) { 1183 ret = -EFAULT; 1184 break; 1185 } 1186 1187 cmd->cpages += n; 1188 uptr += n; 1189 } 1190 mmput(mm); 1191 1192 return ret; 1193 } 1194 1195 static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) 1196 { 1197 unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; 1198 unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; 1199 unsigned long npages = end_pfn - start_pfn + 1; 1200 unsigned long i; 1201 unsigned long *src_pfns; 1202 unsigned long *dst_pfns; 1203 1204 src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); 1205 dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); 1206 1207 migrate_device_range(src_pfns, start_pfn, npages); 1208 for (i = 0; i < npages; i++) { 1209 struct page *dpage, *spage; 1210 1211 spage = migrate_pfn_to_page(src_pfns[i]); 1212 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 1213 continue; 1214 1215 if (WARN_ON(!is_device_private_page(spage) && 1216 !is_device_coherent_page(spage))) 1217 continue; 1218 spage = BACKING_PAGE(spage); 1219 dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); 1220 lock_page(dpage); 1221 copy_highpage(dpage, spage); 1222 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); 1223 if (src_pfns[i] & MIGRATE_PFN_WRITE) 1224 dst_pfns[i] |= MIGRATE_PFN_WRITE; 1225 } 1226 migrate_device_pages(src_pfns, dst_pfns, npages); 1227 migrate_device_finalize(src_pfns, dst_pfns, npages); 1228 kvfree(src_pfns); 1229 kvfree(dst_pfns); 1230 } 1231 1232 /* Removes free pages from the free list so they can't be re-allocated */ 1233 static void dmirror_remove_free_pages(struct dmirror_chunk *devmem) 1234 { 1235 struct dmirror_device *mdevice = devmem->mdevice; 1236 struct page *page; 1237 1238 for (page = mdevice->free_pages; page; page = page->zone_device_data) 1239 if (dmirror_page_to_chunk(page) == devmem) 1240 mdevice->free_pages = page->zone_device_data; 1241 } 1242 1243 static void dmirror_device_remove_chunks(struct dmirror_device *mdevice) 1244 { 1245 unsigned int i; 1246 1247 mutex_lock(&mdevice->devmem_lock); 1248 if (mdevice->devmem_chunks) { 1249 for (i = 0; i < mdevice->devmem_count; i++) { 1250 struct dmirror_chunk *devmem = 1251 mdevice->devmem_chunks[i]; 1252 1253 spin_lock(&mdevice->lock); 1254 devmem->remove = true; 1255 dmirror_remove_free_pages(devmem); 1256 spin_unlock(&mdevice->lock); 1257 1258 dmirror_device_evict_chunk(devmem); 1259 memunmap_pages(&devmem->pagemap); 1260 if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 1261 release_mem_region(devmem->pagemap.range.start, 1262 range_len(&devmem->pagemap.range)); 1263 kfree(devmem); 1264 } 1265 mdevice->devmem_count = 0; 1266 mdevice->devmem_capacity = 0; 1267 mdevice->free_pages = NULL; 1268 kfree(mdevice->devmem_chunks); 1269 mdevice->devmem_chunks = NULL; 1270 } 1271 mutex_unlock(&mdevice->devmem_lock); 1272 } 1273 1274 static long dmirror_fops_unlocked_ioctl(struct file *filp, 1275 unsigned int command, 1276 unsigned long arg) 1277 { 1278 void __user *uarg = (void __user *)arg; 1279 struct hmm_dmirror_cmd cmd; 1280 struct dmirror *dmirror; 1281 int ret; 1282 1283 dmirror = filp->private_data; 1284 if (!dmirror) 1285 return -EINVAL; 1286 1287 if (copy_from_user(&cmd, uarg, sizeof(cmd))) 1288 return -EFAULT; 1289 1290 if (cmd.addr & ~PAGE_MASK) 1291 return -EINVAL; 1292 if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) 1293 return -EINVAL; 1294 1295 cmd.cpages = 0; 1296 cmd.faults = 0; 1297 1298 switch (command) { 1299 case HMM_DMIRROR_READ: 1300 ret = dmirror_read(dmirror, &cmd); 1301 break; 1302 1303 case HMM_DMIRROR_WRITE: 1304 ret = dmirror_write(dmirror, &cmd); 1305 break; 1306 1307 case HMM_DMIRROR_MIGRATE_TO_DEV: 1308 ret = dmirror_migrate_to_device(dmirror, &cmd); 1309 break; 1310 1311 case HMM_DMIRROR_MIGRATE_TO_SYS: 1312 ret = dmirror_migrate_to_system(dmirror, &cmd); 1313 break; 1314 1315 case HMM_DMIRROR_EXCLUSIVE: 1316 ret = dmirror_exclusive(dmirror, &cmd); 1317 break; 1318 1319 case HMM_DMIRROR_CHECK_EXCLUSIVE: 1320 ret = dmirror_check_atomic(dmirror, cmd.addr, 1321 cmd.addr + (cmd.npages << PAGE_SHIFT)); 1322 break; 1323 1324 case HMM_DMIRROR_SNAPSHOT: 1325 ret = dmirror_snapshot(dmirror, &cmd); 1326 break; 1327 1328 case HMM_DMIRROR_RELEASE: 1329 dmirror_device_remove_chunks(dmirror->mdevice); 1330 ret = 0; 1331 break; 1332 1333 default: 1334 return -EINVAL; 1335 } 1336 if (ret) 1337 return ret; 1338 1339 if (copy_to_user(uarg, &cmd, sizeof(cmd))) 1340 return -EFAULT; 1341 1342 return 0; 1343 } 1344 1345 static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma) 1346 { 1347 unsigned long addr; 1348 1349 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1350 struct page *page; 1351 int ret; 1352 1353 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1354 if (!page) 1355 return -ENOMEM; 1356 1357 ret = vm_insert_page(vma, addr, page); 1358 if (ret) { 1359 __free_page(page); 1360 return ret; 1361 } 1362 put_page(page); 1363 } 1364 1365 return 0; 1366 } 1367 1368 static const struct file_operations dmirror_fops = { 1369 .open = dmirror_fops_open, 1370 .release = dmirror_fops_release, 1371 .mmap = dmirror_fops_mmap, 1372 .unlocked_ioctl = dmirror_fops_unlocked_ioctl, 1373 .llseek = default_llseek, 1374 .owner = THIS_MODULE, 1375 }; 1376 1377 static void dmirror_devmem_free(struct page *page) 1378 { 1379 struct page *rpage = BACKING_PAGE(page); 1380 struct dmirror_device *mdevice; 1381 1382 if (rpage != page) 1383 __free_page(rpage); 1384 1385 mdevice = dmirror_page_to_device(page); 1386 spin_lock(&mdevice->lock); 1387 1388 /* Return page to our allocator if not freeing the chunk */ 1389 if (!dmirror_page_to_chunk(page)->remove) { 1390 mdevice->cfree++; 1391 page->zone_device_data = mdevice->free_pages; 1392 mdevice->free_pages = page; 1393 } 1394 spin_unlock(&mdevice->lock); 1395 } 1396 1397 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) 1398 { 1399 struct migrate_vma args = { 0 }; 1400 unsigned long src_pfns = 0; 1401 unsigned long dst_pfns = 0; 1402 struct page *rpage; 1403 struct dmirror *dmirror; 1404 vm_fault_t ret; 1405 1406 /* 1407 * Normally, a device would use the page->zone_device_data to point to 1408 * the mirror but here we use it to hold the page for the simulated 1409 * device memory and that page holds the pointer to the mirror. 1410 */ 1411 rpage = vmf->page->zone_device_data; 1412 dmirror = rpage->zone_device_data; 1413 1414 /* FIXME demonstrate how we can adjust migrate range */ 1415 args.vma = vmf->vma; 1416 args.start = vmf->address; 1417 args.end = args.start + PAGE_SIZE; 1418 args.src = &src_pfns; 1419 args.dst = &dst_pfns; 1420 args.pgmap_owner = dmirror->mdevice; 1421 args.flags = dmirror_select_device(dmirror); 1422 args.fault_page = vmf->page; 1423 1424 if (migrate_vma_setup(&args)) 1425 return VM_FAULT_SIGBUS; 1426 1427 ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 1428 if (ret) 1429 return ret; 1430 migrate_vma_pages(&args); 1431 /* 1432 * No device finalize step is needed since 1433 * dmirror_devmem_fault_alloc_and_copy() will have already 1434 * invalidated the device page table. 1435 */ 1436 migrate_vma_finalize(&args); 1437 return 0; 1438 } 1439 1440 static const struct dev_pagemap_ops dmirror_devmem_ops = { 1441 .page_free = dmirror_devmem_free, 1442 .migrate_to_ram = dmirror_devmem_fault, 1443 }; 1444 1445 static int dmirror_device_init(struct dmirror_device *mdevice, int id) 1446 { 1447 dev_t dev; 1448 int ret; 1449 1450 dev = MKDEV(MAJOR(dmirror_dev), id); 1451 mutex_init(&mdevice->devmem_lock); 1452 spin_lock_init(&mdevice->lock); 1453 1454 cdev_init(&mdevice->cdevice, &dmirror_fops); 1455 mdevice->cdevice.owner = THIS_MODULE; 1456 device_initialize(&mdevice->device); 1457 mdevice->device.devt = dev; 1458 1459 ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id); 1460 if (ret) 1461 return ret; 1462 1463 ret = cdev_device_add(&mdevice->cdevice, &mdevice->device); 1464 if (ret) 1465 return ret; 1466 1467 /* Build a list of free ZONE_DEVICE struct pages */ 1468 return dmirror_allocate_chunk(mdevice, NULL); 1469 } 1470 1471 static void dmirror_device_remove(struct dmirror_device *mdevice) 1472 { 1473 dmirror_device_remove_chunks(mdevice); 1474 cdev_device_del(&mdevice->cdevice, &mdevice->device); 1475 } 1476 1477 static int __init hmm_dmirror_init(void) 1478 { 1479 int ret; 1480 int id = 0; 1481 int ndevices = 0; 1482 1483 ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, 1484 "HMM_DMIRROR"); 1485 if (ret) 1486 goto err_unreg; 1487 1488 memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); 1489 dmirror_devices[ndevices++].zone_device_type = 1490 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1491 dmirror_devices[ndevices++].zone_device_type = 1492 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1493 if (spm_addr_dev0 && spm_addr_dev1) { 1494 dmirror_devices[ndevices++].zone_device_type = 1495 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1496 dmirror_devices[ndevices++].zone_device_type = 1497 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1498 } 1499 for (id = 0; id < ndevices; id++) { 1500 ret = dmirror_device_init(dmirror_devices + id, id); 1501 if (ret) 1502 goto err_chrdev; 1503 } 1504 1505 pr_info("HMM test module loaded. This is only for testing HMM.\n"); 1506 return 0; 1507 1508 err_chrdev: 1509 while (--id >= 0) 1510 dmirror_device_remove(dmirror_devices + id); 1511 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1512 err_unreg: 1513 return ret; 1514 } 1515 1516 static void __exit hmm_dmirror_exit(void) 1517 { 1518 int id; 1519 1520 for (id = 0; id < DMIRROR_NDEVICES; id++) 1521 if (dmirror_devices[id].zone_device_type) 1522 dmirror_device_remove(dmirror_devices + id); 1523 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1524 } 1525 1526 module_init(hmm_dmirror_init); 1527 module_exit(hmm_dmirror_exit); 1528 MODULE_DESCRIPTION("HMM (Heterogeneous Memory Management) test module"); 1529 MODULE_LICENSE("GPL"); 1530