1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * HMM stands for Heterogeneous Memory Management, it is a helper layer inside 4 * the linux kernel to help device drivers mirror a process address space in 5 * the device. This allows the device to use the same address space which 6 * makes communication and data exchange a lot easier. 7 * 8 * This framework's sole purpose is to exercise various code paths inside 9 * the kernel to make sure that HMM performs as expected and to flush out any 10 * bugs. 11 */ 12 13 #include "kselftest_harness.h" 14 #include "hugepage_settings.h" 15 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <stdio.h> 19 #include <stdlib.h> 20 #include <stdint.h> 21 #include <unistd.h> 22 #include <strings.h> 23 #include <time.h> 24 #include <pthread.h> 25 #include <limits.h> 26 #include <sys/types.h> 27 #include <sys/stat.h> 28 #include <sys/mman.h> 29 #include <sys/ioctl.h> 30 #include <sys/time.h> 31 32 /* 33 * This is a private UAPI to the kernel test module so it isn't exported 34 * in the usual include/uapi/... directory. 35 */ 36 #include <lib/test_hmm_uapi.h> 37 #include <mm/gup_test.h> 38 #include <mm/vm_util.h> 39 40 struct hmm_buffer { 41 void *ptr; 42 void *mirror; 43 unsigned long size; 44 int fd; 45 uint64_t cpages; 46 uint64_t faults; 47 }; 48 49 enum { 50 HMM_PRIVATE_DEVICE_ONE, 51 HMM_PRIVATE_DEVICE_TWO, 52 HMM_COHERENCE_DEVICE_ONE, 53 HMM_COHERENCE_DEVICE_TWO, 54 }; 55 56 #define ONEKB (1 << 10) 57 #define ONEMEG (1 << 20) 58 #define TWOMEG (1 << 21) 59 #define HMM_BUFFER_SIZE (1024 << 12) 60 #define HMM_PATH_MAX 64 61 #define NTIMES 10 62 63 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) 64 /* Just the flags we need, copied from mm.h: */ 65 66 #ifndef FOLL_WRITE 67 #define FOLL_WRITE 0x01 /* check pte is writable */ 68 #endif 69 70 #ifndef FOLL_LONGTERM 71 #define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */ 72 #endif 73 74 HUGETLB_SETUP_DEFAULT_PAGES(1) 75 76 FIXTURE(hmm) 77 { 78 int fd; 79 unsigned int page_size; 80 unsigned int page_shift; 81 }; 82 83 FIXTURE_VARIANT(hmm) 84 { 85 int device_number; 86 }; 87 88 FIXTURE_VARIANT_ADD(hmm, hmm_device_private) 89 { 90 .device_number = HMM_PRIVATE_DEVICE_ONE, 91 }; 92 93 FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent) 94 { 95 .device_number = HMM_COHERENCE_DEVICE_ONE, 96 }; 97 98 FIXTURE(hmm2) 99 { 100 int fd0; 101 int fd1; 102 unsigned int page_size; 103 unsigned int page_shift; 104 }; 105 106 FIXTURE_VARIANT(hmm2) 107 { 108 int device_number0; 109 int device_number1; 110 }; 111 112 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private) 113 { 114 .device_number0 = HMM_PRIVATE_DEVICE_ONE, 115 .device_number1 = HMM_PRIVATE_DEVICE_TWO, 116 }; 117 118 FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent) 119 { 120 .device_number0 = HMM_COHERENCE_DEVICE_ONE, 121 .device_number1 = HMM_COHERENCE_DEVICE_TWO, 122 }; 123 124 static int hmm_open(int unit) 125 { 126 char pathname[HMM_PATH_MAX]; 127 int fd; 128 129 snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); 130 fd = open(pathname, O_RDWR, 0); 131 if (fd < 0) 132 fprintf(stderr, "could not open hmm dmirror driver (%s)\n", 133 pathname); 134 return fd; 135 } 136 137 static bool hmm_is_coherent_type(int dev_num) 138 { 139 return (dev_num >= HMM_COHERENCE_DEVICE_ONE); 140 } 141 142 FIXTURE_SETUP(hmm) 143 { 144 self->page_size = sysconf(_SC_PAGE_SIZE); 145 self->page_shift = ffs(self->page_size) - 1; 146 147 self->fd = hmm_open(variant->device_number); 148 if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) 149 SKIP(return, "DEVICE_COHERENT not available"); 150 ASSERT_GE(self->fd, 0); 151 } 152 153 FIXTURE_SETUP(hmm2) 154 { 155 self->page_size = sysconf(_SC_PAGE_SIZE); 156 self->page_shift = ffs(self->page_size) - 1; 157 158 self->fd0 = hmm_open(variant->device_number0); 159 if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) 160 SKIP(return, "DEVICE_COHERENT not available"); 161 ASSERT_GE(self->fd0, 0); 162 self->fd1 = hmm_open(variant->device_number1); 163 ASSERT_GE(self->fd1, 0); 164 } 165 166 FIXTURE_TEARDOWN(hmm) 167 { 168 int ret = close(self->fd); 169 170 ASSERT_EQ(ret, 0); 171 self->fd = -1; 172 } 173 174 FIXTURE_TEARDOWN(hmm2) 175 { 176 int ret = close(self->fd0); 177 178 ASSERT_EQ(ret, 0); 179 self->fd0 = -1; 180 181 ret = close(self->fd1); 182 ASSERT_EQ(ret, 0); 183 self->fd1 = -1; 184 } 185 186 static int hmm_dmirror_cmd(int fd, 187 unsigned long request, 188 struct hmm_buffer *buffer, 189 unsigned long npages) 190 { 191 struct hmm_dmirror_cmd cmd; 192 int ret; 193 194 /* Simulate a device reading system memory. */ 195 cmd.addr = (__u64)buffer->ptr; 196 cmd.ptr = (__u64)buffer->mirror; 197 cmd.npages = npages; 198 199 for (;;) { 200 ret = ioctl(fd, request, &cmd); 201 if (ret == 0) 202 break; 203 if (errno == EINTR) 204 continue; 205 return -errno; 206 } 207 buffer->cpages = cmd.cpages; 208 buffer->faults = cmd.faults; 209 210 return 0; 211 } 212 213 static void hmm_buffer_free(struct hmm_buffer *buffer) 214 { 215 if (buffer == NULL) 216 return; 217 218 if (buffer->ptr) { 219 munmap(buffer->ptr, buffer->size); 220 buffer->ptr = NULL; 221 } 222 free(buffer->mirror); 223 free(buffer); 224 } 225 226 /* 227 * Create a temporary file that will be deleted on close. 228 */ 229 static int hmm_create_file(unsigned long size) 230 { 231 char path[HMM_PATH_MAX]; 232 int fd; 233 234 strcpy(path, "/tmp"); 235 fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); 236 if (fd >= 0) { 237 int r; 238 239 do { 240 r = ftruncate(fd, size); 241 } while (r == -1 && errno == EINTR); 242 if (!r) 243 return fd; 244 close(fd); 245 } 246 return -1; 247 } 248 249 /* 250 * Return a random unsigned number. 251 */ 252 static unsigned int hmm_random(void) 253 { 254 static int fd = -1; 255 unsigned int r; 256 257 if (fd < 0) { 258 fd = open("/dev/urandom", O_RDONLY); 259 if (fd < 0) { 260 fprintf(stderr, "%s:%d failed to open /dev/urandom\n", 261 __FILE__, __LINE__); 262 return ~0U; 263 } 264 } 265 read(fd, &r, sizeof(r)); 266 return r; 267 } 268 269 static void hmm_nanosleep(unsigned int n) 270 { 271 struct timespec t; 272 273 t.tv_sec = 0; 274 t.tv_nsec = n; 275 nanosleep(&t, NULL); 276 } 277 278 static int hmm_migrate_sys_to_dev(int fd, 279 struct hmm_buffer *buffer, 280 unsigned long npages) 281 { 282 return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages); 283 } 284 285 static int hmm_migrate_dev_to_sys(int fd, 286 struct hmm_buffer *buffer, 287 unsigned long npages) 288 { 289 return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages); 290 } 291 292 /* 293 * Simple NULL test of device open/close. 294 */ 295 TEST_F(hmm, open_close) 296 { 297 } 298 299 /* 300 * Read private anonymous memory. 301 */ 302 TEST_F(hmm, anon_read) 303 { 304 struct hmm_buffer *buffer; 305 unsigned long npages; 306 unsigned long size; 307 unsigned long i; 308 int *ptr; 309 int ret; 310 int val; 311 312 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 313 ASSERT_NE(npages, 0); 314 size = npages << self->page_shift; 315 316 buffer = malloc(sizeof(*buffer)); 317 ASSERT_NE(buffer, NULL); 318 319 buffer->fd = -1; 320 buffer->size = size; 321 buffer->mirror = malloc(size); 322 ASSERT_NE(buffer->mirror, NULL); 323 324 buffer->ptr = mmap(NULL, size, 325 PROT_READ | PROT_WRITE, 326 MAP_PRIVATE | MAP_ANONYMOUS, 327 buffer->fd, 0); 328 ASSERT_NE(buffer->ptr, MAP_FAILED); 329 330 /* 331 * Initialize buffer in system memory but leave the first two pages 332 * zero (pte_none and pfn_zero). 333 */ 334 i = 2 * self->page_size / sizeof(*ptr); 335 for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 336 ptr[i] = i; 337 338 /* Set buffer permission to read-only. */ 339 ret = mprotect(buffer->ptr, size, PROT_READ); 340 ASSERT_EQ(ret, 0); 341 342 /* Populate the CPU page table with a special zero page. */ 343 val = *(int *)(buffer->ptr + self->page_size); 344 ASSERT_EQ(val, 0); 345 346 /* Simulate a device reading system memory. */ 347 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 348 ASSERT_EQ(ret, 0); 349 ASSERT_EQ(buffer->cpages, npages); 350 ASSERT_EQ(buffer->faults, 1); 351 352 /* Check what the device read. */ 353 ptr = buffer->mirror; 354 for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) 355 ASSERT_EQ(ptr[i], 0); 356 for (; i < size / sizeof(*ptr); ++i) 357 ASSERT_EQ(ptr[i], i); 358 359 hmm_buffer_free(buffer); 360 } 361 362 /* 363 * Read private anonymous memory which has been protected with 364 * mprotect() PROT_NONE. 365 */ 366 TEST_F(hmm, anon_read_prot) 367 { 368 struct hmm_buffer *buffer; 369 unsigned long npages; 370 unsigned long size; 371 unsigned long i; 372 int *ptr; 373 int ret; 374 375 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 376 ASSERT_NE(npages, 0); 377 size = npages << self->page_shift; 378 379 buffer = malloc(sizeof(*buffer)); 380 ASSERT_NE(buffer, NULL); 381 382 buffer->fd = -1; 383 buffer->size = size; 384 buffer->mirror = malloc(size); 385 ASSERT_NE(buffer->mirror, NULL); 386 387 buffer->ptr = mmap(NULL, size, 388 PROT_READ | PROT_WRITE, 389 MAP_PRIVATE | MAP_ANONYMOUS, 390 buffer->fd, 0); 391 ASSERT_NE(buffer->ptr, MAP_FAILED); 392 393 /* Initialize buffer in system memory. */ 394 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 395 ptr[i] = i; 396 397 /* Initialize mirror buffer so we can verify it isn't written. */ 398 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 399 ptr[i] = -i; 400 401 /* Protect buffer from reading. */ 402 ret = mprotect(buffer->ptr, size, PROT_NONE); 403 ASSERT_EQ(ret, 0); 404 405 /* Simulate a device reading system memory. */ 406 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 407 ASSERT_EQ(ret, -EFAULT); 408 409 /* Allow CPU to read the buffer so we can check it. */ 410 ret = mprotect(buffer->ptr, size, PROT_READ); 411 ASSERT_EQ(ret, 0); 412 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 413 ASSERT_EQ(ptr[i], i); 414 415 /* Check what the device read. */ 416 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 417 ASSERT_EQ(ptr[i], -i); 418 419 hmm_buffer_free(buffer); 420 } 421 422 /* 423 * Write private anonymous memory. 424 */ 425 TEST_F(hmm, anon_write) 426 { 427 struct hmm_buffer *buffer; 428 unsigned long npages; 429 unsigned long size; 430 unsigned long i; 431 int *ptr; 432 int ret; 433 434 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 435 ASSERT_NE(npages, 0); 436 size = npages << self->page_shift; 437 438 buffer = malloc(sizeof(*buffer)); 439 ASSERT_NE(buffer, NULL); 440 441 buffer->fd = -1; 442 buffer->size = size; 443 buffer->mirror = malloc(size); 444 ASSERT_NE(buffer->mirror, NULL); 445 446 buffer->ptr = mmap(NULL, size, 447 PROT_READ | PROT_WRITE, 448 MAP_PRIVATE | MAP_ANONYMOUS, 449 buffer->fd, 0); 450 ASSERT_NE(buffer->ptr, MAP_FAILED); 451 452 /* Initialize data that the device will write to buffer->ptr. */ 453 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 454 ptr[i] = i; 455 456 /* Simulate a device writing system memory. */ 457 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 458 ASSERT_EQ(ret, 0); 459 ASSERT_EQ(buffer->cpages, npages); 460 ASSERT_EQ(buffer->faults, 1); 461 462 /* Check what the device wrote. */ 463 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 464 ASSERT_EQ(ptr[i], i); 465 466 hmm_buffer_free(buffer); 467 } 468 469 /* 470 * Write private anonymous memory which has been protected with 471 * mprotect() PROT_READ. 472 */ 473 TEST_F(hmm, anon_write_prot) 474 { 475 struct hmm_buffer *buffer; 476 unsigned long npages; 477 unsigned long size; 478 unsigned long i; 479 int *ptr; 480 int ret; 481 482 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 483 ASSERT_NE(npages, 0); 484 size = npages << self->page_shift; 485 486 buffer = malloc(sizeof(*buffer)); 487 ASSERT_NE(buffer, NULL); 488 489 buffer->fd = -1; 490 buffer->size = size; 491 buffer->mirror = malloc(size); 492 ASSERT_NE(buffer->mirror, NULL); 493 494 buffer->ptr = mmap(NULL, size, 495 PROT_READ, 496 MAP_PRIVATE | MAP_ANONYMOUS, 497 buffer->fd, 0); 498 ASSERT_NE(buffer->ptr, MAP_FAILED); 499 500 /* Simulate a device reading a zero page of memory. */ 501 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); 502 ASSERT_EQ(ret, 0); 503 ASSERT_EQ(buffer->cpages, 1); 504 ASSERT_EQ(buffer->faults, 1); 505 506 /* Initialize data that the device will write to buffer->ptr. */ 507 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 508 ptr[i] = i; 509 510 /* Simulate a device writing system memory. */ 511 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 512 ASSERT_EQ(ret, -EPERM); 513 514 /* Check what the device wrote. */ 515 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 516 ASSERT_EQ(ptr[i], 0); 517 518 /* Now allow writing and see that the zero page is replaced. */ 519 ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); 520 ASSERT_EQ(ret, 0); 521 522 /* Simulate a device writing system memory. */ 523 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 524 ASSERT_EQ(ret, 0); 525 ASSERT_EQ(buffer->cpages, npages); 526 ASSERT_EQ(buffer->faults, 1); 527 528 /* Check what the device wrote. */ 529 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 530 ASSERT_EQ(ptr[i], i); 531 532 hmm_buffer_free(buffer); 533 } 534 535 /* 536 * Check that a device writing an anonymous private mapping 537 * will copy-on-write if a child process inherits the mapping. 538 * 539 * Also verifies after fork() memory the device can be read by child. 540 */ 541 TEST_F(hmm, anon_write_child) 542 { 543 struct hmm_buffer *buffer; 544 unsigned long npages; 545 unsigned long size; 546 unsigned long i; 547 void *old_ptr; 548 void *map; 549 int *ptr; 550 pid_t pid; 551 int child_fd; 552 int ret, use_thp, migrate; 553 554 for (migrate = 0; migrate < 2; ++migrate) { 555 for (use_thp = 0; use_thp < 2; ++use_thp) { 556 npages = ALIGN(use_thp ? read_pmd_pagesize() : HMM_BUFFER_SIZE, 557 self->page_size) >> self->page_shift; 558 ASSERT_NE(npages, 0); 559 size = npages << self->page_shift; 560 561 buffer = malloc(sizeof(*buffer)); 562 ASSERT_NE(buffer, NULL); 563 564 buffer->fd = -1; 565 buffer->size = size * 2; 566 buffer->mirror = malloc(size); 567 ASSERT_NE(buffer->mirror, NULL); 568 569 buffer->ptr = mmap(NULL, size * 2, 570 PROT_READ | PROT_WRITE, 571 MAP_PRIVATE | MAP_ANONYMOUS, 572 buffer->fd, 0); 573 ASSERT_NE(buffer->ptr, MAP_FAILED); 574 575 old_ptr = buffer->ptr; 576 if (use_thp) { 577 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 578 ret = madvise(map, size, MADV_HUGEPAGE); 579 ASSERT_EQ(ret, 0); 580 buffer->ptr = map; 581 } 582 583 /* Initialize buffer->ptr so we can tell if it is written. */ 584 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 585 ptr[i] = i; 586 587 /* Initialize data that the device will write to buffer->ptr. */ 588 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 589 ptr[i] = -i; 590 591 if (migrate) { 592 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 593 ASSERT_EQ(ret, 0); 594 ASSERT_EQ(buffer->cpages, npages); 595 596 } 597 598 pid = fork(); 599 if (pid == -1) 600 ASSERT_EQ(pid, 0); 601 if (pid != 0) { 602 waitpid(pid, &ret, 0); 603 ASSERT_EQ(WIFEXITED(ret), 1); 604 605 /* Check that the parent's buffer did not change. */ 606 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 607 ASSERT_EQ(ptr[i], i); 608 609 buffer->ptr = old_ptr; 610 hmm_buffer_free(buffer); 611 continue; 612 } 613 614 /* Check that we see the parent's values. */ 615 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 616 ASSERT_EQ(ptr[i], i); 617 if (!migrate) { 618 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 619 ASSERT_EQ(ptr[i], -i); 620 } 621 622 /* The child process needs its own mirror to its own mm. */ 623 child_fd = hmm_open(0); 624 ASSERT_GE(child_fd, 0); 625 626 /* Simulate a device writing system memory. */ 627 ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); 628 ASSERT_EQ(ret, 0); 629 ASSERT_EQ(buffer->cpages, npages); 630 ASSERT_EQ(buffer->faults, 1); 631 632 /* Check what the device wrote. */ 633 if (!migrate) { 634 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 635 ASSERT_EQ(ptr[i], -i); 636 } 637 638 close(child_fd); 639 _exit(0); 640 } 641 } 642 } 643 644 /* 645 * Check that a device writing an anonymous shared mapping 646 * will not copy-on-write if a child process inherits the mapping. 647 */ 648 TEST_F(hmm, anon_write_child_shared) 649 { 650 struct hmm_buffer *buffer; 651 unsigned long npages; 652 unsigned long size; 653 unsigned long i; 654 int *ptr; 655 pid_t pid; 656 int child_fd; 657 int ret; 658 659 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 660 ASSERT_NE(npages, 0); 661 size = npages << self->page_shift; 662 663 buffer = malloc(sizeof(*buffer)); 664 ASSERT_NE(buffer, NULL); 665 666 buffer->fd = -1; 667 buffer->size = size; 668 buffer->mirror = malloc(size); 669 ASSERT_NE(buffer->mirror, NULL); 670 671 buffer->ptr = mmap(NULL, size, 672 PROT_READ | PROT_WRITE, 673 MAP_SHARED | MAP_ANONYMOUS, 674 buffer->fd, 0); 675 ASSERT_NE(buffer->ptr, MAP_FAILED); 676 677 /* Initialize buffer->ptr so we can tell if it is written. */ 678 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 679 ptr[i] = i; 680 681 /* Initialize data that the device will write to buffer->ptr. */ 682 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 683 ptr[i] = -i; 684 685 pid = fork(); 686 if (pid == -1) 687 ASSERT_EQ(pid, 0); 688 if (pid != 0) { 689 waitpid(pid, &ret, 0); 690 ASSERT_EQ(WIFEXITED(ret), 1); 691 692 /* Check that the parent's buffer did change. */ 693 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 694 ASSERT_EQ(ptr[i], -i); 695 return; 696 } 697 698 /* Check that we see the parent's values. */ 699 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 700 ASSERT_EQ(ptr[i], i); 701 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 702 ASSERT_EQ(ptr[i], -i); 703 704 /* The child process needs its own mirror to its own mm. */ 705 child_fd = hmm_open(0); 706 ASSERT_GE(child_fd, 0); 707 708 /* Simulate a device writing system memory. */ 709 ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); 710 ASSERT_EQ(ret, 0); 711 ASSERT_EQ(buffer->cpages, npages); 712 ASSERT_EQ(buffer->faults, 1); 713 714 /* Check what the device wrote. */ 715 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 716 ASSERT_EQ(ptr[i], -i); 717 718 close(child_fd); 719 _exit(0); 720 } 721 722 /* 723 * Write private anonymous huge page. 724 */ 725 TEST_F(hmm, anon_write_huge) 726 { 727 struct hmm_buffer *buffer; 728 unsigned long npages; 729 unsigned long size; 730 unsigned long i; 731 void *old_ptr; 732 void *map; 733 int *ptr; 734 int ret; 735 736 size = 2 * read_pmd_pagesize(); 737 738 buffer = malloc(sizeof(*buffer)); 739 ASSERT_NE(buffer, NULL); 740 741 buffer->fd = -1; 742 buffer->size = size; 743 buffer->mirror = malloc(size); 744 ASSERT_NE(buffer->mirror, NULL); 745 746 buffer->ptr = mmap(NULL, size, 747 PROT_READ | PROT_WRITE, 748 MAP_PRIVATE | MAP_ANONYMOUS, 749 buffer->fd, 0); 750 ASSERT_NE(buffer->ptr, MAP_FAILED); 751 752 size /= 2; 753 npages = size >> self->page_shift; 754 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 755 ret = madvise(map, size, MADV_HUGEPAGE); 756 ASSERT_EQ(ret, 0); 757 old_ptr = buffer->ptr; 758 buffer->ptr = map; 759 760 /* Initialize data that the device will write to buffer->ptr. */ 761 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 762 ptr[i] = i; 763 764 /* Simulate a device writing system memory. */ 765 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 766 ASSERT_EQ(ret, 0); 767 ASSERT_EQ(buffer->cpages, npages); 768 ASSERT_EQ(buffer->faults, 1); 769 770 /* Check what the device wrote. */ 771 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 772 ASSERT_EQ(ptr[i], i); 773 774 buffer->ptr = old_ptr; 775 hmm_buffer_free(buffer); 776 } 777 778 /* 779 * Write huge TLBFS page. 780 */ 781 TEST_F(hmm, anon_write_hugetlbfs) 782 { 783 struct hmm_buffer *buffer; 784 unsigned long npages; 785 unsigned long size; 786 unsigned long default_hsize = default_huge_page_size(); 787 unsigned long i; 788 int *ptr; 789 int ret; 790 791 if (!hugetlb_free_default_pages()) 792 SKIP(return, "Not enough huge pages"); 793 794 size = ALIGN(TWOMEG, default_hsize); 795 npages = size >> self->page_shift; 796 797 buffer = malloc(sizeof(*buffer)); 798 ASSERT_NE(buffer, NULL); 799 800 buffer->ptr = mmap(NULL, size, 801 PROT_READ | PROT_WRITE, 802 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, 803 -1, 0); 804 if (buffer->ptr == MAP_FAILED) { 805 free(buffer); 806 SKIP(return, "Huge page could not be allocated"); 807 } 808 809 buffer->fd = -1; 810 buffer->size = size; 811 buffer->mirror = malloc(size); 812 ASSERT_NE(buffer->mirror, NULL); 813 814 /* Initialize data that the device will write to buffer->ptr. */ 815 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 816 ptr[i] = i; 817 818 /* Simulate a device writing system memory. */ 819 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 820 ASSERT_EQ(ret, 0); 821 ASSERT_EQ(buffer->cpages, npages); 822 ASSERT_EQ(buffer->faults, 1); 823 824 /* Check what the device wrote. */ 825 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 826 ASSERT_EQ(ptr[i], i); 827 828 munmap(buffer->ptr, buffer->size); 829 buffer->ptr = NULL; 830 hmm_buffer_free(buffer); 831 } 832 833 /* 834 * Read mmap'ed file memory. 835 */ 836 TEST_F(hmm, file_read) 837 { 838 struct hmm_buffer *buffer; 839 unsigned long npages; 840 unsigned long size; 841 unsigned long i; 842 int *ptr; 843 int ret; 844 int fd; 845 ssize_t len; 846 847 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 848 ASSERT_NE(npages, 0); 849 size = npages << self->page_shift; 850 851 fd = hmm_create_file(size); 852 ASSERT_GE(fd, 0); 853 854 buffer = malloc(sizeof(*buffer)); 855 ASSERT_NE(buffer, NULL); 856 857 buffer->fd = fd; 858 buffer->size = size; 859 buffer->mirror = malloc(size); 860 ASSERT_NE(buffer->mirror, NULL); 861 862 /* Write initial contents of the file. */ 863 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 864 ptr[i] = i; 865 len = pwrite(fd, buffer->mirror, size, 0); 866 ASSERT_EQ(len, size); 867 memset(buffer->mirror, 0, size); 868 869 buffer->ptr = mmap(NULL, size, 870 PROT_READ, 871 MAP_SHARED, 872 buffer->fd, 0); 873 ASSERT_NE(buffer->ptr, MAP_FAILED); 874 875 /* Simulate a device reading system memory. */ 876 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 877 ASSERT_EQ(ret, 0); 878 ASSERT_EQ(buffer->cpages, npages); 879 ASSERT_EQ(buffer->faults, 1); 880 881 /* Check what the device read. */ 882 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 883 ASSERT_EQ(ptr[i], i); 884 885 hmm_buffer_free(buffer); 886 } 887 888 /* 889 * Write mmap'ed file memory. 890 */ 891 TEST_F(hmm, file_write) 892 { 893 struct hmm_buffer *buffer; 894 unsigned long npages; 895 unsigned long size; 896 unsigned long i; 897 int *ptr; 898 int ret; 899 int fd; 900 ssize_t len; 901 902 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 903 ASSERT_NE(npages, 0); 904 size = npages << self->page_shift; 905 906 fd = hmm_create_file(size); 907 ASSERT_GE(fd, 0); 908 909 buffer = malloc(sizeof(*buffer)); 910 ASSERT_NE(buffer, NULL); 911 912 buffer->fd = fd; 913 buffer->size = size; 914 buffer->mirror = malloc(size); 915 ASSERT_NE(buffer->mirror, NULL); 916 917 buffer->ptr = mmap(NULL, size, 918 PROT_READ | PROT_WRITE, 919 MAP_SHARED, 920 buffer->fd, 0); 921 ASSERT_NE(buffer->ptr, MAP_FAILED); 922 923 /* Initialize data that the device will write to buffer->ptr. */ 924 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 925 ptr[i] = i; 926 927 /* Simulate a device writing system memory. */ 928 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 929 ASSERT_EQ(ret, 0); 930 ASSERT_EQ(buffer->cpages, npages); 931 ASSERT_EQ(buffer->faults, 1); 932 933 /* Check what the device wrote. */ 934 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 935 ASSERT_EQ(ptr[i], i); 936 937 /* Check that the device also wrote the file. */ 938 len = pread(fd, buffer->mirror, size, 0); 939 ASSERT_EQ(len, size); 940 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 941 ASSERT_EQ(ptr[i], i); 942 943 hmm_buffer_free(buffer); 944 } 945 946 /* 947 * Migrate anonymous memory to device private memory. 948 */ 949 TEST_F(hmm, migrate) 950 { 951 struct hmm_buffer *buffer; 952 unsigned long npages; 953 unsigned long size; 954 unsigned long i; 955 int *ptr; 956 int ret; 957 958 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 959 ASSERT_NE(npages, 0); 960 size = npages << self->page_shift; 961 962 buffer = malloc(sizeof(*buffer)); 963 ASSERT_NE(buffer, NULL); 964 965 buffer->fd = -1; 966 buffer->size = size; 967 buffer->mirror = malloc(size); 968 ASSERT_NE(buffer->mirror, NULL); 969 970 buffer->ptr = mmap(NULL, size, 971 PROT_READ | PROT_WRITE, 972 MAP_PRIVATE | MAP_ANONYMOUS, 973 buffer->fd, 0); 974 ASSERT_NE(buffer->ptr, MAP_FAILED); 975 976 /* Initialize buffer in system memory. */ 977 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 978 ptr[i] = i; 979 980 /* Migrate memory to device. */ 981 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 982 ASSERT_EQ(ret, 0); 983 ASSERT_EQ(buffer->cpages, npages); 984 985 /* Check what the device read. */ 986 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 987 ASSERT_EQ(ptr[i], i); 988 989 hmm_buffer_free(buffer); 990 } 991 992 /* 993 * Migrate private file memory to device private memory. 994 */ 995 TEST_F(hmm, migrate_file_private) 996 { 997 struct hmm_buffer *buffer; 998 unsigned long npages; 999 unsigned long size; 1000 unsigned long i; 1001 int *ptr; 1002 int ret; 1003 int fd; 1004 1005 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1006 ASSERT_NE(npages, 0); 1007 size = npages << self->page_shift; 1008 1009 fd = hmm_create_file(size); 1010 ASSERT_GE(fd, 0); 1011 1012 buffer = malloc(sizeof(*buffer)); 1013 ASSERT_NE(buffer, NULL); 1014 1015 buffer->fd = fd; 1016 buffer->size = size; 1017 buffer->mirror = malloc(size); 1018 ASSERT_NE(buffer->mirror, NULL); 1019 1020 buffer->ptr = mmap(NULL, size, 1021 PROT_READ | PROT_WRITE, 1022 MAP_PRIVATE, 1023 buffer->fd, 0); 1024 ASSERT_NE(buffer->ptr, MAP_FAILED); 1025 1026 /* Initialize buffer in system memory. */ 1027 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1028 ptr[i] = i; 1029 1030 /* Migrate memory to device. */ 1031 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1032 ASSERT_EQ(ret, 0); 1033 ASSERT_EQ(buffer->cpages, npages); 1034 1035 /* Check what the device read. */ 1036 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1037 ASSERT_EQ(ptr[i], i); 1038 1039 hmm_buffer_free(buffer); 1040 } 1041 1042 /* 1043 * Migrate anonymous memory to device private memory and fault some of it back 1044 * to system memory, then try migrating the resulting mix of system and device 1045 * private memory to the device. 1046 */ 1047 TEST_F(hmm, migrate_fault) 1048 { 1049 struct hmm_buffer *buffer; 1050 unsigned long npages; 1051 unsigned long size; 1052 unsigned long i; 1053 int *ptr; 1054 int ret; 1055 1056 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1057 ASSERT_NE(npages, 0); 1058 size = npages << self->page_shift; 1059 1060 buffer = malloc(sizeof(*buffer)); 1061 ASSERT_NE(buffer, NULL); 1062 1063 buffer->fd = -1; 1064 buffer->size = size; 1065 buffer->mirror = malloc(size); 1066 ASSERT_NE(buffer->mirror, NULL); 1067 1068 buffer->ptr = mmap(NULL, size, 1069 PROT_READ | PROT_WRITE, 1070 MAP_PRIVATE | MAP_ANONYMOUS, 1071 buffer->fd, 0); 1072 ASSERT_NE(buffer->ptr, MAP_FAILED); 1073 1074 /* Initialize buffer in system memory. */ 1075 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1076 ptr[i] = i; 1077 1078 /* Migrate memory to device. */ 1079 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1080 ASSERT_EQ(ret, 0); 1081 ASSERT_EQ(buffer->cpages, npages); 1082 1083 /* Check what the device read. */ 1084 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1085 ASSERT_EQ(ptr[i], i); 1086 1087 /* Fault half the pages back to system memory and check them. */ 1088 for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) 1089 ASSERT_EQ(ptr[i], i); 1090 1091 /* Migrate memory to the device again. */ 1092 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1093 ASSERT_EQ(ret, 0); 1094 ASSERT_EQ(buffer->cpages, npages); 1095 1096 /* Check what the device read. */ 1097 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1098 ASSERT_EQ(ptr[i], i); 1099 1100 hmm_buffer_free(buffer); 1101 } 1102 1103 TEST_F(hmm, migrate_release) 1104 { 1105 struct hmm_buffer *buffer; 1106 unsigned long npages; 1107 unsigned long size; 1108 unsigned long i; 1109 int *ptr; 1110 int ret; 1111 1112 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1113 ASSERT_NE(npages, 0); 1114 size = npages << self->page_shift; 1115 1116 buffer = malloc(sizeof(*buffer)); 1117 ASSERT_NE(buffer, NULL); 1118 1119 buffer->fd = -1; 1120 buffer->size = size; 1121 buffer->mirror = malloc(size); 1122 ASSERT_NE(buffer->mirror, NULL); 1123 1124 buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 1125 MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); 1126 ASSERT_NE(buffer->ptr, MAP_FAILED); 1127 1128 /* Initialize buffer in system memory. */ 1129 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1130 ptr[i] = i; 1131 1132 /* Migrate memory to device. */ 1133 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1134 ASSERT_EQ(ret, 0); 1135 ASSERT_EQ(buffer->cpages, npages); 1136 1137 /* Check what the device read. */ 1138 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1139 ASSERT_EQ(ptr[i], i); 1140 1141 /* Release device memory. */ 1142 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages); 1143 ASSERT_EQ(ret, 0); 1144 1145 /* Fault pages back to system memory and check them. */ 1146 for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) 1147 ASSERT_EQ(ptr[i], i); 1148 1149 hmm_buffer_free(buffer); 1150 } 1151 1152 /* 1153 * Migrate anonymous shared memory to device private memory. 1154 */ 1155 TEST_F(hmm, migrate_shared) 1156 { 1157 struct hmm_buffer *buffer; 1158 unsigned long npages; 1159 unsigned long size; 1160 int ret; 1161 1162 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1163 ASSERT_NE(npages, 0); 1164 size = npages << self->page_shift; 1165 1166 buffer = malloc(sizeof(*buffer)); 1167 ASSERT_NE(buffer, NULL); 1168 1169 buffer->fd = -1; 1170 buffer->size = size; 1171 buffer->mirror = malloc(size); 1172 ASSERT_NE(buffer->mirror, NULL); 1173 1174 buffer->ptr = mmap(NULL, size, 1175 PROT_READ | PROT_WRITE, 1176 MAP_SHARED | MAP_ANONYMOUS, 1177 buffer->fd, 0); 1178 ASSERT_NE(buffer->ptr, MAP_FAILED); 1179 1180 /* Migrate memory to device. */ 1181 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1182 ASSERT_EQ(ret, -ENOENT); 1183 1184 hmm_buffer_free(buffer); 1185 } 1186 1187 /* 1188 * Try to migrate various memory types to device private memory. 1189 */ 1190 TEST_F(hmm2, migrate_mixed) 1191 { 1192 struct hmm_buffer *buffer; 1193 unsigned long npages; 1194 unsigned long size; 1195 int *ptr; 1196 unsigned char *p; 1197 int ret; 1198 int val; 1199 1200 npages = 6; 1201 size = npages << self->page_shift; 1202 1203 buffer = malloc(sizeof(*buffer)); 1204 ASSERT_NE(buffer, NULL); 1205 1206 buffer->fd = -1; 1207 buffer->size = size; 1208 buffer->mirror = malloc(size); 1209 ASSERT_NE(buffer->mirror, NULL); 1210 1211 /* Reserve a range of addresses. */ 1212 buffer->ptr = mmap(NULL, size, 1213 PROT_NONE, 1214 MAP_PRIVATE | MAP_ANONYMOUS, 1215 buffer->fd, 0); 1216 ASSERT_NE(buffer->ptr, MAP_FAILED); 1217 p = buffer->ptr; 1218 1219 /* Migrating a protected area should be an error. */ 1220 ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); 1221 ASSERT_EQ(ret, -EINVAL); 1222 1223 /* Punch a hole after the first page address. */ 1224 ret = munmap(buffer->ptr + self->page_size, self->page_size); 1225 ASSERT_EQ(ret, 0); 1226 1227 /* We expect an error if the vma doesn't cover the range. */ 1228 ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3); 1229 ASSERT_EQ(ret, -EINVAL); 1230 1231 /* Page 2 will be a read-only zero page. */ 1232 ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, 1233 PROT_READ); 1234 ASSERT_EQ(ret, 0); 1235 ptr = (int *)(buffer->ptr + 2 * self->page_size); 1236 val = *ptr + 3; 1237 ASSERT_EQ(val, 3); 1238 1239 /* Page 3 will be read-only. */ 1240 ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1241 PROT_READ | PROT_WRITE); 1242 ASSERT_EQ(ret, 0); 1243 ptr = (int *)(buffer->ptr + 3 * self->page_size); 1244 *ptr = val; 1245 ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1246 PROT_READ); 1247 ASSERT_EQ(ret, 0); 1248 1249 /* Page 4-5 will be read-write. */ 1250 ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, 1251 PROT_READ | PROT_WRITE); 1252 ASSERT_EQ(ret, 0); 1253 ptr = (int *)(buffer->ptr + 4 * self->page_size); 1254 *ptr = val; 1255 ptr = (int *)(buffer->ptr + 5 * self->page_size); 1256 *ptr = val; 1257 1258 /* Now try to migrate pages 2-5 to device 1. */ 1259 buffer->ptr = p + 2 * self->page_size; 1260 ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4); 1261 ASSERT_EQ(ret, 0); 1262 ASSERT_EQ(buffer->cpages, 4); 1263 1264 /* Page 5 won't be migrated to device 0 because it's on device 1. */ 1265 buffer->ptr = p + 5 * self->page_size; 1266 ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); 1267 ASSERT_EQ(ret, -ENOENT); 1268 buffer->ptr = p; 1269 1270 buffer->ptr = p; 1271 hmm_buffer_free(buffer); 1272 } 1273 1274 /* 1275 * Migrate anonymous memory to device memory and back to system memory 1276 * multiple times. In case of private zone configuration, this is done 1277 * through fault pages accessed by CPU. In case of coherent zone configuration, 1278 * the pages from the device should be explicitly migrated back to system memory. 1279 * The reason is Coherent device zone has coherent access by CPU, therefore 1280 * it will not generate any page fault. 1281 */ 1282 TEST_F(hmm, migrate_multiple) 1283 { 1284 struct hmm_buffer *buffer; 1285 unsigned long npages; 1286 unsigned long size; 1287 unsigned long i; 1288 unsigned long c; 1289 int *ptr; 1290 int ret; 1291 1292 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1293 ASSERT_NE(npages, 0); 1294 size = npages << self->page_shift; 1295 1296 for (c = 0; c < NTIMES; c++) { 1297 buffer = malloc(sizeof(*buffer)); 1298 ASSERT_NE(buffer, NULL); 1299 1300 buffer->fd = -1; 1301 buffer->size = size; 1302 buffer->mirror = malloc(size); 1303 ASSERT_NE(buffer->mirror, NULL); 1304 1305 buffer->ptr = mmap(NULL, size, 1306 PROT_READ | PROT_WRITE, 1307 MAP_PRIVATE | MAP_ANONYMOUS, 1308 buffer->fd, 0); 1309 ASSERT_NE(buffer->ptr, MAP_FAILED); 1310 1311 /* Initialize buffer in system memory. */ 1312 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1313 ptr[i] = i; 1314 1315 /* Migrate memory to device. */ 1316 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 1317 ASSERT_EQ(ret, 0); 1318 ASSERT_EQ(buffer->cpages, npages); 1319 1320 /* Check what the device read. */ 1321 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1322 ASSERT_EQ(ptr[i], i); 1323 1324 /* Migrate back to system memory and check them. */ 1325 if (hmm_is_coherent_type(variant->device_number)) { 1326 ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages); 1327 ASSERT_EQ(ret, 0); 1328 ASSERT_EQ(buffer->cpages, npages); 1329 } 1330 1331 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1332 ASSERT_EQ(ptr[i], i); 1333 1334 hmm_buffer_free(buffer); 1335 } 1336 } 1337 1338 /* 1339 * Read anonymous memory multiple times. 1340 */ 1341 TEST_F(hmm, anon_read_multiple) 1342 { 1343 struct hmm_buffer *buffer; 1344 unsigned long npages; 1345 unsigned long size; 1346 unsigned long i; 1347 unsigned long c; 1348 int *ptr; 1349 int ret; 1350 1351 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1352 ASSERT_NE(npages, 0); 1353 size = npages << self->page_shift; 1354 1355 for (c = 0; c < NTIMES; c++) { 1356 buffer = malloc(sizeof(*buffer)); 1357 ASSERT_NE(buffer, NULL); 1358 1359 buffer->fd = -1; 1360 buffer->size = size; 1361 buffer->mirror = malloc(size); 1362 ASSERT_NE(buffer->mirror, NULL); 1363 1364 buffer->ptr = mmap(NULL, size, 1365 PROT_READ | PROT_WRITE, 1366 MAP_PRIVATE | MAP_ANONYMOUS, 1367 buffer->fd, 0); 1368 ASSERT_NE(buffer->ptr, MAP_FAILED); 1369 1370 /* Initialize buffer in system memory. */ 1371 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1372 ptr[i] = i + c; 1373 1374 /* Simulate a device reading system memory. */ 1375 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1376 npages); 1377 ASSERT_EQ(ret, 0); 1378 ASSERT_EQ(buffer->cpages, npages); 1379 ASSERT_EQ(buffer->faults, 1); 1380 1381 /* Check what the device read. */ 1382 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1383 ASSERT_EQ(ptr[i], i + c); 1384 1385 hmm_buffer_free(buffer); 1386 } 1387 } 1388 1389 void *unmap_buffer(void *p) 1390 { 1391 struct hmm_buffer *buffer = p; 1392 1393 /* Delay for a bit and then unmap buffer while it is being read. */ 1394 hmm_nanosleep(hmm_random() % 32000); 1395 munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); 1396 buffer->ptr = NULL; 1397 1398 return NULL; 1399 } 1400 1401 /* 1402 * Try reading anonymous memory while it is being unmapped. 1403 */ 1404 TEST_F(hmm, anon_teardown) 1405 { 1406 unsigned long npages; 1407 unsigned long size; 1408 unsigned long c; 1409 void *ret; 1410 1411 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1412 ASSERT_NE(npages, 0); 1413 size = npages << self->page_shift; 1414 1415 for (c = 0; c < NTIMES; ++c) { 1416 pthread_t thread; 1417 struct hmm_buffer *buffer; 1418 unsigned long i; 1419 int *ptr; 1420 int rc; 1421 1422 buffer = malloc(sizeof(*buffer)); 1423 ASSERT_NE(buffer, NULL); 1424 1425 buffer->fd = -1; 1426 buffer->size = size; 1427 buffer->mirror = malloc(size); 1428 ASSERT_NE(buffer->mirror, NULL); 1429 1430 buffer->ptr = mmap(NULL, size, 1431 PROT_READ | PROT_WRITE, 1432 MAP_PRIVATE | MAP_ANONYMOUS, 1433 buffer->fd, 0); 1434 ASSERT_NE(buffer->ptr, MAP_FAILED); 1435 1436 /* Initialize buffer in system memory. */ 1437 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1438 ptr[i] = i + c; 1439 1440 rc = pthread_create(&thread, NULL, unmap_buffer, buffer); 1441 ASSERT_EQ(rc, 0); 1442 1443 /* Simulate a device reading system memory. */ 1444 rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1445 npages); 1446 if (rc == 0) { 1447 ASSERT_EQ(buffer->cpages, npages); 1448 ASSERT_EQ(buffer->faults, 1); 1449 1450 /* Check what the device read. */ 1451 for (i = 0, ptr = buffer->mirror; 1452 i < size / sizeof(*ptr); 1453 ++i) 1454 ASSERT_EQ(ptr[i], i + c); 1455 } 1456 1457 pthread_join(thread, &ret); 1458 hmm_buffer_free(buffer); 1459 } 1460 } 1461 1462 /* 1463 * Test memory snapshot without faulting in pages accessed by the device. 1464 */ 1465 TEST_F(hmm, mixedmap) 1466 { 1467 struct hmm_buffer *buffer; 1468 unsigned long npages; 1469 unsigned long size; 1470 unsigned char *m; 1471 int ret; 1472 1473 npages = 1; 1474 size = npages << self->page_shift; 1475 1476 buffer = malloc(sizeof(*buffer)); 1477 ASSERT_NE(buffer, NULL); 1478 1479 buffer->fd = -1; 1480 buffer->size = size; 1481 buffer->mirror = malloc(npages); 1482 ASSERT_NE(buffer->mirror, NULL); 1483 1484 1485 /* Reserve a range of addresses. */ 1486 buffer->ptr = mmap(NULL, size, 1487 PROT_READ | PROT_WRITE, 1488 MAP_PRIVATE, 1489 self->fd, 0); 1490 ASSERT_NE(buffer->ptr, MAP_FAILED); 1491 1492 /* Simulate a device snapshotting CPU pagetables. */ 1493 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); 1494 ASSERT_EQ(ret, 0); 1495 ASSERT_EQ(buffer->cpages, npages); 1496 1497 /* Check what the device saw. */ 1498 m = buffer->mirror; 1499 ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ); 1500 1501 hmm_buffer_free(buffer); 1502 } 1503 1504 /* 1505 * Test memory snapshot without faulting in pages accessed by the device. 1506 */ 1507 TEST_F(hmm2, snapshot) 1508 { 1509 struct hmm_buffer *buffer; 1510 unsigned long npages; 1511 unsigned long size; 1512 int *ptr; 1513 unsigned char *p; 1514 unsigned char *m; 1515 int ret; 1516 int val; 1517 1518 npages = 7; 1519 size = npages << self->page_shift; 1520 1521 buffer = malloc(sizeof(*buffer)); 1522 ASSERT_NE(buffer, NULL); 1523 1524 buffer->fd = -1; 1525 buffer->size = size; 1526 buffer->mirror = malloc(npages); 1527 ASSERT_NE(buffer->mirror, NULL); 1528 1529 /* Reserve a range of addresses. */ 1530 buffer->ptr = mmap(NULL, size, 1531 PROT_NONE, 1532 MAP_PRIVATE | MAP_ANONYMOUS, 1533 buffer->fd, 0); 1534 ASSERT_NE(buffer->ptr, MAP_FAILED); 1535 p = buffer->ptr; 1536 1537 /* Punch a hole after the first page address. */ 1538 ret = munmap(buffer->ptr + self->page_size, self->page_size); 1539 ASSERT_EQ(ret, 0); 1540 1541 /* Page 2 will be read-only zero page. */ 1542 ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, 1543 PROT_READ); 1544 ASSERT_EQ(ret, 0); 1545 ptr = (int *)(buffer->ptr + 2 * self->page_size); 1546 val = *ptr + 3; 1547 ASSERT_EQ(val, 3); 1548 1549 /* Page 3 will be read-only. */ 1550 ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1551 PROT_READ | PROT_WRITE); 1552 ASSERT_EQ(ret, 0); 1553 ptr = (int *)(buffer->ptr + 3 * self->page_size); 1554 *ptr = val; 1555 ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1556 PROT_READ); 1557 ASSERT_EQ(ret, 0); 1558 1559 /* Page 4-6 will be read-write. */ 1560 ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, 1561 PROT_READ | PROT_WRITE); 1562 ASSERT_EQ(ret, 0); 1563 ptr = (int *)(buffer->ptr + 4 * self->page_size); 1564 *ptr = val; 1565 1566 /* Page 5 will be migrated to device 0. */ 1567 buffer->ptr = p + 5 * self->page_size; 1568 ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); 1569 ASSERT_EQ(ret, 0); 1570 ASSERT_EQ(buffer->cpages, 1); 1571 1572 /* Page 6 will be migrated to device 1. */ 1573 buffer->ptr = p + 6 * self->page_size; 1574 ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1); 1575 ASSERT_EQ(ret, 0); 1576 ASSERT_EQ(buffer->cpages, 1); 1577 1578 /* Simulate a device snapshotting CPU pagetables. */ 1579 buffer->ptr = p; 1580 ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); 1581 ASSERT_EQ(ret, 0); 1582 ASSERT_EQ(buffer->cpages, npages); 1583 1584 /* Check what the device saw. */ 1585 m = buffer->mirror; 1586 ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); 1587 ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); 1588 ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); 1589 ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); 1590 ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); 1591 if (!hmm_is_coherent_type(variant->device_number0)) { 1592 ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | 1593 HMM_DMIRROR_PROT_WRITE); 1594 ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); 1595 } else { 1596 ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | 1597 HMM_DMIRROR_PROT_WRITE); 1598 ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE | 1599 HMM_DMIRROR_PROT_WRITE); 1600 } 1601 1602 hmm_buffer_free(buffer); 1603 } 1604 1605 /* 1606 * Test the hmm_range_fault() handling of large pages (PMD or PUD) 1607 * that should be mapped by a large page table entry. 1608 */ 1609 TEST_F(hmm, compound) 1610 { 1611 struct hmm_buffer *buffer; 1612 unsigned long npages; 1613 unsigned long size; 1614 unsigned long default_hsize = default_huge_page_size(); 1615 int *ptr; 1616 unsigned char *m; 1617 unsigned char prot; 1618 int ret; 1619 unsigned long i; 1620 1621 /* Skip test if we can't allocate a hugetlbfs page. */ 1622 if (!hugetlb_free_default_pages()) 1623 SKIP(return, "Not enough huge pages"); 1624 1625 size = ALIGN(TWOMEG, default_hsize); 1626 npages = size >> self->page_shift; 1627 1628 buffer = malloc(sizeof(*buffer)); 1629 ASSERT_NE(buffer, NULL); 1630 1631 buffer->ptr = mmap(NULL, size, 1632 PROT_READ | PROT_WRITE, 1633 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, 1634 -1, 0); 1635 if (buffer->ptr == MAP_FAILED) { 1636 free(buffer); 1637 return; 1638 } 1639 1640 buffer->size = size; 1641 buffer->mirror = malloc(npages); 1642 ASSERT_NE(buffer->mirror, NULL); 1643 1644 /* Initialize the pages the device will snapshot in buffer->ptr. */ 1645 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1646 ptr[i] = i; 1647 1648 /* Simulate a device snapshotting CPU pagetables. */ 1649 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); 1650 ASSERT_EQ(ret, 0); 1651 ASSERT_EQ(buffer->cpages, npages); 1652 1653 /* 1654 * Check what the device saw. The region is backed by a single huge 1655 * page that the device reports either at PMD or at PUD level depending 1656 * on the configured default hugepage size. Determine that level from 1657 * the first page and require every page in the range to match it 1658 * exactly, so that a fragmented mapping mixing levels (or a missing 1659 * large-page bit) is still caught and reported with its actual value. 1660 */ 1661 m = buffer->mirror; 1662 prot = HMM_DMIRROR_PROT_WRITE | 1663 ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD : 1664 HMM_DMIRROR_PROT_PMD); 1665 for (i = 0; i < npages; ++i) 1666 ASSERT_EQ(m[i], prot); 1667 1668 /* Make the region read-only. */ 1669 ret = mprotect(buffer->ptr, size, PROT_READ); 1670 ASSERT_EQ(ret, 0); 1671 1672 /* Simulate a device snapshotting CPU pagetables. */ 1673 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); 1674 ASSERT_EQ(ret, 0); 1675 ASSERT_EQ(buffer->cpages, npages); 1676 1677 /* 1678 * Check what the device saw after mprotect(PROT_READ). Same 1679 * approach as above: determine the mapping level from the first 1680 * page and require every page to match it exactly. 1681 */ 1682 m = buffer->mirror; 1683 prot = HMM_DMIRROR_PROT_READ | 1684 ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD : 1685 HMM_DMIRROR_PROT_PMD); 1686 for (i = 0; i < npages; ++i) 1687 ASSERT_EQ(m[i], prot); 1688 1689 munmap(buffer->ptr, buffer->size); 1690 buffer->ptr = NULL; 1691 hmm_buffer_free(buffer); 1692 } 1693 1694 /* 1695 * Test two devices reading the same memory (double mapped). 1696 */ 1697 TEST_F(hmm2, double_map) 1698 { 1699 struct hmm_buffer *buffer; 1700 unsigned long npages; 1701 unsigned long size; 1702 unsigned long i; 1703 int *ptr; 1704 int ret; 1705 1706 npages = 6; 1707 size = npages << self->page_shift; 1708 1709 buffer = malloc(sizeof(*buffer)); 1710 ASSERT_NE(buffer, NULL); 1711 1712 buffer->fd = -1; 1713 buffer->size = size; 1714 buffer->mirror = malloc(size); 1715 ASSERT_NE(buffer->mirror, NULL); 1716 1717 /* Reserve a range of addresses. */ 1718 buffer->ptr = mmap(NULL, size, 1719 PROT_READ | PROT_WRITE, 1720 MAP_PRIVATE | MAP_ANONYMOUS, 1721 buffer->fd, 0); 1722 ASSERT_NE(buffer->ptr, MAP_FAILED); 1723 1724 /* Initialize buffer in system memory. */ 1725 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1726 ptr[i] = i; 1727 1728 /* Make region read-only. */ 1729 ret = mprotect(buffer->ptr, size, PROT_READ); 1730 ASSERT_EQ(ret, 0); 1731 1732 /* Simulate device 0 reading system memory. */ 1733 ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); 1734 ASSERT_EQ(ret, 0); 1735 ASSERT_EQ(buffer->cpages, npages); 1736 ASSERT_EQ(buffer->faults, 1); 1737 1738 /* Check what the device read. */ 1739 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1740 ASSERT_EQ(ptr[i], i); 1741 1742 /* Simulate device 1 reading system memory. */ 1743 ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); 1744 ASSERT_EQ(ret, 0); 1745 ASSERT_EQ(buffer->cpages, npages); 1746 ASSERT_EQ(buffer->faults, 1); 1747 1748 /* Check what the device read. */ 1749 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1750 ASSERT_EQ(ptr[i], i); 1751 1752 /* Migrate pages to device 1 and try to read from device 0. */ 1753 ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); 1754 ASSERT_EQ(ret, 0); 1755 ASSERT_EQ(buffer->cpages, npages); 1756 1757 ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); 1758 ASSERT_EQ(ret, 0); 1759 ASSERT_EQ(buffer->cpages, npages); 1760 ASSERT_EQ(buffer->faults, 1); 1761 1762 /* Check what device 0 read. */ 1763 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1764 ASSERT_EQ(ptr[i], i); 1765 1766 hmm_buffer_free(buffer); 1767 } 1768 1769 /* 1770 * Basic check of exclusive faulting. 1771 */ 1772 TEST_F(hmm, exclusive) 1773 { 1774 struct hmm_buffer *buffer; 1775 unsigned long npages; 1776 unsigned long size; 1777 unsigned long i; 1778 int *ptr; 1779 int ret; 1780 1781 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1782 ASSERT_NE(npages, 0); 1783 size = npages << self->page_shift; 1784 1785 buffer = malloc(sizeof(*buffer)); 1786 ASSERT_NE(buffer, NULL); 1787 1788 buffer->fd = -1; 1789 buffer->size = size; 1790 buffer->mirror = malloc(size); 1791 ASSERT_NE(buffer->mirror, NULL); 1792 1793 buffer->ptr = mmap(NULL, size, 1794 PROT_READ | PROT_WRITE, 1795 MAP_PRIVATE | MAP_ANONYMOUS, 1796 buffer->fd, 0); 1797 ASSERT_NE(buffer->ptr, MAP_FAILED); 1798 1799 /* Initialize buffer in system memory. */ 1800 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1801 ptr[i] = i; 1802 1803 /* Map memory exclusively for device access. */ 1804 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1805 ASSERT_EQ(ret, 0); 1806 ASSERT_EQ(buffer->cpages, npages); 1807 1808 /* Check what the device read. */ 1809 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1810 ASSERT_EQ(ptr[i], i); 1811 1812 /* Fault pages back to system memory and check them. */ 1813 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1814 ASSERT_EQ(ptr[i]++, i); 1815 1816 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1817 ASSERT_EQ(ptr[i], i+1); 1818 1819 /* Check atomic access revoked */ 1820 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages); 1821 ASSERT_EQ(ret, 0); 1822 1823 hmm_buffer_free(buffer); 1824 } 1825 1826 TEST_F(hmm, exclusive_mprotect) 1827 { 1828 struct hmm_buffer *buffer; 1829 unsigned long npages; 1830 unsigned long size; 1831 unsigned long i; 1832 int *ptr; 1833 int ret; 1834 1835 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1836 ASSERT_NE(npages, 0); 1837 size = npages << self->page_shift; 1838 1839 buffer = malloc(sizeof(*buffer)); 1840 ASSERT_NE(buffer, NULL); 1841 1842 buffer->fd = -1; 1843 buffer->size = size; 1844 buffer->mirror = malloc(size); 1845 ASSERT_NE(buffer->mirror, NULL); 1846 1847 buffer->ptr = mmap(NULL, size, 1848 PROT_READ | PROT_WRITE, 1849 MAP_PRIVATE | MAP_ANONYMOUS, 1850 buffer->fd, 0); 1851 ASSERT_NE(buffer->ptr, MAP_FAILED); 1852 1853 /* Initialize buffer in system memory. */ 1854 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1855 ptr[i] = i; 1856 1857 /* Map memory exclusively for device access. */ 1858 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1859 ASSERT_EQ(ret, 0); 1860 ASSERT_EQ(buffer->cpages, npages); 1861 1862 /* Check what the device read. */ 1863 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1864 ASSERT_EQ(ptr[i], i); 1865 1866 ret = mprotect(buffer->ptr, size, PROT_READ); 1867 ASSERT_EQ(ret, 0); 1868 1869 /* Simulate a device writing system memory. */ 1870 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 1871 ASSERT_EQ(ret, -EPERM); 1872 1873 hmm_buffer_free(buffer); 1874 } 1875 1876 /* 1877 * Check copy-on-write works. 1878 */ 1879 TEST_F(hmm, exclusive_cow) 1880 { 1881 struct hmm_buffer *buffer; 1882 unsigned long npages; 1883 unsigned long size; 1884 unsigned long i; 1885 int *ptr; 1886 int ret; 1887 pid_t pid; 1888 int status; 1889 1890 npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1891 ASSERT_NE(npages, 0); 1892 size = npages << self->page_shift; 1893 1894 buffer = malloc(sizeof(*buffer)); 1895 ASSERT_NE(buffer, NULL); 1896 1897 buffer->fd = -1; 1898 buffer->size = size; 1899 buffer->mirror = malloc(size); 1900 ASSERT_NE(buffer->mirror, NULL); 1901 1902 buffer->ptr = mmap(NULL, size, 1903 PROT_READ | PROT_WRITE, 1904 MAP_PRIVATE | MAP_ANONYMOUS, 1905 buffer->fd, 0); 1906 ASSERT_NE(buffer->ptr, MAP_FAILED); 1907 1908 /* Initialize buffer in system memory. */ 1909 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1910 ptr[i] = i; 1911 1912 /* Map memory exclusively for device access. */ 1913 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1914 ASSERT_EQ(ret, 0); 1915 ASSERT_EQ(buffer->cpages, npages); 1916 1917 pid = fork(); 1918 if (pid == -1) 1919 ASSERT_EQ(pid, 0); 1920 1921 if (pid == 0) { 1922 /* 1923 * Child verifies COW independently, then _exit(0)s so it does 1924 * not run the test teardown. A failed ASSERT_* here makes the 1925 * harness abort() the child, so the parent sees 1926 * !WIFEXITED(status) below and fails in turn. 1927 */ 1928 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1929 ASSERT_EQ(ptr[i]++, i); 1930 1931 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1932 ASSERT_EQ(ptr[i], i + 1); 1933 1934 _exit(0); 1935 } 1936 1937 /* Parent: also increment to verify COW works for both processes. */ 1938 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1939 ASSERT_EQ(ptr[i]++, i); 1940 1941 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1942 ASSERT_EQ(ptr[i], i + 1); 1943 1944 /* Parent: wait for child and then free the buffer. */ 1945 ASSERT_EQ(waitpid(pid, &status, 0), pid); 1946 ASSERT_TRUE(WIFEXITED(status)); 1947 ASSERT_EQ(WEXITSTATUS(status), 0); 1948 1949 hmm_buffer_free(buffer); 1950 } 1951 1952 static int gup_test_exec(int gup_fd, unsigned long addr, int cmd, 1953 int npages, int size, int flags) 1954 { 1955 struct gup_test gup = { 1956 .nr_pages_per_call = npages, 1957 .addr = addr, 1958 .gup_flags = FOLL_WRITE | flags, 1959 .size = size, 1960 }; 1961 1962 if (ioctl(gup_fd, cmd, &gup)) { 1963 perror("ioctl on error\n"); 1964 return errno; 1965 } 1966 1967 return 0; 1968 } 1969 1970 /* 1971 * Test get user device pages through gup_test. Setting PIN_LONGTERM flag. 1972 * This should trigger a migration back to system memory for both, private 1973 * and coherent type pages. 1974 * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added 1975 * to your configuration before you run it. 1976 */ 1977 TEST_F(hmm, hmm_gup_test) 1978 { 1979 struct hmm_buffer *buffer; 1980 int gup_fd; 1981 unsigned long npages; 1982 unsigned long size; 1983 unsigned long i; 1984 int *ptr; 1985 int ret; 1986 unsigned char *m; 1987 1988 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1989 if (gup_fd == -1) 1990 SKIP(return, "Skipping test, could not find gup_test driver"); 1991 1992 npages = 4; 1993 size = npages << self->page_shift; 1994 1995 buffer = malloc(sizeof(*buffer)); 1996 ASSERT_NE(buffer, NULL); 1997 1998 buffer->fd = -1; 1999 buffer->size = size; 2000 buffer->mirror = malloc(size); 2001 ASSERT_NE(buffer->mirror, NULL); 2002 2003 buffer->ptr = mmap(NULL, size, 2004 PROT_READ | PROT_WRITE, 2005 MAP_PRIVATE | MAP_ANONYMOUS, 2006 buffer->fd, 0); 2007 ASSERT_NE(buffer->ptr, MAP_FAILED); 2008 2009 /* Initialize buffer in system memory. */ 2010 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2011 ptr[i] = i; 2012 2013 /* Migrate memory to device. */ 2014 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2015 ASSERT_EQ(ret, 0); 2016 ASSERT_EQ(buffer->cpages, npages); 2017 /* Check what the device read. */ 2018 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2019 ASSERT_EQ(ptr[i], i); 2020 2021 ASSERT_EQ(gup_test_exec(gup_fd, 2022 (unsigned long)buffer->ptr, 2023 GUP_BASIC_TEST, 1, self->page_size, 0), 0); 2024 ASSERT_EQ(gup_test_exec(gup_fd, 2025 (unsigned long)buffer->ptr + 1 * self->page_size, 2026 GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0); 2027 ASSERT_EQ(gup_test_exec(gup_fd, 2028 (unsigned long)buffer->ptr + 2 * self->page_size, 2029 PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0); 2030 ASSERT_EQ(gup_test_exec(gup_fd, 2031 (unsigned long)buffer->ptr + 3 * self->page_size, 2032 PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0); 2033 2034 /* Take snapshot to CPU pagetables */ 2035 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); 2036 ASSERT_EQ(ret, 0); 2037 ASSERT_EQ(buffer->cpages, npages); 2038 m = buffer->mirror; 2039 if (hmm_is_coherent_type(variant->device_number)) { 2040 ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]); 2041 ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]); 2042 } else { 2043 ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]); 2044 ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]); 2045 } 2046 ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]); 2047 ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]); 2048 /* 2049 * Check again the content on the pages. Make sure there's no 2050 * corrupted data. 2051 */ 2052 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2053 ASSERT_EQ(ptr[i], i); 2054 2055 close(gup_fd); 2056 hmm_buffer_free(buffer); 2057 } 2058 2059 /* 2060 * Test copy-on-write in device pages. 2061 * In case of writing to COW private page(s), a page fault will migrate pages 2062 * back to system memory first. Then, these pages will be duplicated. In case 2063 * of COW device coherent type, pages are duplicated directly from device 2064 * memory. 2065 */ 2066 TEST_F(hmm, hmm_cow_in_device) 2067 { 2068 struct hmm_buffer *buffer; 2069 unsigned long npages; 2070 unsigned long size; 2071 unsigned long i; 2072 int *ptr; 2073 int ret; 2074 unsigned char *m; 2075 pid_t pid; 2076 int status; 2077 2078 npages = 4; 2079 size = npages << self->page_shift; 2080 2081 buffer = malloc(sizeof(*buffer)); 2082 ASSERT_NE(buffer, NULL); 2083 2084 buffer->fd = -1; 2085 buffer->size = size; 2086 buffer->mirror = malloc(size); 2087 ASSERT_NE(buffer->mirror, NULL); 2088 2089 buffer->ptr = mmap(NULL, size, 2090 PROT_READ | PROT_WRITE, 2091 MAP_PRIVATE | MAP_ANONYMOUS, 2092 buffer->fd, 0); 2093 ASSERT_NE(buffer->ptr, MAP_FAILED); 2094 2095 /* Initialize buffer in system memory. */ 2096 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2097 ptr[i] = i; 2098 2099 /* Migrate memory to device. */ 2100 2101 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2102 ASSERT_EQ(ret, 0); 2103 ASSERT_EQ(buffer->cpages, npages); 2104 2105 pid = fork(); 2106 if (pid == -1) 2107 ASSERT_EQ(pid, 0); 2108 if (!pid) { 2109 /* Child process waits for SIGKILL from the parent. */ 2110 while (1) { 2111 } 2112 /* Should not reach this */ 2113 } 2114 /* Parent process writes to COW pages(s) and gets a 2115 * new copy in system. In case of device private pages, 2116 * this write causes a migration to system mem first. 2117 */ 2118 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2119 ptr[i] = i; 2120 2121 /* Terminate child and wait */ 2122 EXPECT_EQ(0, kill(pid, SIGKILL)); 2123 EXPECT_EQ(pid, waitpid(pid, &status, 0)); 2124 EXPECT_NE(0, WIFSIGNALED(status)); 2125 EXPECT_EQ(SIGKILL, WTERMSIG(status)); 2126 2127 /* Take snapshot to CPU pagetables */ 2128 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); 2129 ASSERT_EQ(ret, 0); 2130 ASSERT_EQ(buffer->cpages, npages); 2131 m = buffer->mirror; 2132 for (i = 0; i < npages; i++) 2133 ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]); 2134 2135 hmm_buffer_free(buffer); 2136 } 2137 2138 /* 2139 * Migrate private anonymous huge empty page. 2140 */ 2141 TEST_F(hmm, migrate_anon_huge_empty) 2142 { 2143 struct hmm_buffer *buffer; 2144 unsigned long npages; 2145 unsigned long size; 2146 unsigned long i; 2147 void *old_ptr; 2148 void *map; 2149 int *ptr; 2150 int ret; 2151 2152 size = read_pmd_pagesize(); 2153 2154 buffer = malloc(sizeof(*buffer)); 2155 ASSERT_NE(buffer, NULL); 2156 2157 buffer->fd = -1; 2158 buffer->size = 2 * size; 2159 buffer->mirror = malloc(size); 2160 ASSERT_NE(buffer->mirror, NULL); 2161 memset(buffer->mirror, 0xFF, size); 2162 2163 buffer->ptr = mmap(NULL, 2 * size, 2164 PROT_READ, 2165 MAP_PRIVATE | MAP_ANONYMOUS, 2166 buffer->fd, 0); 2167 ASSERT_NE(buffer->ptr, MAP_FAILED); 2168 2169 npages = size >> self->page_shift; 2170 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2171 ret = madvise(map, size, MADV_HUGEPAGE); 2172 ASSERT_EQ(ret, 0); 2173 old_ptr = buffer->ptr; 2174 buffer->ptr = map; 2175 2176 /* Migrate memory to device. */ 2177 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2178 ASSERT_EQ(ret, 0); 2179 ASSERT_EQ(buffer->cpages, npages); 2180 2181 /* Check what the device read. */ 2182 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2183 ASSERT_EQ(ptr[i], 0); 2184 2185 buffer->ptr = old_ptr; 2186 hmm_buffer_free(buffer); 2187 } 2188 2189 /* 2190 * Migrate private anonymous huge zero page. 2191 */ 2192 TEST_F(hmm, migrate_anon_huge_zero) 2193 { 2194 struct hmm_buffer *buffer; 2195 unsigned long npages; 2196 unsigned long size; 2197 unsigned long i; 2198 void *old_ptr; 2199 void *map; 2200 int *ptr; 2201 int ret; 2202 int val; 2203 2204 size = read_pmd_pagesize(); 2205 2206 buffer = malloc(sizeof(*buffer)); 2207 ASSERT_NE(buffer, NULL); 2208 2209 buffer->fd = -1; 2210 buffer->size = 2 * size; 2211 buffer->mirror = malloc(size); 2212 ASSERT_NE(buffer->mirror, NULL); 2213 memset(buffer->mirror, 0xFF, size); 2214 2215 buffer->ptr = mmap(NULL, 2 * size, 2216 PROT_READ, 2217 MAP_PRIVATE | MAP_ANONYMOUS, 2218 buffer->fd, 0); 2219 ASSERT_NE(buffer->ptr, MAP_FAILED); 2220 2221 npages = size >> self->page_shift; 2222 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2223 ret = madvise(map, size, MADV_HUGEPAGE); 2224 ASSERT_EQ(ret, 0); 2225 old_ptr = buffer->ptr; 2226 buffer->ptr = map; 2227 2228 /* Initialize a read-only zero huge page. */ 2229 val = *(int *)buffer->ptr; 2230 ASSERT_EQ(val, 0); 2231 2232 /* Migrate memory to device. */ 2233 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2234 ASSERT_EQ(ret, 0); 2235 ASSERT_EQ(buffer->cpages, npages); 2236 2237 /* Check what the device read. */ 2238 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2239 ASSERT_EQ(ptr[i], 0); 2240 2241 /* Fault pages back to system memory and check them. */ 2242 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) { 2243 ASSERT_EQ(ptr[i], 0); 2244 /* If it asserts once, it probably will 500,000 times */ 2245 if (ptr[i] != 0) 2246 break; 2247 } 2248 2249 buffer->ptr = old_ptr; 2250 hmm_buffer_free(buffer); 2251 } 2252 2253 /* 2254 * Migrate private anonymous huge page and free. 2255 */ 2256 TEST_F(hmm, migrate_anon_huge_free) 2257 { 2258 struct hmm_buffer *buffer; 2259 unsigned long npages; 2260 unsigned long size; 2261 unsigned long i; 2262 void *old_ptr; 2263 void *map; 2264 int *ptr; 2265 int ret; 2266 2267 size = read_pmd_pagesize(); 2268 2269 buffer = malloc(sizeof(*buffer)); 2270 ASSERT_NE(buffer, NULL); 2271 2272 buffer->fd = -1; 2273 buffer->size = 2 * size; 2274 buffer->mirror = malloc(size); 2275 ASSERT_NE(buffer->mirror, NULL); 2276 memset(buffer->mirror, 0xFF, size); 2277 2278 buffer->ptr = mmap(NULL, 2 * size, 2279 PROT_READ | PROT_WRITE, 2280 MAP_PRIVATE | MAP_ANONYMOUS, 2281 buffer->fd, 0); 2282 ASSERT_NE(buffer->ptr, MAP_FAILED); 2283 2284 npages = size >> self->page_shift; 2285 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2286 ret = madvise(map, size, MADV_HUGEPAGE); 2287 ASSERT_EQ(ret, 0); 2288 old_ptr = buffer->ptr; 2289 buffer->ptr = map; 2290 2291 /* Initialize buffer in system memory. */ 2292 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2293 ptr[i] = i; 2294 2295 /* Migrate memory to device. */ 2296 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2297 ASSERT_EQ(ret, 0); 2298 ASSERT_EQ(buffer->cpages, npages); 2299 2300 /* Check what the device read. */ 2301 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2302 ASSERT_EQ(ptr[i], i); 2303 2304 /* Try freeing it. */ 2305 ret = madvise(map, size, MADV_FREE); 2306 ASSERT_EQ(ret, 0); 2307 2308 buffer->ptr = old_ptr; 2309 hmm_buffer_free(buffer); 2310 } 2311 2312 /* 2313 * Migrate private anonymous huge page and fault back to sysmem. 2314 */ 2315 TEST_F(hmm, migrate_anon_huge_fault) 2316 { 2317 struct hmm_buffer *buffer; 2318 unsigned long npages; 2319 unsigned long size; 2320 unsigned long i; 2321 unsigned char *m; 2322 uint64_t entry; 2323 void *old_ptr; 2324 void *map; 2325 int pagemap_fd; 2326 int *ptr; 2327 int ret; 2328 2329 size = read_pmd_pagesize(); 2330 2331 buffer = malloc(sizeof(*buffer)); 2332 ASSERT_NE(buffer, NULL); 2333 2334 buffer->fd = -1; 2335 buffer->size = 2 * size; 2336 buffer->mirror = malloc(size); 2337 ASSERT_NE(buffer->mirror, NULL); 2338 memset(buffer->mirror, 0xFF, size); 2339 2340 buffer->ptr = mmap(NULL, 2 * size, 2341 PROT_READ | PROT_WRITE, 2342 MAP_PRIVATE | MAP_ANONYMOUS, 2343 buffer->fd, 0); 2344 ASSERT_NE(buffer->ptr, MAP_FAILED); 2345 2346 npages = size >> self->page_shift; 2347 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2348 old_ptr = buffer->ptr; 2349 buffer->ptr = map; 2350 2351 /* Initialize buffer in system memory. */ 2352 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2353 ptr[i] = i; 2354 2355 ret = madvise(map, size, MADV_COLLAPSE); 2356 ASSERT_EQ(ret, 0); 2357 2358 /* Migrate memory to device. */ 2359 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2360 ASSERT_EQ(ret, 0); 2361 ASSERT_EQ(buffer->cpages, npages); 2362 2363 /* Check what the device read. */ 2364 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2365 ASSERT_EQ(ptr[i], i); 2366 2367 if (!hmm_is_coherent_type(variant->device_number)) { 2368 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, 2369 buffer, npages); 2370 ASSERT_EQ(ret, 0); 2371 ASSERT_EQ(buffer->cpages, npages); 2372 2373 m = buffer->mirror; 2374 for (i = 0; i < npages; ++i) 2375 ASSERT_EQ(m[i], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | 2376 HMM_DMIRROR_PROT_WRITE | 2377 HMM_DMIRROR_PROT_PMD); 2378 2379 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 2380 ASSERT_GE(pagemap_fd, 0); 2381 2382 for (i = 0; i < npages; ++i) { 2383 entry = pagemap_get_entry(pagemap_fd, 2384 (char *)buffer->ptr + i * self->page_size); 2385 2386 ASSERT_NE(entry & PM_SWAP, 0); 2387 ASSERT_FALSE(PAGEMAP_PRESENT(entry)); 2388 } 2389 2390 close(pagemap_fd); 2391 } 2392 2393 /* Fault pages back to system memory and check them. */ 2394 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2395 ASSERT_EQ(ptr[i], i); 2396 2397 buffer->ptr = old_ptr; 2398 hmm_buffer_free(buffer); 2399 } 2400 2401 /* 2402 * Migrate memory and fault back to sysmem after partially unmapping. 2403 */ 2404 TEST_F(hmm, migrate_partial_unmap_fault) 2405 { 2406 struct hmm_buffer *buffer; 2407 unsigned long npages; 2408 unsigned long size = read_pmd_pagesize(); 2409 unsigned long unmap_size; 2410 unsigned long offsets[3]; 2411 unsigned long i; 2412 void *old_ptr; 2413 void *map; 2414 int *ptr; 2415 int ret, j, use_thp; 2416 2417 if (!size) 2418 size = TWOMEG; 2419 2420 unmap_size = size / 2; 2421 offsets[0] = 0; 2422 offsets[1] = size / 4; 2423 offsets[2] = size / 2; 2424 2425 for (use_thp = 0; use_thp < 2; ++use_thp) { 2426 for (j = 0; j < ARRAY_SIZE(offsets); ++j) { 2427 buffer = malloc(sizeof(*buffer)); 2428 ASSERT_NE(buffer, NULL); 2429 2430 buffer->fd = -1; 2431 buffer->size = 2 * size; 2432 buffer->mirror = malloc(size); 2433 ASSERT_NE(buffer->mirror, NULL); 2434 memset(buffer->mirror, 0xFF, size); 2435 2436 buffer->ptr = mmap(NULL, 2 * size, 2437 PROT_READ | PROT_WRITE, 2438 MAP_PRIVATE | MAP_ANONYMOUS, 2439 buffer->fd, 0); 2440 ASSERT_NE(buffer->ptr, MAP_FAILED); 2441 2442 npages = size >> self->page_shift; 2443 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2444 if (use_thp) 2445 ret = madvise(map, size, MADV_HUGEPAGE); 2446 else 2447 ret = madvise(map, size, MADV_NOHUGEPAGE); 2448 ASSERT_EQ(ret, 0); 2449 old_ptr = buffer->ptr; 2450 buffer->ptr = map; 2451 2452 /* Initialize buffer in system memory. */ 2453 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2454 ptr[i] = i; 2455 2456 /* Migrate memory to device. */ 2457 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2458 ASSERT_EQ(ret, 0); 2459 ASSERT_EQ(buffer->cpages, npages); 2460 2461 /* Check what the device read. */ 2462 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2463 ASSERT_EQ(ptr[i], i); 2464 2465 munmap(buffer->ptr + offsets[j], unmap_size); 2466 2467 /* Fault pages back to system memory and check them. */ 2468 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2469 if (i * sizeof(int) < offsets[j] || 2470 i * sizeof(int) >= offsets[j] + unmap_size) 2471 ASSERT_EQ(ptr[i], i); 2472 2473 buffer->ptr = old_ptr; 2474 hmm_buffer_free(buffer); 2475 } 2476 } 2477 } 2478 2479 TEST_F(hmm, migrate_remap_fault) 2480 { 2481 struct hmm_buffer *buffer; 2482 unsigned long npages; 2483 unsigned long size = read_pmd_pagesize(); 2484 unsigned long offsets[3]; 2485 unsigned long i; 2486 void *old_ptr, *new_ptr = NULL; 2487 void *map; 2488 int *ptr; 2489 int ret, j, use_thp, dont_unmap, before; 2490 2491 if (!size) 2492 size = TWOMEG; 2493 2494 offsets[0] = 0; 2495 offsets[1] = size / 4; 2496 offsets[2] = size / 2; 2497 2498 for (before = 0; before < 2; ++before) { 2499 for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) { 2500 for (use_thp = 0; use_thp < 2; ++use_thp) { 2501 for (j = 0; j < ARRAY_SIZE(offsets); ++j) { 2502 int flags = MREMAP_MAYMOVE | MREMAP_FIXED; 2503 2504 if (dont_unmap) 2505 flags |= MREMAP_DONTUNMAP; 2506 2507 buffer = malloc(sizeof(*buffer)); 2508 ASSERT_NE(buffer, NULL); 2509 2510 buffer->fd = -1; 2511 buffer->size = 8 * size; 2512 buffer->mirror = malloc(size); 2513 ASSERT_NE(buffer->mirror, NULL); 2514 memset(buffer->mirror, 0xFF, size); 2515 2516 buffer->ptr = mmap(NULL, buffer->size, 2517 PROT_READ | PROT_WRITE, 2518 MAP_PRIVATE | MAP_ANONYMOUS, 2519 buffer->fd, 0); 2520 ASSERT_NE(buffer->ptr, MAP_FAILED); 2521 2522 npages = size >> self->page_shift; 2523 map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 2524 if (use_thp) 2525 ret = madvise(map, size, MADV_HUGEPAGE); 2526 else 2527 ret = madvise(map, size, MADV_NOHUGEPAGE); 2528 ASSERT_EQ(ret, 0); 2529 old_ptr = buffer->ptr; 2530 munmap(map + size, size * 2); 2531 buffer->ptr = map; 2532 2533 /* Initialize buffer in system memory. */ 2534 for (i = 0, ptr = buffer->ptr; 2535 i < size / sizeof(*ptr); ++i) 2536 ptr[i] = i; 2537 2538 if (before) { 2539 new_ptr = mremap((void *)map, size, size, flags, 2540 map + size + offsets[j]); 2541 ASSERT_NE(new_ptr, MAP_FAILED); 2542 buffer->ptr = new_ptr; 2543 } 2544 2545 /* Migrate memory to device. */ 2546 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2547 ASSERT_EQ(ret, 0); 2548 ASSERT_EQ(buffer->cpages, npages); 2549 2550 /* Check what the device read. */ 2551 for (i = 0, ptr = buffer->mirror; 2552 i < size / sizeof(*ptr); ++i) 2553 ASSERT_EQ(ptr[i], i); 2554 2555 if (!before) { 2556 new_ptr = mremap((void *)map, size, size, flags, 2557 map + size + offsets[j]); 2558 ASSERT_NE(new_ptr, MAP_FAILED); 2559 buffer->ptr = new_ptr; 2560 } 2561 2562 /* Fault pages back to system memory and check them. */ 2563 for (i = 0, ptr = buffer->ptr; 2564 i < size / sizeof(*ptr); ++i) 2565 ASSERT_EQ(ptr[i], i); 2566 2567 munmap(new_ptr, size); 2568 buffer->ptr = old_ptr; 2569 hmm_buffer_free(buffer); 2570 } 2571 } 2572 } 2573 } 2574 } 2575 2576 /* 2577 * Migrate private anonymous huge page with allocation errors. 2578 */ 2579 TEST_F(hmm, migrate_anon_huge_err) 2580 { 2581 struct hmm_buffer *buffer; 2582 unsigned long npages; 2583 unsigned long size; 2584 unsigned long i; 2585 void *old_ptr; 2586 void *map; 2587 int *ptr; 2588 int ret; 2589 2590 size = read_pmd_pagesize(); 2591 2592 buffer = malloc(sizeof(*buffer)); 2593 ASSERT_NE(buffer, NULL); 2594 2595 buffer->fd = -1; 2596 buffer->size = 2 * size; 2597 buffer->mirror = malloc(2 * size); 2598 ASSERT_NE(buffer->mirror, NULL); 2599 memset(buffer->mirror, 0xFF, 2 * size); 2600 2601 old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, 2602 MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); 2603 ASSERT_NE(old_ptr, MAP_FAILED); 2604 2605 npages = size >> self->page_shift; 2606 map = (void *)ALIGN((uintptr_t)old_ptr, size); 2607 ret = madvise(map, size, MADV_HUGEPAGE); 2608 ASSERT_EQ(ret, 0); 2609 buffer->ptr = map; 2610 2611 /* Initialize buffer in system memory. */ 2612 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2613 ptr[i] = i; 2614 2615 /* Migrate memory to device but force a THP allocation error. */ 2616 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, 2617 HMM_DMIRROR_FLAG_FAIL_ALLOC); 2618 ASSERT_EQ(ret, 0); 2619 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2620 ASSERT_EQ(ret, 0); 2621 ASSERT_EQ(buffer->cpages, npages); 2622 2623 /* Check what the device read. */ 2624 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) { 2625 ASSERT_EQ(ptr[i], i); 2626 if (ptr[i] != i) 2627 break; 2628 } 2629 2630 /* Try faulting back a single (PAGE_SIZE) page. */ 2631 ptr = buffer->ptr; 2632 ASSERT_EQ(ptr[2048], 2048); 2633 2634 /* unmap and remap the region to reset things. */ 2635 ret = munmap(old_ptr, 2 * size); 2636 ASSERT_EQ(ret, 0); 2637 old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, 2638 MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); 2639 ASSERT_NE(old_ptr, MAP_FAILED); 2640 map = (void *)ALIGN((uintptr_t)old_ptr, size); 2641 ret = madvise(map, size, MADV_HUGEPAGE); 2642 ASSERT_EQ(ret, 0); 2643 buffer->ptr = map; 2644 2645 /* Initialize buffer in system memory. */ 2646 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2647 ptr[i] = i; 2648 2649 /* Migrate THP to device. */ 2650 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2651 ASSERT_EQ(ret, 0); 2652 ASSERT_EQ(buffer->cpages, npages); 2653 2654 /* 2655 * Force an allocation error when faulting back a THP resident in the 2656 * device. 2657 */ 2658 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, 2659 HMM_DMIRROR_FLAG_FAIL_ALLOC); 2660 ASSERT_EQ(ret, 0); 2661 2662 ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages); 2663 ASSERT_EQ(ret, 0); 2664 ptr = buffer->ptr; 2665 ASSERT_EQ(ptr[2048], 2048); 2666 2667 buffer->ptr = old_ptr; 2668 hmm_buffer_free(buffer); 2669 } 2670 2671 /* 2672 * Migrate private anonymous huge zero page with allocation errors. 2673 */ 2674 TEST_F(hmm, migrate_anon_huge_zero_err) 2675 { 2676 struct hmm_buffer *buffer; 2677 unsigned long npages; 2678 unsigned long size; 2679 unsigned long i; 2680 void *old_ptr; 2681 void *map; 2682 int *ptr; 2683 int ret; 2684 2685 size = read_pmd_pagesize(); 2686 2687 buffer = malloc(sizeof(*buffer)); 2688 ASSERT_NE(buffer, NULL); 2689 2690 buffer->fd = -1; 2691 buffer->size = 2 * size; 2692 buffer->mirror = malloc(2 * size); 2693 ASSERT_NE(buffer->mirror, NULL); 2694 memset(buffer->mirror, 0xFF, 2 * size); 2695 2696 old_ptr = mmap(NULL, 2 * size, PROT_READ, 2697 MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); 2698 ASSERT_NE(old_ptr, MAP_FAILED); 2699 2700 npages = size >> self->page_shift; 2701 map = (void *)ALIGN((uintptr_t)old_ptr, size); 2702 ret = madvise(map, size, MADV_HUGEPAGE); 2703 ASSERT_EQ(ret, 0); 2704 buffer->ptr = map; 2705 2706 /* Migrate memory to device but force a THP allocation error. */ 2707 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, 2708 HMM_DMIRROR_FLAG_FAIL_ALLOC); 2709 ASSERT_EQ(ret, 0); 2710 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2711 ASSERT_EQ(ret, 0); 2712 ASSERT_EQ(buffer->cpages, npages); 2713 2714 /* Check what the device read. */ 2715 for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 2716 ASSERT_EQ(ptr[i], 0); 2717 2718 /* Try faulting back a single (PAGE_SIZE) page. */ 2719 ptr = buffer->ptr; 2720 ASSERT_EQ(ptr[2048], 0); 2721 2722 /* unmap and remap the region to reset things. */ 2723 ret = munmap(old_ptr, 2 * size); 2724 ASSERT_EQ(ret, 0); 2725 old_ptr = mmap(NULL, 2 * size, PROT_READ, 2726 MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); 2727 ASSERT_NE(old_ptr, MAP_FAILED); 2728 map = (void *)ALIGN((uintptr_t)old_ptr, size); 2729 ret = madvise(map, size, MADV_HUGEPAGE); 2730 ASSERT_EQ(ret, 0); 2731 buffer->ptr = map; 2732 2733 /* Initialize buffer in system memory (zero THP page). */ 2734 ret = ptr[0]; 2735 ASSERT_EQ(ret, 0); 2736 2737 /* Migrate memory to device but force a THP allocation error. */ 2738 ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, 2739 HMM_DMIRROR_FLAG_FAIL_ALLOC); 2740 ASSERT_EQ(ret, 0); 2741 ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); 2742 ASSERT_EQ(ret, 0); 2743 ASSERT_EQ(buffer->cpages, npages); 2744 2745 /* Fault the device memory back and check it. */ 2746 for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 2747 ASSERT_EQ(ptr[i], 0); 2748 2749 buffer->ptr = old_ptr; 2750 hmm_buffer_free(buffer); 2751 } 2752 2753 struct benchmark_results { 2754 double sys_to_dev_time; 2755 double dev_to_sys_time; 2756 double throughput_s2d; 2757 double throughput_d2s; 2758 }; 2759 2760 static double get_time_ms(void) 2761 { 2762 struct timeval tv; 2763 2764 gettimeofday(&tv, NULL); 2765 return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0); 2766 } 2767 2768 static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size) 2769 { 2770 struct hmm_buffer *buffer; 2771 2772 buffer = malloc(sizeof(*buffer)); 2773 2774 buffer->fd = -1; 2775 buffer->size = size; 2776 buffer->mirror = malloc(size); 2777 memset(buffer->mirror, 0xFF, size); 2778 return buffer; 2779 } 2780 2781 static void print_benchmark_results(const char *test_name, size_t buffer_size, 2782 struct benchmark_results *thp, 2783 struct benchmark_results *regular) 2784 { 2785 double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) / 2786 regular->sys_to_dev_time) * 100.0; 2787 double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) / 2788 regular->dev_to_sys_time) * 100.0; 2789 double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) / 2790 regular->throughput_s2d) * 100.0; 2791 double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) / 2792 regular->throughput_d2s) * 100.0; 2793 2794 printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0)); 2795 printf(" | With THP | Without THP | Improvement\n"); 2796 printf("---------------------------------------------------------------------\n"); 2797 printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n", 2798 thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement); 2799 printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n", 2800 thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement); 2801 printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n", 2802 thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement); 2803 printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n", 2804 thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement); 2805 } 2806 2807 /* 2808 * Run a single migration benchmark 2809 * fd: file descriptor for hmm device 2810 * use_thp: whether to use THP 2811 * buffer_size: size of buffer to allocate 2812 * iterations: number of iterations 2813 * results: where to store results 2814 */ 2815 static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size, 2816 int iterations, struct benchmark_results *results) 2817 { 2818 struct hmm_buffer *buffer; 2819 unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE); 2820 double start, end; 2821 double s2d_total = 0, d2s_total = 0; 2822 int ret, i; 2823 int *ptr; 2824 2825 buffer = hmm_buffer_alloc(buffer_size); 2826 2827 /* Map memory */ 2828 buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, 2829 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 2830 2831 if (buffer->ptr == MAP_FAILED) 2832 return -1; 2833 2834 /* Apply THP hint if requested */ 2835 if (use_thp) 2836 ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE); 2837 else 2838 ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE); 2839 2840 if (ret) 2841 return ret; 2842 2843 /* Initialize memory to make sure pages are allocated */ 2844 ptr = (int *)buffer->ptr; 2845 for (i = 0; i < buffer_size / sizeof(int); i++) 2846 ptr[i] = i & 0xFF; 2847 2848 /* Warmup iteration */ 2849 ret = hmm_migrate_sys_to_dev(fd, buffer, npages); 2850 if (ret) 2851 return ret; 2852 2853 ret = hmm_migrate_dev_to_sys(fd, buffer, npages); 2854 if (ret) 2855 return ret; 2856 2857 /* Benchmark iterations */ 2858 for (i = 0; i < iterations; i++) { 2859 /* System to device migration */ 2860 start = get_time_ms(); 2861 2862 ret = hmm_migrate_sys_to_dev(fd, buffer, npages); 2863 if (ret) 2864 return ret; 2865 2866 end = get_time_ms(); 2867 s2d_total += (end - start); 2868 2869 /* Device to system migration */ 2870 start = get_time_ms(); 2871 2872 ret = hmm_migrate_dev_to_sys(fd, buffer, npages); 2873 if (ret) 2874 return ret; 2875 2876 end = get_time_ms(); 2877 d2s_total += (end - start); 2878 } 2879 2880 /* Calculate average times and throughput */ 2881 results->sys_to_dev_time = s2d_total / iterations; 2882 results->dev_to_sys_time = d2s_total / iterations; 2883 results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) / 2884 (results->sys_to_dev_time / 1000.0); 2885 results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) / 2886 (results->dev_to_sys_time / 1000.0); 2887 2888 /* Cleanup */ 2889 hmm_buffer_free(buffer); 2890 return 0; 2891 } 2892 2893 /* 2894 * Benchmark THP migration with different buffer sizes 2895 */ 2896 TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120) 2897 { 2898 struct benchmark_results thp_results, regular_results; 2899 size_t thp_size = read_pmd_pagesize(); 2900 int iterations = 5; 2901 2902 if (!thp_size) 2903 thp_size = TWOMEG; 2904 2905 printf("\nHMM THP Migration Benchmark\n"); 2906 printf("---------------------------\n"); 2907 printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE)); 2908 2909 /* Test different buffer sizes */ 2910 size_t test_sizes[] = { 2911 thp_size / 4, /* quarter THP */ 2912 thp_size / 2, /* half THP */ 2913 thp_size, /* single THP */ 2914 thp_size * 2, /* two THPs */ 2915 thp_size * 4, /* four THPs */ 2916 thp_size * 8, /* eight THPs */ 2917 thp_size * 128, /* one twenty eight THPs */ 2918 }; 2919 2920 static const char *const test_names[] = { 2921 "Small Buffer", 2922 "Half THP Size", 2923 "Single THP Size", 2924 "Two THP Size", 2925 "Four THP Size", 2926 "Eight THP Size", 2927 "One twenty eight THP Size" 2928 }; 2929 2930 int num_tests = ARRAY_SIZE(test_sizes); 2931 2932 /* Run all tests */ 2933 for (int i = 0; i < num_tests; i++) { 2934 /* Skip test sizes exceeding INT_MAX to avoid overflow */ 2935 if (test_sizes[i] > INT_MAX) 2936 break; 2937 2938 /* Test with THP */ 2939 ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i], 2940 iterations, &thp_results), 0); 2941 2942 /* Test without THP */ 2943 ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i], 2944 iterations, ®ular_results), 0); 2945 2946 /* Print results */ 2947 print_benchmark_results(test_names[i], test_sizes[i], 2948 &thp_results, ®ular_results); 2949 } 2950 } 2951 TEST_HARNESS_MAIN 2952