1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <dirent.h> 18 #include <assert.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 33 #ifndef MADV_PAGEOUT 34 #define MADV_PAGEOUT 21 35 #endif 36 #ifndef MADV_COLLAPSE 37 #define MADV_COLLAPSE 25 38 #endif 39 40 static size_t pagesize; 41 static int pagemap_fd; 42 static size_t thpsize; 43 static int nr_hugetlbsizes; 44 static size_t hugetlbsizes[10]; 45 static int gup_fd; 46 static bool has_huge_zeropage; 47 48 static void detect_huge_zeropage(void) 49 { 50 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 51 O_RDONLY); 52 size_t enabled = 0; 53 char buf[15]; 54 int ret; 55 56 if (fd < 0) 57 return; 58 59 ret = pread(fd, buf, sizeof(buf), 0); 60 if (ret > 0 && ret < sizeof(buf)) { 61 buf[ret] = 0; 62 63 enabled = strtoul(buf, NULL, 10); 64 if (enabled == 1) { 65 has_huge_zeropage = true; 66 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 67 } 68 } 69 70 close(fd); 71 } 72 73 static void detect_hugetlbsizes(void) 74 { 75 DIR *dir = opendir("/sys/kernel/mm/hugepages/"); 76 77 if (!dir) 78 return; 79 80 while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { 81 struct dirent *entry = readdir(dir); 82 size_t kb; 83 84 if (!entry) 85 break; 86 if (entry->d_type != DT_DIR) 87 continue; 88 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) 89 continue; 90 hugetlbsizes[nr_hugetlbsizes] = kb * 1024; 91 nr_hugetlbsizes++; 92 ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", 93 kb); 94 } 95 closedir(dir); 96 } 97 98 static bool range_is_swapped(void *addr, size_t size) 99 { 100 for (; size; addr += pagesize, size -= pagesize) 101 if (!pagemap_is_swapped(pagemap_fd, addr)) 102 return false; 103 return true; 104 } 105 106 struct comm_pipes { 107 int child_ready[2]; 108 int parent_ready[2]; 109 }; 110 111 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 112 { 113 if (pipe(comm_pipes->child_ready) < 0) 114 return -errno; 115 if (pipe(comm_pipes->parent_ready) < 0) { 116 close(comm_pipes->child_ready[0]); 117 close(comm_pipes->child_ready[1]); 118 return -errno; 119 } 120 121 return 0; 122 } 123 124 static void close_comm_pipes(struct comm_pipes *comm_pipes) 125 { 126 close(comm_pipes->child_ready[0]); 127 close(comm_pipes->child_ready[1]); 128 close(comm_pipes->parent_ready[0]); 129 close(comm_pipes->parent_ready[1]); 130 } 131 132 static int child_memcmp_fn(char *mem, size_t size, 133 struct comm_pipes *comm_pipes) 134 { 135 char *old = malloc(size); 136 char buf; 137 138 /* Backup the original content. */ 139 memcpy(old, mem, size); 140 141 /* Wait until the parent modified the page. */ 142 write(comm_pipes->child_ready[1], "0", 1); 143 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 144 ; 145 146 /* See if we still read the old values. */ 147 return memcmp(old, mem, size); 148 } 149 150 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 151 struct comm_pipes *comm_pipes) 152 { 153 struct iovec iov = { 154 .iov_base = mem, 155 .iov_len = size, 156 }; 157 ssize_t cur, total, transferred; 158 char *old, *new; 159 int fds[2]; 160 char buf; 161 162 old = malloc(size); 163 new = malloc(size); 164 165 /* Backup the original content. */ 166 memcpy(old, mem, size); 167 168 if (pipe(fds) < 0) 169 return -errno; 170 171 /* Trigger a read-only pin. */ 172 transferred = vmsplice(fds[1], &iov, 1, 0); 173 if (transferred < 0) 174 return -errno; 175 if (transferred == 0) 176 return -EINVAL; 177 178 /* Unmap it from our page tables. */ 179 if (munmap(mem, size) < 0) 180 return -errno; 181 182 /* Wait until the parent modified it. */ 183 write(comm_pipes->child_ready[1], "0", 1); 184 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 185 ; 186 187 /* See if we still read the old values via the pipe. */ 188 for (total = 0; total < transferred; total += cur) { 189 cur = read(fds[0], new + total, transferred - total); 190 if (cur < 0) 191 return -errno; 192 } 193 194 return memcmp(old, new, transferred); 195 } 196 197 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 198 199 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 200 child_fn fn) 201 { 202 struct comm_pipes comm_pipes; 203 char buf; 204 int ret; 205 206 ret = setup_comm_pipes(&comm_pipes); 207 if (ret) { 208 ksft_test_result_fail("pipe() failed\n"); 209 return; 210 } 211 212 ret = fork(); 213 if (ret < 0) { 214 ksft_test_result_fail("fork() failed\n"); 215 goto close_comm_pipes; 216 } else if (!ret) { 217 exit(fn(mem, size, &comm_pipes)); 218 } 219 220 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 221 ; 222 223 if (do_mprotect) { 224 /* 225 * mprotect() optimizations might try avoiding 226 * write-faults by directly mapping pages writable. 227 */ 228 ret = mprotect(mem, size, PROT_READ); 229 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 230 if (ret) { 231 ksft_test_result_fail("mprotect() failed\n"); 232 write(comm_pipes.parent_ready[1], "0", 1); 233 wait(&ret); 234 goto close_comm_pipes; 235 } 236 } 237 238 /* Modify the page. */ 239 memset(mem, 0xff, size); 240 write(comm_pipes.parent_ready[1], "0", 1); 241 242 wait(&ret); 243 if (WIFEXITED(ret)) 244 ret = WEXITSTATUS(ret); 245 else 246 ret = -EINVAL; 247 248 ksft_test_result(!ret, "No leak from parent into child\n"); 249 close_comm_pipes: 250 close_comm_pipes(&comm_pipes); 251 } 252 253 static void test_cow_in_parent(char *mem, size_t size) 254 { 255 do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 256 } 257 258 static void test_cow_in_parent_mprotect(char *mem, size_t size) 259 { 260 do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 261 } 262 263 static void test_vmsplice_in_child(char *mem, size_t size) 264 { 265 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 266 } 267 268 static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 269 { 270 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 271 } 272 273 static void do_test_vmsplice_in_parent(char *mem, size_t size, 274 bool before_fork) 275 { 276 struct iovec iov = { 277 .iov_base = mem, 278 .iov_len = size, 279 }; 280 ssize_t cur, total, transferred; 281 struct comm_pipes comm_pipes; 282 char *old, *new; 283 int ret, fds[2]; 284 char buf; 285 286 old = malloc(size); 287 new = malloc(size); 288 289 memcpy(old, mem, size); 290 291 ret = setup_comm_pipes(&comm_pipes); 292 if (ret) { 293 ksft_test_result_fail("pipe() failed\n"); 294 goto free; 295 } 296 297 if (pipe(fds) < 0) { 298 ksft_test_result_fail("pipe() failed\n"); 299 goto close_comm_pipes; 300 } 301 302 if (before_fork) { 303 transferred = vmsplice(fds[1], &iov, 1, 0); 304 if (transferred <= 0) { 305 ksft_test_result_fail("vmsplice() failed\n"); 306 goto close_pipe; 307 } 308 } 309 310 ret = fork(); 311 if (ret < 0) { 312 ksft_test_result_fail("fork() failed\n"); 313 goto close_pipe; 314 } else if (!ret) { 315 write(comm_pipes.child_ready[1], "0", 1); 316 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 317 ; 318 /* Modify page content in the child. */ 319 memset(mem, 0xff, size); 320 exit(0); 321 } 322 323 if (!before_fork) { 324 transferred = vmsplice(fds[1], &iov, 1, 0); 325 if (transferred <= 0) { 326 ksft_test_result_fail("vmsplice() failed\n"); 327 wait(&ret); 328 goto close_pipe; 329 } 330 } 331 332 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 333 ; 334 if (munmap(mem, size) < 0) { 335 ksft_test_result_fail("munmap() failed\n"); 336 goto close_pipe; 337 } 338 write(comm_pipes.parent_ready[1], "0", 1); 339 340 /* Wait until the child is done writing. */ 341 wait(&ret); 342 if (!WIFEXITED(ret)) { 343 ksft_test_result_fail("wait() failed\n"); 344 goto close_pipe; 345 } 346 347 /* See if we still read the old values. */ 348 for (total = 0; total < transferred; total += cur) { 349 cur = read(fds[0], new + total, transferred - total); 350 if (cur < 0) { 351 ksft_test_result_fail("read() failed\n"); 352 goto close_pipe; 353 } 354 } 355 356 ksft_test_result(!memcmp(old, new, transferred), 357 "No leak from child into parent\n"); 358 close_pipe: 359 close(fds[0]); 360 close(fds[1]); 361 close_comm_pipes: 362 close_comm_pipes(&comm_pipes); 363 free: 364 free(old); 365 free(new); 366 } 367 368 static void test_vmsplice_before_fork(char *mem, size_t size) 369 { 370 do_test_vmsplice_in_parent(mem, size, true); 371 } 372 373 static void test_vmsplice_after_fork(char *mem, size_t size) 374 { 375 do_test_vmsplice_in_parent(mem, size, false); 376 } 377 378 #ifdef LOCAL_CONFIG_HAVE_LIBURING 379 static void do_test_iouring(char *mem, size_t size, bool use_fork) 380 { 381 struct comm_pipes comm_pipes; 382 struct io_uring_cqe *cqe; 383 struct io_uring_sqe *sqe; 384 struct io_uring ring; 385 ssize_t cur, total; 386 struct iovec iov; 387 char *buf, *tmp; 388 int ret, fd; 389 FILE *file; 390 391 ret = setup_comm_pipes(&comm_pipes); 392 if (ret) { 393 ksft_test_result_fail("pipe() failed\n"); 394 return; 395 } 396 397 file = tmpfile(); 398 if (!file) { 399 ksft_test_result_fail("tmpfile() failed\n"); 400 goto close_comm_pipes; 401 } 402 fd = fileno(file); 403 assert(fd); 404 405 tmp = malloc(size); 406 if (!tmp) { 407 ksft_test_result_fail("malloc() failed\n"); 408 goto close_file; 409 } 410 411 /* Skip on errors, as we might just lack kernel support. */ 412 ret = io_uring_queue_init(1, &ring, 0); 413 if (ret < 0) { 414 ksft_test_result_skip("io_uring_queue_init() failed\n"); 415 goto free_tmp; 416 } 417 418 /* 419 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 420 * | FOLL_LONGTERM the range. 421 * 422 * Skip on errors, as we might just lack kernel support or might not 423 * have sufficient MEMLOCK permissions. 424 */ 425 iov.iov_base = mem; 426 iov.iov_len = size; 427 ret = io_uring_register_buffers(&ring, &iov, 1); 428 if (ret) { 429 ksft_test_result_skip("io_uring_register_buffers() failed\n"); 430 goto queue_exit; 431 } 432 433 if (use_fork) { 434 /* 435 * fork() and keep the child alive until we're done. Note that 436 * we expect the pinned page to not get shared with the child. 437 */ 438 ret = fork(); 439 if (ret < 0) { 440 ksft_test_result_fail("fork() failed\n"); 441 goto unregister_buffers; 442 } else if (!ret) { 443 write(comm_pipes.child_ready[1], "0", 1); 444 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 445 ; 446 exit(0); 447 } 448 449 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 450 ; 451 } else { 452 /* 453 * Map the page R/O into the page table. Enable softdirty 454 * tracking to stop the page from getting mapped R/W immediately 455 * again by mprotect() optimizations. Note that we don't have an 456 * easy way to test if that worked (the pagemap does not export 457 * if the page is mapped R/O vs. R/W). 458 */ 459 ret = mprotect(mem, size, PROT_READ); 460 clear_softdirty(); 461 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 462 if (ret) { 463 ksft_test_result_fail("mprotect() failed\n"); 464 goto unregister_buffers; 465 } 466 } 467 468 /* 469 * Modify the page and write page content as observed by the fixed 470 * buffer pin to the file so we can verify it. 471 */ 472 memset(mem, 0xff, size); 473 sqe = io_uring_get_sqe(&ring); 474 if (!sqe) { 475 ksft_test_result_fail("io_uring_get_sqe() failed\n"); 476 goto quit_child; 477 } 478 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 479 480 ret = io_uring_submit(&ring); 481 if (ret < 0) { 482 ksft_test_result_fail("io_uring_submit() failed\n"); 483 goto quit_child; 484 } 485 486 ret = io_uring_wait_cqe(&ring, &cqe); 487 if (ret < 0) { 488 ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 489 goto quit_child; 490 } 491 492 if (cqe->res != size) { 493 ksft_test_result_fail("write_fixed failed\n"); 494 goto quit_child; 495 } 496 io_uring_cqe_seen(&ring, cqe); 497 498 /* Read back the file content to the temporary buffer. */ 499 total = 0; 500 while (total < size) { 501 cur = pread(fd, tmp + total, size - total, total); 502 if (cur < 0) { 503 ksft_test_result_fail("pread() failed\n"); 504 goto quit_child; 505 } 506 total += cur; 507 } 508 509 /* Finally, check if we read what we expected. */ 510 ksft_test_result(!memcmp(mem, tmp, size), 511 "Longterm R/W pin is reliable\n"); 512 513 quit_child: 514 if (use_fork) { 515 write(comm_pipes.parent_ready[1], "0", 1); 516 wait(&ret); 517 } 518 unregister_buffers: 519 io_uring_unregister_buffers(&ring); 520 queue_exit: 521 io_uring_queue_exit(&ring); 522 free_tmp: 523 free(tmp); 524 close_file: 525 fclose(file); 526 close_comm_pipes: 527 close_comm_pipes(&comm_pipes); 528 } 529 530 static void test_iouring_ro(char *mem, size_t size) 531 { 532 do_test_iouring(mem, size, false); 533 } 534 535 static void test_iouring_fork(char *mem, size_t size) 536 { 537 do_test_iouring(mem, size, true); 538 } 539 540 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 541 542 enum ro_pin_test { 543 RO_PIN_TEST, 544 RO_PIN_TEST_SHARED, 545 RO_PIN_TEST_PREVIOUSLY_SHARED, 546 RO_PIN_TEST_RO_EXCLUSIVE, 547 }; 548 549 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 550 bool fast) 551 { 552 struct pin_longterm_test args; 553 struct comm_pipes comm_pipes; 554 char *tmp, buf; 555 __u64 tmp_val; 556 int ret; 557 558 if (gup_fd < 0) { 559 ksft_test_result_skip("gup_test not available\n"); 560 return; 561 } 562 563 tmp = malloc(size); 564 if (!tmp) { 565 ksft_test_result_fail("malloc() failed\n"); 566 return; 567 } 568 569 ret = setup_comm_pipes(&comm_pipes); 570 if (ret) { 571 ksft_test_result_fail("pipe() failed\n"); 572 goto free_tmp; 573 } 574 575 switch (test) { 576 case RO_PIN_TEST: 577 break; 578 case RO_PIN_TEST_SHARED: 579 case RO_PIN_TEST_PREVIOUSLY_SHARED: 580 /* 581 * Share the pages with our child. As the pages are not pinned, 582 * this should just work. 583 */ 584 ret = fork(); 585 if (ret < 0) { 586 ksft_test_result_fail("fork() failed\n"); 587 goto close_comm_pipes; 588 } else if (!ret) { 589 write(comm_pipes.child_ready[1], "0", 1); 590 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 591 ; 592 exit(0); 593 } 594 595 /* Wait until our child is ready. */ 596 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 597 ; 598 599 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 600 /* 601 * Tell the child to quit now and wait until it quit. 602 * The pages should now be mapped R/O into our page 603 * tables, but they are no longer shared. 604 */ 605 write(comm_pipes.parent_ready[1], "0", 1); 606 wait(&ret); 607 if (!WIFEXITED(ret)) 608 ksft_print_msg("[INFO] wait() failed\n"); 609 } 610 break; 611 case RO_PIN_TEST_RO_EXCLUSIVE: 612 /* 613 * Map the page R/O into the page table. Enable softdirty 614 * tracking to stop the page from getting mapped R/W immediately 615 * again by mprotect() optimizations. Note that we don't have an 616 * easy way to test if that worked (the pagemap does not export 617 * if the page is mapped R/O vs. R/W). 618 */ 619 ret = mprotect(mem, size, PROT_READ); 620 clear_softdirty(); 621 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 622 if (ret) { 623 ksft_test_result_fail("mprotect() failed\n"); 624 goto close_comm_pipes; 625 } 626 break; 627 default: 628 assert(false); 629 } 630 631 /* Take a R/O pin. This should trigger unsharing. */ 632 args.addr = (__u64)(uintptr_t)mem; 633 args.size = size; 634 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 635 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 636 if (ret) { 637 if (errno == EINVAL) 638 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 639 else 640 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 641 goto wait; 642 } 643 644 /* Modify the page. */ 645 memset(mem, 0xff, size); 646 647 /* 648 * Read back the content via the pin to the temporary buffer and 649 * test if we observed the modification. 650 */ 651 tmp_val = (__u64)(uintptr_t)tmp; 652 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 653 if (ret) 654 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 655 else 656 ksft_test_result(!memcmp(mem, tmp, size), 657 "Longterm R/O pin is reliable\n"); 658 659 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 660 if (ret) 661 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 662 wait: 663 switch (test) { 664 case RO_PIN_TEST_SHARED: 665 write(comm_pipes.parent_ready[1], "0", 1); 666 wait(&ret); 667 if (!WIFEXITED(ret)) 668 ksft_print_msg("[INFO] wait() failed\n"); 669 break; 670 default: 671 break; 672 } 673 close_comm_pipes: 674 close_comm_pipes(&comm_pipes); 675 free_tmp: 676 free(tmp); 677 } 678 679 static void test_ro_pin_on_shared(char *mem, size_t size) 680 { 681 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 682 } 683 684 static void test_ro_fast_pin_on_shared(char *mem, size_t size) 685 { 686 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 687 } 688 689 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 690 { 691 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 692 } 693 694 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 695 { 696 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 697 } 698 699 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 700 { 701 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 702 } 703 704 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 705 { 706 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 707 } 708 709 typedef void (*test_fn)(char *mem, size_t size); 710 711 static void do_run_with_base_page(test_fn fn, bool swapout) 712 { 713 char *mem; 714 int ret; 715 716 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 717 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 718 if (mem == MAP_FAILED) { 719 ksft_test_result_fail("mmap() failed\n"); 720 return; 721 } 722 723 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 724 /* Ignore if not around on a kernel. */ 725 if (ret && errno != EINVAL) { 726 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 727 goto munmap; 728 } 729 730 /* Populate a base page. */ 731 memset(mem, 0, pagesize); 732 733 if (swapout) { 734 madvise(mem, pagesize, MADV_PAGEOUT); 735 if (!pagemap_is_swapped(pagemap_fd, mem)) { 736 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 737 goto munmap; 738 } 739 } 740 741 fn(mem, pagesize); 742 munmap: 743 munmap(mem, pagesize); 744 } 745 746 static void run_with_base_page(test_fn fn, const char *desc) 747 { 748 ksft_print_msg("[RUN] %s ... with base page\n", desc); 749 do_run_with_base_page(fn, false); 750 } 751 752 static void run_with_base_page_swap(test_fn fn, const char *desc) 753 { 754 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 755 do_run_with_base_page(fn, true); 756 } 757 758 enum thp_run { 759 THP_RUN_PMD, 760 THP_RUN_PMD_SWAPOUT, 761 THP_RUN_PTE, 762 THP_RUN_PTE_SWAPOUT, 763 THP_RUN_SINGLE_PTE, 764 THP_RUN_SINGLE_PTE_SWAPOUT, 765 THP_RUN_PARTIAL_MREMAP, 766 THP_RUN_PARTIAL_SHARED, 767 }; 768 769 static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 770 { 771 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 772 size_t size, mmap_size, mremap_size; 773 int ret; 774 775 /* For alignment purposes, we need twice the thp size. */ 776 mmap_size = 2 * thpsize; 777 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 778 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 779 if (mmap_mem == MAP_FAILED) { 780 ksft_test_result_fail("mmap() failed\n"); 781 return; 782 } 783 784 /* We need a THP-aligned memory area. */ 785 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 786 787 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 788 if (ret) { 789 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 790 goto munmap; 791 } 792 793 /* 794 * Try to populate a THP. Touch the first sub-page and test if we get 795 * another sub-page populated automatically. 796 */ 797 mem[0] = 0; 798 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 799 ksft_test_result_skip("Did not get a THP populated\n"); 800 goto munmap; 801 } 802 memset(mem, 0, thpsize); 803 804 size = thpsize; 805 switch (thp_run) { 806 case THP_RUN_PMD: 807 case THP_RUN_PMD_SWAPOUT: 808 break; 809 case THP_RUN_PTE: 810 case THP_RUN_PTE_SWAPOUT: 811 /* 812 * Trigger PTE-mapping the THP by temporarily mapping a single 813 * subpage R/O. 814 */ 815 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 816 if (ret) { 817 ksft_test_result_fail("mprotect() failed\n"); 818 goto munmap; 819 } 820 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 821 if (ret) { 822 ksft_test_result_fail("mprotect() failed\n"); 823 goto munmap; 824 } 825 break; 826 case THP_RUN_SINGLE_PTE: 827 case THP_RUN_SINGLE_PTE_SWAPOUT: 828 /* 829 * Discard all but a single subpage of that PTE-mapped THP. What 830 * remains is a single PTE mapping a single subpage. 831 */ 832 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 833 if (ret) { 834 ksft_test_result_fail("MADV_DONTNEED failed\n"); 835 goto munmap; 836 } 837 size = pagesize; 838 break; 839 case THP_RUN_PARTIAL_MREMAP: 840 /* 841 * Remap half of the THP. We need some new memory location 842 * for that. 843 */ 844 mremap_size = thpsize / 2; 845 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 846 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 847 if (mem == MAP_FAILED) { 848 ksft_test_result_fail("mmap() failed\n"); 849 goto munmap; 850 } 851 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 852 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 853 if (tmp != mremap_mem) { 854 ksft_test_result_fail("mremap() failed\n"); 855 goto munmap; 856 } 857 size = mremap_size; 858 break; 859 case THP_RUN_PARTIAL_SHARED: 860 /* 861 * Share the first page of the THP with a child and quit the 862 * child. This will result in some parts of the THP never 863 * have been shared. 864 */ 865 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 866 if (ret) { 867 ksft_test_result_fail("MADV_DONTFORK failed\n"); 868 goto munmap; 869 } 870 ret = fork(); 871 if (ret < 0) { 872 ksft_test_result_fail("fork() failed\n"); 873 goto munmap; 874 } else if (!ret) { 875 exit(0); 876 } 877 wait(&ret); 878 /* Allow for sharing all pages again. */ 879 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 880 if (ret) { 881 ksft_test_result_fail("MADV_DOFORK failed\n"); 882 goto munmap; 883 } 884 break; 885 default: 886 assert(false); 887 } 888 889 switch (thp_run) { 890 case THP_RUN_PMD_SWAPOUT: 891 case THP_RUN_PTE_SWAPOUT: 892 case THP_RUN_SINGLE_PTE_SWAPOUT: 893 madvise(mem, size, MADV_PAGEOUT); 894 if (!range_is_swapped(mem, size)) { 895 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 896 goto munmap; 897 } 898 break; 899 default: 900 break; 901 } 902 903 fn(mem, size); 904 munmap: 905 munmap(mmap_mem, mmap_size); 906 if (mremap_mem != MAP_FAILED) 907 munmap(mremap_mem, mremap_size); 908 } 909 910 static void run_with_thp(test_fn fn, const char *desc) 911 { 912 ksft_print_msg("[RUN] %s ... with THP\n", desc); 913 do_run_with_thp(fn, THP_RUN_PMD); 914 } 915 916 static void run_with_thp_swap(test_fn fn, const char *desc) 917 { 918 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 919 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 920 } 921 922 static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 923 { 924 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 925 do_run_with_thp(fn, THP_RUN_PTE); 926 } 927 928 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 929 { 930 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 931 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 932 } 933 934 static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 935 { 936 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 937 do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 938 } 939 940 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 941 { 942 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 943 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 944 } 945 946 static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 947 { 948 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 949 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 950 } 951 952 static void run_with_partial_shared_thp(test_fn fn, const char *desc) 953 { 954 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 955 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 956 } 957 958 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 959 { 960 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 961 char *mem, *dummy; 962 963 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 964 hugetlbsize / 1024); 965 966 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 967 968 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 969 if (mem == MAP_FAILED) { 970 ksft_test_result_skip("need more free huge pages\n"); 971 return; 972 } 973 974 /* Populate an huge page. */ 975 memset(mem, 0, hugetlbsize); 976 977 /* 978 * We need a total of two hugetlb pages to handle COW/unsharing 979 * properly, otherwise we might get zapped by a SIGBUS. 980 */ 981 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 982 if (dummy == MAP_FAILED) { 983 ksft_test_result_skip("need more free huge pages\n"); 984 goto munmap; 985 } 986 munmap(dummy, hugetlbsize); 987 988 fn(mem, hugetlbsize); 989 munmap: 990 munmap(mem, hugetlbsize); 991 } 992 993 struct test_case { 994 const char *desc; 995 test_fn fn; 996 }; 997 998 /* 999 * Test cases that are specific to anonymous pages: pages in private mappings 1000 * that may get shared via COW during fork(). 1001 */ 1002 static const struct test_case anon_test_cases[] = { 1003 /* 1004 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1005 * either the child can observe modifications by the parent or the 1006 * other way around. 1007 */ 1008 { 1009 "Basic COW after fork()", 1010 test_cow_in_parent, 1011 }, 1012 /* 1013 * Basic test, but do an additional mprotect(PROT_READ)+ 1014 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1015 */ 1016 { 1017 "Basic COW after fork() with mprotect() optimization", 1018 test_cow_in_parent_mprotect, 1019 }, 1020 /* 1021 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1022 * we miss to break COW, the child observes modifications by the parent. 1023 * This is CVE-2020-29374 reported by Jann Horn. 1024 */ 1025 { 1026 "vmsplice() + unmap in child", 1027 test_vmsplice_in_child 1028 }, 1029 /* 1030 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1031 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1032 */ 1033 { 1034 "vmsplice() + unmap in child with mprotect() optimization", 1035 test_vmsplice_in_child_mprotect 1036 }, 1037 /* 1038 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1039 * fork(); modify in the child. If we miss to break COW, the parent 1040 * observes modifications by the child. 1041 */ 1042 { 1043 "vmsplice() before fork(), unmap in parent after fork()", 1044 test_vmsplice_before_fork, 1045 }, 1046 /* 1047 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1048 * child. If we miss to break COW, the parent observes modifications by 1049 * the child. 1050 */ 1051 { 1052 "vmsplice() + unmap in parent after fork()", 1053 test_vmsplice_after_fork, 1054 }, 1055 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1056 /* 1057 * Take a R/W longterm pin and then map the page R/O into the page 1058 * table to trigger a write fault on next access. When modifying the 1059 * page, the page content must be visible via the pin. 1060 */ 1061 { 1062 "R/O-mapping a page registered as iouring fixed buffer", 1063 test_iouring_ro, 1064 }, 1065 /* 1066 * Take a R/W longterm pin and then fork() a child. When modifying the 1067 * page, the page content must be visible via the pin. We expect the 1068 * pinned page to not get shared with the child. 1069 */ 1070 { 1071 "fork() with an iouring fixed buffer", 1072 test_iouring_fork, 1073 }, 1074 1075 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1076 /* 1077 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1078 * When modifying the page via the page table, the page content change 1079 * must be visible via the pin. 1080 */ 1081 { 1082 "R/O GUP pin on R/O-mapped shared page", 1083 test_ro_pin_on_shared, 1084 }, 1085 /* Same as above, but using GUP-fast. */ 1086 { 1087 "R/O GUP-fast pin on R/O-mapped shared page", 1088 test_ro_fast_pin_on_shared, 1089 }, 1090 /* 1091 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1092 * was previously shared. When modifying the page via the page table, 1093 * the page content change must be visible via the pin. 1094 */ 1095 { 1096 "R/O GUP pin on R/O-mapped previously-shared page", 1097 test_ro_pin_on_ro_previously_shared, 1098 }, 1099 /* Same as above, but using GUP-fast. */ 1100 { 1101 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1102 test_ro_fast_pin_on_ro_previously_shared, 1103 }, 1104 /* 1105 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1106 * When modifying the page via the page table, the page content change 1107 * must be visible via the pin. 1108 */ 1109 { 1110 "R/O GUP pin on R/O-mapped exclusive page", 1111 test_ro_pin_on_ro_exclusive, 1112 }, 1113 /* Same as above, but using GUP-fast. */ 1114 { 1115 "R/O GUP-fast pin on R/O-mapped exclusive page", 1116 test_ro_fast_pin_on_ro_exclusive, 1117 }, 1118 }; 1119 1120 static void run_anon_test_case(struct test_case const *test_case) 1121 { 1122 int i; 1123 1124 run_with_base_page(test_case->fn, test_case->desc); 1125 run_with_base_page_swap(test_case->fn, test_case->desc); 1126 if (thpsize) { 1127 run_with_thp(test_case->fn, test_case->desc); 1128 run_with_thp_swap(test_case->fn, test_case->desc); 1129 run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1130 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1131 run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1132 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1133 run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1134 run_with_partial_shared_thp(test_case->fn, test_case->desc); 1135 } 1136 for (i = 0; i < nr_hugetlbsizes; i++) 1137 run_with_hugetlb(test_case->fn, test_case->desc, 1138 hugetlbsizes[i]); 1139 } 1140 1141 static void run_anon_test_cases(void) 1142 { 1143 int i; 1144 1145 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1146 1147 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1148 run_anon_test_case(&anon_test_cases[i]); 1149 } 1150 1151 static int tests_per_anon_test_case(void) 1152 { 1153 int tests = 2 + nr_hugetlbsizes; 1154 1155 if (thpsize) 1156 tests += 8; 1157 return tests; 1158 } 1159 1160 enum anon_thp_collapse_test { 1161 ANON_THP_COLLAPSE_UNSHARED, 1162 ANON_THP_COLLAPSE_FULLY_SHARED, 1163 ANON_THP_COLLAPSE_LOWER_SHARED, 1164 ANON_THP_COLLAPSE_UPPER_SHARED, 1165 }; 1166 1167 static void do_test_anon_thp_collapse(char *mem, size_t size, 1168 enum anon_thp_collapse_test test) 1169 { 1170 struct comm_pipes comm_pipes; 1171 char buf; 1172 int ret; 1173 1174 ret = setup_comm_pipes(&comm_pipes); 1175 if (ret) { 1176 ksft_test_result_fail("pipe() failed\n"); 1177 return; 1178 } 1179 1180 /* 1181 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1182 * R/O, such that we can try collapsing it later. 1183 */ 1184 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1185 if (ret) { 1186 ksft_test_result_fail("mprotect() failed\n"); 1187 goto close_comm_pipes; 1188 } 1189 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1190 if (ret) { 1191 ksft_test_result_fail("mprotect() failed\n"); 1192 goto close_comm_pipes; 1193 } 1194 1195 switch (test) { 1196 case ANON_THP_COLLAPSE_UNSHARED: 1197 /* Collapse before actually COW-sharing the page. */ 1198 ret = madvise(mem, size, MADV_COLLAPSE); 1199 if (ret) { 1200 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1201 strerror(errno)); 1202 goto close_comm_pipes; 1203 } 1204 break; 1205 case ANON_THP_COLLAPSE_FULLY_SHARED: 1206 /* COW-share the full PTE-mapped THP. */ 1207 break; 1208 case ANON_THP_COLLAPSE_LOWER_SHARED: 1209 /* Don't COW-share the upper part of the THP. */ 1210 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1211 if (ret) { 1212 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1213 goto close_comm_pipes; 1214 } 1215 break; 1216 case ANON_THP_COLLAPSE_UPPER_SHARED: 1217 /* Don't COW-share the lower part of the THP. */ 1218 ret = madvise(mem, size / 2, MADV_DONTFORK); 1219 if (ret) { 1220 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1221 goto close_comm_pipes; 1222 } 1223 break; 1224 default: 1225 assert(false); 1226 } 1227 1228 ret = fork(); 1229 if (ret < 0) { 1230 ksft_test_result_fail("fork() failed\n"); 1231 goto close_comm_pipes; 1232 } else if (!ret) { 1233 switch (test) { 1234 case ANON_THP_COLLAPSE_UNSHARED: 1235 case ANON_THP_COLLAPSE_FULLY_SHARED: 1236 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1237 break; 1238 case ANON_THP_COLLAPSE_LOWER_SHARED: 1239 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1240 break; 1241 case ANON_THP_COLLAPSE_UPPER_SHARED: 1242 exit(child_memcmp_fn(mem + size / 2, size / 2, 1243 &comm_pipes)); 1244 break; 1245 default: 1246 assert(false); 1247 } 1248 } 1249 1250 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1251 ; 1252 1253 switch (test) { 1254 case ANON_THP_COLLAPSE_UNSHARED: 1255 break; 1256 case ANON_THP_COLLAPSE_UPPER_SHARED: 1257 case ANON_THP_COLLAPSE_LOWER_SHARED: 1258 /* 1259 * Revert MADV_DONTFORK such that we merge the VMAs and are 1260 * able to actually collapse. 1261 */ 1262 ret = madvise(mem, size, MADV_DOFORK); 1263 if (ret) { 1264 ksft_test_result_fail("MADV_DOFORK failed\n"); 1265 write(comm_pipes.parent_ready[1], "0", 1); 1266 wait(&ret); 1267 goto close_comm_pipes; 1268 } 1269 /* FALLTHROUGH */ 1270 case ANON_THP_COLLAPSE_FULLY_SHARED: 1271 /* Collapse before anyone modified the COW-shared page. */ 1272 ret = madvise(mem, size, MADV_COLLAPSE); 1273 if (ret) { 1274 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1275 strerror(errno)); 1276 write(comm_pipes.parent_ready[1], "0", 1); 1277 wait(&ret); 1278 goto close_comm_pipes; 1279 } 1280 break; 1281 default: 1282 assert(false); 1283 } 1284 1285 /* Modify the page. */ 1286 memset(mem, 0xff, size); 1287 write(comm_pipes.parent_ready[1], "0", 1); 1288 1289 wait(&ret); 1290 if (WIFEXITED(ret)) 1291 ret = WEXITSTATUS(ret); 1292 else 1293 ret = -EINVAL; 1294 1295 ksft_test_result(!ret, "No leak from parent into child\n"); 1296 close_comm_pipes: 1297 close_comm_pipes(&comm_pipes); 1298 } 1299 1300 static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1301 { 1302 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1303 } 1304 1305 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1306 { 1307 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1308 } 1309 1310 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1311 { 1312 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1313 } 1314 1315 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1316 { 1317 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1318 } 1319 1320 /* 1321 * Test cases that are specific to anonymous THP: pages in private mappings 1322 * that may get shared via COW during fork(). 1323 */ 1324 static const struct test_case anon_thp_test_cases[] = { 1325 /* 1326 * Basic COW test for fork() without any GUP when collapsing a THP 1327 * before fork(). 1328 * 1329 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1330 * collapse") might easily get COW handling wrong when not collapsing 1331 * exclusivity information properly. 1332 */ 1333 { 1334 "Basic COW after fork() when collapsing before fork()", 1335 test_anon_thp_collapse_unshared, 1336 }, 1337 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1338 { 1339 "Basic COW after fork() when collapsing after fork() (fully shared)", 1340 test_anon_thp_collapse_fully_shared, 1341 }, 1342 /* 1343 * Basic COW test, but collapse after COW-sharing the lower half of a 1344 * THP. 1345 */ 1346 { 1347 "Basic COW after fork() when collapsing after fork() (lower shared)", 1348 test_anon_thp_collapse_lower_shared, 1349 }, 1350 /* 1351 * Basic COW test, but collapse after COW-sharing the upper half of a 1352 * THP. 1353 */ 1354 { 1355 "Basic COW after fork() when collapsing after fork() (upper shared)", 1356 test_anon_thp_collapse_upper_shared, 1357 }, 1358 }; 1359 1360 static void run_anon_thp_test_cases(void) 1361 { 1362 int i; 1363 1364 if (!thpsize) 1365 return; 1366 1367 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1368 1369 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1370 struct test_case const *test_case = &anon_thp_test_cases[i]; 1371 1372 ksft_print_msg("[RUN] %s\n", test_case->desc); 1373 do_run_with_thp(test_case->fn, THP_RUN_PMD); 1374 } 1375 } 1376 1377 static int tests_per_anon_thp_test_case(void) 1378 { 1379 return thpsize ? 1 : 0; 1380 } 1381 1382 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1383 1384 static void test_cow(char *mem, const char *smem, size_t size) 1385 { 1386 char *old = malloc(size); 1387 1388 /* Backup the original content. */ 1389 memcpy(old, smem, size); 1390 1391 /* Modify the page. */ 1392 memset(mem, 0xff, size); 1393 1394 /* See if we still read the old values via the other mapping. */ 1395 ksft_test_result(!memcmp(smem, old, size), 1396 "Other mapping not modified\n"); 1397 free(old); 1398 } 1399 1400 static void test_ro_pin(char *mem, const char *smem, size_t size) 1401 { 1402 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1403 } 1404 1405 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1406 { 1407 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1408 } 1409 1410 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1411 { 1412 char *mem, *smem, tmp; 1413 1414 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1415 1416 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1417 MAP_PRIVATE | MAP_ANON, -1, 0); 1418 if (mem == MAP_FAILED) { 1419 ksft_test_result_fail("mmap() failed\n"); 1420 return; 1421 } 1422 1423 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1424 if (mem == MAP_FAILED) { 1425 ksft_test_result_fail("mmap() failed\n"); 1426 goto munmap; 1427 } 1428 1429 /* Read from the page to populate the shared zeropage. */ 1430 tmp = *mem + *smem; 1431 asm volatile("" : "+r" (tmp)); 1432 1433 fn(mem, smem, pagesize); 1434 munmap: 1435 munmap(mem, pagesize); 1436 if (smem != MAP_FAILED) 1437 munmap(smem, pagesize); 1438 } 1439 1440 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1441 { 1442 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1443 size_t mmap_size; 1444 int ret; 1445 1446 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1447 1448 if (!has_huge_zeropage) { 1449 ksft_test_result_skip("Huge zeropage not enabled\n"); 1450 return; 1451 } 1452 1453 /* For alignment purposes, we need twice the thp size. */ 1454 mmap_size = 2 * thpsize; 1455 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1456 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1457 if (mmap_mem == MAP_FAILED) { 1458 ksft_test_result_fail("mmap() failed\n"); 1459 return; 1460 } 1461 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1462 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1463 if (mmap_smem == MAP_FAILED) { 1464 ksft_test_result_fail("mmap() failed\n"); 1465 goto munmap; 1466 } 1467 1468 /* We need a THP-aligned memory area. */ 1469 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1470 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1471 1472 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1473 ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1474 if (ret) { 1475 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1476 goto munmap; 1477 } 1478 1479 /* 1480 * Read from the memory to populate the huge shared zeropage. Read from 1481 * the first sub-page and test if we get another sub-page populated 1482 * automatically. 1483 */ 1484 tmp = *mem + *smem; 1485 asm volatile("" : "+r" (tmp)); 1486 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1487 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1488 ksft_test_result_skip("Did not get THPs populated\n"); 1489 goto munmap; 1490 } 1491 1492 fn(mem, smem, thpsize); 1493 munmap: 1494 munmap(mmap_mem, mmap_size); 1495 if (mmap_smem != MAP_FAILED) 1496 munmap(mmap_smem, mmap_size); 1497 } 1498 1499 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1500 { 1501 char *mem, *smem, tmp; 1502 int fd; 1503 1504 ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1505 1506 fd = memfd_create("test", 0); 1507 if (fd < 0) { 1508 ksft_test_result_fail("memfd_create() failed\n"); 1509 return; 1510 } 1511 1512 /* File consists of a single page filled with zeroes. */ 1513 if (fallocate(fd, 0, 0, pagesize)) { 1514 ksft_test_result_fail("fallocate() failed\n"); 1515 goto close; 1516 } 1517 1518 /* Create a private mapping of the memfd. */ 1519 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1520 if (mem == MAP_FAILED) { 1521 ksft_test_result_fail("mmap() failed\n"); 1522 goto close; 1523 } 1524 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1525 if (mem == MAP_FAILED) { 1526 ksft_test_result_fail("mmap() failed\n"); 1527 goto munmap; 1528 } 1529 1530 /* Fault the page in. */ 1531 tmp = *mem + *smem; 1532 asm volatile("" : "+r" (tmp)); 1533 1534 fn(mem, smem, pagesize); 1535 munmap: 1536 munmap(mem, pagesize); 1537 if (smem != MAP_FAILED) 1538 munmap(smem, pagesize); 1539 close: 1540 close(fd); 1541 } 1542 1543 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1544 { 1545 char *mem, *smem, tmp; 1546 FILE *file; 1547 int fd; 1548 1549 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1550 1551 file = tmpfile(); 1552 if (!file) { 1553 ksft_test_result_fail("tmpfile() failed\n"); 1554 return; 1555 } 1556 1557 fd = fileno(file); 1558 if (fd < 0) { 1559 ksft_test_result_skip("fileno() failed\n"); 1560 return; 1561 } 1562 1563 /* File consists of a single page filled with zeroes. */ 1564 if (fallocate(fd, 0, 0, pagesize)) { 1565 ksft_test_result_fail("fallocate() failed\n"); 1566 goto close; 1567 } 1568 1569 /* Create a private mapping of the memfd. */ 1570 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1571 if (mem == MAP_FAILED) { 1572 ksft_test_result_fail("mmap() failed\n"); 1573 goto close; 1574 } 1575 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1576 if (mem == MAP_FAILED) { 1577 ksft_test_result_fail("mmap() failed\n"); 1578 goto munmap; 1579 } 1580 1581 /* Fault the page in. */ 1582 tmp = *mem + *smem; 1583 asm volatile("" : "+r" (tmp)); 1584 1585 fn(mem, smem, pagesize); 1586 munmap: 1587 munmap(mem, pagesize); 1588 if (smem != MAP_FAILED) 1589 munmap(smem, pagesize); 1590 close: 1591 fclose(file); 1592 } 1593 1594 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1595 size_t hugetlbsize) 1596 { 1597 int flags = MFD_HUGETLB; 1598 char *mem, *smem, tmp; 1599 int fd; 1600 1601 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1602 hugetlbsize / 1024); 1603 1604 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1605 1606 fd = memfd_create("test", flags); 1607 if (fd < 0) { 1608 ksft_test_result_skip("memfd_create() failed\n"); 1609 return; 1610 } 1611 1612 /* File consists of a single page filled with zeroes. */ 1613 if (fallocate(fd, 0, 0, hugetlbsize)) { 1614 ksft_test_result_skip("need more free huge pages\n"); 1615 goto close; 1616 } 1617 1618 /* Create a private mapping of the memfd. */ 1619 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1620 0); 1621 if (mem == MAP_FAILED) { 1622 ksft_test_result_skip("need more free huge pages\n"); 1623 goto close; 1624 } 1625 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1626 if (mem == MAP_FAILED) { 1627 ksft_test_result_fail("mmap() failed\n"); 1628 goto munmap; 1629 } 1630 1631 /* Fault the page in. */ 1632 tmp = *mem + *smem; 1633 asm volatile("" : "+r" (tmp)); 1634 1635 fn(mem, smem, hugetlbsize); 1636 munmap: 1637 munmap(mem, hugetlbsize); 1638 if (mem != MAP_FAILED) 1639 munmap(smem, hugetlbsize); 1640 close: 1641 close(fd); 1642 } 1643 1644 struct non_anon_test_case { 1645 const char *desc; 1646 non_anon_test_fn fn; 1647 }; 1648 1649 /* 1650 * Test cases that target any pages in private mappings that are not anonymous: 1651 * pages that may get shared via COW ndependent of fork(). This includes 1652 * the shared zeropage(s), pagecache pages, ... 1653 */ 1654 static const struct non_anon_test_case non_anon_test_cases[] = { 1655 /* 1656 * Basic COW test without any GUP. If we miss to break COW, changes are 1657 * visible via other private/shared mappings. 1658 */ 1659 { 1660 "Basic COW", 1661 test_cow, 1662 }, 1663 /* 1664 * Take a R/O longterm pin. When modifying the page via the page table, 1665 * the page content change must be visible via the pin. 1666 */ 1667 { 1668 "R/O longterm GUP pin", 1669 test_ro_pin, 1670 }, 1671 /* Same as above, but using GUP-fast. */ 1672 { 1673 "R/O longterm GUP-fast pin", 1674 test_ro_fast_pin, 1675 }, 1676 }; 1677 1678 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1679 { 1680 int i; 1681 1682 run_with_zeropage(test_case->fn, test_case->desc); 1683 run_with_memfd(test_case->fn, test_case->desc); 1684 run_with_tmpfile(test_case->fn, test_case->desc); 1685 if (thpsize) 1686 run_with_huge_zeropage(test_case->fn, test_case->desc); 1687 for (i = 0; i < nr_hugetlbsizes; i++) 1688 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1689 hugetlbsizes[i]); 1690 } 1691 1692 static void run_non_anon_test_cases(void) 1693 { 1694 int i; 1695 1696 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1697 1698 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1699 run_non_anon_test_case(&non_anon_test_cases[i]); 1700 } 1701 1702 static int tests_per_non_anon_test_case(void) 1703 { 1704 int tests = 3 + nr_hugetlbsizes; 1705 1706 if (thpsize) 1707 tests += 1; 1708 return tests; 1709 } 1710 1711 int main(int argc, char **argv) 1712 { 1713 int err; 1714 1715 pagesize = getpagesize(); 1716 thpsize = read_pmd_pagesize(); 1717 if (thpsize) 1718 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 1719 thpsize / 1024); 1720 detect_hugetlbsizes(); 1721 detect_huge_zeropage(); 1722 1723 ksft_print_header(); 1724 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1725 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1726 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1727 1728 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1729 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1730 if (pagemap_fd < 0) 1731 ksft_exit_fail_msg("opening pagemap failed\n"); 1732 1733 run_anon_test_cases(); 1734 run_anon_thp_test_cases(); 1735 run_non_anon_test_cases(); 1736 1737 err = ksft_get_fail_cnt(); 1738 if (err) 1739 ksft_exit_fail_msg("%d out of %d tests failed\n", 1740 err, ksft_test_num()); 1741 return ksft_exit_pass(); 1742 } 1743