1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <sys/mman.h> 19 #include <sys/ioctl.h> 20 #include <sys/wait.h> 21 #include <linux/memfd.h> 22 23 #include "local_config.h" 24 #ifdef LOCAL_CONFIG_HAVE_LIBURING 25 #include <liburing.h> 26 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 27 28 #include "../../../../mm/gup_test.h" 29 #include "../kselftest.h" 30 #include "vm_util.h" 31 32 #ifndef MADV_PAGEOUT 33 #define MADV_PAGEOUT 21 34 #endif 35 #ifndef MADV_COLLAPSE 36 #define MADV_COLLAPSE 25 37 #endif 38 39 static size_t pagesize; 40 static int pagemap_fd; 41 static size_t thpsize; 42 static int nr_hugetlbsizes; 43 static size_t hugetlbsizes[10]; 44 static int gup_fd; 45 static bool has_huge_zeropage; 46 47 static void detect_huge_zeropage(void) 48 { 49 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 50 O_RDONLY); 51 size_t enabled = 0; 52 char buf[15]; 53 int ret; 54 55 if (fd < 0) 56 return; 57 58 ret = pread(fd, buf, sizeof(buf), 0); 59 if (ret > 0 && ret < sizeof(buf)) { 60 buf[ret] = 0; 61 62 enabled = strtoul(buf, NULL, 10); 63 if (enabled == 1) { 64 has_huge_zeropage = true; 65 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 66 } 67 } 68 69 close(fd); 70 } 71 72 static bool range_is_swapped(void *addr, size_t size) 73 { 74 for (; size; addr += pagesize, size -= pagesize) 75 if (!pagemap_is_swapped(pagemap_fd, addr)) 76 return false; 77 return true; 78 } 79 80 struct comm_pipes { 81 int child_ready[2]; 82 int parent_ready[2]; 83 }; 84 85 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 86 { 87 if (pipe(comm_pipes->child_ready) < 0) 88 return -errno; 89 if (pipe(comm_pipes->parent_ready) < 0) { 90 close(comm_pipes->child_ready[0]); 91 close(comm_pipes->child_ready[1]); 92 return -errno; 93 } 94 95 return 0; 96 } 97 98 static void close_comm_pipes(struct comm_pipes *comm_pipes) 99 { 100 close(comm_pipes->child_ready[0]); 101 close(comm_pipes->child_ready[1]); 102 close(comm_pipes->parent_ready[0]); 103 close(comm_pipes->parent_ready[1]); 104 } 105 106 static int child_memcmp_fn(char *mem, size_t size, 107 struct comm_pipes *comm_pipes) 108 { 109 char *old = malloc(size); 110 char buf; 111 112 /* Backup the original content. */ 113 memcpy(old, mem, size); 114 115 /* Wait until the parent modified the page. */ 116 write(comm_pipes->child_ready[1], "0", 1); 117 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 118 ; 119 120 /* See if we still read the old values. */ 121 return memcmp(old, mem, size); 122 } 123 124 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 125 struct comm_pipes *comm_pipes) 126 { 127 struct iovec iov = { 128 .iov_base = mem, 129 .iov_len = size, 130 }; 131 ssize_t cur, total, transferred; 132 char *old, *new; 133 int fds[2]; 134 char buf; 135 136 old = malloc(size); 137 new = malloc(size); 138 139 /* Backup the original content. */ 140 memcpy(old, mem, size); 141 142 if (pipe(fds) < 0) 143 return -errno; 144 145 /* Trigger a read-only pin. */ 146 transferred = vmsplice(fds[1], &iov, 1, 0); 147 if (transferred < 0) 148 return -errno; 149 if (transferred == 0) 150 return -EINVAL; 151 152 /* Unmap it from our page tables. */ 153 if (munmap(mem, size) < 0) 154 return -errno; 155 156 /* Wait until the parent modified it. */ 157 write(comm_pipes->child_ready[1], "0", 1); 158 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 159 ; 160 161 /* See if we still read the old values via the pipe. */ 162 for (total = 0; total < transferred; total += cur) { 163 cur = read(fds[0], new + total, transferred - total); 164 if (cur < 0) 165 return -errno; 166 } 167 168 return memcmp(old, new, transferred); 169 } 170 171 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 172 173 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 174 child_fn fn) 175 { 176 struct comm_pipes comm_pipes; 177 char buf; 178 int ret; 179 180 ret = setup_comm_pipes(&comm_pipes); 181 if (ret) { 182 ksft_test_result_fail("pipe() failed\n"); 183 return; 184 } 185 186 ret = fork(); 187 if (ret < 0) { 188 ksft_test_result_fail("fork() failed\n"); 189 goto close_comm_pipes; 190 } else if (!ret) { 191 exit(fn(mem, size, &comm_pipes)); 192 } 193 194 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 195 ; 196 197 if (do_mprotect) { 198 /* 199 * mprotect() optimizations might try avoiding 200 * write-faults by directly mapping pages writable. 201 */ 202 ret = mprotect(mem, size, PROT_READ); 203 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 204 if (ret) { 205 ksft_test_result_fail("mprotect() failed\n"); 206 write(comm_pipes.parent_ready[1], "0", 1); 207 wait(&ret); 208 goto close_comm_pipes; 209 } 210 } 211 212 /* Modify the page. */ 213 memset(mem, 0xff, size); 214 write(comm_pipes.parent_ready[1], "0", 1); 215 216 wait(&ret); 217 if (WIFEXITED(ret)) 218 ret = WEXITSTATUS(ret); 219 else 220 ret = -EINVAL; 221 222 ksft_test_result(!ret, "No leak from parent into child\n"); 223 close_comm_pipes: 224 close_comm_pipes(&comm_pipes); 225 } 226 227 static void test_cow_in_parent(char *mem, size_t size) 228 { 229 do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 230 } 231 232 static void test_cow_in_parent_mprotect(char *mem, size_t size) 233 { 234 do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 235 } 236 237 static void test_vmsplice_in_child(char *mem, size_t size) 238 { 239 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 240 } 241 242 static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 243 { 244 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 245 } 246 247 static void do_test_vmsplice_in_parent(char *mem, size_t size, 248 bool before_fork) 249 { 250 struct iovec iov = { 251 .iov_base = mem, 252 .iov_len = size, 253 }; 254 ssize_t cur, total, transferred; 255 struct comm_pipes comm_pipes; 256 char *old, *new; 257 int ret, fds[2]; 258 char buf; 259 260 old = malloc(size); 261 new = malloc(size); 262 263 memcpy(old, mem, size); 264 265 ret = setup_comm_pipes(&comm_pipes); 266 if (ret) { 267 ksft_test_result_fail("pipe() failed\n"); 268 goto free; 269 } 270 271 if (pipe(fds) < 0) { 272 ksft_test_result_fail("pipe() failed\n"); 273 goto close_comm_pipes; 274 } 275 276 if (before_fork) { 277 transferred = vmsplice(fds[1], &iov, 1, 0); 278 if (transferred <= 0) { 279 ksft_test_result_fail("vmsplice() failed\n"); 280 goto close_pipe; 281 } 282 } 283 284 ret = fork(); 285 if (ret < 0) { 286 ksft_test_result_fail("fork() failed\n"); 287 goto close_pipe; 288 } else if (!ret) { 289 write(comm_pipes.child_ready[1], "0", 1); 290 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 291 ; 292 /* Modify page content in the child. */ 293 memset(mem, 0xff, size); 294 exit(0); 295 } 296 297 if (!before_fork) { 298 transferred = vmsplice(fds[1], &iov, 1, 0); 299 if (transferred <= 0) { 300 ksft_test_result_fail("vmsplice() failed\n"); 301 wait(&ret); 302 goto close_pipe; 303 } 304 } 305 306 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 307 ; 308 if (munmap(mem, size) < 0) { 309 ksft_test_result_fail("munmap() failed\n"); 310 goto close_pipe; 311 } 312 write(comm_pipes.parent_ready[1], "0", 1); 313 314 /* Wait until the child is done writing. */ 315 wait(&ret); 316 if (!WIFEXITED(ret)) { 317 ksft_test_result_fail("wait() failed\n"); 318 goto close_pipe; 319 } 320 321 /* See if we still read the old values. */ 322 for (total = 0; total < transferred; total += cur) { 323 cur = read(fds[0], new + total, transferred - total); 324 if (cur < 0) { 325 ksft_test_result_fail("read() failed\n"); 326 goto close_pipe; 327 } 328 } 329 330 ksft_test_result(!memcmp(old, new, transferred), 331 "No leak from child into parent\n"); 332 close_pipe: 333 close(fds[0]); 334 close(fds[1]); 335 close_comm_pipes: 336 close_comm_pipes(&comm_pipes); 337 free: 338 free(old); 339 free(new); 340 } 341 342 static void test_vmsplice_before_fork(char *mem, size_t size) 343 { 344 do_test_vmsplice_in_parent(mem, size, true); 345 } 346 347 static void test_vmsplice_after_fork(char *mem, size_t size) 348 { 349 do_test_vmsplice_in_parent(mem, size, false); 350 } 351 352 #ifdef LOCAL_CONFIG_HAVE_LIBURING 353 static void do_test_iouring(char *mem, size_t size, bool use_fork) 354 { 355 struct comm_pipes comm_pipes; 356 struct io_uring_cqe *cqe; 357 struct io_uring_sqe *sqe; 358 struct io_uring ring; 359 ssize_t cur, total; 360 struct iovec iov; 361 char *buf, *tmp; 362 int ret, fd; 363 FILE *file; 364 365 ret = setup_comm_pipes(&comm_pipes); 366 if (ret) { 367 ksft_test_result_fail("pipe() failed\n"); 368 return; 369 } 370 371 file = tmpfile(); 372 if (!file) { 373 ksft_test_result_fail("tmpfile() failed\n"); 374 goto close_comm_pipes; 375 } 376 fd = fileno(file); 377 assert(fd); 378 379 tmp = malloc(size); 380 if (!tmp) { 381 ksft_test_result_fail("malloc() failed\n"); 382 goto close_file; 383 } 384 385 /* Skip on errors, as we might just lack kernel support. */ 386 ret = io_uring_queue_init(1, &ring, 0); 387 if (ret < 0) { 388 ksft_test_result_skip("io_uring_queue_init() failed\n"); 389 goto free_tmp; 390 } 391 392 /* 393 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 394 * | FOLL_LONGTERM the range. 395 * 396 * Skip on errors, as we might just lack kernel support or might not 397 * have sufficient MEMLOCK permissions. 398 */ 399 iov.iov_base = mem; 400 iov.iov_len = size; 401 ret = io_uring_register_buffers(&ring, &iov, 1); 402 if (ret) { 403 ksft_test_result_skip("io_uring_register_buffers() failed\n"); 404 goto queue_exit; 405 } 406 407 if (use_fork) { 408 /* 409 * fork() and keep the child alive until we're done. Note that 410 * we expect the pinned page to not get shared with the child. 411 */ 412 ret = fork(); 413 if (ret < 0) { 414 ksft_test_result_fail("fork() failed\n"); 415 goto unregister_buffers; 416 } else if (!ret) { 417 write(comm_pipes.child_ready[1], "0", 1); 418 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 419 ; 420 exit(0); 421 } 422 423 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 424 ; 425 } else { 426 /* 427 * Map the page R/O into the page table. Enable softdirty 428 * tracking to stop the page from getting mapped R/W immediately 429 * again by mprotect() optimizations. Note that we don't have an 430 * easy way to test if that worked (the pagemap does not export 431 * if the page is mapped R/O vs. R/W). 432 */ 433 ret = mprotect(mem, size, PROT_READ); 434 clear_softdirty(); 435 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 436 if (ret) { 437 ksft_test_result_fail("mprotect() failed\n"); 438 goto unregister_buffers; 439 } 440 } 441 442 /* 443 * Modify the page and write page content as observed by the fixed 444 * buffer pin to the file so we can verify it. 445 */ 446 memset(mem, 0xff, size); 447 sqe = io_uring_get_sqe(&ring); 448 if (!sqe) { 449 ksft_test_result_fail("io_uring_get_sqe() failed\n"); 450 goto quit_child; 451 } 452 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 453 454 ret = io_uring_submit(&ring); 455 if (ret < 0) { 456 ksft_test_result_fail("io_uring_submit() failed\n"); 457 goto quit_child; 458 } 459 460 ret = io_uring_wait_cqe(&ring, &cqe); 461 if (ret < 0) { 462 ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 463 goto quit_child; 464 } 465 466 if (cqe->res != size) { 467 ksft_test_result_fail("write_fixed failed\n"); 468 goto quit_child; 469 } 470 io_uring_cqe_seen(&ring, cqe); 471 472 /* Read back the file content to the temporary buffer. */ 473 total = 0; 474 while (total < size) { 475 cur = pread(fd, tmp + total, size - total, total); 476 if (cur < 0) { 477 ksft_test_result_fail("pread() failed\n"); 478 goto quit_child; 479 } 480 total += cur; 481 } 482 483 /* Finally, check if we read what we expected. */ 484 ksft_test_result(!memcmp(mem, tmp, size), 485 "Longterm R/W pin is reliable\n"); 486 487 quit_child: 488 if (use_fork) { 489 write(comm_pipes.parent_ready[1], "0", 1); 490 wait(&ret); 491 } 492 unregister_buffers: 493 io_uring_unregister_buffers(&ring); 494 queue_exit: 495 io_uring_queue_exit(&ring); 496 free_tmp: 497 free(tmp); 498 close_file: 499 fclose(file); 500 close_comm_pipes: 501 close_comm_pipes(&comm_pipes); 502 } 503 504 static void test_iouring_ro(char *mem, size_t size) 505 { 506 do_test_iouring(mem, size, false); 507 } 508 509 static void test_iouring_fork(char *mem, size_t size) 510 { 511 do_test_iouring(mem, size, true); 512 } 513 514 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 515 516 enum ro_pin_test { 517 RO_PIN_TEST, 518 RO_PIN_TEST_SHARED, 519 RO_PIN_TEST_PREVIOUSLY_SHARED, 520 RO_PIN_TEST_RO_EXCLUSIVE, 521 }; 522 523 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 524 bool fast) 525 { 526 struct pin_longterm_test args; 527 struct comm_pipes comm_pipes; 528 char *tmp, buf; 529 __u64 tmp_val; 530 int ret; 531 532 if (gup_fd < 0) { 533 ksft_test_result_skip("gup_test not available\n"); 534 return; 535 } 536 537 tmp = malloc(size); 538 if (!tmp) { 539 ksft_test_result_fail("malloc() failed\n"); 540 return; 541 } 542 543 ret = setup_comm_pipes(&comm_pipes); 544 if (ret) { 545 ksft_test_result_fail("pipe() failed\n"); 546 goto free_tmp; 547 } 548 549 switch (test) { 550 case RO_PIN_TEST: 551 break; 552 case RO_PIN_TEST_SHARED: 553 case RO_PIN_TEST_PREVIOUSLY_SHARED: 554 /* 555 * Share the pages with our child. As the pages are not pinned, 556 * this should just work. 557 */ 558 ret = fork(); 559 if (ret < 0) { 560 ksft_test_result_fail("fork() failed\n"); 561 goto close_comm_pipes; 562 } else if (!ret) { 563 write(comm_pipes.child_ready[1], "0", 1); 564 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 565 ; 566 exit(0); 567 } 568 569 /* Wait until our child is ready. */ 570 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 571 ; 572 573 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 574 /* 575 * Tell the child to quit now and wait until it quit. 576 * The pages should now be mapped R/O into our page 577 * tables, but they are no longer shared. 578 */ 579 write(comm_pipes.parent_ready[1], "0", 1); 580 wait(&ret); 581 if (!WIFEXITED(ret)) 582 ksft_print_msg("[INFO] wait() failed\n"); 583 } 584 break; 585 case RO_PIN_TEST_RO_EXCLUSIVE: 586 /* 587 * Map the page R/O into the page table. Enable softdirty 588 * tracking to stop the page from getting mapped R/W immediately 589 * again by mprotect() optimizations. Note that we don't have an 590 * easy way to test if that worked (the pagemap does not export 591 * if the page is mapped R/O vs. R/W). 592 */ 593 ret = mprotect(mem, size, PROT_READ); 594 clear_softdirty(); 595 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 596 if (ret) { 597 ksft_test_result_fail("mprotect() failed\n"); 598 goto close_comm_pipes; 599 } 600 break; 601 default: 602 assert(false); 603 } 604 605 /* Take a R/O pin. This should trigger unsharing. */ 606 args.addr = (__u64)(uintptr_t)mem; 607 args.size = size; 608 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 609 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 610 if (ret) { 611 if (errno == EINVAL) 612 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 613 else 614 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 615 goto wait; 616 } 617 618 /* Modify the page. */ 619 memset(mem, 0xff, size); 620 621 /* 622 * Read back the content via the pin to the temporary buffer and 623 * test if we observed the modification. 624 */ 625 tmp_val = (__u64)(uintptr_t)tmp; 626 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 627 if (ret) 628 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 629 else 630 ksft_test_result(!memcmp(mem, tmp, size), 631 "Longterm R/O pin is reliable\n"); 632 633 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 634 if (ret) 635 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 636 wait: 637 switch (test) { 638 case RO_PIN_TEST_SHARED: 639 write(comm_pipes.parent_ready[1], "0", 1); 640 wait(&ret); 641 if (!WIFEXITED(ret)) 642 ksft_print_msg("[INFO] wait() failed\n"); 643 break; 644 default: 645 break; 646 } 647 close_comm_pipes: 648 close_comm_pipes(&comm_pipes); 649 free_tmp: 650 free(tmp); 651 } 652 653 static void test_ro_pin_on_shared(char *mem, size_t size) 654 { 655 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 656 } 657 658 static void test_ro_fast_pin_on_shared(char *mem, size_t size) 659 { 660 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 661 } 662 663 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 664 { 665 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 666 } 667 668 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 669 { 670 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 671 } 672 673 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 674 { 675 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 676 } 677 678 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 679 { 680 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 681 } 682 683 typedef void (*test_fn)(char *mem, size_t size); 684 685 static void do_run_with_base_page(test_fn fn, bool swapout) 686 { 687 char *mem; 688 int ret; 689 690 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 691 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 692 if (mem == MAP_FAILED) { 693 ksft_test_result_fail("mmap() failed\n"); 694 return; 695 } 696 697 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 698 /* Ignore if not around on a kernel. */ 699 if (ret && errno != EINVAL) { 700 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 701 goto munmap; 702 } 703 704 /* Populate a base page. */ 705 memset(mem, 0, pagesize); 706 707 if (swapout) { 708 madvise(mem, pagesize, MADV_PAGEOUT); 709 if (!pagemap_is_swapped(pagemap_fd, mem)) { 710 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 711 goto munmap; 712 } 713 } 714 715 fn(mem, pagesize); 716 munmap: 717 munmap(mem, pagesize); 718 } 719 720 static void run_with_base_page(test_fn fn, const char *desc) 721 { 722 ksft_print_msg("[RUN] %s ... with base page\n", desc); 723 do_run_with_base_page(fn, false); 724 } 725 726 static void run_with_base_page_swap(test_fn fn, const char *desc) 727 { 728 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 729 do_run_with_base_page(fn, true); 730 } 731 732 enum thp_run { 733 THP_RUN_PMD, 734 THP_RUN_PMD_SWAPOUT, 735 THP_RUN_PTE, 736 THP_RUN_PTE_SWAPOUT, 737 THP_RUN_SINGLE_PTE, 738 THP_RUN_SINGLE_PTE_SWAPOUT, 739 THP_RUN_PARTIAL_MREMAP, 740 THP_RUN_PARTIAL_SHARED, 741 }; 742 743 static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 744 { 745 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 746 size_t size, mmap_size, mremap_size; 747 int ret; 748 749 /* For alignment purposes, we need twice the thp size. */ 750 mmap_size = 2 * thpsize; 751 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 752 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 753 if (mmap_mem == MAP_FAILED) { 754 ksft_test_result_fail("mmap() failed\n"); 755 return; 756 } 757 758 /* We need a THP-aligned memory area. */ 759 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 760 761 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 762 if (ret) { 763 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 764 goto munmap; 765 } 766 767 /* 768 * Try to populate a THP. Touch the first sub-page and test if we get 769 * another sub-page populated automatically. 770 */ 771 mem[0] = 0; 772 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 773 ksft_test_result_skip("Did not get a THP populated\n"); 774 goto munmap; 775 } 776 memset(mem, 0, thpsize); 777 778 size = thpsize; 779 switch (thp_run) { 780 case THP_RUN_PMD: 781 case THP_RUN_PMD_SWAPOUT: 782 break; 783 case THP_RUN_PTE: 784 case THP_RUN_PTE_SWAPOUT: 785 /* 786 * Trigger PTE-mapping the THP by temporarily mapping a single 787 * subpage R/O. 788 */ 789 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 790 if (ret) { 791 ksft_test_result_fail("mprotect() failed\n"); 792 goto munmap; 793 } 794 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 795 if (ret) { 796 ksft_test_result_fail("mprotect() failed\n"); 797 goto munmap; 798 } 799 break; 800 case THP_RUN_SINGLE_PTE: 801 case THP_RUN_SINGLE_PTE_SWAPOUT: 802 /* 803 * Discard all but a single subpage of that PTE-mapped THP. What 804 * remains is a single PTE mapping a single subpage. 805 */ 806 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 807 if (ret) { 808 ksft_test_result_fail("MADV_DONTNEED failed\n"); 809 goto munmap; 810 } 811 size = pagesize; 812 break; 813 case THP_RUN_PARTIAL_MREMAP: 814 /* 815 * Remap half of the THP. We need some new memory location 816 * for that. 817 */ 818 mremap_size = thpsize / 2; 819 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 820 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 821 if (mem == MAP_FAILED) { 822 ksft_test_result_fail("mmap() failed\n"); 823 goto munmap; 824 } 825 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 826 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 827 if (tmp != mremap_mem) { 828 ksft_test_result_fail("mremap() failed\n"); 829 goto munmap; 830 } 831 size = mremap_size; 832 break; 833 case THP_RUN_PARTIAL_SHARED: 834 /* 835 * Share the first page of the THP with a child and quit the 836 * child. This will result in some parts of the THP never 837 * have been shared. 838 */ 839 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 840 if (ret) { 841 ksft_test_result_fail("MADV_DONTFORK failed\n"); 842 goto munmap; 843 } 844 ret = fork(); 845 if (ret < 0) { 846 ksft_test_result_fail("fork() failed\n"); 847 goto munmap; 848 } else if (!ret) { 849 exit(0); 850 } 851 wait(&ret); 852 /* Allow for sharing all pages again. */ 853 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 854 if (ret) { 855 ksft_test_result_fail("MADV_DOFORK failed\n"); 856 goto munmap; 857 } 858 break; 859 default: 860 assert(false); 861 } 862 863 switch (thp_run) { 864 case THP_RUN_PMD_SWAPOUT: 865 case THP_RUN_PTE_SWAPOUT: 866 case THP_RUN_SINGLE_PTE_SWAPOUT: 867 madvise(mem, size, MADV_PAGEOUT); 868 if (!range_is_swapped(mem, size)) { 869 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 870 goto munmap; 871 } 872 break; 873 default: 874 break; 875 } 876 877 fn(mem, size); 878 munmap: 879 munmap(mmap_mem, mmap_size); 880 if (mremap_mem != MAP_FAILED) 881 munmap(mremap_mem, mremap_size); 882 } 883 884 static void run_with_thp(test_fn fn, const char *desc) 885 { 886 ksft_print_msg("[RUN] %s ... with THP\n", desc); 887 do_run_with_thp(fn, THP_RUN_PMD); 888 } 889 890 static void run_with_thp_swap(test_fn fn, const char *desc) 891 { 892 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 893 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 894 } 895 896 static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 897 { 898 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 899 do_run_with_thp(fn, THP_RUN_PTE); 900 } 901 902 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 903 { 904 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 905 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 906 } 907 908 static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 909 { 910 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 911 do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 912 } 913 914 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 915 { 916 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 917 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 918 } 919 920 static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 921 { 922 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 923 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 924 } 925 926 static void run_with_partial_shared_thp(test_fn fn, const char *desc) 927 { 928 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 929 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 930 } 931 932 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 933 { 934 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 935 char *mem, *dummy; 936 937 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 938 hugetlbsize / 1024); 939 940 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 941 942 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 943 if (mem == MAP_FAILED) { 944 ksft_test_result_skip("need more free huge pages\n"); 945 return; 946 } 947 948 /* Populate an huge page. */ 949 memset(mem, 0, hugetlbsize); 950 951 /* 952 * We need a total of two hugetlb pages to handle COW/unsharing 953 * properly, otherwise we might get zapped by a SIGBUS. 954 */ 955 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 956 if (dummy == MAP_FAILED) { 957 ksft_test_result_skip("need more free huge pages\n"); 958 goto munmap; 959 } 960 munmap(dummy, hugetlbsize); 961 962 fn(mem, hugetlbsize); 963 munmap: 964 munmap(mem, hugetlbsize); 965 } 966 967 struct test_case { 968 const char *desc; 969 test_fn fn; 970 }; 971 972 /* 973 * Test cases that are specific to anonymous pages: pages in private mappings 974 * that may get shared via COW during fork(). 975 */ 976 static const struct test_case anon_test_cases[] = { 977 /* 978 * Basic COW tests for fork() without any GUP. If we miss to break COW, 979 * either the child can observe modifications by the parent or the 980 * other way around. 981 */ 982 { 983 "Basic COW after fork()", 984 test_cow_in_parent, 985 }, 986 /* 987 * Basic test, but do an additional mprotect(PROT_READ)+ 988 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 989 */ 990 { 991 "Basic COW after fork() with mprotect() optimization", 992 test_cow_in_parent_mprotect, 993 }, 994 /* 995 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 996 * we miss to break COW, the child observes modifications by the parent. 997 * This is CVE-2020-29374 reported by Jann Horn. 998 */ 999 { 1000 "vmsplice() + unmap in child", 1001 test_vmsplice_in_child 1002 }, 1003 /* 1004 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1005 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1006 */ 1007 { 1008 "vmsplice() + unmap in child with mprotect() optimization", 1009 test_vmsplice_in_child_mprotect 1010 }, 1011 /* 1012 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1013 * fork(); modify in the child. If we miss to break COW, the parent 1014 * observes modifications by the child. 1015 */ 1016 { 1017 "vmsplice() before fork(), unmap in parent after fork()", 1018 test_vmsplice_before_fork, 1019 }, 1020 /* 1021 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1022 * child. If we miss to break COW, the parent observes modifications by 1023 * the child. 1024 */ 1025 { 1026 "vmsplice() + unmap in parent after fork()", 1027 test_vmsplice_after_fork, 1028 }, 1029 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1030 /* 1031 * Take a R/W longterm pin and then map the page R/O into the page 1032 * table to trigger a write fault on next access. When modifying the 1033 * page, the page content must be visible via the pin. 1034 */ 1035 { 1036 "R/O-mapping a page registered as iouring fixed buffer", 1037 test_iouring_ro, 1038 }, 1039 /* 1040 * Take a R/W longterm pin and then fork() a child. When modifying the 1041 * page, the page content must be visible via the pin. We expect the 1042 * pinned page to not get shared with the child. 1043 */ 1044 { 1045 "fork() with an iouring fixed buffer", 1046 test_iouring_fork, 1047 }, 1048 1049 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1050 /* 1051 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1052 * When modifying the page via the page table, the page content change 1053 * must be visible via the pin. 1054 */ 1055 { 1056 "R/O GUP pin on R/O-mapped shared page", 1057 test_ro_pin_on_shared, 1058 }, 1059 /* Same as above, but using GUP-fast. */ 1060 { 1061 "R/O GUP-fast pin on R/O-mapped shared page", 1062 test_ro_fast_pin_on_shared, 1063 }, 1064 /* 1065 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1066 * was previously shared. When modifying the page via the page table, 1067 * the page content change must be visible via the pin. 1068 */ 1069 { 1070 "R/O GUP pin on R/O-mapped previously-shared page", 1071 test_ro_pin_on_ro_previously_shared, 1072 }, 1073 /* Same as above, but using GUP-fast. */ 1074 { 1075 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1076 test_ro_fast_pin_on_ro_previously_shared, 1077 }, 1078 /* 1079 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1080 * When modifying the page via the page table, the page content change 1081 * must be visible via the pin. 1082 */ 1083 { 1084 "R/O GUP pin on R/O-mapped exclusive page", 1085 test_ro_pin_on_ro_exclusive, 1086 }, 1087 /* Same as above, but using GUP-fast. */ 1088 { 1089 "R/O GUP-fast pin on R/O-mapped exclusive page", 1090 test_ro_fast_pin_on_ro_exclusive, 1091 }, 1092 }; 1093 1094 static void run_anon_test_case(struct test_case const *test_case) 1095 { 1096 int i; 1097 1098 run_with_base_page(test_case->fn, test_case->desc); 1099 run_with_base_page_swap(test_case->fn, test_case->desc); 1100 if (thpsize) { 1101 run_with_thp(test_case->fn, test_case->desc); 1102 run_with_thp_swap(test_case->fn, test_case->desc); 1103 run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1104 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1105 run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1106 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1107 run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1108 run_with_partial_shared_thp(test_case->fn, test_case->desc); 1109 } 1110 for (i = 0; i < nr_hugetlbsizes; i++) 1111 run_with_hugetlb(test_case->fn, test_case->desc, 1112 hugetlbsizes[i]); 1113 } 1114 1115 static void run_anon_test_cases(void) 1116 { 1117 int i; 1118 1119 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1120 1121 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1122 run_anon_test_case(&anon_test_cases[i]); 1123 } 1124 1125 static int tests_per_anon_test_case(void) 1126 { 1127 int tests = 2 + nr_hugetlbsizes; 1128 1129 if (thpsize) 1130 tests += 8; 1131 return tests; 1132 } 1133 1134 enum anon_thp_collapse_test { 1135 ANON_THP_COLLAPSE_UNSHARED, 1136 ANON_THP_COLLAPSE_FULLY_SHARED, 1137 ANON_THP_COLLAPSE_LOWER_SHARED, 1138 ANON_THP_COLLAPSE_UPPER_SHARED, 1139 }; 1140 1141 static void do_test_anon_thp_collapse(char *mem, size_t size, 1142 enum anon_thp_collapse_test test) 1143 { 1144 struct comm_pipes comm_pipes; 1145 char buf; 1146 int ret; 1147 1148 ret = setup_comm_pipes(&comm_pipes); 1149 if (ret) { 1150 ksft_test_result_fail("pipe() failed\n"); 1151 return; 1152 } 1153 1154 /* 1155 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1156 * R/O, such that we can try collapsing it later. 1157 */ 1158 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1159 if (ret) { 1160 ksft_test_result_fail("mprotect() failed\n"); 1161 goto close_comm_pipes; 1162 } 1163 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1164 if (ret) { 1165 ksft_test_result_fail("mprotect() failed\n"); 1166 goto close_comm_pipes; 1167 } 1168 1169 switch (test) { 1170 case ANON_THP_COLLAPSE_UNSHARED: 1171 /* Collapse before actually COW-sharing the page. */ 1172 ret = madvise(mem, size, MADV_COLLAPSE); 1173 if (ret) { 1174 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1175 strerror(errno)); 1176 goto close_comm_pipes; 1177 } 1178 break; 1179 case ANON_THP_COLLAPSE_FULLY_SHARED: 1180 /* COW-share the full PTE-mapped THP. */ 1181 break; 1182 case ANON_THP_COLLAPSE_LOWER_SHARED: 1183 /* Don't COW-share the upper part of the THP. */ 1184 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1185 if (ret) { 1186 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1187 goto close_comm_pipes; 1188 } 1189 break; 1190 case ANON_THP_COLLAPSE_UPPER_SHARED: 1191 /* Don't COW-share the lower part of the THP. */ 1192 ret = madvise(mem, size / 2, MADV_DONTFORK); 1193 if (ret) { 1194 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1195 goto close_comm_pipes; 1196 } 1197 break; 1198 default: 1199 assert(false); 1200 } 1201 1202 ret = fork(); 1203 if (ret < 0) { 1204 ksft_test_result_fail("fork() failed\n"); 1205 goto close_comm_pipes; 1206 } else if (!ret) { 1207 switch (test) { 1208 case ANON_THP_COLLAPSE_UNSHARED: 1209 case ANON_THP_COLLAPSE_FULLY_SHARED: 1210 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1211 break; 1212 case ANON_THP_COLLAPSE_LOWER_SHARED: 1213 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1214 break; 1215 case ANON_THP_COLLAPSE_UPPER_SHARED: 1216 exit(child_memcmp_fn(mem + size / 2, size / 2, 1217 &comm_pipes)); 1218 break; 1219 default: 1220 assert(false); 1221 } 1222 } 1223 1224 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1225 ; 1226 1227 switch (test) { 1228 case ANON_THP_COLLAPSE_UNSHARED: 1229 break; 1230 case ANON_THP_COLLAPSE_UPPER_SHARED: 1231 case ANON_THP_COLLAPSE_LOWER_SHARED: 1232 /* 1233 * Revert MADV_DONTFORK such that we merge the VMAs and are 1234 * able to actually collapse. 1235 */ 1236 ret = madvise(mem, size, MADV_DOFORK); 1237 if (ret) { 1238 ksft_test_result_fail("MADV_DOFORK failed\n"); 1239 write(comm_pipes.parent_ready[1], "0", 1); 1240 wait(&ret); 1241 goto close_comm_pipes; 1242 } 1243 /* FALLTHROUGH */ 1244 case ANON_THP_COLLAPSE_FULLY_SHARED: 1245 /* Collapse before anyone modified the COW-shared page. */ 1246 ret = madvise(mem, size, MADV_COLLAPSE); 1247 if (ret) { 1248 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1249 strerror(errno)); 1250 write(comm_pipes.parent_ready[1], "0", 1); 1251 wait(&ret); 1252 goto close_comm_pipes; 1253 } 1254 break; 1255 default: 1256 assert(false); 1257 } 1258 1259 /* Modify the page. */ 1260 memset(mem, 0xff, size); 1261 write(comm_pipes.parent_ready[1], "0", 1); 1262 1263 wait(&ret); 1264 if (WIFEXITED(ret)) 1265 ret = WEXITSTATUS(ret); 1266 else 1267 ret = -EINVAL; 1268 1269 ksft_test_result(!ret, "No leak from parent into child\n"); 1270 close_comm_pipes: 1271 close_comm_pipes(&comm_pipes); 1272 } 1273 1274 static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1275 { 1276 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1277 } 1278 1279 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1280 { 1281 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1282 } 1283 1284 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1285 { 1286 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1287 } 1288 1289 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1290 { 1291 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1292 } 1293 1294 /* 1295 * Test cases that are specific to anonymous THP: pages in private mappings 1296 * that may get shared via COW during fork(). 1297 */ 1298 static const struct test_case anon_thp_test_cases[] = { 1299 /* 1300 * Basic COW test for fork() without any GUP when collapsing a THP 1301 * before fork(). 1302 * 1303 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1304 * collapse") might easily get COW handling wrong when not collapsing 1305 * exclusivity information properly. 1306 */ 1307 { 1308 "Basic COW after fork() when collapsing before fork()", 1309 test_anon_thp_collapse_unshared, 1310 }, 1311 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1312 { 1313 "Basic COW after fork() when collapsing after fork() (fully shared)", 1314 test_anon_thp_collapse_fully_shared, 1315 }, 1316 /* 1317 * Basic COW test, but collapse after COW-sharing the lower half of a 1318 * THP. 1319 */ 1320 { 1321 "Basic COW after fork() when collapsing after fork() (lower shared)", 1322 test_anon_thp_collapse_lower_shared, 1323 }, 1324 /* 1325 * Basic COW test, but collapse after COW-sharing the upper half of a 1326 * THP. 1327 */ 1328 { 1329 "Basic COW after fork() when collapsing after fork() (upper shared)", 1330 test_anon_thp_collapse_upper_shared, 1331 }, 1332 }; 1333 1334 static void run_anon_thp_test_cases(void) 1335 { 1336 int i; 1337 1338 if (!thpsize) 1339 return; 1340 1341 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1342 1343 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1344 struct test_case const *test_case = &anon_thp_test_cases[i]; 1345 1346 ksft_print_msg("[RUN] %s\n", test_case->desc); 1347 do_run_with_thp(test_case->fn, THP_RUN_PMD); 1348 } 1349 } 1350 1351 static int tests_per_anon_thp_test_case(void) 1352 { 1353 return thpsize ? 1 : 0; 1354 } 1355 1356 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1357 1358 static void test_cow(char *mem, const char *smem, size_t size) 1359 { 1360 char *old = malloc(size); 1361 1362 /* Backup the original content. */ 1363 memcpy(old, smem, size); 1364 1365 /* Modify the page. */ 1366 memset(mem, 0xff, size); 1367 1368 /* See if we still read the old values via the other mapping. */ 1369 ksft_test_result(!memcmp(smem, old, size), 1370 "Other mapping not modified\n"); 1371 free(old); 1372 } 1373 1374 static void test_ro_pin(char *mem, const char *smem, size_t size) 1375 { 1376 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1377 } 1378 1379 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1380 { 1381 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1382 } 1383 1384 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1385 { 1386 char *mem, *smem, tmp; 1387 1388 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1389 1390 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1391 MAP_PRIVATE | MAP_ANON, -1, 0); 1392 if (mem == MAP_FAILED) { 1393 ksft_test_result_fail("mmap() failed\n"); 1394 return; 1395 } 1396 1397 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1398 if (mem == MAP_FAILED) { 1399 ksft_test_result_fail("mmap() failed\n"); 1400 goto munmap; 1401 } 1402 1403 /* Read from the page to populate the shared zeropage. */ 1404 tmp = *mem + *smem; 1405 asm volatile("" : "+r" (tmp)); 1406 1407 fn(mem, smem, pagesize); 1408 munmap: 1409 munmap(mem, pagesize); 1410 if (smem != MAP_FAILED) 1411 munmap(smem, pagesize); 1412 } 1413 1414 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1415 { 1416 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1417 size_t mmap_size; 1418 int ret; 1419 1420 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1421 1422 if (!has_huge_zeropage) { 1423 ksft_test_result_skip("Huge zeropage not enabled\n"); 1424 return; 1425 } 1426 1427 /* For alignment purposes, we need twice the thp size. */ 1428 mmap_size = 2 * thpsize; 1429 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1430 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1431 if (mmap_mem == MAP_FAILED) { 1432 ksft_test_result_fail("mmap() failed\n"); 1433 return; 1434 } 1435 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1436 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1437 if (mmap_smem == MAP_FAILED) { 1438 ksft_test_result_fail("mmap() failed\n"); 1439 goto munmap; 1440 } 1441 1442 /* We need a THP-aligned memory area. */ 1443 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1444 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1445 1446 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1447 ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1448 if (ret) { 1449 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1450 goto munmap; 1451 } 1452 1453 /* 1454 * Read from the memory to populate the huge shared zeropage. Read from 1455 * the first sub-page and test if we get another sub-page populated 1456 * automatically. 1457 */ 1458 tmp = *mem + *smem; 1459 asm volatile("" : "+r" (tmp)); 1460 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1461 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1462 ksft_test_result_skip("Did not get THPs populated\n"); 1463 goto munmap; 1464 } 1465 1466 fn(mem, smem, thpsize); 1467 munmap: 1468 munmap(mmap_mem, mmap_size); 1469 if (mmap_smem != MAP_FAILED) 1470 munmap(mmap_smem, mmap_size); 1471 } 1472 1473 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1474 { 1475 char *mem, *smem, tmp; 1476 int fd; 1477 1478 ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1479 1480 fd = memfd_create("test", 0); 1481 if (fd < 0) { 1482 ksft_test_result_fail("memfd_create() failed\n"); 1483 return; 1484 } 1485 1486 /* File consists of a single page filled with zeroes. */ 1487 if (fallocate(fd, 0, 0, pagesize)) { 1488 ksft_test_result_fail("fallocate() failed\n"); 1489 goto close; 1490 } 1491 1492 /* Create a private mapping of the memfd. */ 1493 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1494 if (mem == MAP_FAILED) { 1495 ksft_test_result_fail("mmap() failed\n"); 1496 goto close; 1497 } 1498 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1499 if (mem == MAP_FAILED) { 1500 ksft_test_result_fail("mmap() failed\n"); 1501 goto munmap; 1502 } 1503 1504 /* Fault the page in. */ 1505 tmp = *mem + *smem; 1506 asm volatile("" : "+r" (tmp)); 1507 1508 fn(mem, smem, pagesize); 1509 munmap: 1510 munmap(mem, pagesize); 1511 if (smem != MAP_FAILED) 1512 munmap(smem, pagesize); 1513 close: 1514 close(fd); 1515 } 1516 1517 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1518 { 1519 char *mem, *smem, tmp; 1520 FILE *file; 1521 int fd; 1522 1523 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1524 1525 file = tmpfile(); 1526 if (!file) { 1527 ksft_test_result_fail("tmpfile() failed\n"); 1528 return; 1529 } 1530 1531 fd = fileno(file); 1532 if (fd < 0) { 1533 ksft_test_result_skip("fileno() failed\n"); 1534 return; 1535 } 1536 1537 /* File consists of a single page filled with zeroes. */ 1538 if (fallocate(fd, 0, 0, pagesize)) { 1539 ksft_test_result_fail("fallocate() failed\n"); 1540 goto close; 1541 } 1542 1543 /* Create a private mapping of the memfd. */ 1544 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1545 if (mem == MAP_FAILED) { 1546 ksft_test_result_fail("mmap() failed\n"); 1547 goto close; 1548 } 1549 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1550 if (mem == MAP_FAILED) { 1551 ksft_test_result_fail("mmap() failed\n"); 1552 goto munmap; 1553 } 1554 1555 /* Fault the page in. */ 1556 tmp = *mem + *smem; 1557 asm volatile("" : "+r" (tmp)); 1558 1559 fn(mem, smem, pagesize); 1560 munmap: 1561 munmap(mem, pagesize); 1562 if (smem != MAP_FAILED) 1563 munmap(smem, pagesize); 1564 close: 1565 fclose(file); 1566 } 1567 1568 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1569 size_t hugetlbsize) 1570 { 1571 int flags = MFD_HUGETLB; 1572 char *mem, *smem, tmp; 1573 int fd; 1574 1575 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1576 hugetlbsize / 1024); 1577 1578 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1579 1580 fd = memfd_create("test", flags); 1581 if (fd < 0) { 1582 ksft_test_result_skip("memfd_create() failed\n"); 1583 return; 1584 } 1585 1586 /* File consists of a single page filled with zeroes. */ 1587 if (fallocate(fd, 0, 0, hugetlbsize)) { 1588 ksft_test_result_skip("need more free huge pages\n"); 1589 goto close; 1590 } 1591 1592 /* Create a private mapping of the memfd. */ 1593 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1594 0); 1595 if (mem == MAP_FAILED) { 1596 ksft_test_result_skip("need more free huge pages\n"); 1597 goto close; 1598 } 1599 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1600 if (mem == MAP_FAILED) { 1601 ksft_test_result_fail("mmap() failed\n"); 1602 goto munmap; 1603 } 1604 1605 /* Fault the page in. */ 1606 tmp = *mem + *smem; 1607 asm volatile("" : "+r" (tmp)); 1608 1609 fn(mem, smem, hugetlbsize); 1610 munmap: 1611 munmap(mem, hugetlbsize); 1612 if (mem != MAP_FAILED) 1613 munmap(smem, hugetlbsize); 1614 close: 1615 close(fd); 1616 } 1617 1618 struct non_anon_test_case { 1619 const char *desc; 1620 non_anon_test_fn fn; 1621 }; 1622 1623 /* 1624 * Test cases that target any pages in private mappings that are not anonymous: 1625 * pages that may get shared via COW ndependent of fork(). This includes 1626 * the shared zeropage(s), pagecache pages, ... 1627 */ 1628 static const struct non_anon_test_case non_anon_test_cases[] = { 1629 /* 1630 * Basic COW test without any GUP. If we miss to break COW, changes are 1631 * visible via other private/shared mappings. 1632 */ 1633 { 1634 "Basic COW", 1635 test_cow, 1636 }, 1637 /* 1638 * Take a R/O longterm pin. When modifying the page via the page table, 1639 * the page content change must be visible via the pin. 1640 */ 1641 { 1642 "R/O longterm GUP pin", 1643 test_ro_pin, 1644 }, 1645 /* Same as above, but using GUP-fast. */ 1646 { 1647 "R/O longterm GUP-fast pin", 1648 test_ro_fast_pin, 1649 }, 1650 }; 1651 1652 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1653 { 1654 int i; 1655 1656 run_with_zeropage(test_case->fn, test_case->desc); 1657 run_with_memfd(test_case->fn, test_case->desc); 1658 run_with_tmpfile(test_case->fn, test_case->desc); 1659 if (thpsize) 1660 run_with_huge_zeropage(test_case->fn, test_case->desc); 1661 for (i = 0; i < nr_hugetlbsizes; i++) 1662 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1663 hugetlbsizes[i]); 1664 } 1665 1666 static void run_non_anon_test_cases(void) 1667 { 1668 int i; 1669 1670 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1671 1672 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1673 run_non_anon_test_case(&non_anon_test_cases[i]); 1674 } 1675 1676 static int tests_per_non_anon_test_case(void) 1677 { 1678 int tests = 3 + nr_hugetlbsizes; 1679 1680 if (thpsize) 1681 tests += 1; 1682 return tests; 1683 } 1684 1685 int main(int argc, char **argv) 1686 { 1687 int err; 1688 1689 pagesize = getpagesize(); 1690 thpsize = read_pmd_pagesize(); 1691 if (thpsize) 1692 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 1693 thpsize / 1024); 1694 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1695 ARRAY_SIZE(hugetlbsizes)); 1696 detect_huge_zeropage(); 1697 1698 ksft_print_header(); 1699 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1700 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1701 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1702 1703 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1704 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1705 if (pagemap_fd < 0) 1706 ksft_exit_fail_msg("opening pagemap failed\n"); 1707 1708 run_anon_test_cases(); 1709 run_anon_thp_test_cases(); 1710 run_non_anon_test_cases(); 1711 1712 err = ksft_get_fail_cnt(); 1713 if (err) 1714 ksft_exit_fail_msg("%d out of %d tests failed\n", 1715 err, ksft_test_num()); 1716 return ksft_exit_pass(); 1717 } 1718