1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "kselftest.h" 31 #include "vm_util.h" 32 #include "thp_settings.h" 33 34 static size_t pagesize; 35 static int pagemap_fd; 36 static size_t pmdsize; 37 static int nr_thpsizes; 38 static size_t thpsizes[20]; 39 static int nr_hugetlbsizes; 40 static size_t hugetlbsizes[10]; 41 static int gup_fd; 42 static bool has_huge_zeropage; 43 44 static int detect_thp_sizes(size_t sizes[], int max) 45 { 46 int count = 0; 47 unsigned long orders; 48 size_t kb; 49 int i; 50 51 /* thp not supported at all. */ 52 if (!pmdsize) 53 return 0; 54 55 orders = 1UL << sz2ord(pmdsize, pagesize); 56 orders |= thp_supported_orders(); 57 58 for (i = 0; orders && count < max; i++) { 59 if (!(orders & (1UL << i))) 60 continue; 61 orders &= ~(1UL << i); 62 kb = (pagesize >> 10) << i; 63 sizes[count++] = kb * 1024; 64 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 65 } 66 67 return count; 68 } 69 70 static bool range_is_swapped(void *addr, size_t size) 71 { 72 for (; size; addr += pagesize, size -= pagesize) 73 if (!pagemap_is_swapped(pagemap_fd, addr)) 74 return false; 75 return true; 76 } 77 78 static bool populate_page_checked(char *addr) 79 { 80 bool ret; 81 82 FORCE_READ(*addr); 83 ret = pagemap_is_populated(pagemap_fd, addr); 84 if (!ret) 85 ksft_print_msg("Failed to populate page\n"); 86 87 return ret; 88 } 89 90 struct comm_pipes { 91 int child_ready[2]; 92 int parent_ready[2]; 93 }; 94 95 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 96 { 97 if (pipe(comm_pipes->child_ready) < 0) { 98 ksft_perror("pipe() failed"); 99 return -errno; 100 } 101 if (pipe(comm_pipes->parent_ready) < 0) { 102 ksft_perror("pipe() failed"); 103 close(comm_pipes->child_ready[0]); 104 close(comm_pipes->child_ready[1]); 105 return -errno; 106 } 107 108 return 0; 109 } 110 111 static void close_comm_pipes(struct comm_pipes *comm_pipes) 112 { 113 close(comm_pipes->child_ready[0]); 114 close(comm_pipes->child_ready[1]); 115 close(comm_pipes->parent_ready[0]); 116 close(comm_pipes->parent_ready[1]); 117 } 118 119 static int child_memcmp_fn(char *mem, size_t size, 120 struct comm_pipes *comm_pipes) 121 { 122 char *old = malloc(size); 123 char buf; 124 125 /* Backup the original content. */ 126 memcpy(old, mem, size); 127 128 /* Wait until the parent modified the page. */ 129 write(comm_pipes->child_ready[1], "0", 1); 130 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 131 ; 132 133 /* See if we still read the old values. */ 134 return memcmp(old, mem, size); 135 } 136 137 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 138 struct comm_pipes *comm_pipes) 139 { 140 struct iovec iov = { 141 .iov_base = mem, 142 .iov_len = size, 143 }; 144 ssize_t cur, total, transferred; 145 char *old, *new; 146 int fds[2]; 147 char buf; 148 149 old = malloc(size); 150 new = malloc(size); 151 152 /* Backup the original content. */ 153 memcpy(old, mem, size); 154 155 if (pipe(fds) < 0) 156 return -errno; 157 158 /* Trigger a read-only pin. */ 159 transferred = vmsplice(fds[1], &iov, 1, 0); 160 if (transferred < 0) 161 return -errno; 162 if (transferred == 0) 163 return -EINVAL; 164 165 /* Unmap it from our page tables. */ 166 if (munmap(mem, size) < 0) 167 return -errno; 168 169 /* Wait until the parent modified it. */ 170 write(comm_pipes->child_ready[1], "0", 1); 171 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 172 ; 173 174 /* See if we still read the old values via the pipe. */ 175 for (total = 0; total < transferred; total += cur) { 176 cur = read(fds[0], new + total, transferred - total); 177 if (cur < 0) 178 return -errno; 179 } 180 181 return memcmp(old, new, transferred); 182 } 183 184 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 185 186 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 187 child_fn fn, bool xfail) 188 { 189 struct comm_pipes comm_pipes; 190 char buf; 191 int ret; 192 193 ret = setup_comm_pipes(&comm_pipes); 194 if (ret) { 195 log_test_result(KSFT_FAIL); 196 return; 197 } 198 199 ret = fork(); 200 if (ret < 0) { 201 ksft_perror("fork() failed"); 202 log_test_result(KSFT_FAIL); 203 goto close_comm_pipes; 204 } else if (!ret) { 205 exit(fn(mem, size, &comm_pipes)); 206 } 207 208 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 209 ; 210 211 if (do_mprotect) { 212 /* 213 * mprotect() optimizations might try avoiding 214 * write-faults by directly mapping pages writable. 215 */ 216 ret = mprotect(mem, size, PROT_READ); 217 if (ret) { 218 ksft_perror("mprotect() failed"); 219 log_test_result(KSFT_FAIL); 220 write(comm_pipes.parent_ready[1], "0", 1); 221 wait(&ret); 222 goto close_comm_pipes; 223 } 224 225 ret = mprotect(mem, size, PROT_READ|PROT_WRITE); 226 if (ret) { 227 ksft_perror("mprotect() failed"); 228 log_test_result(KSFT_FAIL); 229 write(comm_pipes.parent_ready[1], "0", 1); 230 wait(&ret); 231 goto close_comm_pipes; 232 } 233 } 234 235 /* Modify the page. */ 236 memset(mem, 0xff, size); 237 write(comm_pipes.parent_ready[1], "0", 1); 238 239 wait(&ret); 240 if (WIFEXITED(ret)) 241 ret = WEXITSTATUS(ret); 242 else 243 ret = -EINVAL; 244 245 if (!ret) { 246 log_test_result(KSFT_PASS); 247 } else if (xfail) { 248 /* 249 * With hugetlb, some vmsplice() tests are currently expected to 250 * fail because (a) harder to fix and (b) nobody really cares. 251 * Flag them as expected failure for now. 252 */ 253 ksft_print_msg("Leak from parent into child\n"); 254 log_test_result(KSFT_XFAIL); 255 } else { 256 ksft_print_msg("Leak from parent into child\n"); 257 log_test_result(KSFT_FAIL); 258 } 259 close_comm_pipes: 260 close_comm_pipes(&comm_pipes); 261 } 262 263 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 264 { 265 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 266 } 267 268 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 269 { 270 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 271 } 272 273 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 274 { 275 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 276 is_hugetlb); 277 } 278 279 static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 280 bool is_hugetlb) 281 { 282 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 283 is_hugetlb); 284 } 285 286 static void do_test_vmsplice_in_parent(char *mem, size_t size, 287 bool before_fork, bool xfail) 288 { 289 struct iovec iov = { 290 .iov_base = mem, 291 .iov_len = size, 292 }; 293 ssize_t cur, total, transferred = 0; 294 struct comm_pipes comm_pipes; 295 char *old, *new; 296 int ret, fds[2]; 297 char buf; 298 299 old = malloc(size); 300 new = malloc(size); 301 302 memcpy(old, mem, size); 303 304 ret = setup_comm_pipes(&comm_pipes); 305 if (ret) { 306 log_test_result(KSFT_FAIL); 307 goto free; 308 } 309 310 if (pipe(fds) < 0) { 311 ksft_perror("pipe() failed"); 312 log_test_result(KSFT_FAIL); 313 goto close_comm_pipes; 314 } 315 316 if (before_fork) { 317 transferred = vmsplice(fds[1], &iov, 1, 0); 318 if (transferred <= 0) { 319 ksft_perror("vmsplice() failed\n"); 320 log_test_result(KSFT_FAIL); 321 goto close_pipe; 322 } 323 } 324 325 ret = fork(); 326 if (ret < 0) { 327 ksft_perror("fork() failed\n"); 328 log_test_result(KSFT_FAIL); 329 goto close_pipe; 330 } else if (!ret) { 331 write(comm_pipes.child_ready[1], "0", 1); 332 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 333 ; 334 /* Modify page content in the child. */ 335 memset(mem, 0xff, size); 336 exit(0); 337 } 338 339 if (!before_fork) { 340 transferred = vmsplice(fds[1], &iov, 1, 0); 341 if (transferred <= 0) { 342 ksft_perror("vmsplice() failed"); 343 log_test_result(KSFT_FAIL); 344 wait(&ret); 345 goto close_pipe; 346 } 347 } 348 349 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 350 ; 351 if (munmap(mem, size) < 0) { 352 ksft_perror("munmap() failed"); 353 log_test_result(KSFT_FAIL); 354 goto close_pipe; 355 } 356 write(comm_pipes.parent_ready[1], "0", 1); 357 358 /* Wait until the child is done writing. */ 359 wait(&ret); 360 if (!WIFEXITED(ret)) { 361 ksft_perror("wait() failed"); 362 log_test_result(KSFT_FAIL); 363 goto close_pipe; 364 } 365 366 /* See if we still read the old values. */ 367 for (total = 0; total < transferred; total += cur) { 368 cur = read(fds[0], new + total, transferred - total); 369 if (cur < 0) { 370 ksft_perror("read() failed"); 371 log_test_result(KSFT_FAIL); 372 goto close_pipe; 373 } 374 } 375 376 if (!memcmp(old, new, transferred)) { 377 log_test_result(KSFT_PASS); 378 } else if (xfail) { 379 /* 380 * With hugetlb, some vmsplice() tests are currently expected to 381 * fail because (a) harder to fix and (b) nobody really cares. 382 * Flag them as expected failure for now. 383 */ 384 ksft_print_msg("Leak from child into parent\n"); 385 log_test_result(KSFT_XFAIL); 386 } else { 387 ksft_print_msg("Leak from child into parent\n"); 388 log_test_result(KSFT_FAIL); 389 } 390 close_pipe: 391 close(fds[0]); 392 close(fds[1]); 393 close_comm_pipes: 394 close_comm_pipes(&comm_pipes); 395 free: 396 free(old); 397 free(new); 398 } 399 400 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 401 { 402 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 403 } 404 405 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 406 { 407 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 408 } 409 410 #ifdef LOCAL_CONFIG_HAVE_LIBURING 411 static void do_test_iouring(char *mem, size_t size, bool use_fork) 412 { 413 struct comm_pipes comm_pipes; 414 struct io_uring_cqe *cqe; 415 struct io_uring_sqe *sqe; 416 struct io_uring ring; 417 ssize_t cur, total; 418 struct iovec iov; 419 char *buf, *tmp; 420 int ret, fd; 421 FILE *file; 422 423 ret = setup_comm_pipes(&comm_pipes); 424 if (ret) { 425 log_test_result(KSFT_FAIL); 426 return; 427 } 428 429 file = tmpfile(); 430 if (!file) { 431 ksft_perror("tmpfile() failed"); 432 log_test_result(KSFT_FAIL); 433 goto close_comm_pipes; 434 } 435 fd = fileno(file); 436 assert(fd); 437 438 tmp = malloc(size); 439 if (!tmp) { 440 ksft_print_msg("malloc() failed\n"); 441 log_test_result(KSFT_FAIL); 442 goto close_file; 443 } 444 445 /* Skip on errors, as we might just lack kernel support. */ 446 ret = io_uring_queue_init(1, &ring, 0); 447 if (ret < 0) { 448 ksft_print_msg("io_uring_queue_init() failed\n"); 449 log_test_result(KSFT_SKIP); 450 goto free_tmp; 451 } 452 453 /* 454 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 455 * | FOLL_LONGTERM the range. 456 * 457 * Skip on errors, as we might just lack kernel support or might not 458 * have sufficient MEMLOCK permissions. 459 */ 460 iov.iov_base = mem; 461 iov.iov_len = size; 462 ret = io_uring_register_buffers(&ring, &iov, 1); 463 if (ret) { 464 ksft_print_msg("io_uring_register_buffers() failed\n"); 465 log_test_result(KSFT_SKIP); 466 goto queue_exit; 467 } 468 469 if (use_fork) { 470 /* 471 * fork() and keep the child alive until we're done. Note that 472 * we expect the pinned page to not get shared with the child. 473 */ 474 ret = fork(); 475 if (ret < 0) { 476 ksft_perror("fork() failed"); 477 log_test_result(KSFT_FAIL); 478 goto unregister_buffers; 479 } else if (!ret) { 480 write(comm_pipes.child_ready[1], "0", 1); 481 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 482 ; 483 exit(0); 484 } 485 486 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 487 ; 488 } else { 489 /* 490 * Map the page R/O into the page table. Enable softdirty 491 * tracking to stop the page from getting mapped R/W immediately 492 * again by mprotect() optimizations. Note that we don't have an 493 * easy way to test if that worked (the pagemap does not export 494 * if the page is mapped R/O vs. R/W). 495 */ 496 ret = mprotect(mem, size, PROT_READ); 497 if (ret) { 498 ksft_perror("mprotect() failed"); 499 log_test_result(KSFT_FAIL); 500 goto unregister_buffers; 501 } 502 503 clear_softdirty(); 504 ret = mprotect(mem, size, PROT_READ | PROT_WRITE); 505 if (ret) { 506 ksft_perror("mprotect() failed"); 507 log_test_result(KSFT_FAIL); 508 goto unregister_buffers; 509 } 510 } 511 512 /* 513 * Modify the page and write page content as observed by the fixed 514 * buffer pin to the file so we can verify it. 515 */ 516 memset(mem, 0xff, size); 517 sqe = io_uring_get_sqe(&ring); 518 if (!sqe) { 519 ksft_print_msg("io_uring_get_sqe() failed\n"); 520 log_test_result(KSFT_FAIL); 521 goto quit_child; 522 } 523 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 524 525 ret = io_uring_submit(&ring); 526 if (ret < 0) { 527 ksft_print_msg("io_uring_submit() failed\n"); 528 log_test_result(KSFT_FAIL); 529 goto quit_child; 530 } 531 532 ret = io_uring_wait_cqe(&ring, &cqe); 533 if (ret < 0) { 534 ksft_print_msg("io_uring_wait_cqe() failed\n"); 535 log_test_result(KSFT_FAIL); 536 goto quit_child; 537 } 538 539 if (cqe->res != size) { 540 ksft_print_msg("write_fixed failed\n"); 541 log_test_result(KSFT_FAIL); 542 goto quit_child; 543 } 544 io_uring_cqe_seen(&ring, cqe); 545 546 /* Read back the file content to the temporary buffer. */ 547 total = 0; 548 while (total < size) { 549 cur = pread(fd, tmp + total, size - total, total); 550 if (cur < 0) { 551 ksft_perror("pread() failed\n"); 552 log_test_result(KSFT_FAIL); 553 goto quit_child; 554 } 555 total += cur; 556 } 557 558 /* Finally, check if we read what we expected. */ 559 if (!memcmp(mem, tmp, size)) { 560 log_test_result(KSFT_PASS); 561 } else { 562 ksft_print_msg("Longtom R/W pin is not reliable\n"); 563 log_test_result(KSFT_FAIL); 564 } 565 566 quit_child: 567 if (use_fork) { 568 write(comm_pipes.parent_ready[1], "0", 1); 569 wait(&ret); 570 } 571 unregister_buffers: 572 io_uring_unregister_buffers(&ring); 573 queue_exit: 574 io_uring_queue_exit(&ring); 575 free_tmp: 576 free(tmp); 577 close_file: 578 fclose(file); 579 close_comm_pipes: 580 close_comm_pipes(&comm_pipes); 581 } 582 583 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 584 { 585 do_test_iouring(mem, size, false); 586 } 587 588 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 589 { 590 do_test_iouring(mem, size, true); 591 } 592 593 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 594 595 enum ro_pin_test { 596 RO_PIN_TEST, 597 RO_PIN_TEST_SHARED, 598 RO_PIN_TEST_PREVIOUSLY_SHARED, 599 RO_PIN_TEST_RO_EXCLUSIVE, 600 }; 601 602 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 603 bool fast) 604 { 605 struct pin_longterm_test args; 606 struct comm_pipes comm_pipes; 607 char *tmp, buf; 608 __u64 tmp_val; 609 int ret; 610 611 if (gup_fd < 0) { 612 ksft_print_msg("gup_test not available\n"); 613 log_test_result(KSFT_SKIP); 614 return; 615 } 616 617 tmp = malloc(size); 618 if (!tmp) { 619 ksft_perror("malloc() failed\n"); 620 log_test_result(KSFT_FAIL); 621 return; 622 } 623 624 ret = setup_comm_pipes(&comm_pipes); 625 if (ret) { 626 log_test_result(KSFT_FAIL); 627 goto free_tmp; 628 } 629 630 switch (test) { 631 case RO_PIN_TEST: 632 break; 633 case RO_PIN_TEST_SHARED: 634 case RO_PIN_TEST_PREVIOUSLY_SHARED: 635 /* 636 * Share the pages with our child. As the pages are not pinned, 637 * this should just work. 638 */ 639 ret = fork(); 640 if (ret < 0) { 641 ksft_perror("fork() failed"); 642 log_test_result(KSFT_FAIL); 643 goto close_comm_pipes; 644 } else if (!ret) { 645 write(comm_pipes.child_ready[1], "0", 1); 646 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 647 ; 648 exit(0); 649 } 650 651 /* Wait until our child is ready. */ 652 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 653 ; 654 655 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 656 /* 657 * Tell the child to quit now and wait until it quit. 658 * The pages should now be mapped R/O into our page 659 * tables, but they are no longer shared. 660 */ 661 write(comm_pipes.parent_ready[1], "0", 1); 662 wait(&ret); 663 if (!WIFEXITED(ret)) 664 ksft_print_msg("[INFO] wait() failed\n"); 665 } 666 break; 667 case RO_PIN_TEST_RO_EXCLUSIVE: 668 /* 669 * Map the page R/O into the page table. Enable softdirty 670 * tracking to stop the page from getting mapped R/W immediately 671 * again by mprotect() optimizations. Note that we don't have an 672 * easy way to test if that worked (the pagemap does not export 673 * if the page is mapped R/O vs. R/W). 674 */ 675 ret = mprotect(mem, size, PROT_READ); 676 clear_softdirty(); 677 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 678 if (ret) { 679 ksft_perror("mprotect() failed"); 680 log_test_result(KSFT_FAIL); 681 goto close_comm_pipes; 682 } 683 break; 684 default: 685 assert(false); 686 } 687 688 /* Take a R/O pin. This should trigger unsharing. */ 689 args.addr = (__u64)(uintptr_t)mem; 690 args.size = size; 691 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 692 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 693 if (ret) { 694 if (errno == EINVAL) 695 ret = KSFT_SKIP; 696 else 697 ret = KSFT_FAIL; 698 ksft_perror("PIN_LONGTERM_TEST_START failed"); 699 log_test_result(ret); 700 goto wait; 701 } 702 703 /* Modify the page. */ 704 memset(mem, 0xff, size); 705 706 /* 707 * Read back the content via the pin to the temporary buffer and 708 * test if we observed the modification. 709 */ 710 tmp_val = (__u64)(uintptr_t)tmp; 711 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 712 if (ret) { 713 ksft_perror("PIN_LONGTERM_TEST_READ failed"); 714 log_test_result(KSFT_FAIL); 715 } else { 716 if (!memcmp(mem, tmp, size)) { 717 log_test_result(KSFT_PASS); 718 } else { 719 ksft_print_msg("Longterm R/O pin is not reliable\n"); 720 log_test_result(KSFT_FAIL); 721 } 722 } 723 724 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 725 if (ret) 726 ksft_perror("PIN_LONGTERM_TEST_STOP failed"); 727 wait: 728 switch (test) { 729 case RO_PIN_TEST_SHARED: 730 write(comm_pipes.parent_ready[1], "0", 1); 731 wait(&ret); 732 if (!WIFEXITED(ret)) 733 ksft_perror("wait() failed"); 734 break; 735 default: 736 break; 737 } 738 close_comm_pipes: 739 close_comm_pipes(&comm_pipes); 740 free_tmp: 741 free(tmp); 742 } 743 744 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 745 { 746 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 747 } 748 749 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 750 { 751 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 752 } 753 754 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 755 bool is_hugetlb) 756 { 757 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 758 } 759 760 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 761 bool is_hugetlb) 762 { 763 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 764 } 765 766 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 767 bool is_hugetlb) 768 { 769 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 770 } 771 772 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 773 bool is_hugetlb) 774 { 775 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 776 } 777 778 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 779 780 static void do_run_with_base_page(test_fn fn, bool swapout) 781 { 782 char *mem; 783 int ret; 784 785 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 786 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 787 if (mem == MAP_FAILED) { 788 ksft_perror("mmap() failed"); 789 log_test_result(KSFT_FAIL); 790 return; 791 } 792 793 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 794 /* Ignore if not around on a kernel. */ 795 if (ret && errno != EINVAL) { 796 ksft_perror("MADV_NOHUGEPAGE failed"); 797 log_test_result(KSFT_FAIL); 798 goto munmap; 799 } 800 801 /* Populate a base page. */ 802 memset(mem, 1, pagesize); 803 804 if (swapout) { 805 madvise(mem, pagesize, MADV_PAGEOUT); 806 if (!pagemap_is_swapped(pagemap_fd, mem)) { 807 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 808 log_test_result(KSFT_SKIP); 809 goto munmap; 810 } 811 } 812 813 fn(mem, pagesize, false); 814 munmap: 815 munmap(mem, pagesize); 816 } 817 818 static void run_with_base_page(test_fn fn, const char *desc) 819 { 820 log_test_start("%s ... with base page", desc); 821 do_run_with_base_page(fn, false); 822 } 823 824 static void run_with_base_page_swap(test_fn fn, const char *desc) 825 { 826 log_test_start("%s ... with swapped out base page", desc); 827 do_run_with_base_page(fn, true); 828 } 829 830 enum thp_run { 831 THP_RUN_PMD, 832 THP_RUN_PMD_SWAPOUT, 833 THP_RUN_PTE, 834 THP_RUN_PTE_SWAPOUT, 835 THP_RUN_SINGLE_PTE, 836 THP_RUN_SINGLE_PTE_SWAPOUT, 837 THP_RUN_PARTIAL_MREMAP, 838 THP_RUN_PARTIAL_SHARED, 839 }; 840 841 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 842 { 843 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 844 size_t size, mmap_size, mremap_size; 845 int ret; 846 847 /* For alignment purposes, we need twice the thp size. */ 848 mmap_size = 2 * thpsize; 849 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 850 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 851 if (mmap_mem == MAP_FAILED) { 852 ksft_perror("mmap() failed"); 853 log_test_result(KSFT_FAIL); 854 return; 855 } 856 857 /* We need a THP-aligned memory area. */ 858 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 859 860 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 861 if (ret) { 862 ksft_perror("MADV_HUGEPAGE failed"); 863 log_test_result(KSFT_FAIL); 864 goto munmap; 865 } 866 867 /* 868 * Try to populate a THP. Touch the first sub-page and test if 869 * we get the last sub-page populated automatically. 870 */ 871 mem[0] = 1; 872 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 873 ksft_print_msg("Did not get a THP populated\n"); 874 log_test_result(KSFT_SKIP); 875 goto munmap; 876 } 877 memset(mem, 1, thpsize); 878 879 size = thpsize; 880 switch (thp_run) { 881 case THP_RUN_PMD: 882 case THP_RUN_PMD_SWAPOUT: 883 assert(thpsize == pmdsize); 884 break; 885 case THP_RUN_PTE: 886 case THP_RUN_PTE_SWAPOUT: 887 /* 888 * Trigger PTE-mapping the THP by temporarily mapping a single 889 * subpage R/O. This is a noop if the THP is not pmdsize (and 890 * therefore already PTE-mapped). 891 */ 892 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 893 if (ret) { 894 ksft_perror("mprotect() failed"); 895 log_test_result(KSFT_FAIL); 896 goto munmap; 897 } 898 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 899 if (ret) { 900 ksft_perror("mprotect() failed"); 901 log_test_result(KSFT_FAIL); 902 goto munmap; 903 } 904 break; 905 case THP_RUN_SINGLE_PTE: 906 case THP_RUN_SINGLE_PTE_SWAPOUT: 907 /* 908 * Discard all but a single subpage of that PTE-mapped THP. What 909 * remains is a single PTE mapping a single subpage. 910 */ 911 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 912 if (ret) { 913 ksft_perror("MADV_DONTNEED failed"); 914 log_test_result(KSFT_FAIL); 915 goto munmap; 916 } 917 size = pagesize; 918 break; 919 case THP_RUN_PARTIAL_MREMAP: 920 /* 921 * Remap half of the THP. We need some new memory location 922 * for that. 923 */ 924 mremap_size = thpsize / 2; 925 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 926 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 927 if (mremap_mem == MAP_FAILED) { 928 ksft_perror("mmap() failed"); 929 log_test_result(KSFT_FAIL); 930 goto munmap; 931 } 932 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 933 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 934 if (tmp != mremap_mem) { 935 ksft_perror("mremap() failed"); 936 log_test_result(KSFT_FAIL); 937 goto munmap; 938 } 939 size = mremap_size; 940 break; 941 case THP_RUN_PARTIAL_SHARED: 942 /* 943 * Share the first page of the THP with a child and quit the 944 * child. This will result in some parts of the THP never 945 * have been shared. 946 */ 947 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 948 if (ret) { 949 ksft_perror("MADV_DONTFORK failed"); 950 log_test_result(KSFT_FAIL); 951 goto munmap; 952 } 953 ret = fork(); 954 if (ret < 0) { 955 ksft_perror("fork() failed"); 956 log_test_result(KSFT_FAIL); 957 goto munmap; 958 } else if (!ret) { 959 exit(0); 960 } 961 wait(&ret); 962 /* Allow for sharing all pages again. */ 963 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 964 if (ret) { 965 ksft_perror("MADV_DOFORK failed"); 966 log_test_result(KSFT_FAIL); 967 goto munmap; 968 } 969 break; 970 default: 971 assert(false); 972 } 973 974 switch (thp_run) { 975 case THP_RUN_PMD_SWAPOUT: 976 case THP_RUN_PTE_SWAPOUT: 977 case THP_RUN_SINGLE_PTE_SWAPOUT: 978 madvise(mem, size, MADV_PAGEOUT); 979 if (!range_is_swapped(mem, size)) { 980 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 981 log_test_result(KSFT_SKIP); 982 goto munmap; 983 } 984 break; 985 default: 986 break; 987 } 988 989 fn(mem, size, false); 990 munmap: 991 munmap(mmap_mem, mmap_size); 992 if (mremap_mem != MAP_FAILED) 993 munmap(mremap_mem, mremap_size); 994 } 995 996 static void run_with_thp(test_fn fn, const char *desc, size_t size) 997 { 998 log_test_start("%s ... with THP (%zu kB)", 999 desc, size / 1024); 1000 do_run_with_thp(fn, THP_RUN_PMD, size); 1001 } 1002 1003 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 1004 { 1005 log_test_start("%s ... with swapped-out THP (%zu kB)", 1006 desc, size / 1024); 1007 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 1008 } 1009 1010 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 1011 { 1012 log_test_start("%s ... with PTE-mapped THP (%zu kB)", 1013 desc, size / 1024); 1014 do_run_with_thp(fn, THP_RUN_PTE, size); 1015 } 1016 1017 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 1018 { 1019 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", 1020 desc, size / 1024); 1021 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 1022 } 1023 1024 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 1025 { 1026 log_test_start("%s ... with single PTE of THP (%zu kB)", 1027 desc, size / 1024); 1028 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 1029 } 1030 1031 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 1032 { 1033 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", 1034 desc, size / 1024); 1035 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 1036 } 1037 1038 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 1039 { 1040 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", 1041 desc, size / 1024); 1042 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 1043 } 1044 1045 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 1046 { 1047 log_test_start("%s ... with partially shared THP (%zu kB)", 1048 desc, size / 1024); 1049 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 1050 } 1051 1052 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 1053 { 1054 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1055 char *mem, *dummy; 1056 1057 log_test_start("%s ... with hugetlb (%zu kB)", desc, 1058 hugetlbsize / 1024); 1059 1060 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1061 1062 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1063 if (mem == MAP_FAILED) { 1064 ksft_perror("need more free huge pages"); 1065 log_test_result(KSFT_SKIP); 1066 return; 1067 } 1068 1069 /* Populate an huge page. */ 1070 memset(mem, 1, hugetlbsize); 1071 1072 /* 1073 * We need a total of two hugetlb pages to handle COW/unsharing 1074 * properly, otherwise we might get zapped by a SIGBUS. 1075 */ 1076 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1077 if (dummy == MAP_FAILED) { 1078 ksft_perror("need more free huge pages"); 1079 log_test_result(KSFT_SKIP); 1080 goto munmap; 1081 } 1082 munmap(dummy, hugetlbsize); 1083 1084 fn(mem, hugetlbsize, true); 1085 munmap: 1086 munmap(mem, hugetlbsize); 1087 } 1088 1089 struct test_case { 1090 const char *desc; 1091 test_fn fn; 1092 }; 1093 1094 /* 1095 * Test cases that are specific to anonymous pages: pages in private mappings 1096 * that may get shared via COW during fork(). 1097 */ 1098 static const struct test_case anon_test_cases[] = { 1099 /* 1100 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1101 * either the child can observe modifications by the parent or the 1102 * other way around. 1103 */ 1104 { 1105 "Basic COW after fork()", 1106 test_cow_in_parent, 1107 }, 1108 /* 1109 * Basic test, but do an additional mprotect(PROT_READ)+ 1110 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1111 */ 1112 { 1113 "Basic COW after fork() with mprotect() optimization", 1114 test_cow_in_parent_mprotect, 1115 }, 1116 /* 1117 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1118 * we miss to break COW, the child observes modifications by the parent. 1119 * This is CVE-2020-29374 reported by Jann Horn. 1120 */ 1121 { 1122 "vmsplice() + unmap in child", 1123 test_vmsplice_in_child, 1124 }, 1125 /* 1126 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1127 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1128 */ 1129 { 1130 "vmsplice() + unmap in child with mprotect() optimization", 1131 test_vmsplice_in_child_mprotect, 1132 }, 1133 /* 1134 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1135 * fork(); modify in the child. If we miss to break COW, the parent 1136 * observes modifications by the child. 1137 */ 1138 { 1139 "vmsplice() before fork(), unmap in parent after fork()", 1140 test_vmsplice_before_fork, 1141 }, 1142 /* 1143 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1144 * child. If we miss to break COW, the parent observes modifications by 1145 * the child. 1146 */ 1147 { 1148 "vmsplice() + unmap in parent after fork()", 1149 test_vmsplice_after_fork, 1150 }, 1151 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1152 /* 1153 * Take a R/W longterm pin and then map the page R/O into the page 1154 * table to trigger a write fault on next access. When modifying the 1155 * page, the page content must be visible via the pin. 1156 */ 1157 { 1158 "R/O-mapping a page registered as iouring fixed buffer", 1159 test_iouring_ro, 1160 }, 1161 /* 1162 * Take a R/W longterm pin and then fork() a child. When modifying the 1163 * page, the page content must be visible via the pin. We expect the 1164 * pinned page to not get shared with the child. 1165 */ 1166 { 1167 "fork() with an iouring fixed buffer", 1168 test_iouring_fork, 1169 }, 1170 1171 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1172 /* 1173 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1174 * When modifying the page via the page table, the page content change 1175 * must be visible via the pin. 1176 */ 1177 { 1178 "R/O GUP pin on R/O-mapped shared page", 1179 test_ro_pin_on_shared, 1180 }, 1181 /* Same as above, but using GUP-fast. */ 1182 { 1183 "R/O GUP-fast pin on R/O-mapped shared page", 1184 test_ro_fast_pin_on_shared, 1185 }, 1186 /* 1187 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1188 * was previously shared. When modifying the page via the page table, 1189 * the page content change must be visible via the pin. 1190 */ 1191 { 1192 "R/O GUP pin on R/O-mapped previously-shared page", 1193 test_ro_pin_on_ro_previously_shared, 1194 }, 1195 /* Same as above, but using GUP-fast. */ 1196 { 1197 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1198 test_ro_fast_pin_on_ro_previously_shared, 1199 }, 1200 /* 1201 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1202 * When modifying the page via the page table, the page content change 1203 * must be visible via the pin. 1204 */ 1205 { 1206 "R/O GUP pin on R/O-mapped exclusive page", 1207 test_ro_pin_on_ro_exclusive, 1208 }, 1209 /* Same as above, but using GUP-fast. */ 1210 { 1211 "R/O GUP-fast pin on R/O-mapped exclusive page", 1212 test_ro_fast_pin_on_ro_exclusive, 1213 }, 1214 }; 1215 1216 static void run_anon_test_case(struct test_case const *test_case) 1217 { 1218 int i; 1219 1220 run_with_base_page(test_case->fn, test_case->desc); 1221 run_with_base_page_swap(test_case->fn, test_case->desc); 1222 for (i = 0; i < nr_thpsizes; i++) { 1223 size_t size = thpsizes[i]; 1224 struct thp_settings settings = *thp_current_settings(); 1225 1226 settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER; 1227 settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; 1228 thp_push_settings(&settings); 1229 1230 if (size == pmdsize) { 1231 run_with_thp(test_case->fn, test_case->desc, size); 1232 run_with_thp_swap(test_case->fn, test_case->desc, size); 1233 } 1234 1235 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1236 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1237 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1238 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1239 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1240 run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1241 1242 thp_pop_settings(); 1243 } 1244 for (i = 0; i < nr_hugetlbsizes; i++) 1245 run_with_hugetlb(test_case->fn, test_case->desc, 1246 hugetlbsizes[i]); 1247 } 1248 1249 static void run_anon_test_cases(void) 1250 { 1251 int i; 1252 1253 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1254 1255 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1256 run_anon_test_case(&anon_test_cases[i]); 1257 } 1258 1259 static int tests_per_anon_test_case(void) 1260 { 1261 int tests = 2 + nr_hugetlbsizes; 1262 1263 tests += 6 * nr_thpsizes; 1264 if (pmdsize) 1265 tests += 2; 1266 return tests; 1267 } 1268 1269 enum anon_thp_collapse_test { 1270 ANON_THP_COLLAPSE_UNSHARED, 1271 ANON_THP_COLLAPSE_FULLY_SHARED, 1272 ANON_THP_COLLAPSE_LOWER_SHARED, 1273 ANON_THP_COLLAPSE_UPPER_SHARED, 1274 }; 1275 1276 static void do_test_anon_thp_collapse(char *mem, size_t size, 1277 enum anon_thp_collapse_test test) 1278 { 1279 struct comm_pipes comm_pipes; 1280 char buf; 1281 int ret; 1282 1283 ret = setup_comm_pipes(&comm_pipes); 1284 if (ret) { 1285 log_test_result(KSFT_FAIL); 1286 return; 1287 } 1288 1289 /* 1290 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1291 * R/O, such that we can try collapsing it later. 1292 */ 1293 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1294 if (ret) { 1295 ksft_perror("mprotect() failed"); 1296 log_test_result(KSFT_FAIL); 1297 goto close_comm_pipes; 1298 } 1299 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1300 if (ret) { 1301 ksft_perror("mprotect() failed"); 1302 log_test_result(KSFT_FAIL); 1303 goto close_comm_pipes; 1304 } 1305 1306 switch (test) { 1307 case ANON_THP_COLLAPSE_UNSHARED: 1308 /* Collapse before actually COW-sharing the page. */ 1309 ret = madvise(mem, size, MADV_COLLAPSE); 1310 if (ret) { 1311 ksft_perror("MADV_COLLAPSE failed"); 1312 log_test_result(KSFT_SKIP); 1313 goto close_comm_pipes; 1314 } 1315 break; 1316 case ANON_THP_COLLAPSE_FULLY_SHARED: 1317 /* COW-share the full PTE-mapped THP. */ 1318 break; 1319 case ANON_THP_COLLAPSE_LOWER_SHARED: 1320 /* Don't COW-share the upper part of the THP. */ 1321 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1322 if (ret) { 1323 ksft_perror("MADV_DONTFORK failed"); 1324 log_test_result(KSFT_FAIL); 1325 goto close_comm_pipes; 1326 } 1327 break; 1328 case ANON_THP_COLLAPSE_UPPER_SHARED: 1329 /* Don't COW-share the lower part of the THP. */ 1330 ret = madvise(mem, size / 2, MADV_DONTFORK); 1331 if (ret) { 1332 ksft_perror("MADV_DONTFORK failed"); 1333 log_test_result(KSFT_FAIL); 1334 goto close_comm_pipes; 1335 } 1336 break; 1337 default: 1338 assert(false); 1339 } 1340 1341 ret = fork(); 1342 if (ret < 0) { 1343 ksft_perror("fork() failed"); 1344 log_test_result(KSFT_FAIL); 1345 goto close_comm_pipes; 1346 } else if (!ret) { 1347 switch (test) { 1348 case ANON_THP_COLLAPSE_UNSHARED: 1349 case ANON_THP_COLLAPSE_FULLY_SHARED: 1350 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1351 break; 1352 case ANON_THP_COLLAPSE_LOWER_SHARED: 1353 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1354 break; 1355 case ANON_THP_COLLAPSE_UPPER_SHARED: 1356 exit(child_memcmp_fn(mem + size / 2, size / 2, 1357 &comm_pipes)); 1358 break; 1359 default: 1360 assert(false); 1361 } 1362 } 1363 1364 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1365 ; 1366 1367 switch (test) { 1368 case ANON_THP_COLLAPSE_UNSHARED: 1369 break; 1370 case ANON_THP_COLLAPSE_UPPER_SHARED: 1371 case ANON_THP_COLLAPSE_LOWER_SHARED: 1372 /* 1373 * Revert MADV_DONTFORK such that we merge the VMAs and are 1374 * able to actually collapse. 1375 */ 1376 ret = madvise(mem, size, MADV_DOFORK); 1377 if (ret) { 1378 ksft_perror("MADV_DOFORK failed"); 1379 log_test_result(KSFT_FAIL); 1380 write(comm_pipes.parent_ready[1], "0", 1); 1381 wait(&ret); 1382 goto close_comm_pipes; 1383 } 1384 /* FALLTHROUGH */ 1385 case ANON_THP_COLLAPSE_FULLY_SHARED: 1386 /* Collapse before anyone modified the COW-shared page. */ 1387 ret = madvise(mem, size, MADV_COLLAPSE); 1388 if (ret) { 1389 ksft_perror("MADV_COLLAPSE failed"); 1390 log_test_result(KSFT_SKIP); 1391 write(comm_pipes.parent_ready[1], "0", 1); 1392 wait(&ret); 1393 goto close_comm_pipes; 1394 } 1395 break; 1396 default: 1397 assert(false); 1398 } 1399 1400 /* Modify the page. */ 1401 memset(mem, 0xff, size); 1402 write(comm_pipes.parent_ready[1], "0", 1); 1403 1404 wait(&ret); 1405 if (WIFEXITED(ret)) 1406 ret = WEXITSTATUS(ret); 1407 else 1408 ret = -EINVAL; 1409 1410 if (!ret) { 1411 log_test_result(KSFT_PASS); 1412 } else { 1413 ksft_print_msg("Leak from parent into child\n"); 1414 log_test_result(KSFT_FAIL); 1415 } 1416 close_comm_pipes: 1417 close_comm_pipes(&comm_pipes); 1418 } 1419 1420 static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1421 bool is_hugetlb) 1422 { 1423 assert(!is_hugetlb); 1424 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1425 } 1426 1427 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1428 bool is_hugetlb) 1429 { 1430 assert(!is_hugetlb); 1431 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1432 } 1433 1434 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1435 bool is_hugetlb) 1436 { 1437 assert(!is_hugetlb); 1438 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1439 } 1440 1441 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1442 bool is_hugetlb) 1443 { 1444 assert(!is_hugetlb); 1445 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1446 } 1447 1448 /* 1449 * Test cases that are specific to anonymous THP: pages in private mappings 1450 * that may get shared via COW during fork(). 1451 */ 1452 static const struct test_case anon_thp_test_cases[] = { 1453 /* 1454 * Basic COW test for fork() without any GUP when collapsing a THP 1455 * before fork(). 1456 * 1457 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1458 * collapse") might easily get COW handling wrong when not collapsing 1459 * exclusivity information properly. 1460 */ 1461 { 1462 "Basic COW after fork() when collapsing before fork()", 1463 test_anon_thp_collapse_unshared, 1464 }, 1465 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1466 { 1467 "Basic COW after fork() when collapsing after fork() (fully shared)", 1468 test_anon_thp_collapse_fully_shared, 1469 }, 1470 /* 1471 * Basic COW test, but collapse after COW-sharing the lower half of a 1472 * THP. 1473 */ 1474 { 1475 "Basic COW after fork() when collapsing after fork() (lower shared)", 1476 test_anon_thp_collapse_lower_shared, 1477 }, 1478 /* 1479 * Basic COW test, but collapse after COW-sharing the upper half of a 1480 * THP. 1481 */ 1482 { 1483 "Basic COW after fork() when collapsing after fork() (upper shared)", 1484 test_anon_thp_collapse_upper_shared, 1485 }, 1486 }; 1487 1488 static void run_anon_thp_test_cases(void) 1489 { 1490 int i; 1491 1492 if (!pmdsize) 1493 return; 1494 1495 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1496 1497 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1498 struct test_case const *test_case = &anon_thp_test_cases[i]; 1499 1500 log_test_start("%s", test_case->desc); 1501 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1502 } 1503 } 1504 1505 static int tests_per_anon_thp_test_case(void) 1506 { 1507 return pmdsize ? 1 : 0; 1508 } 1509 1510 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1511 1512 static void test_cow(char *mem, const char *smem, size_t size) 1513 { 1514 char *old = malloc(size); 1515 1516 /* Backup the original content. */ 1517 memcpy(old, smem, size); 1518 1519 /* Modify the page. */ 1520 memset(mem, 0xff, size); 1521 1522 /* See if we still read the old values via the other mapping. */ 1523 if (!memcmp(smem, old, size)) { 1524 log_test_result(KSFT_PASS); 1525 } else { 1526 ksft_print_msg("Other mapping modified\n"); 1527 log_test_result(KSFT_FAIL); 1528 } 1529 free(old); 1530 } 1531 1532 static void test_ro_pin(char *mem, const char *smem, size_t size) 1533 { 1534 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1535 } 1536 1537 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1538 { 1539 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1540 } 1541 1542 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1543 { 1544 char *mem, *smem; 1545 1546 log_test_start("%s ... with shared zeropage", desc); 1547 1548 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1549 MAP_PRIVATE | MAP_ANON, -1, 0); 1550 if (mem == MAP_FAILED) { 1551 ksft_perror("mmap() failed"); 1552 log_test_result(KSFT_FAIL); 1553 return; 1554 } 1555 1556 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1557 if (smem == MAP_FAILED) { 1558 ksft_perror("mmap() failed"); 1559 log_test_result(KSFT_FAIL); 1560 goto munmap; 1561 } 1562 1563 /* Read from the page to populate the shared zeropage. */ 1564 if (!populate_page_checked(mem) || !populate_page_checked(smem)) { 1565 log_test_result(KSFT_FAIL); 1566 goto munmap; 1567 } 1568 1569 fn(mem, smem, pagesize); 1570 munmap: 1571 munmap(mem, pagesize); 1572 if (smem != MAP_FAILED) 1573 munmap(smem, pagesize); 1574 } 1575 1576 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1577 { 1578 char *mem, *smem, *mmap_mem, *mmap_smem; 1579 size_t mmap_size; 1580 int ret; 1581 1582 log_test_start("%s ... with huge zeropage", desc); 1583 1584 if (!has_huge_zeropage) { 1585 ksft_print_msg("Huge zeropage not enabled\n"); 1586 log_test_result(KSFT_SKIP); 1587 return; 1588 } 1589 1590 /* For alignment purposes, we need twice the thp size. */ 1591 mmap_size = 2 * pmdsize; 1592 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1593 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1594 if (mmap_mem == MAP_FAILED) { 1595 ksft_perror("mmap() failed"); 1596 log_test_result(KSFT_FAIL); 1597 return; 1598 } 1599 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1600 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1601 if (mmap_smem == MAP_FAILED) { 1602 ksft_perror("mmap() failed"); 1603 log_test_result(KSFT_FAIL); 1604 goto munmap; 1605 } 1606 1607 /* We need a THP-aligned memory area. */ 1608 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 1609 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1610 1611 ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 1612 if (ret) { 1613 ksft_perror("madvise()"); 1614 log_test_result(KSFT_FAIL); 1615 goto munmap; 1616 } 1617 ret = madvise(smem, pmdsize, MADV_HUGEPAGE); 1618 if (ret) { 1619 ksft_perror("madvise()"); 1620 log_test_result(KSFT_FAIL); 1621 goto munmap; 1622 } 1623 1624 /* 1625 * Read from the memory to populate the huge shared zeropage. Read from 1626 * the first sub-page and test if we get another sub-page populated 1627 * automatically. 1628 */ 1629 if (!populate_page_checked(mem) || !populate_page_checked(smem)) { 1630 log_test_result(KSFT_FAIL); 1631 goto munmap; 1632 } 1633 1634 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1635 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1636 ksft_test_result_skip("Did not get THPs populated\n"); 1637 goto munmap; 1638 } 1639 1640 fn(mem, smem, pmdsize); 1641 munmap: 1642 munmap(mmap_mem, mmap_size); 1643 if (mmap_smem != MAP_FAILED) 1644 munmap(mmap_smem, mmap_size); 1645 } 1646 1647 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1648 { 1649 char *mem, *smem; 1650 int fd; 1651 1652 log_test_start("%s ... with memfd", desc); 1653 1654 fd = memfd_create("test", 0); 1655 if (fd < 0) { 1656 ksft_perror("memfd_create() failed"); 1657 log_test_result(KSFT_FAIL); 1658 return; 1659 } 1660 1661 /* File consists of a single page filled with zeroes. */ 1662 if (fallocate(fd, 0, 0, pagesize)) { 1663 ksft_perror("fallocate() failed"); 1664 log_test_result(KSFT_FAIL); 1665 goto close; 1666 } 1667 1668 /* Create a private mapping of the memfd. */ 1669 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1670 if (mem == MAP_FAILED) { 1671 ksft_perror("mmap() failed"); 1672 log_test_result(KSFT_FAIL); 1673 goto close; 1674 } 1675 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1676 if (smem == MAP_FAILED) { 1677 ksft_perror("mmap() failed"); 1678 log_test_result(KSFT_FAIL); 1679 goto munmap; 1680 } 1681 1682 /* Fault the page in. */ 1683 if (!populate_page_checked(mem) || !populate_page_checked(smem)) { 1684 log_test_result(KSFT_FAIL); 1685 goto munmap; 1686 } 1687 1688 fn(mem, smem, pagesize); 1689 munmap: 1690 munmap(mem, pagesize); 1691 if (smem != MAP_FAILED) 1692 munmap(smem, pagesize); 1693 close: 1694 close(fd); 1695 } 1696 1697 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1698 { 1699 char *mem, *smem; 1700 FILE *file; 1701 int fd; 1702 1703 log_test_start("%s ... with tmpfile", desc); 1704 1705 file = tmpfile(); 1706 if (!file) { 1707 ksft_perror("tmpfile() failed"); 1708 log_test_result(KSFT_FAIL); 1709 return; 1710 } 1711 1712 fd = fileno(file); 1713 if (fd < 0) { 1714 ksft_perror("fileno() failed"); 1715 log_test_result(KSFT_SKIP); 1716 return; 1717 } 1718 1719 /* File consists of a single page filled with zeroes. */ 1720 if (fallocate(fd, 0, 0, pagesize)) { 1721 ksft_perror("fallocate() failed"); 1722 log_test_result(KSFT_FAIL); 1723 goto close; 1724 } 1725 1726 /* Create a private mapping of the memfd. */ 1727 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1728 if (mem == MAP_FAILED) { 1729 ksft_perror("mmap() failed"); 1730 log_test_result(KSFT_FAIL); 1731 goto close; 1732 } 1733 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1734 if (smem == MAP_FAILED) { 1735 ksft_perror("mmap() failed"); 1736 log_test_result(KSFT_FAIL); 1737 goto munmap; 1738 } 1739 1740 /* Fault the page in. */ 1741 if (!populate_page_checked(mem) || !populate_page_checked(smem)) { 1742 log_test_result(KSFT_FAIL); 1743 goto munmap; 1744 } 1745 1746 fn(mem, smem, pagesize); 1747 munmap: 1748 munmap(mem, pagesize); 1749 if (smem != MAP_FAILED) 1750 munmap(smem, pagesize); 1751 close: 1752 fclose(file); 1753 } 1754 1755 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1756 size_t hugetlbsize) 1757 { 1758 int flags = MFD_HUGETLB; 1759 char *mem, *smem; 1760 int fd; 1761 1762 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 1763 hugetlbsize / 1024); 1764 1765 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1766 1767 fd = memfd_create("test", flags); 1768 if (fd < 0) { 1769 ksft_perror("memfd_create() failed"); 1770 log_test_result(KSFT_SKIP); 1771 return; 1772 } 1773 1774 /* File consists of a single page filled with zeroes. */ 1775 if (fallocate(fd, 0, 0, hugetlbsize)) { 1776 ksft_perror("need more free huge pages"); 1777 log_test_result(KSFT_SKIP); 1778 goto close; 1779 } 1780 1781 /* Create a private mapping of the memfd. */ 1782 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1783 0); 1784 if (mem == MAP_FAILED) { 1785 ksft_perror("need more free huge pages"); 1786 log_test_result(KSFT_SKIP); 1787 goto close; 1788 } 1789 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1790 if (smem == MAP_FAILED) { 1791 ksft_perror("mmap() failed"); 1792 log_test_result(KSFT_FAIL); 1793 goto munmap; 1794 } 1795 1796 /* Fault the page in. */ 1797 if (!populate_page_checked(mem) || !populate_page_checked(smem)) { 1798 log_test_result(KSFT_FAIL); 1799 goto munmap; 1800 } 1801 1802 fn(mem, smem, hugetlbsize); 1803 munmap: 1804 munmap(mem, hugetlbsize); 1805 if (smem != MAP_FAILED) 1806 munmap(smem, hugetlbsize); 1807 close: 1808 close(fd); 1809 } 1810 1811 struct non_anon_test_case { 1812 const char *desc; 1813 non_anon_test_fn fn; 1814 }; 1815 1816 /* 1817 * Test cases that target any pages in private mappings that are not anonymous: 1818 * pages that may get shared via COW ndependent of fork(). This includes 1819 * the shared zeropage(s), pagecache pages, ... 1820 */ 1821 static const struct non_anon_test_case non_anon_test_cases[] = { 1822 /* 1823 * Basic COW test without any GUP. If we miss to break COW, changes are 1824 * visible via other private/shared mappings. 1825 */ 1826 { 1827 "Basic COW", 1828 test_cow, 1829 }, 1830 /* 1831 * Take a R/O longterm pin. When modifying the page via the page table, 1832 * the page content change must be visible via the pin. 1833 */ 1834 { 1835 "R/O longterm GUP pin", 1836 test_ro_pin, 1837 }, 1838 /* Same as above, but using GUP-fast. */ 1839 { 1840 "R/O longterm GUP-fast pin", 1841 test_ro_fast_pin, 1842 }, 1843 }; 1844 1845 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1846 { 1847 int i; 1848 1849 run_with_zeropage(test_case->fn, test_case->desc); 1850 run_with_memfd(test_case->fn, test_case->desc); 1851 run_with_tmpfile(test_case->fn, test_case->desc); 1852 if (pmdsize) 1853 run_with_huge_zeropage(test_case->fn, test_case->desc); 1854 for (i = 0; i < nr_hugetlbsizes; i++) 1855 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1856 hugetlbsizes[i]); 1857 } 1858 1859 static void run_non_anon_test_cases(void) 1860 { 1861 int i; 1862 1863 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1864 1865 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1866 run_non_anon_test_case(&non_anon_test_cases[i]); 1867 } 1868 1869 static int tests_per_non_anon_test_case(void) 1870 { 1871 int tests = 3 + nr_hugetlbsizes; 1872 1873 if (pmdsize) 1874 tests += 1; 1875 return tests; 1876 } 1877 1878 int main(int argc, char **argv) 1879 { 1880 struct thp_settings default_settings; 1881 1882 ksft_print_header(); 1883 1884 pagesize = getpagesize(); 1885 pmdsize = read_pmd_pagesize(); 1886 if (pmdsize) { 1887 /* Only if THP is supported. */ 1888 thp_read_settings(&default_settings); 1889 default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT; 1890 thp_save_settings(); 1891 thp_push_settings(&default_settings); 1892 1893 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 1894 pmdsize / 1024); 1895 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 1896 } 1897 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1898 ARRAY_SIZE(hugetlbsizes)); 1899 has_huge_zeropage = detect_huge_zeropage(); 1900 1901 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1902 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1903 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1904 1905 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1906 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1907 if (pagemap_fd < 0) 1908 ksft_exit_fail_msg("opening pagemap failed\n"); 1909 1910 run_anon_test_cases(); 1911 run_anon_thp_test_cases(); 1912 run_non_anon_test_cases(); 1913 1914 if (pmdsize) { 1915 /* Only if THP is supported. */ 1916 thp_restore_settings(); 1917 } 1918 1919 ksft_finished(); 1920 } 1921