1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 #include "thp_settings.h" 33 34 static size_t pagesize; 35 static int pagemap_fd; 36 static size_t pmdsize; 37 static int nr_thpsizes; 38 static size_t thpsizes[20]; 39 static int nr_hugetlbsizes; 40 static size_t hugetlbsizes[10]; 41 static int gup_fd; 42 static bool has_huge_zeropage; 43 44 static int sz2ord(size_t size) 45 { 46 return __builtin_ctzll(size / pagesize); 47 } 48 49 static int detect_thp_sizes(size_t sizes[], int max) 50 { 51 int count = 0; 52 unsigned long orders; 53 size_t kb; 54 int i; 55 56 /* thp not supported at all. */ 57 if (!pmdsize) 58 return 0; 59 60 orders = 1UL << sz2ord(pmdsize); 61 orders |= thp_supported_orders(); 62 63 for (i = 0; orders && count < max; i++) { 64 if (!(orders & (1UL << i))) 65 continue; 66 orders &= ~(1UL << i); 67 kb = (pagesize >> 10) << i; 68 sizes[count++] = kb * 1024; 69 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 70 } 71 72 return count; 73 } 74 75 static void detect_huge_zeropage(void) 76 { 77 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 78 O_RDONLY); 79 size_t enabled = 0; 80 char buf[15]; 81 int ret; 82 83 if (fd < 0) 84 return; 85 86 ret = pread(fd, buf, sizeof(buf), 0); 87 if (ret > 0 && ret < sizeof(buf)) { 88 buf[ret] = 0; 89 90 enabled = strtoul(buf, NULL, 10); 91 if (enabled == 1) { 92 has_huge_zeropage = true; 93 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 94 } 95 } 96 97 close(fd); 98 } 99 100 static bool range_is_swapped(void *addr, size_t size) 101 { 102 for (; size; addr += pagesize, size -= pagesize) 103 if (!pagemap_is_swapped(pagemap_fd, addr)) 104 return false; 105 return true; 106 } 107 108 struct comm_pipes { 109 int child_ready[2]; 110 int parent_ready[2]; 111 }; 112 113 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 114 { 115 if (pipe(comm_pipes->child_ready) < 0) { 116 ksft_perror("pipe() failed"); 117 return -errno; 118 } 119 if (pipe(comm_pipes->parent_ready) < 0) { 120 ksft_perror("pipe() failed"); 121 close(comm_pipes->child_ready[0]); 122 close(comm_pipes->child_ready[1]); 123 return -errno; 124 } 125 126 return 0; 127 } 128 129 static void close_comm_pipes(struct comm_pipes *comm_pipes) 130 { 131 close(comm_pipes->child_ready[0]); 132 close(comm_pipes->child_ready[1]); 133 close(comm_pipes->parent_ready[0]); 134 close(comm_pipes->parent_ready[1]); 135 } 136 137 static int child_memcmp_fn(char *mem, size_t size, 138 struct comm_pipes *comm_pipes) 139 { 140 char *old = malloc(size); 141 char buf; 142 143 /* Backup the original content. */ 144 memcpy(old, mem, size); 145 146 /* Wait until the parent modified the page. */ 147 write(comm_pipes->child_ready[1], "0", 1); 148 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 149 ; 150 151 /* See if we still read the old values. */ 152 return memcmp(old, mem, size); 153 } 154 155 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 156 struct comm_pipes *comm_pipes) 157 { 158 struct iovec iov = { 159 .iov_base = mem, 160 .iov_len = size, 161 }; 162 ssize_t cur, total, transferred; 163 char *old, *new; 164 int fds[2]; 165 char buf; 166 167 old = malloc(size); 168 new = malloc(size); 169 170 /* Backup the original content. */ 171 memcpy(old, mem, size); 172 173 if (pipe(fds) < 0) 174 return -errno; 175 176 /* Trigger a read-only pin. */ 177 transferred = vmsplice(fds[1], &iov, 1, 0); 178 if (transferred < 0) 179 return -errno; 180 if (transferred == 0) 181 return -EINVAL; 182 183 /* Unmap it from our page tables. */ 184 if (munmap(mem, size) < 0) 185 return -errno; 186 187 /* Wait until the parent modified it. */ 188 write(comm_pipes->child_ready[1], "0", 1); 189 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 190 ; 191 192 /* See if we still read the old values via the pipe. */ 193 for (total = 0; total < transferred; total += cur) { 194 cur = read(fds[0], new + total, transferred - total); 195 if (cur < 0) 196 return -errno; 197 } 198 199 return memcmp(old, new, transferred); 200 } 201 202 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 203 204 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 205 child_fn fn, bool xfail) 206 { 207 struct comm_pipes comm_pipes; 208 char buf; 209 int ret; 210 211 ret = setup_comm_pipes(&comm_pipes); 212 if (ret) { 213 log_test_result(KSFT_FAIL); 214 return; 215 } 216 217 ret = fork(); 218 if (ret < 0) { 219 ksft_perror("fork() failed"); 220 log_test_result(KSFT_FAIL); 221 goto close_comm_pipes; 222 } else if (!ret) { 223 exit(fn(mem, size, &comm_pipes)); 224 } 225 226 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 227 ; 228 229 if (do_mprotect) { 230 /* 231 * mprotect() optimizations might try avoiding 232 * write-faults by directly mapping pages writable. 233 */ 234 ret = mprotect(mem, size, PROT_READ); 235 if (ret) { 236 ksft_perror("mprotect() failed"); 237 log_test_result(KSFT_FAIL); 238 write(comm_pipes.parent_ready[1], "0", 1); 239 wait(&ret); 240 goto close_comm_pipes; 241 } 242 243 ret = mprotect(mem, size, PROT_READ|PROT_WRITE); 244 if (ret) { 245 ksft_perror("mprotect() failed"); 246 log_test_result(KSFT_FAIL); 247 write(comm_pipes.parent_ready[1], "0", 1); 248 wait(&ret); 249 goto close_comm_pipes; 250 } 251 } 252 253 /* Modify the page. */ 254 memset(mem, 0xff, size); 255 write(comm_pipes.parent_ready[1], "0", 1); 256 257 wait(&ret); 258 if (WIFEXITED(ret)) 259 ret = WEXITSTATUS(ret); 260 else 261 ret = -EINVAL; 262 263 if (!ret) { 264 log_test_result(KSFT_PASS); 265 } else if (xfail) { 266 /* 267 * With hugetlb, some vmsplice() tests are currently expected to 268 * fail because (a) harder to fix and (b) nobody really cares. 269 * Flag them as expected failure for now. 270 */ 271 ksft_print_msg("Leak from parent into child\n"); 272 log_test_result(KSFT_XFAIL); 273 } else { 274 ksft_print_msg("Leak from parent into child\n"); 275 log_test_result(KSFT_FAIL); 276 } 277 close_comm_pipes: 278 close_comm_pipes(&comm_pipes); 279 } 280 281 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 282 { 283 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 284 } 285 286 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 287 { 288 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 289 } 290 291 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 292 { 293 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 294 is_hugetlb); 295 } 296 297 static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 298 bool is_hugetlb) 299 { 300 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 301 is_hugetlb); 302 } 303 304 static void do_test_vmsplice_in_parent(char *mem, size_t size, 305 bool before_fork, bool xfail) 306 { 307 struct iovec iov = { 308 .iov_base = mem, 309 .iov_len = size, 310 }; 311 ssize_t cur, total, transferred = 0; 312 struct comm_pipes comm_pipes; 313 char *old, *new; 314 int ret, fds[2]; 315 char buf; 316 317 old = malloc(size); 318 new = malloc(size); 319 320 memcpy(old, mem, size); 321 322 ret = setup_comm_pipes(&comm_pipes); 323 if (ret) { 324 log_test_result(KSFT_FAIL); 325 goto free; 326 } 327 328 if (pipe(fds) < 0) { 329 ksft_perror("pipe() failed"); 330 log_test_result(KSFT_FAIL); 331 goto close_comm_pipes; 332 } 333 334 if (before_fork) { 335 transferred = vmsplice(fds[1], &iov, 1, 0); 336 if (transferred <= 0) { 337 ksft_perror("vmsplice() failed\n"); 338 log_test_result(KSFT_FAIL); 339 goto close_pipe; 340 } 341 } 342 343 ret = fork(); 344 if (ret < 0) { 345 ksft_perror("fork() failed\n"); 346 log_test_result(KSFT_FAIL); 347 goto close_pipe; 348 } else if (!ret) { 349 write(comm_pipes.child_ready[1], "0", 1); 350 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 351 ; 352 /* Modify page content in the child. */ 353 memset(mem, 0xff, size); 354 exit(0); 355 } 356 357 if (!before_fork) { 358 transferred = vmsplice(fds[1], &iov, 1, 0); 359 if (transferred <= 0) { 360 ksft_perror("vmsplice() failed"); 361 log_test_result(KSFT_FAIL); 362 wait(&ret); 363 goto close_pipe; 364 } 365 } 366 367 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 368 ; 369 if (munmap(mem, size) < 0) { 370 ksft_perror("munmap() failed"); 371 log_test_result(KSFT_FAIL); 372 goto close_pipe; 373 } 374 write(comm_pipes.parent_ready[1], "0", 1); 375 376 /* Wait until the child is done writing. */ 377 wait(&ret); 378 if (!WIFEXITED(ret)) { 379 ksft_perror("wait() failed"); 380 log_test_result(KSFT_FAIL); 381 goto close_pipe; 382 } 383 384 /* See if we still read the old values. */ 385 for (total = 0; total < transferred; total += cur) { 386 cur = read(fds[0], new + total, transferred - total); 387 if (cur < 0) { 388 ksft_perror("read() failed"); 389 log_test_result(KSFT_FAIL); 390 goto close_pipe; 391 } 392 } 393 394 if (!memcmp(old, new, transferred)) { 395 log_test_result(KSFT_PASS); 396 } else if (xfail) { 397 /* 398 * With hugetlb, some vmsplice() tests are currently expected to 399 * fail because (a) harder to fix and (b) nobody really cares. 400 * Flag them as expected failure for now. 401 */ 402 ksft_print_msg("Leak from child into parent\n"); 403 log_test_result(KSFT_XFAIL); 404 } else { 405 ksft_print_msg("Leak from child into parent\n"); 406 log_test_result(KSFT_FAIL); 407 } 408 close_pipe: 409 close(fds[0]); 410 close(fds[1]); 411 close_comm_pipes: 412 close_comm_pipes(&comm_pipes); 413 free: 414 free(old); 415 free(new); 416 } 417 418 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 419 { 420 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 421 } 422 423 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 424 { 425 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 426 } 427 428 #ifdef LOCAL_CONFIG_HAVE_LIBURING 429 static void do_test_iouring(char *mem, size_t size, bool use_fork) 430 { 431 struct comm_pipes comm_pipes; 432 struct io_uring_cqe *cqe; 433 struct io_uring_sqe *sqe; 434 struct io_uring ring; 435 ssize_t cur, total; 436 struct iovec iov; 437 char *buf, *tmp; 438 int ret, fd; 439 FILE *file; 440 441 ret = setup_comm_pipes(&comm_pipes); 442 if (ret) { 443 log_test_result(KSFT_FAIL); 444 return; 445 } 446 447 file = tmpfile(); 448 if (!file) { 449 ksft_perror("tmpfile() failed"); 450 log_test_result(KSFT_FAIL); 451 goto close_comm_pipes; 452 } 453 fd = fileno(file); 454 assert(fd); 455 456 tmp = malloc(size); 457 if (!tmp) { 458 ksft_print_msg("malloc() failed\n"); 459 log_test_result(KSFT_FAIL); 460 goto close_file; 461 } 462 463 /* Skip on errors, as we might just lack kernel support. */ 464 ret = io_uring_queue_init(1, &ring, 0); 465 if (ret < 0) { 466 ksft_print_msg("io_uring_queue_init() failed\n"); 467 log_test_result(KSFT_SKIP); 468 goto free_tmp; 469 } 470 471 /* 472 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 473 * | FOLL_LONGTERM the range. 474 * 475 * Skip on errors, as we might just lack kernel support or might not 476 * have sufficient MEMLOCK permissions. 477 */ 478 iov.iov_base = mem; 479 iov.iov_len = size; 480 ret = io_uring_register_buffers(&ring, &iov, 1); 481 if (ret) { 482 ksft_print_msg("io_uring_register_buffers() failed\n"); 483 log_test_result(KSFT_SKIP); 484 goto queue_exit; 485 } 486 487 if (use_fork) { 488 /* 489 * fork() and keep the child alive until we're done. Note that 490 * we expect the pinned page to not get shared with the child. 491 */ 492 ret = fork(); 493 if (ret < 0) { 494 ksft_perror("fork() failed"); 495 log_test_result(KSFT_FAIL); 496 goto unregister_buffers; 497 } else if (!ret) { 498 write(comm_pipes.child_ready[1], "0", 1); 499 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 500 ; 501 exit(0); 502 } 503 504 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 505 ; 506 } else { 507 /* 508 * Map the page R/O into the page table. Enable softdirty 509 * tracking to stop the page from getting mapped R/W immediately 510 * again by mprotect() optimizations. Note that we don't have an 511 * easy way to test if that worked (the pagemap does not export 512 * if the page is mapped R/O vs. R/W). 513 */ 514 ret = mprotect(mem, size, PROT_READ); 515 if (ret) { 516 ksft_perror("mprotect() failed"); 517 log_test_result(KSFT_FAIL); 518 goto unregister_buffers; 519 } 520 521 clear_softdirty(); 522 ret = mprotect(mem, size, PROT_READ | PROT_WRITE); 523 if (ret) { 524 ksft_perror("mprotect() failed"); 525 log_test_result(KSFT_FAIL); 526 goto unregister_buffers; 527 } 528 } 529 530 /* 531 * Modify the page and write page content as observed by the fixed 532 * buffer pin to the file so we can verify it. 533 */ 534 memset(mem, 0xff, size); 535 sqe = io_uring_get_sqe(&ring); 536 if (!sqe) { 537 ksft_print_msg("io_uring_get_sqe() failed\n"); 538 log_test_result(KSFT_FAIL); 539 goto quit_child; 540 } 541 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 542 543 ret = io_uring_submit(&ring); 544 if (ret < 0) { 545 ksft_print_msg("io_uring_submit() failed\n"); 546 log_test_result(KSFT_FAIL); 547 goto quit_child; 548 } 549 550 ret = io_uring_wait_cqe(&ring, &cqe); 551 if (ret < 0) { 552 ksft_print_msg("io_uring_wait_cqe() failed\n"); 553 log_test_result(KSFT_FAIL); 554 goto quit_child; 555 } 556 557 if (cqe->res != size) { 558 ksft_print_msg("write_fixed failed\n"); 559 log_test_result(KSFT_FAIL); 560 goto quit_child; 561 } 562 io_uring_cqe_seen(&ring, cqe); 563 564 /* Read back the file content to the temporary buffer. */ 565 total = 0; 566 while (total < size) { 567 cur = pread(fd, tmp + total, size - total, total); 568 if (cur < 0) { 569 ksft_perror("pread() failed\n"); 570 log_test_result(KSFT_FAIL); 571 goto quit_child; 572 } 573 total += cur; 574 } 575 576 /* Finally, check if we read what we expected. */ 577 if (!memcmp(mem, tmp, size)) { 578 log_test_result(KSFT_PASS); 579 } else { 580 ksft_print_msg("Longtom R/W pin is not reliable\n"); 581 log_test_result(KSFT_FAIL); 582 } 583 584 quit_child: 585 if (use_fork) { 586 write(comm_pipes.parent_ready[1], "0", 1); 587 wait(&ret); 588 } 589 unregister_buffers: 590 io_uring_unregister_buffers(&ring); 591 queue_exit: 592 io_uring_queue_exit(&ring); 593 free_tmp: 594 free(tmp); 595 close_file: 596 fclose(file); 597 close_comm_pipes: 598 close_comm_pipes(&comm_pipes); 599 } 600 601 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 602 { 603 do_test_iouring(mem, size, false); 604 } 605 606 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 607 { 608 do_test_iouring(mem, size, true); 609 } 610 611 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 612 613 enum ro_pin_test { 614 RO_PIN_TEST, 615 RO_PIN_TEST_SHARED, 616 RO_PIN_TEST_PREVIOUSLY_SHARED, 617 RO_PIN_TEST_RO_EXCLUSIVE, 618 }; 619 620 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 621 bool fast) 622 { 623 struct pin_longterm_test args; 624 struct comm_pipes comm_pipes; 625 char *tmp, buf; 626 __u64 tmp_val; 627 int ret; 628 629 if (gup_fd < 0) { 630 ksft_print_msg("gup_test not available\n"); 631 log_test_result(KSFT_SKIP); 632 return; 633 } 634 635 tmp = malloc(size); 636 if (!tmp) { 637 ksft_perror("malloc() failed\n"); 638 log_test_result(KSFT_FAIL); 639 return; 640 } 641 642 ret = setup_comm_pipes(&comm_pipes); 643 if (ret) { 644 log_test_result(KSFT_FAIL); 645 goto free_tmp; 646 } 647 648 switch (test) { 649 case RO_PIN_TEST: 650 break; 651 case RO_PIN_TEST_SHARED: 652 case RO_PIN_TEST_PREVIOUSLY_SHARED: 653 /* 654 * Share the pages with our child. As the pages are not pinned, 655 * this should just work. 656 */ 657 ret = fork(); 658 if (ret < 0) { 659 ksft_perror("fork() failed"); 660 log_test_result(KSFT_FAIL); 661 goto close_comm_pipes; 662 } else if (!ret) { 663 write(comm_pipes.child_ready[1], "0", 1); 664 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 665 ; 666 exit(0); 667 } 668 669 /* Wait until our child is ready. */ 670 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 671 ; 672 673 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 674 /* 675 * Tell the child to quit now and wait until it quit. 676 * The pages should now be mapped R/O into our page 677 * tables, but they are no longer shared. 678 */ 679 write(comm_pipes.parent_ready[1], "0", 1); 680 wait(&ret); 681 if (!WIFEXITED(ret)) 682 ksft_print_msg("[INFO] wait() failed\n"); 683 } 684 break; 685 case RO_PIN_TEST_RO_EXCLUSIVE: 686 /* 687 * Map the page R/O into the page table. Enable softdirty 688 * tracking to stop the page from getting mapped R/W immediately 689 * again by mprotect() optimizations. Note that we don't have an 690 * easy way to test if that worked (the pagemap does not export 691 * if the page is mapped R/O vs. R/W). 692 */ 693 ret = mprotect(mem, size, PROT_READ); 694 clear_softdirty(); 695 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 696 if (ret) { 697 ksft_perror("mprotect() failed"); 698 log_test_result(KSFT_FAIL); 699 goto close_comm_pipes; 700 } 701 break; 702 default: 703 assert(false); 704 } 705 706 /* Take a R/O pin. This should trigger unsharing. */ 707 args.addr = (__u64)(uintptr_t)mem; 708 args.size = size; 709 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 710 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 711 if (ret) { 712 if (errno == EINVAL) 713 ret = KSFT_SKIP; 714 else 715 ret = KSFT_FAIL; 716 ksft_perror("PIN_LONGTERM_TEST_START failed"); 717 log_test_result(ret); 718 goto wait; 719 } 720 721 /* Modify the page. */ 722 memset(mem, 0xff, size); 723 724 /* 725 * Read back the content via the pin to the temporary buffer and 726 * test if we observed the modification. 727 */ 728 tmp_val = (__u64)(uintptr_t)tmp; 729 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 730 if (ret) { 731 ksft_perror("PIN_LONGTERM_TEST_READ failed"); 732 log_test_result(KSFT_FAIL); 733 } else { 734 if (!memcmp(mem, tmp, size)) { 735 log_test_result(KSFT_PASS); 736 } else { 737 ksft_print_msg("Longterm R/O pin is not reliable\n"); 738 log_test_result(KSFT_FAIL); 739 } 740 } 741 742 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 743 if (ret) 744 ksft_perror("PIN_LONGTERM_TEST_STOP failed"); 745 wait: 746 switch (test) { 747 case RO_PIN_TEST_SHARED: 748 write(comm_pipes.parent_ready[1], "0", 1); 749 wait(&ret); 750 if (!WIFEXITED(ret)) 751 ksft_perror("wait() failed"); 752 break; 753 default: 754 break; 755 } 756 close_comm_pipes: 757 close_comm_pipes(&comm_pipes); 758 free_tmp: 759 free(tmp); 760 } 761 762 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 763 { 764 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 765 } 766 767 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 768 { 769 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 770 } 771 772 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 773 bool is_hugetlb) 774 { 775 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 776 } 777 778 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 779 bool is_hugetlb) 780 { 781 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 782 } 783 784 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 785 bool is_hugetlb) 786 { 787 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 788 } 789 790 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 791 bool is_hugetlb) 792 { 793 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 794 } 795 796 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 797 798 static void do_run_with_base_page(test_fn fn, bool swapout) 799 { 800 char *mem; 801 int ret; 802 803 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 804 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 805 if (mem == MAP_FAILED) { 806 ksft_perror("mmap() failed"); 807 log_test_result(KSFT_FAIL); 808 return; 809 } 810 811 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 812 /* Ignore if not around on a kernel. */ 813 if (ret && errno != EINVAL) { 814 ksft_perror("MADV_NOHUGEPAGE failed"); 815 log_test_result(KSFT_FAIL); 816 goto munmap; 817 } 818 819 /* Populate a base page. */ 820 memset(mem, 1, pagesize); 821 822 if (swapout) { 823 madvise(mem, pagesize, MADV_PAGEOUT); 824 if (!pagemap_is_swapped(pagemap_fd, mem)) { 825 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 826 log_test_result(KSFT_SKIP); 827 goto munmap; 828 } 829 } 830 831 fn(mem, pagesize, false); 832 munmap: 833 munmap(mem, pagesize); 834 } 835 836 static void run_with_base_page(test_fn fn, const char *desc) 837 { 838 log_test_start("%s ... with base page", desc); 839 do_run_with_base_page(fn, false); 840 } 841 842 static void run_with_base_page_swap(test_fn fn, const char *desc) 843 { 844 log_test_start("%s ... with swapped out base page", desc); 845 do_run_with_base_page(fn, true); 846 } 847 848 enum thp_run { 849 THP_RUN_PMD, 850 THP_RUN_PMD_SWAPOUT, 851 THP_RUN_PTE, 852 THP_RUN_PTE_SWAPOUT, 853 THP_RUN_SINGLE_PTE, 854 THP_RUN_SINGLE_PTE_SWAPOUT, 855 THP_RUN_PARTIAL_MREMAP, 856 THP_RUN_PARTIAL_SHARED, 857 }; 858 859 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 860 { 861 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 862 size_t size, mmap_size, mremap_size; 863 int ret; 864 865 /* For alignment purposes, we need twice the thp size. */ 866 mmap_size = 2 * thpsize; 867 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 868 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 869 if (mmap_mem == MAP_FAILED) { 870 ksft_perror("mmap() failed"); 871 log_test_result(KSFT_FAIL); 872 return; 873 } 874 875 /* We need a THP-aligned memory area. */ 876 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 877 878 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 879 if (ret) { 880 ksft_perror("MADV_HUGEPAGE failed"); 881 log_test_result(KSFT_FAIL); 882 goto munmap; 883 } 884 885 /* 886 * Try to populate a THP. Touch the first sub-page and test if 887 * we get the last sub-page populated automatically. 888 */ 889 mem[0] = 1; 890 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 891 ksft_print_msg("Did not get a THP populated\n"); 892 log_test_result(KSFT_SKIP); 893 goto munmap; 894 } 895 memset(mem, 1, thpsize); 896 897 size = thpsize; 898 switch (thp_run) { 899 case THP_RUN_PMD: 900 case THP_RUN_PMD_SWAPOUT: 901 assert(thpsize == pmdsize); 902 break; 903 case THP_RUN_PTE: 904 case THP_RUN_PTE_SWAPOUT: 905 /* 906 * Trigger PTE-mapping the THP by temporarily mapping a single 907 * subpage R/O. This is a noop if the THP is not pmdsize (and 908 * therefore already PTE-mapped). 909 */ 910 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 911 if (ret) { 912 ksft_perror("mprotect() failed"); 913 log_test_result(KSFT_FAIL); 914 goto munmap; 915 } 916 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 917 if (ret) { 918 ksft_perror("mprotect() failed"); 919 log_test_result(KSFT_FAIL); 920 goto munmap; 921 } 922 break; 923 case THP_RUN_SINGLE_PTE: 924 case THP_RUN_SINGLE_PTE_SWAPOUT: 925 /* 926 * Discard all but a single subpage of that PTE-mapped THP. What 927 * remains is a single PTE mapping a single subpage. 928 */ 929 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 930 if (ret) { 931 ksft_perror("MADV_DONTNEED failed"); 932 log_test_result(KSFT_FAIL); 933 goto munmap; 934 } 935 size = pagesize; 936 break; 937 case THP_RUN_PARTIAL_MREMAP: 938 /* 939 * Remap half of the THP. We need some new memory location 940 * for that. 941 */ 942 mremap_size = thpsize / 2; 943 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 944 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 945 if (mremap_mem == MAP_FAILED) { 946 ksft_perror("mmap() failed"); 947 log_test_result(KSFT_FAIL); 948 goto munmap; 949 } 950 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 951 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 952 if (tmp != mremap_mem) { 953 ksft_perror("mremap() failed"); 954 log_test_result(KSFT_FAIL); 955 goto munmap; 956 } 957 size = mremap_size; 958 break; 959 case THP_RUN_PARTIAL_SHARED: 960 /* 961 * Share the first page of the THP with a child and quit the 962 * child. This will result in some parts of the THP never 963 * have been shared. 964 */ 965 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 966 if (ret) { 967 ksft_perror("MADV_DONTFORK failed"); 968 log_test_result(KSFT_FAIL); 969 goto munmap; 970 } 971 ret = fork(); 972 if (ret < 0) { 973 ksft_perror("fork() failed"); 974 log_test_result(KSFT_FAIL); 975 goto munmap; 976 } else if (!ret) { 977 exit(0); 978 } 979 wait(&ret); 980 /* Allow for sharing all pages again. */ 981 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 982 if (ret) { 983 ksft_perror("MADV_DOFORK failed"); 984 log_test_result(KSFT_FAIL); 985 goto munmap; 986 } 987 break; 988 default: 989 assert(false); 990 } 991 992 switch (thp_run) { 993 case THP_RUN_PMD_SWAPOUT: 994 case THP_RUN_PTE_SWAPOUT: 995 case THP_RUN_SINGLE_PTE_SWAPOUT: 996 madvise(mem, size, MADV_PAGEOUT); 997 if (!range_is_swapped(mem, size)) { 998 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 999 log_test_result(KSFT_SKIP); 1000 goto munmap; 1001 } 1002 break; 1003 default: 1004 break; 1005 } 1006 1007 fn(mem, size, false); 1008 munmap: 1009 munmap(mmap_mem, mmap_size); 1010 if (mremap_mem != MAP_FAILED) 1011 munmap(mremap_mem, mremap_size); 1012 } 1013 1014 static void run_with_thp(test_fn fn, const char *desc, size_t size) 1015 { 1016 log_test_start("%s ... with THP (%zu kB)", 1017 desc, size / 1024); 1018 do_run_with_thp(fn, THP_RUN_PMD, size); 1019 } 1020 1021 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 1022 { 1023 log_test_start("%s ... with swapped-out THP (%zu kB)", 1024 desc, size / 1024); 1025 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 1026 } 1027 1028 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 1029 { 1030 log_test_start("%s ... with PTE-mapped THP (%zu kB)", 1031 desc, size / 1024); 1032 do_run_with_thp(fn, THP_RUN_PTE, size); 1033 } 1034 1035 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 1036 { 1037 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", 1038 desc, size / 1024); 1039 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 1040 } 1041 1042 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 1043 { 1044 log_test_start("%s ... with single PTE of THP (%zu kB)", 1045 desc, size / 1024); 1046 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 1047 } 1048 1049 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 1050 { 1051 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", 1052 desc, size / 1024); 1053 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 1054 } 1055 1056 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 1057 { 1058 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", 1059 desc, size / 1024); 1060 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 1061 } 1062 1063 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 1064 { 1065 log_test_start("%s ... with partially shared THP (%zu kB)", 1066 desc, size / 1024); 1067 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 1068 } 1069 1070 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 1071 { 1072 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1073 char *mem, *dummy; 1074 1075 log_test_start("%s ... with hugetlb (%zu kB)", desc, 1076 hugetlbsize / 1024); 1077 1078 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1079 1080 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1081 if (mem == MAP_FAILED) { 1082 ksft_perror("need more free huge pages"); 1083 log_test_result(KSFT_SKIP); 1084 return; 1085 } 1086 1087 /* Populate an huge page. */ 1088 memset(mem, 1, hugetlbsize); 1089 1090 /* 1091 * We need a total of two hugetlb pages to handle COW/unsharing 1092 * properly, otherwise we might get zapped by a SIGBUS. 1093 */ 1094 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1095 if (dummy == MAP_FAILED) { 1096 ksft_perror("need more free huge pages"); 1097 log_test_result(KSFT_SKIP); 1098 goto munmap; 1099 } 1100 munmap(dummy, hugetlbsize); 1101 1102 fn(mem, hugetlbsize, true); 1103 munmap: 1104 munmap(mem, hugetlbsize); 1105 } 1106 1107 struct test_case { 1108 const char *desc; 1109 test_fn fn; 1110 }; 1111 1112 /* 1113 * Test cases that are specific to anonymous pages: pages in private mappings 1114 * that may get shared via COW during fork(). 1115 */ 1116 static const struct test_case anon_test_cases[] = { 1117 /* 1118 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1119 * either the child can observe modifications by the parent or the 1120 * other way around. 1121 */ 1122 { 1123 "Basic COW after fork()", 1124 test_cow_in_parent, 1125 }, 1126 /* 1127 * Basic test, but do an additional mprotect(PROT_READ)+ 1128 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1129 */ 1130 { 1131 "Basic COW after fork() with mprotect() optimization", 1132 test_cow_in_parent_mprotect, 1133 }, 1134 /* 1135 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1136 * we miss to break COW, the child observes modifications by the parent. 1137 * This is CVE-2020-29374 reported by Jann Horn. 1138 */ 1139 { 1140 "vmsplice() + unmap in child", 1141 test_vmsplice_in_child, 1142 }, 1143 /* 1144 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1145 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1146 */ 1147 { 1148 "vmsplice() + unmap in child with mprotect() optimization", 1149 test_vmsplice_in_child_mprotect, 1150 }, 1151 /* 1152 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1153 * fork(); modify in the child. If we miss to break COW, the parent 1154 * observes modifications by the child. 1155 */ 1156 { 1157 "vmsplice() before fork(), unmap in parent after fork()", 1158 test_vmsplice_before_fork, 1159 }, 1160 /* 1161 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1162 * child. If we miss to break COW, the parent observes modifications by 1163 * the child. 1164 */ 1165 { 1166 "vmsplice() + unmap in parent after fork()", 1167 test_vmsplice_after_fork, 1168 }, 1169 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1170 /* 1171 * Take a R/W longterm pin and then map the page R/O into the page 1172 * table to trigger a write fault on next access. When modifying the 1173 * page, the page content must be visible via the pin. 1174 */ 1175 { 1176 "R/O-mapping a page registered as iouring fixed buffer", 1177 test_iouring_ro, 1178 }, 1179 /* 1180 * Take a R/W longterm pin and then fork() a child. When modifying the 1181 * page, the page content must be visible via the pin. We expect the 1182 * pinned page to not get shared with the child. 1183 */ 1184 { 1185 "fork() with an iouring fixed buffer", 1186 test_iouring_fork, 1187 }, 1188 1189 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1190 /* 1191 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1192 * When modifying the page via the page table, the page content change 1193 * must be visible via the pin. 1194 */ 1195 { 1196 "R/O GUP pin on R/O-mapped shared page", 1197 test_ro_pin_on_shared, 1198 }, 1199 /* Same as above, but using GUP-fast. */ 1200 { 1201 "R/O GUP-fast pin on R/O-mapped shared page", 1202 test_ro_fast_pin_on_shared, 1203 }, 1204 /* 1205 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1206 * was previously shared. When modifying the page via the page table, 1207 * the page content change must be visible via the pin. 1208 */ 1209 { 1210 "R/O GUP pin on R/O-mapped previously-shared page", 1211 test_ro_pin_on_ro_previously_shared, 1212 }, 1213 /* Same as above, but using GUP-fast. */ 1214 { 1215 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1216 test_ro_fast_pin_on_ro_previously_shared, 1217 }, 1218 /* 1219 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1220 * When modifying the page via the page table, the page content change 1221 * must be visible via the pin. 1222 */ 1223 { 1224 "R/O GUP pin on R/O-mapped exclusive page", 1225 test_ro_pin_on_ro_exclusive, 1226 }, 1227 /* Same as above, but using GUP-fast. */ 1228 { 1229 "R/O GUP-fast pin on R/O-mapped exclusive page", 1230 test_ro_fast_pin_on_ro_exclusive, 1231 }, 1232 }; 1233 1234 static void run_anon_test_case(struct test_case const *test_case) 1235 { 1236 int i; 1237 1238 run_with_base_page(test_case->fn, test_case->desc); 1239 run_with_base_page_swap(test_case->fn, test_case->desc); 1240 for (i = 0; i < nr_thpsizes; i++) { 1241 size_t size = thpsizes[i]; 1242 struct thp_settings settings = *thp_current_settings(); 1243 1244 settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; 1245 settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; 1246 thp_push_settings(&settings); 1247 1248 if (size == pmdsize) { 1249 run_with_thp(test_case->fn, test_case->desc, size); 1250 run_with_thp_swap(test_case->fn, test_case->desc, size); 1251 } 1252 1253 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1254 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1255 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1256 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1257 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1258 run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1259 1260 thp_pop_settings(); 1261 } 1262 for (i = 0; i < nr_hugetlbsizes; i++) 1263 run_with_hugetlb(test_case->fn, test_case->desc, 1264 hugetlbsizes[i]); 1265 } 1266 1267 static void run_anon_test_cases(void) 1268 { 1269 int i; 1270 1271 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1272 1273 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1274 run_anon_test_case(&anon_test_cases[i]); 1275 } 1276 1277 static int tests_per_anon_test_case(void) 1278 { 1279 int tests = 2 + nr_hugetlbsizes; 1280 1281 tests += 6 * nr_thpsizes; 1282 if (pmdsize) 1283 tests += 2; 1284 return tests; 1285 } 1286 1287 enum anon_thp_collapse_test { 1288 ANON_THP_COLLAPSE_UNSHARED, 1289 ANON_THP_COLLAPSE_FULLY_SHARED, 1290 ANON_THP_COLLAPSE_LOWER_SHARED, 1291 ANON_THP_COLLAPSE_UPPER_SHARED, 1292 }; 1293 1294 static void do_test_anon_thp_collapse(char *mem, size_t size, 1295 enum anon_thp_collapse_test test) 1296 { 1297 struct comm_pipes comm_pipes; 1298 char buf; 1299 int ret; 1300 1301 ret = setup_comm_pipes(&comm_pipes); 1302 if (ret) { 1303 log_test_result(KSFT_FAIL); 1304 return; 1305 } 1306 1307 /* 1308 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1309 * R/O, such that we can try collapsing it later. 1310 */ 1311 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1312 if (ret) { 1313 ksft_perror("mprotect() failed"); 1314 log_test_result(KSFT_FAIL); 1315 goto close_comm_pipes; 1316 } 1317 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1318 if (ret) { 1319 ksft_perror("mprotect() failed"); 1320 log_test_result(KSFT_FAIL); 1321 goto close_comm_pipes; 1322 } 1323 1324 switch (test) { 1325 case ANON_THP_COLLAPSE_UNSHARED: 1326 /* Collapse before actually COW-sharing the page. */ 1327 ret = madvise(mem, size, MADV_COLLAPSE); 1328 if (ret) { 1329 ksft_perror("MADV_COLLAPSE failed"); 1330 log_test_result(KSFT_SKIP); 1331 goto close_comm_pipes; 1332 } 1333 break; 1334 case ANON_THP_COLLAPSE_FULLY_SHARED: 1335 /* COW-share the full PTE-mapped THP. */ 1336 break; 1337 case ANON_THP_COLLAPSE_LOWER_SHARED: 1338 /* Don't COW-share the upper part of the THP. */ 1339 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1340 if (ret) { 1341 ksft_perror("MADV_DONTFORK failed"); 1342 log_test_result(KSFT_FAIL); 1343 goto close_comm_pipes; 1344 } 1345 break; 1346 case ANON_THP_COLLAPSE_UPPER_SHARED: 1347 /* Don't COW-share the lower part of the THP. */ 1348 ret = madvise(mem, size / 2, MADV_DONTFORK); 1349 if (ret) { 1350 ksft_perror("MADV_DONTFORK failed"); 1351 log_test_result(KSFT_FAIL); 1352 goto close_comm_pipes; 1353 } 1354 break; 1355 default: 1356 assert(false); 1357 } 1358 1359 ret = fork(); 1360 if (ret < 0) { 1361 ksft_perror("fork() failed"); 1362 log_test_result(KSFT_FAIL); 1363 goto close_comm_pipes; 1364 } else if (!ret) { 1365 switch (test) { 1366 case ANON_THP_COLLAPSE_UNSHARED: 1367 case ANON_THP_COLLAPSE_FULLY_SHARED: 1368 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1369 break; 1370 case ANON_THP_COLLAPSE_LOWER_SHARED: 1371 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1372 break; 1373 case ANON_THP_COLLAPSE_UPPER_SHARED: 1374 exit(child_memcmp_fn(mem + size / 2, size / 2, 1375 &comm_pipes)); 1376 break; 1377 default: 1378 assert(false); 1379 } 1380 } 1381 1382 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1383 ; 1384 1385 switch (test) { 1386 case ANON_THP_COLLAPSE_UNSHARED: 1387 break; 1388 case ANON_THP_COLLAPSE_UPPER_SHARED: 1389 case ANON_THP_COLLAPSE_LOWER_SHARED: 1390 /* 1391 * Revert MADV_DONTFORK such that we merge the VMAs and are 1392 * able to actually collapse. 1393 */ 1394 ret = madvise(mem, size, MADV_DOFORK); 1395 if (ret) { 1396 ksft_perror("MADV_DOFORK failed"); 1397 log_test_result(KSFT_FAIL); 1398 write(comm_pipes.parent_ready[1], "0", 1); 1399 wait(&ret); 1400 goto close_comm_pipes; 1401 } 1402 /* FALLTHROUGH */ 1403 case ANON_THP_COLLAPSE_FULLY_SHARED: 1404 /* Collapse before anyone modified the COW-shared page. */ 1405 ret = madvise(mem, size, MADV_COLLAPSE); 1406 if (ret) { 1407 ksft_perror("MADV_COLLAPSE failed"); 1408 log_test_result(KSFT_SKIP); 1409 write(comm_pipes.parent_ready[1], "0", 1); 1410 wait(&ret); 1411 goto close_comm_pipes; 1412 } 1413 break; 1414 default: 1415 assert(false); 1416 } 1417 1418 /* Modify the page. */ 1419 memset(mem, 0xff, size); 1420 write(comm_pipes.parent_ready[1], "0", 1); 1421 1422 wait(&ret); 1423 if (WIFEXITED(ret)) 1424 ret = WEXITSTATUS(ret); 1425 else 1426 ret = -EINVAL; 1427 1428 if (!ret) { 1429 log_test_result(KSFT_PASS); 1430 } else { 1431 ksft_print_msg("Leak from parent into child\n"); 1432 log_test_result(KSFT_FAIL); 1433 } 1434 close_comm_pipes: 1435 close_comm_pipes(&comm_pipes); 1436 } 1437 1438 static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1439 bool is_hugetlb) 1440 { 1441 assert(!is_hugetlb); 1442 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1443 } 1444 1445 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1446 bool is_hugetlb) 1447 { 1448 assert(!is_hugetlb); 1449 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1450 } 1451 1452 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1453 bool is_hugetlb) 1454 { 1455 assert(!is_hugetlb); 1456 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1457 } 1458 1459 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1460 bool is_hugetlb) 1461 { 1462 assert(!is_hugetlb); 1463 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1464 } 1465 1466 /* 1467 * Test cases that are specific to anonymous THP: pages in private mappings 1468 * that may get shared via COW during fork(). 1469 */ 1470 static const struct test_case anon_thp_test_cases[] = { 1471 /* 1472 * Basic COW test for fork() without any GUP when collapsing a THP 1473 * before fork(). 1474 * 1475 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1476 * collapse") might easily get COW handling wrong when not collapsing 1477 * exclusivity information properly. 1478 */ 1479 { 1480 "Basic COW after fork() when collapsing before fork()", 1481 test_anon_thp_collapse_unshared, 1482 }, 1483 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1484 { 1485 "Basic COW after fork() when collapsing after fork() (fully shared)", 1486 test_anon_thp_collapse_fully_shared, 1487 }, 1488 /* 1489 * Basic COW test, but collapse after COW-sharing the lower half of a 1490 * THP. 1491 */ 1492 { 1493 "Basic COW after fork() when collapsing after fork() (lower shared)", 1494 test_anon_thp_collapse_lower_shared, 1495 }, 1496 /* 1497 * Basic COW test, but collapse after COW-sharing the upper half of a 1498 * THP. 1499 */ 1500 { 1501 "Basic COW after fork() when collapsing after fork() (upper shared)", 1502 test_anon_thp_collapse_upper_shared, 1503 }, 1504 }; 1505 1506 static void run_anon_thp_test_cases(void) 1507 { 1508 int i; 1509 1510 if (!pmdsize) 1511 return; 1512 1513 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1514 1515 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1516 struct test_case const *test_case = &anon_thp_test_cases[i]; 1517 1518 log_test_start("%s", test_case->desc); 1519 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1520 } 1521 } 1522 1523 static int tests_per_anon_thp_test_case(void) 1524 { 1525 return pmdsize ? 1 : 0; 1526 } 1527 1528 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1529 1530 static void test_cow(char *mem, const char *smem, size_t size) 1531 { 1532 char *old = malloc(size); 1533 1534 /* Backup the original content. */ 1535 memcpy(old, smem, size); 1536 1537 /* Modify the page. */ 1538 memset(mem, 0xff, size); 1539 1540 /* See if we still read the old values via the other mapping. */ 1541 if (!memcmp(smem, old, size)) { 1542 log_test_result(KSFT_PASS); 1543 } else { 1544 ksft_print_msg("Other mapping modified\n"); 1545 log_test_result(KSFT_FAIL); 1546 } 1547 free(old); 1548 } 1549 1550 static void test_ro_pin(char *mem, const char *smem, size_t size) 1551 { 1552 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1553 } 1554 1555 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1556 { 1557 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1558 } 1559 1560 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1561 { 1562 char *mem, *smem, tmp; 1563 1564 log_test_start("%s ... with shared zeropage", desc); 1565 1566 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1567 MAP_PRIVATE | MAP_ANON, -1, 0); 1568 if (mem == MAP_FAILED) { 1569 ksft_perror("mmap() failed"); 1570 log_test_result(KSFT_FAIL); 1571 return; 1572 } 1573 1574 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1575 if (smem == MAP_FAILED) { 1576 ksft_perror("mmap() failed"); 1577 log_test_result(KSFT_FAIL); 1578 goto munmap; 1579 } 1580 1581 /* Read from the page to populate the shared zeropage. */ 1582 tmp = *mem + *smem; 1583 asm volatile("" : "+r" (tmp)); 1584 1585 fn(mem, smem, pagesize); 1586 munmap: 1587 munmap(mem, pagesize); 1588 if (smem != MAP_FAILED) 1589 munmap(smem, pagesize); 1590 } 1591 1592 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1593 { 1594 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1595 size_t mmap_size; 1596 int ret; 1597 1598 log_test_start("%s ... with huge zeropage", desc); 1599 1600 if (!has_huge_zeropage) { 1601 ksft_print_msg("Huge zeropage not enabled\n"); 1602 log_test_result(KSFT_SKIP); 1603 return; 1604 } 1605 1606 /* For alignment purposes, we need twice the thp size. */ 1607 mmap_size = 2 * pmdsize; 1608 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1609 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1610 if (mmap_mem == MAP_FAILED) { 1611 ksft_perror("mmap() failed"); 1612 log_test_result(KSFT_FAIL); 1613 return; 1614 } 1615 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1616 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1617 if (mmap_smem == MAP_FAILED) { 1618 ksft_perror("mmap() failed"); 1619 log_test_result(KSFT_FAIL); 1620 goto munmap; 1621 } 1622 1623 /* We need a THP-aligned memory area. */ 1624 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 1625 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1626 1627 ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 1628 if (ret) { 1629 ksft_perror("madvise()"); 1630 log_test_result(KSFT_FAIL); 1631 goto munmap; 1632 } 1633 ret = madvise(smem, pmdsize, MADV_HUGEPAGE); 1634 if (ret) { 1635 ksft_perror("madvise()"); 1636 log_test_result(KSFT_FAIL); 1637 goto munmap; 1638 } 1639 1640 /* 1641 * Read from the memory to populate the huge shared zeropage. Read from 1642 * the first sub-page and test if we get another sub-page populated 1643 * automatically. 1644 */ 1645 tmp = *mem + *smem; 1646 asm volatile("" : "+r" (tmp)); 1647 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1648 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1649 ksft_test_result_skip("Did not get THPs populated\n"); 1650 goto munmap; 1651 } 1652 1653 fn(mem, smem, pmdsize); 1654 munmap: 1655 munmap(mmap_mem, mmap_size); 1656 if (mmap_smem != MAP_FAILED) 1657 munmap(mmap_smem, mmap_size); 1658 } 1659 1660 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1661 { 1662 char *mem, *smem, tmp; 1663 int fd; 1664 1665 log_test_start("%s ... with memfd", desc); 1666 1667 fd = memfd_create("test", 0); 1668 if (fd < 0) { 1669 ksft_perror("memfd_create() failed"); 1670 log_test_result(KSFT_FAIL); 1671 return; 1672 } 1673 1674 /* File consists of a single page filled with zeroes. */ 1675 if (fallocate(fd, 0, 0, pagesize)) { 1676 ksft_perror("fallocate() failed"); 1677 log_test_result(KSFT_FAIL); 1678 goto close; 1679 } 1680 1681 /* Create a private mapping of the memfd. */ 1682 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1683 if (mem == MAP_FAILED) { 1684 ksft_perror("mmap() failed"); 1685 log_test_result(KSFT_FAIL); 1686 goto close; 1687 } 1688 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1689 if (smem == MAP_FAILED) { 1690 ksft_perror("mmap() failed"); 1691 log_test_result(KSFT_FAIL); 1692 goto munmap; 1693 } 1694 1695 /* Fault the page in. */ 1696 tmp = *mem + *smem; 1697 asm volatile("" : "+r" (tmp)); 1698 1699 fn(mem, smem, pagesize); 1700 munmap: 1701 munmap(mem, pagesize); 1702 if (smem != MAP_FAILED) 1703 munmap(smem, pagesize); 1704 close: 1705 close(fd); 1706 } 1707 1708 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1709 { 1710 char *mem, *smem, tmp; 1711 FILE *file; 1712 int fd; 1713 1714 log_test_start("%s ... with tmpfile", desc); 1715 1716 file = tmpfile(); 1717 if (!file) { 1718 ksft_perror("tmpfile() failed"); 1719 log_test_result(KSFT_FAIL); 1720 return; 1721 } 1722 1723 fd = fileno(file); 1724 if (fd < 0) { 1725 ksft_perror("fileno() failed"); 1726 log_test_result(KSFT_SKIP); 1727 return; 1728 } 1729 1730 /* File consists of a single page filled with zeroes. */ 1731 if (fallocate(fd, 0, 0, pagesize)) { 1732 ksft_perror("fallocate() failed"); 1733 log_test_result(KSFT_FAIL); 1734 goto close; 1735 } 1736 1737 /* Create a private mapping of the memfd. */ 1738 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1739 if (mem == MAP_FAILED) { 1740 ksft_perror("mmap() failed"); 1741 log_test_result(KSFT_FAIL); 1742 goto close; 1743 } 1744 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1745 if (smem == MAP_FAILED) { 1746 ksft_perror("mmap() failed"); 1747 log_test_result(KSFT_FAIL); 1748 goto munmap; 1749 } 1750 1751 /* Fault the page in. */ 1752 tmp = *mem + *smem; 1753 asm volatile("" : "+r" (tmp)); 1754 1755 fn(mem, smem, pagesize); 1756 munmap: 1757 munmap(mem, pagesize); 1758 if (smem != MAP_FAILED) 1759 munmap(smem, pagesize); 1760 close: 1761 fclose(file); 1762 } 1763 1764 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1765 size_t hugetlbsize) 1766 { 1767 int flags = MFD_HUGETLB; 1768 char *mem, *smem, tmp; 1769 int fd; 1770 1771 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 1772 hugetlbsize / 1024); 1773 1774 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1775 1776 fd = memfd_create("test", flags); 1777 if (fd < 0) { 1778 ksft_perror("memfd_create() failed"); 1779 log_test_result(KSFT_SKIP); 1780 return; 1781 } 1782 1783 /* File consists of a single page filled with zeroes. */ 1784 if (fallocate(fd, 0, 0, hugetlbsize)) { 1785 ksft_perror("need more free huge pages"); 1786 log_test_result(KSFT_SKIP); 1787 goto close; 1788 } 1789 1790 /* Create a private mapping of the memfd. */ 1791 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1792 0); 1793 if (mem == MAP_FAILED) { 1794 ksft_perror("need more free huge pages"); 1795 log_test_result(KSFT_SKIP); 1796 goto close; 1797 } 1798 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1799 if (smem == MAP_FAILED) { 1800 ksft_perror("mmap() failed"); 1801 log_test_result(KSFT_FAIL); 1802 goto munmap; 1803 } 1804 1805 /* Fault the page in. */ 1806 tmp = *mem + *smem; 1807 asm volatile("" : "+r" (tmp)); 1808 1809 fn(mem, smem, hugetlbsize); 1810 munmap: 1811 munmap(mem, hugetlbsize); 1812 if (smem != MAP_FAILED) 1813 munmap(smem, hugetlbsize); 1814 close: 1815 close(fd); 1816 } 1817 1818 struct non_anon_test_case { 1819 const char *desc; 1820 non_anon_test_fn fn; 1821 }; 1822 1823 /* 1824 * Test cases that target any pages in private mappings that are not anonymous: 1825 * pages that may get shared via COW ndependent of fork(). This includes 1826 * the shared zeropage(s), pagecache pages, ... 1827 */ 1828 static const struct non_anon_test_case non_anon_test_cases[] = { 1829 /* 1830 * Basic COW test without any GUP. If we miss to break COW, changes are 1831 * visible via other private/shared mappings. 1832 */ 1833 { 1834 "Basic COW", 1835 test_cow, 1836 }, 1837 /* 1838 * Take a R/O longterm pin. When modifying the page via the page table, 1839 * the page content change must be visible via the pin. 1840 */ 1841 { 1842 "R/O longterm GUP pin", 1843 test_ro_pin, 1844 }, 1845 /* Same as above, but using GUP-fast. */ 1846 { 1847 "R/O longterm GUP-fast pin", 1848 test_ro_fast_pin, 1849 }, 1850 }; 1851 1852 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1853 { 1854 int i; 1855 1856 run_with_zeropage(test_case->fn, test_case->desc); 1857 run_with_memfd(test_case->fn, test_case->desc); 1858 run_with_tmpfile(test_case->fn, test_case->desc); 1859 if (pmdsize) 1860 run_with_huge_zeropage(test_case->fn, test_case->desc); 1861 for (i = 0; i < nr_hugetlbsizes; i++) 1862 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1863 hugetlbsizes[i]); 1864 } 1865 1866 static void run_non_anon_test_cases(void) 1867 { 1868 int i; 1869 1870 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1871 1872 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1873 run_non_anon_test_case(&non_anon_test_cases[i]); 1874 } 1875 1876 static int tests_per_non_anon_test_case(void) 1877 { 1878 int tests = 3 + nr_hugetlbsizes; 1879 1880 if (pmdsize) 1881 tests += 1; 1882 return tests; 1883 } 1884 1885 int main(int argc, char **argv) 1886 { 1887 struct thp_settings default_settings; 1888 1889 ksft_print_header(); 1890 1891 pagesize = getpagesize(); 1892 pmdsize = read_pmd_pagesize(); 1893 if (pmdsize) { 1894 /* Only if THP is supported. */ 1895 thp_read_settings(&default_settings); 1896 default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; 1897 thp_save_settings(); 1898 thp_push_settings(&default_settings); 1899 1900 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 1901 pmdsize / 1024); 1902 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 1903 } 1904 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1905 ARRAY_SIZE(hugetlbsizes)); 1906 detect_huge_zeropage(); 1907 1908 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1909 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1910 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1911 1912 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1913 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1914 if (pagemap_fd < 0) 1915 ksft_exit_fail_msg("opening pagemap failed\n"); 1916 1917 run_anon_test_cases(); 1918 run_anon_thp_test_cases(); 1919 run_non_anon_test_cases(); 1920 1921 if (pmdsize) { 1922 /* Only if THP is supported. */ 1923 thp_restore_settings(); 1924 } 1925 1926 ksft_finished(); 1927 } 1928