1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 #include "thp_settings.h" 33 34 static size_t pagesize; 35 static int pagemap_fd; 36 static size_t pmdsize; 37 static int nr_thpsizes; 38 static size_t thpsizes[20]; 39 static int nr_hugetlbsizes; 40 static size_t hugetlbsizes[10]; 41 static int gup_fd; 42 static bool has_huge_zeropage; 43 44 static int sz2ord(size_t size) 45 { 46 return __builtin_ctzll(size / pagesize); 47 } 48 49 static int detect_thp_sizes(size_t sizes[], int max) 50 { 51 int count = 0; 52 unsigned long orders; 53 size_t kb; 54 int i; 55 56 /* thp not supported at all. */ 57 if (!pmdsize) 58 return 0; 59 60 orders = 1UL << sz2ord(pmdsize); 61 orders |= thp_supported_orders(); 62 63 for (i = 0; orders && count < max; i++) { 64 if (!(orders & (1UL << i))) 65 continue; 66 orders &= ~(1UL << i); 67 kb = (pagesize >> 10) << i; 68 sizes[count++] = kb * 1024; 69 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 70 } 71 72 return count; 73 } 74 75 static bool range_is_swapped(void *addr, size_t size) 76 { 77 for (; size; addr += pagesize, size -= pagesize) 78 if (!pagemap_is_swapped(pagemap_fd, addr)) 79 return false; 80 return true; 81 } 82 83 struct comm_pipes { 84 int child_ready[2]; 85 int parent_ready[2]; 86 }; 87 88 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 89 { 90 if (pipe(comm_pipes->child_ready) < 0) { 91 ksft_perror("pipe() failed"); 92 return -errno; 93 } 94 if (pipe(comm_pipes->parent_ready) < 0) { 95 ksft_perror("pipe() failed"); 96 close(comm_pipes->child_ready[0]); 97 close(comm_pipes->child_ready[1]); 98 return -errno; 99 } 100 101 return 0; 102 } 103 104 static void close_comm_pipes(struct comm_pipes *comm_pipes) 105 { 106 close(comm_pipes->child_ready[0]); 107 close(comm_pipes->child_ready[1]); 108 close(comm_pipes->parent_ready[0]); 109 close(comm_pipes->parent_ready[1]); 110 } 111 112 static int child_memcmp_fn(char *mem, size_t size, 113 struct comm_pipes *comm_pipes) 114 { 115 char *old = malloc(size); 116 char buf; 117 118 /* Backup the original content. */ 119 memcpy(old, mem, size); 120 121 /* Wait until the parent modified the page. */ 122 write(comm_pipes->child_ready[1], "0", 1); 123 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 124 ; 125 126 /* See if we still read the old values. */ 127 return memcmp(old, mem, size); 128 } 129 130 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 131 struct comm_pipes *comm_pipes) 132 { 133 struct iovec iov = { 134 .iov_base = mem, 135 .iov_len = size, 136 }; 137 ssize_t cur, total, transferred; 138 char *old, *new; 139 int fds[2]; 140 char buf; 141 142 old = malloc(size); 143 new = malloc(size); 144 145 /* Backup the original content. */ 146 memcpy(old, mem, size); 147 148 if (pipe(fds) < 0) 149 return -errno; 150 151 /* Trigger a read-only pin. */ 152 transferred = vmsplice(fds[1], &iov, 1, 0); 153 if (transferred < 0) 154 return -errno; 155 if (transferred == 0) 156 return -EINVAL; 157 158 /* Unmap it from our page tables. */ 159 if (munmap(mem, size) < 0) 160 return -errno; 161 162 /* Wait until the parent modified it. */ 163 write(comm_pipes->child_ready[1], "0", 1); 164 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 165 ; 166 167 /* See if we still read the old values via the pipe. */ 168 for (total = 0; total < transferred; total += cur) { 169 cur = read(fds[0], new + total, transferred - total); 170 if (cur < 0) 171 return -errno; 172 } 173 174 return memcmp(old, new, transferred); 175 } 176 177 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 178 179 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 180 child_fn fn, bool xfail) 181 { 182 struct comm_pipes comm_pipes; 183 char buf; 184 int ret; 185 186 ret = setup_comm_pipes(&comm_pipes); 187 if (ret) { 188 log_test_result(KSFT_FAIL); 189 return; 190 } 191 192 ret = fork(); 193 if (ret < 0) { 194 ksft_perror("fork() failed"); 195 log_test_result(KSFT_FAIL); 196 goto close_comm_pipes; 197 } else if (!ret) { 198 exit(fn(mem, size, &comm_pipes)); 199 } 200 201 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 202 ; 203 204 if (do_mprotect) { 205 /* 206 * mprotect() optimizations might try avoiding 207 * write-faults by directly mapping pages writable. 208 */ 209 ret = mprotect(mem, size, PROT_READ); 210 if (ret) { 211 ksft_perror("mprotect() failed"); 212 log_test_result(KSFT_FAIL); 213 write(comm_pipes.parent_ready[1], "0", 1); 214 wait(&ret); 215 goto close_comm_pipes; 216 } 217 218 ret = mprotect(mem, size, PROT_READ|PROT_WRITE); 219 if (ret) { 220 ksft_perror("mprotect() failed"); 221 log_test_result(KSFT_FAIL); 222 write(comm_pipes.parent_ready[1], "0", 1); 223 wait(&ret); 224 goto close_comm_pipes; 225 } 226 } 227 228 /* Modify the page. */ 229 memset(mem, 0xff, size); 230 write(comm_pipes.parent_ready[1], "0", 1); 231 232 wait(&ret); 233 if (WIFEXITED(ret)) 234 ret = WEXITSTATUS(ret); 235 else 236 ret = -EINVAL; 237 238 if (!ret) { 239 log_test_result(KSFT_PASS); 240 } else if (xfail) { 241 /* 242 * With hugetlb, some vmsplice() tests are currently expected to 243 * fail because (a) harder to fix and (b) nobody really cares. 244 * Flag them as expected failure for now. 245 */ 246 ksft_print_msg("Leak from parent into child\n"); 247 log_test_result(KSFT_XFAIL); 248 } else { 249 ksft_print_msg("Leak from parent into child\n"); 250 log_test_result(KSFT_FAIL); 251 } 252 close_comm_pipes: 253 close_comm_pipes(&comm_pipes); 254 } 255 256 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 257 { 258 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 259 } 260 261 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 262 { 263 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 264 } 265 266 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 267 { 268 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 269 is_hugetlb); 270 } 271 272 static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 273 bool is_hugetlb) 274 { 275 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 276 is_hugetlb); 277 } 278 279 static void do_test_vmsplice_in_parent(char *mem, size_t size, 280 bool before_fork, bool xfail) 281 { 282 struct iovec iov = { 283 .iov_base = mem, 284 .iov_len = size, 285 }; 286 ssize_t cur, total, transferred = 0; 287 struct comm_pipes comm_pipes; 288 char *old, *new; 289 int ret, fds[2]; 290 char buf; 291 292 old = malloc(size); 293 new = malloc(size); 294 295 memcpy(old, mem, size); 296 297 ret = setup_comm_pipes(&comm_pipes); 298 if (ret) { 299 log_test_result(KSFT_FAIL); 300 goto free; 301 } 302 303 if (pipe(fds) < 0) { 304 ksft_perror("pipe() failed"); 305 log_test_result(KSFT_FAIL); 306 goto close_comm_pipes; 307 } 308 309 if (before_fork) { 310 transferred = vmsplice(fds[1], &iov, 1, 0); 311 if (transferred <= 0) { 312 ksft_perror("vmsplice() failed\n"); 313 log_test_result(KSFT_FAIL); 314 goto close_pipe; 315 } 316 } 317 318 ret = fork(); 319 if (ret < 0) { 320 ksft_perror("fork() failed\n"); 321 log_test_result(KSFT_FAIL); 322 goto close_pipe; 323 } else if (!ret) { 324 write(comm_pipes.child_ready[1], "0", 1); 325 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 326 ; 327 /* Modify page content in the child. */ 328 memset(mem, 0xff, size); 329 exit(0); 330 } 331 332 if (!before_fork) { 333 transferred = vmsplice(fds[1], &iov, 1, 0); 334 if (transferred <= 0) { 335 ksft_perror("vmsplice() failed"); 336 log_test_result(KSFT_FAIL); 337 wait(&ret); 338 goto close_pipe; 339 } 340 } 341 342 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 343 ; 344 if (munmap(mem, size) < 0) { 345 ksft_perror("munmap() failed"); 346 log_test_result(KSFT_FAIL); 347 goto close_pipe; 348 } 349 write(comm_pipes.parent_ready[1], "0", 1); 350 351 /* Wait until the child is done writing. */ 352 wait(&ret); 353 if (!WIFEXITED(ret)) { 354 ksft_perror("wait() failed"); 355 log_test_result(KSFT_FAIL); 356 goto close_pipe; 357 } 358 359 /* See if we still read the old values. */ 360 for (total = 0; total < transferred; total += cur) { 361 cur = read(fds[0], new + total, transferred - total); 362 if (cur < 0) { 363 ksft_perror("read() failed"); 364 log_test_result(KSFT_FAIL); 365 goto close_pipe; 366 } 367 } 368 369 if (!memcmp(old, new, transferred)) { 370 log_test_result(KSFT_PASS); 371 } else if (xfail) { 372 /* 373 * With hugetlb, some vmsplice() tests are currently expected to 374 * fail because (a) harder to fix and (b) nobody really cares. 375 * Flag them as expected failure for now. 376 */ 377 ksft_print_msg("Leak from child into parent\n"); 378 log_test_result(KSFT_XFAIL); 379 } else { 380 ksft_print_msg("Leak from child into parent\n"); 381 log_test_result(KSFT_FAIL); 382 } 383 close_pipe: 384 close(fds[0]); 385 close(fds[1]); 386 close_comm_pipes: 387 close_comm_pipes(&comm_pipes); 388 free: 389 free(old); 390 free(new); 391 } 392 393 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 394 { 395 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 396 } 397 398 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 399 { 400 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 401 } 402 403 #ifdef LOCAL_CONFIG_HAVE_LIBURING 404 static void do_test_iouring(char *mem, size_t size, bool use_fork) 405 { 406 struct comm_pipes comm_pipes; 407 struct io_uring_cqe *cqe; 408 struct io_uring_sqe *sqe; 409 struct io_uring ring; 410 ssize_t cur, total; 411 struct iovec iov; 412 char *buf, *tmp; 413 int ret, fd; 414 FILE *file; 415 416 ret = setup_comm_pipes(&comm_pipes); 417 if (ret) { 418 log_test_result(KSFT_FAIL); 419 return; 420 } 421 422 file = tmpfile(); 423 if (!file) { 424 ksft_perror("tmpfile() failed"); 425 log_test_result(KSFT_FAIL); 426 goto close_comm_pipes; 427 } 428 fd = fileno(file); 429 assert(fd); 430 431 tmp = malloc(size); 432 if (!tmp) { 433 ksft_print_msg("malloc() failed\n"); 434 log_test_result(KSFT_FAIL); 435 goto close_file; 436 } 437 438 /* Skip on errors, as we might just lack kernel support. */ 439 ret = io_uring_queue_init(1, &ring, 0); 440 if (ret < 0) { 441 ksft_print_msg("io_uring_queue_init() failed\n"); 442 log_test_result(KSFT_SKIP); 443 goto free_tmp; 444 } 445 446 /* 447 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 448 * | FOLL_LONGTERM the range. 449 * 450 * Skip on errors, as we might just lack kernel support or might not 451 * have sufficient MEMLOCK permissions. 452 */ 453 iov.iov_base = mem; 454 iov.iov_len = size; 455 ret = io_uring_register_buffers(&ring, &iov, 1); 456 if (ret) { 457 ksft_print_msg("io_uring_register_buffers() failed\n"); 458 log_test_result(KSFT_SKIP); 459 goto queue_exit; 460 } 461 462 if (use_fork) { 463 /* 464 * fork() and keep the child alive until we're done. Note that 465 * we expect the pinned page to not get shared with the child. 466 */ 467 ret = fork(); 468 if (ret < 0) { 469 ksft_perror("fork() failed"); 470 log_test_result(KSFT_FAIL); 471 goto unregister_buffers; 472 } else if (!ret) { 473 write(comm_pipes.child_ready[1], "0", 1); 474 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 475 ; 476 exit(0); 477 } 478 479 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 480 ; 481 } else { 482 /* 483 * Map the page R/O into the page table. Enable softdirty 484 * tracking to stop the page from getting mapped R/W immediately 485 * again by mprotect() optimizations. Note that we don't have an 486 * easy way to test if that worked (the pagemap does not export 487 * if the page is mapped R/O vs. R/W). 488 */ 489 ret = mprotect(mem, size, PROT_READ); 490 if (ret) { 491 ksft_perror("mprotect() failed"); 492 log_test_result(KSFT_FAIL); 493 goto unregister_buffers; 494 } 495 496 clear_softdirty(); 497 ret = mprotect(mem, size, PROT_READ | PROT_WRITE); 498 if (ret) { 499 ksft_perror("mprotect() failed"); 500 log_test_result(KSFT_FAIL); 501 goto unregister_buffers; 502 } 503 } 504 505 /* 506 * Modify the page and write page content as observed by the fixed 507 * buffer pin to the file so we can verify it. 508 */ 509 memset(mem, 0xff, size); 510 sqe = io_uring_get_sqe(&ring); 511 if (!sqe) { 512 ksft_print_msg("io_uring_get_sqe() failed\n"); 513 log_test_result(KSFT_FAIL); 514 goto quit_child; 515 } 516 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 517 518 ret = io_uring_submit(&ring); 519 if (ret < 0) { 520 ksft_print_msg("io_uring_submit() failed\n"); 521 log_test_result(KSFT_FAIL); 522 goto quit_child; 523 } 524 525 ret = io_uring_wait_cqe(&ring, &cqe); 526 if (ret < 0) { 527 ksft_print_msg("io_uring_wait_cqe() failed\n"); 528 log_test_result(KSFT_FAIL); 529 goto quit_child; 530 } 531 532 if (cqe->res != size) { 533 ksft_print_msg("write_fixed failed\n"); 534 log_test_result(KSFT_FAIL); 535 goto quit_child; 536 } 537 io_uring_cqe_seen(&ring, cqe); 538 539 /* Read back the file content to the temporary buffer. */ 540 total = 0; 541 while (total < size) { 542 cur = pread(fd, tmp + total, size - total, total); 543 if (cur < 0) { 544 ksft_perror("pread() failed\n"); 545 log_test_result(KSFT_FAIL); 546 goto quit_child; 547 } 548 total += cur; 549 } 550 551 /* Finally, check if we read what we expected. */ 552 if (!memcmp(mem, tmp, size)) { 553 log_test_result(KSFT_PASS); 554 } else { 555 ksft_print_msg("Longtom R/W pin is not reliable\n"); 556 log_test_result(KSFT_FAIL); 557 } 558 559 quit_child: 560 if (use_fork) { 561 write(comm_pipes.parent_ready[1], "0", 1); 562 wait(&ret); 563 } 564 unregister_buffers: 565 io_uring_unregister_buffers(&ring); 566 queue_exit: 567 io_uring_queue_exit(&ring); 568 free_tmp: 569 free(tmp); 570 close_file: 571 fclose(file); 572 close_comm_pipes: 573 close_comm_pipes(&comm_pipes); 574 } 575 576 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 577 { 578 do_test_iouring(mem, size, false); 579 } 580 581 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 582 { 583 do_test_iouring(mem, size, true); 584 } 585 586 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 587 588 enum ro_pin_test { 589 RO_PIN_TEST, 590 RO_PIN_TEST_SHARED, 591 RO_PIN_TEST_PREVIOUSLY_SHARED, 592 RO_PIN_TEST_RO_EXCLUSIVE, 593 }; 594 595 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 596 bool fast) 597 { 598 struct pin_longterm_test args; 599 struct comm_pipes comm_pipes; 600 char *tmp, buf; 601 __u64 tmp_val; 602 int ret; 603 604 if (gup_fd < 0) { 605 ksft_print_msg("gup_test not available\n"); 606 log_test_result(KSFT_SKIP); 607 return; 608 } 609 610 tmp = malloc(size); 611 if (!tmp) { 612 ksft_perror("malloc() failed\n"); 613 log_test_result(KSFT_FAIL); 614 return; 615 } 616 617 ret = setup_comm_pipes(&comm_pipes); 618 if (ret) { 619 log_test_result(KSFT_FAIL); 620 goto free_tmp; 621 } 622 623 switch (test) { 624 case RO_PIN_TEST: 625 break; 626 case RO_PIN_TEST_SHARED: 627 case RO_PIN_TEST_PREVIOUSLY_SHARED: 628 /* 629 * Share the pages with our child. As the pages are not pinned, 630 * this should just work. 631 */ 632 ret = fork(); 633 if (ret < 0) { 634 ksft_perror("fork() failed"); 635 log_test_result(KSFT_FAIL); 636 goto close_comm_pipes; 637 } else if (!ret) { 638 write(comm_pipes.child_ready[1], "0", 1); 639 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 640 ; 641 exit(0); 642 } 643 644 /* Wait until our child is ready. */ 645 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 646 ; 647 648 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 649 /* 650 * Tell the child to quit now and wait until it quit. 651 * The pages should now be mapped R/O into our page 652 * tables, but they are no longer shared. 653 */ 654 write(comm_pipes.parent_ready[1], "0", 1); 655 wait(&ret); 656 if (!WIFEXITED(ret)) 657 ksft_print_msg("[INFO] wait() failed\n"); 658 } 659 break; 660 case RO_PIN_TEST_RO_EXCLUSIVE: 661 /* 662 * Map the page R/O into the page table. Enable softdirty 663 * tracking to stop the page from getting mapped R/W immediately 664 * again by mprotect() optimizations. Note that we don't have an 665 * easy way to test if that worked (the pagemap does not export 666 * if the page is mapped R/O vs. R/W). 667 */ 668 ret = mprotect(mem, size, PROT_READ); 669 clear_softdirty(); 670 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 671 if (ret) { 672 ksft_perror("mprotect() failed"); 673 log_test_result(KSFT_FAIL); 674 goto close_comm_pipes; 675 } 676 break; 677 default: 678 assert(false); 679 } 680 681 /* Take a R/O pin. This should trigger unsharing. */ 682 args.addr = (__u64)(uintptr_t)mem; 683 args.size = size; 684 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 685 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 686 if (ret) { 687 if (errno == EINVAL) 688 ret = KSFT_SKIP; 689 else 690 ret = KSFT_FAIL; 691 ksft_perror("PIN_LONGTERM_TEST_START failed"); 692 log_test_result(ret); 693 goto wait; 694 } 695 696 /* Modify the page. */ 697 memset(mem, 0xff, size); 698 699 /* 700 * Read back the content via the pin to the temporary buffer and 701 * test if we observed the modification. 702 */ 703 tmp_val = (__u64)(uintptr_t)tmp; 704 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 705 if (ret) { 706 ksft_perror("PIN_LONGTERM_TEST_READ failed"); 707 log_test_result(KSFT_FAIL); 708 } else { 709 if (!memcmp(mem, tmp, size)) { 710 log_test_result(KSFT_PASS); 711 } else { 712 ksft_print_msg("Longterm R/O pin is not reliable\n"); 713 log_test_result(KSFT_FAIL); 714 } 715 } 716 717 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 718 if (ret) 719 ksft_perror("PIN_LONGTERM_TEST_STOP failed"); 720 wait: 721 switch (test) { 722 case RO_PIN_TEST_SHARED: 723 write(comm_pipes.parent_ready[1], "0", 1); 724 wait(&ret); 725 if (!WIFEXITED(ret)) 726 ksft_perror("wait() failed"); 727 break; 728 default: 729 break; 730 } 731 close_comm_pipes: 732 close_comm_pipes(&comm_pipes); 733 free_tmp: 734 free(tmp); 735 } 736 737 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 738 { 739 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 740 } 741 742 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 743 { 744 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 745 } 746 747 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 748 bool is_hugetlb) 749 { 750 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 751 } 752 753 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 754 bool is_hugetlb) 755 { 756 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 757 } 758 759 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 760 bool is_hugetlb) 761 { 762 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 763 } 764 765 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 766 bool is_hugetlb) 767 { 768 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 769 } 770 771 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 772 773 static void do_run_with_base_page(test_fn fn, bool swapout) 774 { 775 char *mem; 776 int ret; 777 778 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 779 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 780 if (mem == MAP_FAILED) { 781 ksft_perror("mmap() failed"); 782 log_test_result(KSFT_FAIL); 783 return; 784 } 785 786 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 787 /* Ignore if not around on a kernel. */ 788 if (ret && errno != EINVAL) { 789 ksft_perror("MADV_NOHUGEPAGE failed"); 790 log_test_result(KSFT_FAIL); 791 goto munmap; 792 } 793 794 /* Populate a base page. */ 795 memset(mem, 1, pagesize); 796 797 if (swapout) { 798 madvise(mem, pagesize, MADV_PAGEOUT); 799 if (!pagemap_is_swapped(pagemap_fd, mem)) { 800 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 801 log_test_result(KSFT_SKIP); 802 goto munmap; 803 } 804 } 805 806 fn(mem, pagesize, false); 807 munmap: 808 munmap(mem, pagesize); 809 } 810 811 static void run_with_base_page(test_fn fn, const char *desc) 812 { 813 log_test_start("%s ... with base page", desc); 814 do_run_with_base_page(fn, false); 815 } 816 817 static void run_with_base_page_swap(test_fn fn, const char *desc) 818 { 819 log_test_start("%s ... with swapped out base page", desc); 820 do_run_with_base_page(fn, true); 821 } 822 823 enum thp_run { 824 THP_RUN_PMD, 825 THP_RUN_PMD_SWAPOUT, 826 THP_RUN_PTE, 827 THP_RUN_PTE_SWAPOUT, 828 THP_RUN_SINGLE_PTE, 829 THP_RUN_SINGLE_PTE_SWAPOUT, 830 THP_RUN_PARTIAL_MREMAP, 831 THP_RUN_PARTIAL_SHARED, 832 }; 833 834 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 835 { 836 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 837 size_t size, mmap_size, mremap_size; 838 int ret; 839 840 /* For alignment purposes, we need twice the thp size. */ 841 mmap_size = 2 * thpsize; 842 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 843 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 844 if (mmap_mem == MAP_FAILED) { 845 ksft_perror("mmap() failed"); 846 log_test_result(KSFT_FAIL); 847 return; 848 } 849 850 /* We need a THP-aligned memory area. */ 851 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 852 853 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 854 if (ret) { 855 ksft_perror("MADV_HUGEPAGE failed"); 856 log_test_result(KSFT_FAIL); 857 goto munmap; 858 } 859 860 /* 861 * Try to populate a THP. Touch the first sub-page and test if 862 * we get the last sub-page populated automatically. 863 */ 864 mem[0] = 1; 865 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 866 ksft_print_msg("Did not get a THP populated\n"); 867 log_test_result(KSFT_SKIP); 868 goto munmap; 869 } 870 memset(mem, 1, thpsize); 871 872 size = thpsize; 873 switch (thp_run) { 874 case THP_RUN_PMD: 875 case THP_RUN_PMD_SWAPOUT: 876 assert(thpsize == pmdsize); 877 break; 878 case THP_RUN_PTE: 879 case THP_RUN_PTE_SWAPOUT: 880 /* 881 * Trigger PTE-mapping the THP by temporarily mapping a single 882 * subpage R/O. This is a noop if the THP is not pmdsize (and 883 * therefore already PTE-mapped). 884 */ 885 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 886 if (ret) { 887 ksft_perror("mprotect() failed"); 888 log_test_result(KSFT_FAIL); 889 goto munmap; 890 } 891 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 892 if (ret) { 893 ksft_perror("mprotect() failed"); 894 log_test_result(KSFT_FAIL); 895 goto munmap; 896 } 897 break; 898 case THP_RUN_SINGLE_PTE: 899 case THP_RUN_SINGLE_PTE_SWAPOUT: 900 /* 901 * Discard all but a single subpage of that PTE-mapped THP. What 902 * remains is a single PTE mapping a single subpage. 903 */ 904 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 905 if (ret) { 906 ksft_perror("MADV_DONTNEED failed"); 907 log_test_result(KSFT_FAIL); 908 goto munmap; 909 } 910 size = pagesize; 911 break; 912 case THP_RUN_PARTIAL_MREMAP: 913 /* 914 * Remap half of the THP. We need some new memory location 915 * for that. 916 */ 917 mremap_size = thpsize / 2; 918 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 919 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 920 if (mremap_mem == MAP_FAILED) { 921 ksft_perror("mmap() failed"); 922 log_test_result(KSFT_FAIL); 923 goto munmap; 924 } 925 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 926 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 927 if (tmp != mremap_mem) { 928 ksft_perror("mremap() failed"); 929 log_test_result(KSFT_FAIL); 930 goto munmap; 931 } 932 size = mremap_size; 933 break; 934 case THP_RUN_PARTIAL_SHARED: 935 /* 936 * Share the first page of the THP with a child and quit the 937 * child. This will result in some parts of the THP never 938 * have been shared. 939 */ 940 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 941 if (ret) { 942 ksft_perror("MADV_DONTFORK failed"); 943 log_test_result(KSFT_FAIL); 944 goto munmap; 945 } 946 ret = fork(); 947 if (ret < 0) { 948 ksft_perror("fork() failed"); 949 log_test_result(KSFT_FAIL); 950 goto munmap; 951 } else if (!ret) { 952 exit(0); 953 } 954 wait(&ret); 955 /* Allow for sharing all pages again. */ 956 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 957 if (ret) { 958 ksft_perror("MADV_DOFORK failed"); 959 log_test_result(KSFT_FAIL); 960 goto munmap; 961 } 962 break; 963 default: 964 assert(false); 965 } 966 967 switch (thp_run) { 968 case THP_RUN_PMD_SWAPOUT: 969 case THP_RUN_PTE_SWAPOUT: 970 case THP_RUN_SINGLE_PTE_SWAPOUT: 971 madvise(mem, size, MADV_PAGEOUT); 972 if (!range_is_swapped(mem, size)) { 973 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 974 log_test_result(KSFT_SKIP); 975 goto munmap; 976 } 977 break; 978 default: 979 break; 980 } 981 982 fn(mem, size, false); 983 munmap: 984 munmap(mmap_mem, mmap_size); 985 if (mremap_mem != MAP_FAILED) 986 munmap(mremap_mem, mremap_size); 987 } 988 989 static void run_with_thp(test_fn fn, const char *desc, size_t size) 990 { 991 log_test_start("%s ... with THP (%zu kB)", 992 desc, size / 1024); 993 do_run_with_thp(fn, THP_RUN_PMD, size); 994 } 995 996 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 997 { 998 log_test_start("%s ... with swapped-out THP (%zu kB)", 999 desc, size / 1024); 1000 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 1001 } 1002 1003 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 1004 { 1005 log_test_start("%s ... with PTE-mapped THP (%zu kB)", 1006 desc, size / 1024); 1007 do_run_with_thp(fn, THP_RUN_PTE, size); 1008 } 1009 1010 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 1011 { 1012 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", 1013 desc, size / 1024); 1014 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 1015 } 1016 1017 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 1018 { 1019 log_test_start("%s ... with single PTE of THP (%zu kB)", 1020 desc, size / 1024); 1021 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 1022 } 1023 1024 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 1025 { 1026 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", 1027 desc, size / 1024); 1028 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 1029 } 1030 1031 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 1032 { 1033 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", 1034 desc, size / 1024); 1035 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 1036 } 1037 1038 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 1039 { 1040 log_test_start("%s ... with partially shared THP (%zu kB)", 1041 desc, size / 1024); 1042 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 1043 } 1044 1045 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 1046 { 1047 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1048 char *mem, *dummy; 1049 1050 log_test_start("%s ... with hugetlb (%zu kB)", desc, 1051 hugetlbsize / 1024); 1052 1053 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1054 1055 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1056 if (mem == MAP_FAILED) { 1057 ksft_perror("need more free huge pages"); 1058 log_test_result(KSFT_SKIP); 1059 return; 1060 } 1061 1062 /* Populate an huge page. */ 1063 memset(mem, 1, hugetlbsize); 1064 1065 /* 1066 * We need a total of two hugetlb pages to handle COW/unsharing 1067 * properly, otherwise we might get zapped by a SIGBUS. 1068 */ 1069 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1070 if (dummy == MAP_FAILED) { 1071 ksft_perror("need more free huge pages"); 1072 log_test_result(KSFT_SKIP); 1073 goto munmap; 1074 } 1075 munmap(dummy, hugetlbsize); 1076 1077 fn(mem, hugetlbsize, true); 1078 munmap: 1079 munmap(mem, hugetlbsize); 1080 } 1081 1082 struct test_case { 1083 const char *desc; 1084 test_fn fn; 1085 }; 1086 1087 /* 1088 * Test cases that are specific to anonymous pages: pages in private mappings 1089 * that may get shared via COW during fork(). 1090 */ 1091 static const struct test_case anon_test_cases[] = { 1092 /* 1093 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1094 * either the child can observe modifications by the parent or the 1095 * other way around. 1096 */ 1097 { 1098 "Basic COW after fork()", 1099 test_cow_in_parent, 1100 }, 1101 /* 1102 * Basic test, but do an additional mprotect(PROT_READ)+ 1103 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1104 */ 1105 { 1106 "Basic COW after fork() with mprotect() optimization", 1107 test_cow_in_parent_mprotect, 1108 }, 1109 /* 1110 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1111 * we miss to break COW, the child observes modifications by the parent. 1112 * This is CVE-2020-29374 reported by Jann Horn. 1113 */ 1114 { 1115 "vmsplice() + unmap in child", 1116 test_vmsplice_in_child, 1117 }, 1118 /* 1119 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1120 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1121 */ 1122 { 1123 "vmsplice() + unmap in child with mprotect() optimization", 1124 test_vmsplice_in_child_mprotect, 1125 }, 1126 /* 1127 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1128 * fork(); modify in the child. If we miss to break COW, the parent 1129 * observes modifications by the child. 1130 */ 1131 { 1132 "vmsplice() before fork(), unmap in parent after fork()", 1133 test_vmsplice_before_fork, 1134 }, 1135 /* 1136 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1137 * child. If we miss to break COW, the parent observes modifications by 1138 * the child. 1139 */ 1140 { 1141 "vmsplice() + unmap in parent after fork()", 1142 test_vmsplice_after_fork, 1143 }, 1144 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1145 /* 1146 * Take a R/W longterm pin and then map the page R/O into the page 1147 * table to trigger a write fault on next access. When modifying the 1148 * page, the page content must be visible via the pin. 1149 */ 1150 { 1151 "R/O-mapping a page registered as iouring fixed buffer", 1152 test_iouring_ro, 1153 }, 1154 /* 1155 * Take a R/W longterm pin and then fork() a child. When modifying the 1156 * page, the page content must be visible via the pin. We expect the 1157 * pinned page to not get shared with the child. 1158 */ 1159 { 1160 "fork() with an iouring fixed buffer", 1161 test_iouring_fork, 1162 }, 1163 1164 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1165 /* 1166 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1167 * When modifying the page via the page table, the page content change 1168 * must be visible via the pin. 1169 */ 1170 { 1171 "R/O GUP pin on R/O-mapped shared page", 1172 test_ro_pin_on_shared, 1173 }, 1174 /* Same as above, but using GUP-fast. */ 1175 { 1176 "R/O GUP-fast pin on R/O-mapped shared page", 1177 test_ro_fast_pin_on_shared, 1178 }, 1179 /* 1180 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1181 * was previously shared. When modifying the page via the page table, 1182 * the page content change must be visible via the pin. 1183 */ 1184 { 1185 "R/O GUP pin on R/O-mapped previously-shared page", 1186 test_ro_pin_on_ro_previously_shared, 1187 }, 1188 /* Same as above, but using GUP-fast. */ 1189 { 1190 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1191 test_ro_fast_pin_on_ro_previously_shared, 1192 }, 1193 /* 1194 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1195 * When modifying the page via the page table, the page content change 1196 * must be visible via the pin. 1197 */ 1198 { 1199 "R/O GUP pin on R/O-mapped exclusive page", 1200 test_ro_pin_on_ro_exclusive, 1201 }, 1202 /* Same as above, but using GUP-fast. */ 1203 { 1204 "R/O GUP-fast pin on R/O-mapped exclusive page", 1205 test_ro_fast_pin_on_ro_exclusive, 1206 }, 1207 }; 1208 1209 static void run_anon_test_case(struct test_case const *test_case) 1210 { 1211 int i; 1212 1213 run_with_base_page(test_case->fn, test_case->desc); 1214 run_with_base_page_swap(test_case->fn, test_case->desc); 1215 for (i = 0; i < nr_thpsizes; i++) { 1216 size_t size = thpsizes[i]; 1217 struct thp_settings settings = *thp_current_settings(); 1218 1219 settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; 1220 settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; 1221 thp_push_settings(&settings); 1222 1223 if (size == pmdsize) { 1224 run_with_thp(test_case->fn, test_case->desc, size); 1225 run_with_thp_swap(test_case->fn, test_case->desc, size); 1226 } 1227 1228 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1229 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1230 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1231 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1232 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1233 run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1234 1235 thp_pop_settings(); 1236 } 1237 for (i = 0; i < nr_hugetlbsizes; i++) 1238 run_with_hugetlb(test_case->fn, test_case->desc, 1239 hugetlbsizes[i]); 1240 } 1241 1242 static void run_anon_test_cases(void) 1243 { 1244 int i; 1245 1246 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1247 1248 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1249 run_anon_test_case(&anon_test_cases[i]); 1250 } 1251 1252 static int tests_per_anon_test_case(void) 1253 { 1254 int tests = 2 + nr_hugetlbsizes; 1255 1256 tests += 6 * nr_thpsizes; 1257 if (pmdsize) 1258 tests += 2; 1259 return tests; 1260 } 1261 1262 enum anon_thp_collapse_test { 1263 ANON_THP_COLLAPSE_UNSHARED, 1264 ANON_THP_COLLAPSE_FULLY_SHARED, 1265 ANON_THP_COLLAPSE_LOWER_SHARED, 1266 ANON_THP_COLLAPSE_UPPER_SHARED, 1267 }; 1268 1269 static void do_test_anon_thp_collapse(char *mem, size_t size, 1270 enum anon_thp_collapse_test test) 1271 { 1272 struct comm_pipes comm_pipes; 1273 char buf; 1274 int ret; 1275 1276 ret = setup_comm_pipes(&comm_pipes); 1277 if (ret) { 1278 log_test_result(KSFT_FAIL); 1279 return; 1280 } 1281 1282 /* 1283 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1284 * R/O, such that we can try collapsing it later. 1285 */ 1286 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1287 if (ret) { 1288 ksft_perror("mprotect() failed"); 1289 log_test_result(KSFT_FAIL); 1290 goto close_comm_pipes; 1291 } 1292 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1293 if (ret) { 1294 ksft_perror("mprotect() failed"); 1295 log_test_result(KSFT_FAIL); 1296 goto close_comm_pipes; 1297 } 1298 1299 switch (test) { 1300 case ANON_THP_COLLAPSE_UNSHARED: 1301 /* Collapse before actually COW-sharing the page. */ 1302 ret = madvise(mem, size, MADV_COLLAPSE); 1303 if (ret) { 1304 ksft_perror("MADV_COLLAPSE failed"); 1305 log_test_result(KSFT_SKIP); 1306 goto close_comm_pipes; 1307 } 1308 break; 1309 case ANON_THP_COLLAPSE_FULLY_SHARED: 1310 /* COW-share the full PTE-mapped THP. */ 1311 break; 1312 case ANON_THP_COLLAPSE_LOWER_SHARED: 1313 /* Don't COW-share the upper part of the THP. */ 1314 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1315 if (ret) { 1316 ksft_perror("MADV_DONTFORK failed"); 1317 log_test_result(KSFT_FAIL); 1318 goto close_comm_pipes; 1319 } 1320 break; 1321 case ANON_THP_COLLAPSE_UPPER_SHARED: 1322 /* Don't COW-share the lower part of the THP. */ 1323 ret = madvise(mem, size / 2, MADV_DONTFORK); 1324 if (ret) { 1325 ksft_perror("MADV_DONTFORK failed"); 1326 log_test_result(KSFT_FAIL); 1327 goto close_comm_pipes; 1328 } 1329 break; 1330 default: 1331 assert(false); 1332 } 1333 1334 ret = fork(); 1335 if (ret < 0) { 1336 ksft_perror("fork() failed"); 1337 log_test_result(KSFT_FAIL); 1338 goto close_comm_pipes; 1339 } else if (!ret) { 1340 switch (test) { 1341 case ANON_THP_COLLAPSE_UNSHARED: 1342 case ANON_THP_COLLAPSE_FULLY_SHARED: 1343 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1344 break; 1345 case ANON_THP_COLLAPSE_LOWER_SHARED: 1346 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1347 break; 1348 case ANON_THP_COLLAPSE_UPPER_SHARED: 1349 exit(child_memcmp_fn(mem + size / 2, size / 2, 1350 &comm_pipes)); 1351 break; 1352 default: 1353 assert(false); 1354 } 1355 } 1356 1357 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1358 ; 1359 1360 switch (test) { 1361 case ANON_THP_COLLAPSE_UNSHARED: 1362 break; 1363 case ANON_THP_COLLAPSE_UPPER_SHARED: 1364 case ANON_THP_COLLAPSE_LOWER_SHARED: 1365 /* 1366 * Revert MADV_DONTFORK such that we merge the VMAs and are 1367 * able to actually collapse. 1368 */ 1369 ret = madvise(mem, size, MADV_DOFORK); 1370 if (ret) { 1371 ksft_perror("MADV_DOFORK failed"); 1372 log_test_result(KSFT_FAIL); 1373 write(comm_pipes.parent_ready[1], "0", 1); 1374 wait(&ret); 1375 goto close_comm_pipes; 1376 } 1377 /* FALLTHROUGH */ 1378 case ANON_THP_COLLAPSE_FULLY_SHARED: 1379 /* Collapse before anyone modified the COW-shared page. */ 1380 ret = madvise(mem, size, MADV_COLLAPSE); 1381 if (ret) { 1382 ksft_perror("MADV_COLLAPSE failed"); 1383 log_test_result(KSFT_SKIP); 1384 write(comm_pipes.parent_ready[1], "0", 1); 1385 wait(&ret); 1386 goto close_comm_pipes; 1387 } 1388 break; 1389 default: 1390 assert(false); 1391 } 1392 1393 /* Modify the page. */ 1394 memset(mem, 0xff, size); 1395 write(comm_pipes.parent_ready[1], "0", 1); 1396 1397 wait(&ret); 1398 if (WIFEXITED(ret)) 1399 ret = WEXITSTATUS(ret); 1400 else 1401 ret = -EINVAL; 1402 1403 if (!ret) { 1404 log_test_result(KSFT_PASS); 1405 } else { 1406 ksft_print_msg("Leak from parent into child\n"); 1407 log_test_result(KSFT_FAIL); 1408 } 1409 close_comm_pipes: 1410 close_comm_pipes(&comm_pipes); 1411 } 1412 1413 static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1414 bool is_hugetlb) 1415 { 1416 assert(!is_hugetlb); 1417 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1418 } 1419 1420 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1421 bool is_hugetlb) 1422 { 1423 assert(!is_hugetlb); 1424 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1425 } 1426 1427 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1428 bool is_hugetlb) 1429 { 1430 assert(!is_hugetlb); 1431 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1432 } 1433 1434 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1435 bool is_hugetlb) 1436 { 1437 assert(!is_hugetlb); 1438 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1439 } 1440 1441 /* 1442 * Test cases that are specific to anonymous THP: pages in private mappings 1443 * that may get shared via COW during fork(). 1444 */ 1445 static const struct test_case anon_thp_test_cases[] = { 1446 /* 1447 * Basic COW test for fork() without any GUP when collapsing a THP 1448 * before fork(). 1449 * 1450 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1451 * collapse") might easily get COW handling wrong when not collapsing 1452 * exclusivity information properly. 1453 */ 1454 { 1455 "Basic COW after fork() when collapsing before fork()", 1456 test_anon_thp_collapse_unshared, 1457 }, 1458 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1459 { 1460 "Basic COW after fork() when collapsing after fork() (fully shared)", 1461 test_anon_thp_collapse_fully_shared, 1462 }, 1463 /* 1464 * Basic COW test, but collapse after COW-sharing the lower half of a 1465 * THP. 1466 */ 1467 { 1468 "Basic COW after fork() when collapsing after fork() (lower shared)", 1469 test_anon_thp_collapse_lower_shared, 1470 }, 1471 /* 1472 * Basic COW test, but collapse after COW-sharing the upper half of a 1473 * THP. 1474 */ 1475 { 1476 "Basic COW after fork() when collapsing after fork() (upper shared)", 1477 test_anon_thp_collapse_upper_shared, 1478 }, 1479 }; 1480 1481 static void run_anon_thp_test_cases(void) 1482 { 1483 int i; 1484 1485 if (!pmdsize) 1486 return; 1487 1488 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1489 1490 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1491 struct test_case const *test_case = &anon_thp_test_cases[i]; 1492 1493 log_test_start("%s", test_case->desc); 1494 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1495 } 1496 } 1497 1498 static int tests_per_anon_thp_test_case(void) 1499 { 1500 return pmdsize ? 1 : 0; 1501 } 1502 1503 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1504 1505 static void test_cow(char *mem, const char *smem, size_t size) 1506 { 1507 char *old = malloc(size); 1508 1509 /* Backup the original content. */ 1510 memcpy(old, smem, size); 1511 1512 /* Modify the page. */ 1513 memset(mem, 0xff, size); 1514 1515 /* See if we still read the old values via the other mapping. */ 1516 if (!memcmp(smem, old, size)) { 1517 log_test_result(KSFT_PASS); 1518 } else { 1519 ksft_print_msg("Other mapping modified\n"); 1520 log_test_result(KSFT_FAIL); 1521 } 1522 free(old); 1523 } 1524 1525 static void test_ro_pin(char *mem, const char *smem, size_t size) 1526 { 1527 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1528 } 1529 1530 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1531 { 1532 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1533 } 1534 1535 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1536 { 1537 char *mem, *smem; 1538 1539 log_test_start("%s ... with shared zeropage", desc); 1540 1541 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1542 MAP_PRIVATE | MAP_ANON, -1, 0); 1543 if (mem == MAP_FAILED) { 1544 ksft_perror("mmap() failed"); 1545 log_test_result(KSFT_FAIL); 1546 return; 1547 } 1548 1549 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1550 if (smem == MAP_FAILED) { 1551 ksft_perror("mmap() failed"); 1552 log_test_result(KSFT_FAIL); 1553 goto munmap; 1554 } 1555 1556 /* Read from the page to populate the shared zeropage. */ 1557 FORCE_READ(mem); 1558 FORCE_READ(smem); 1559 1560 fn(mem, smem, pagesize); 1561 munmap: 1562 munmap(mem, pagesize); 1563 if (smem != MAP_FAILED) 1564 munmap(smem, pagesize); 1565 } 1566 1567 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1568 { 1569 char *mem, *smem, *mmap_mem, *mmap_smem; 1570 size_t mmap_size; 1571 int ret; 1572 1573 log_test_start("%s ... with huge zeropage", desc); 1574 1575 if (!has_huge_zeropage) { 1576 ksft_print_msg("Huge zeropage not enabled\n"); 1577 log_test_result(KSFT_SKIP); 1578 return; 1579 } 1580 1581 /* For alignment purposes, we need twice the thp size. */ 1582 mmap_size = 2 * pmdsize; 1583 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1584 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1585 if (mmap_mem == MAP_FAILED) { 1586 ksft_perror("mmap() failed"); 1587 log_test_result(KSFT_FAIL); 1588 return; 1589 } 1590 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1591 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1592 if (mmap_smem == MAP_FAILED) { 1593 ksft_perror("mmap() failed"); 1594 log_test_result(KSFT_FAIL); 1595 goto munmap; 1596 } 1597 1598 /* We need a THP-aligned memory area. */ 1599 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 1600 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1601 1602 ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 1603 if (ret) { 1604 ksft_perror("madvise()"); 1605 log_test_result(KSFT_FAIL); 1606 goto munmap; 1607 } 1608 ret = madvise(smem, pmdsize, MADV_HUGEPAGE); 1609 if (ret) { 1610 ksft_perror("madvise()"); 1611 log_test_result(KSFT_FAIL); 1612 goto munmap; 1613 } 1614 1615 /* 1616 * Read from the memory to populate the huge shared zeropage. Read from 1617 * the first sub-page and test if we get another sub-page populated 1618 * automatically. 1619 */ 1620 FORCE_READ(mem); 1621 FORCE_READ(smem); 1622 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1623 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1624 ksft_test_result_skip("Did not get THPs populated\n"); 1625 goto munmap; 1626 } 1627 1628 fn(mem, smem, pmdsize); 1629 munmap: 1630 munmap(mmap_mem, mmap_size); 1631 if (mmap_smem != MAP_FAILED) 1632 munmap(mmap_smem, mmap_size); 1633 } 1634 1635 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1636 { 1637 char *mem, *smem; 1638 int fd; 1639 1640 log_test_start("%s ... with memfd", desc); 1641 1642 fd = memfd_create("test", 0); 1643 if (fd < 0) { 1644 ksft_perror("memfd_create() failed"); 1645 log_test_result(KSFT_FAIL); 1646 return; 1647 } 1648 1649 /* File consists of a single page filled with zeroes. */ 1650 if (fallocate(fd, 0, 0, pagesize)) { 1651 ksft_perror("fallocate() failed"); 1652 log_test_result(KSFT_FAIL); 1653 goto close; 1654 } 1655 1656 /* Create a private mapping of the memfd. */ 1657 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1658 if (mem == MAP_FAILED) { 1659 ksft_perror("mmap() failed"); 1660 log_test_result(KSFT_FAIL); 1661 goto close; 1662 } 1663 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1664 if (smem == MAP_FAILED) { 1665 ksft_perror("mmap() failed"); 1666 log_test_result(KSFT_FAIL); 1667 goto munmap; 1668 } 1669 1670 /* Fault the page in. */ 1671 FORCE_READ(mem); 1672 FORCE_READ(smem); 1673 1674 fn(mem, smem, pagesize); 1675 munmap: 1676 munmap(mem, pagesize); 1677 if (smem != MAP_FAILED) 1678 munmap(smem, pagesize); 1679 close: 1680 close(fd); 1681 } 1682 1683 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1684 { 1685 char *mem, *smem; 1686 FILE *file; 1687 int fd; 1688 1689 log_test_start("%s ... with tmpfile", desc); 1690 1691 file = tmpfile(); 1692 if (!file) { 1693 ksft_perror("tmpfile() failed"); 1694 log_test_result(KSFT_FAIL); 1695 return; 1696 } 1697 1698 fd = fileno(file); 1699 if (fd < 0) { 1700 ksft_perror("fileno() failed"); 1701 log_test_result(KSFT_SKIP); 1702 return; 1703 } 1704 1705 /* File consists of a single page filled with zeroes. */ 1706 if (fallocate(fd, 0, 0, pagesize)) { 1707 ksft_perror("fallocate() failed"); 1708 log_test_result(KSFT_FAIL); 1709 goto close; 1710 } 1711 1712 /* Create a private mapping of the memfd. */ 1713 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1714 if (mem == MAP_FAILED) { 1715 ksft_perror("mmap() failed"); 1716 log_test_result(KSFT_FAIL); 1717 goto close; 1718 } 1719 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1720 if (smem == MAP_FAILED) { 1721 ksft_perror("mmap() failed"); 1722 log_test_result(KSFT_FAIL); 1723 goto munmap; 1724 } 1725 1726 /* Fault the page in. */ 1727 FORCE_READ(mem); 1728 FORCE_READ(smem); 1729 1730 fn(mem, smem, pagesize); 1731 munmap: 1732 munmap(mem, pagesize); 1733 if (smem != MAP_FAILED) 1734 munmap(smem, pagesize); 1735 close: 1736 fclose(file); 1737 } 1738 1739 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1740 size_t hugetlbsize) 1741 { 1742 int flags = MFD_HUGETLB; 1743 char *mem, *smem; 1744 int fd; 1745 1746 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 1747 hugetlbsize / 1024); 1748 1749 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1750 1751 fd = memfd_create("test", flags); 1752 if (fd < 0) { 1753 ksft_perror("memfd_create() failed"); 1754 log_test_result(KSFT_SKIP); 1755 return; 1756 } 1757 1758 /* File consists of a single page filled with zeroes. */ 1759 if (fallocate(fd, 0, 0, hugetlbsize)) { 1760 ksft_perror("need more free huge pages"); 1761 log_test_result(KSFT_SKIP); 1762 goto close; 1763 } 1764 1765 /* Create a private mapping of the memfd. */ 1766 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1767 0); 1768 if (mem == MAP_FAILED) { 1769 ksft_perror("need more free huge pages"); 1770 log_test_result(KSFT_SKIP); 1771 goto close; 1772 } 1773 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1774 if (smem == MAP_FAILED) { 1775 ksft_perror("mmap() failed"); 1776 log_test_result(KSFT_FAIL); 1777 goto munmap; 1778 } 1779 1780 /* Fault the page in. */ 1781 FORCE_READ(mem); 1782 FORCE_READ(smem); 1783 1784 fn(mem, smem, hugetlbsize); 1785 munmap: 1786 munmap(mem, hugetlbsize); 1787 if (smem != MAP_FAILED) 1788 munmap(smem, hugetlbsize); 1789 close: 1790 close(fd); 1791 } 1792 1793 struct non_anon_test_case { 1794 const char *desc; 1795 non_anon_test_fn fn; 1796 }; 1797 1798 /* 1799 * Test cases that target any pages in private mappings that are not anonymous: 1800 * pages that may get shared via COW ndependent of fork(). This includes 1801 * the shared zeropage(s), pagecache pages, ... 1802 */ 1803 static const struct non_anon_test_case non_anon_test_cases[] = { 1804 /* 1805 * Basic COW test without any GUP. If we miss to break COW, changes are 1806 * visible via other private/shared mappings. 1807 */ 1808 { 1809 "Basic COW", 1810 test_cow, 1811 }, 1812 /* 1813 * Take a R/O longterm pin. When modifying the page via the page table, 1814 * the page content change must be visible via the pin. 1815 */ 1816 { 1817 "R/O longterm GUP pin", 1818 test_ro_pin, 1819 }, 1820 /* Same as above, but using GUP-fast. */ 1821 { 1822 "R/O longterm GUP-fast pin", 1823 test_ro_fast_pin, 1824 }, 1825 }; 1826 1827 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1828 { 1829 int i; 1830 1831 run_with_zeropage(test_case->fn, test_case->desc); 1832 run_with_memfd(test_case->fn, test_case->desc); 1833 run_with_tmpfile(test_case->fn, test_case->desc); 1834 if (pmdsize) 1835 run_with_huge_zeropage(test_case->fn, test_case->desc); 1836 for (i = 0; i < nr_hugetlbsizes; i++) 1837 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1838 hugetlbsizes[i]); 1839 } 1840 1841 static void run_non_anon_test_cases(void) 1842 { 1843 int i; 1844 1845 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1846 1847 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1848 run_non_anon_test_case(&non_anon_test_cases[i]); 1849 } 1850 1851 static int tests_per_non_anon_test_case(void) 1852 { 1853 int tests = 3 + nr_hugetlbsizes; 1854 1855 if (pmdsize) 1856 tests += 1; 1857 return tests; 1858 } 1859 1860 int main(int argc, char **argv) 1861 { 1862 struct thp_settings default_settings; 1863 1864 ksft_print_header(); 1865 1866 pagesize = getpagesize(); 1867 pmdsize = read_pmd_pagesize(); 1868 if (pmdsize) { 1869 /* Only if THP is supported. */ 1870 thp_read_settings(&default_settings); 1871 default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; 1872 thp_save_settings(); 1873 thp_push_settings(&default_settings); 1874 1875 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 1876 pmdsize / 1024); 1877 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 1878 } 1879 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1880 ARRAY_SIZE(hugetlbsizes)); 1881 has_huge_zeropage = detect_huge_zeropage(); 1882 1883 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1884 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1885 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1886 1887 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1888 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1889 if (pagemap_fd < 0) 1890 ksft_exit_fail_msg("opening pagemap failed\n"); 1891 1892 run_anon_test_cases(); 1893 run_anon_thp_test_cases(); 1894 run_non_anon_test_cases(); 1895 1896 if (pmdsize) { 1897 /* Only if THP is supported. */ 1898 thp_restore_settings(); 1899 } 1900 1901 ksft_finished(); 1902 } 1903