1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 #include "thp_settings.h" 33 34 static size_t pagesize; 35 static int pagemap_fd; 36 static size_t pmdsize; 37 static int nr_thpsizes; 38 static size_t thpsizes[20]; 39 static int nr_hugetlbsizes; 40 static size_t hugetlbsizes[10]; 41 static int gup_fd; 42 static bool has_huge_zeropage; 43 44 static int detect_thp_sizes(size_t sizes[], int max) 45 { 46 int count = 0; 47 unsigned long orders; 48 size_t kb; 49 int i; 50 51 /* thp not supported at all. */ 52 if (!pmdsize) 53 return 0; 54 55 orders = 1UL << sz2ord(pmdsize, pagesize); 56 orders |= thp_supported_orders(); 57 58 for (i = 0; orders && count < max; i++) { 59 if (!(orders & (1UL << i))) 60 continue; 61 orders &= ~(1UL << i); 62 kb = (pagesize >> 10) << i; 63 sizes[count++] = kb * 1024; 64 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 65 } 66 67 return count; 68 } 69 70 static bool range_is_swapped(void *addr, size_t size) 71 { 72 for (; size; addr += pagesize, size -= pagesize) 73 if (!pagemap_is_swapped(pagemap_fd, addr)) 74 return false; 75 return true; 76 } 77 78 struct comm_pipes { 79 int child_ready[2]; 80 int parent_ready[2]; 81 }; 82 83 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 84 { 85 if (pipe(comm_pipes->child_ready) < 0) { 86 ksft_perror("pipe() failed"); 87 return -errno; 88 } 89 if (pipe(comm_pipes->parent_ready) < 0) { 90 ksft_perror("pipe() failed"); 91 close(comm_pipes->child_ready[0]); 92 close(comm_pipes->child_ready[1]); 93 return -errno; 94 } 95 96 return 0; 97 } 98 99 static void close_comm_pipes(struct comm_pipes *comm_pipes) 100 { 101 close(comm_pipes->child_ready[0]); 102 close(comm_pipes->child_ready[1]); 103 close(comm_pipes->parent_ready[0]); 104 close(comm_pipes->parent_ready[1]); 105 } 106 107 static int child_memcmp_fn(char *mem, size_t size, 108 struct comm_pipes *comm_pipes) 109 { 110 char *old = malloc(size); 111 char buf; 112 113 /* Backup the original content. */ 114 memcpy(old, mem, size); 115 116 /* Wait until the parent modified the page. */ 117 write(comm_pipes->child_ready[1], "0", 1); 118 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 119 ; 120 121 /* See if we still read the old values. */ 122 return memcmp(old, mem, size); 123 } 124 125 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 126 struct comm_pipes *comm_pipes) 127 { 128 struct iovec iov = { 129 .iov_base = mem, 130 .iov_len = size, 131 }; 132 ssize_t cur, total, transferred; 133 char *old, *new; 134 int fds[2]; 135 char buf; 136 137 old = malloc(size); 138 new = malloc(size); 139 140 /* Backup the original content. */ 141 memcpy(old, mem, size); 142 143 if (pipe(fds) < 0) 144 return -errno; 145 146 /* Trigger a read-only pin. */ 147 transferred = vmsplice(fds[1], &iov, 1, 0); 148 if (transferred < 0) 149 return -errno; 150 if (transferred == 0) 151 return -EINVAL; 152 153 /* Unmap it from our page tables. */ 154 if (munmap(mem, size) < 0) 155 return -errno; 156 157 /* Wait until the parent modified it. */ 158 write(comm_pipes->child_ready[1], "0", 1); 159 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 160 ; 161 162 /* See if we still read the old values via the pipe. */ 163 for (total = 0; total < transferred; total += cur) { 164 cur = read(fds[0], new + total, transferred - total); 165 if (cur < 0) 166 return -errno; 167 } 168 169 return memcmp(old, new, transferred); 170 } 171 172 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 173 174 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 175 child_fn fn, bool xfail) 176 { 177 struct comm_pipes comm_pipes; 178 char buf; 179 int ret; 180 181 ret = setup_comm_pipes(&comm_pipes); 182 if (ret) { 183 log_test_result(KSFT_FAIL); 184 return; 185 } 186 187 ret = fork(); 188 if (ret < 0) { 189 ksft_perror("fork() failed"); 190 log_test_result(KSFT_FAIL); 191 goto close_comm_pipes; 192 } else if (!ret) { 193 exit(fn(mem, size, &comm_pipes)); 194 } 195 196 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 197 ; 198 199 if (do_mprotect) { 200 /* 201 * mprotect() optimizations might try avoiding 202 * write-faults by directly mapping pages writable. 203 */ 204 ret = mprotect(mem, size, PROT_READ); 205 if (ret) { 206 ksft_perror("mprotect() failed"); 207 log_test_result(KSFT_FAIL); 208 write(comm_pipes.parent_ready[1], "0", 1); 209 wait(&ret); 210 goto close_comm_pipes; 211 } 212 213 ret = mprotect(mem, size, PROT_READ|PROT_WRITE); 214 if (ret) { 215 ksft_perror("mprotect() failed"); 216 log_test_result(KSFT_FAIL); 217 write(comm_pipes.parent_ready[1], "0", 1); 218 wait(&ret); 219 goto close_comm_pipes; 220 } 221 } 222 223 /* Modify the page. */ 224 memset(mem, 0xff, size); 225 write(comm_pipes.parent_ready[1], "0", 1); 226 227 wait(&ret); 228 if (WIFEXITED(ret)) 229 ret = WEXITSTATUS(ret); 230 else 231 ret = -EINVAL; 232 233 if (!ret) { 234 log_test_result(KSFT_PASS); 235 } else if (xfail) { 236 /* 237 * With hugetlb, some vmsplice() tests are currently expected to 238 * fail because (a) harder to fix and (b) nobody really cares. 239 * Flag them as expected failure for now. 240 */ 241 ksft_print_msg("Leak from parent into child\n"); 242 log_test_result(KSFT_XFAIL); 243 } else { 244 ksft_print_msg("Leak from parent into child\n"); 245 log_test_result(KSFT_FAIL); 246 } 247 close_comm_pipes: 248 close_comm_pipes(&comm_pipes); 249 } 250 251 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 252 { 253 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 254 } 255 256 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 257 { 258 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 259 } 260 261 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 262 { 263 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 264 is_hugetlb); 265 } 266 267 static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 268 bool is_hugetlb) 269 { 270 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 271 is_hugetlb); 272 } 273 274 static void do_test_vmsplice_in_parent(char *mem, size_t size, 275 bool before_fork, bool xfail) 276 { 277 struct iovec iov = { 278 .iov_base = mem, 279 .iov_len = size, 280 }; 281 ssize_t cur, total, transferred = 0; 282 struct comm_pipes comm_pipes; 283 char *old, *new; 284 int ret, fds[2]; 285 char buf; 286 287 old = malloc(size); 288 new = malloc(size); 289 290 memcpy(old, mem, size); 291 292 ret = setup_comm_pipes(&comm_pipes); 293 if (ret) { 294 log_test_result(KSFT_FAIL); 295 goto free; 296 } 297 298 if (pipe(fds) < 0) { 299 ksft_perror("pipe() failed"); 300 log_test_result(KSFT_FAIL); 301 goto close_comm_pipes; 302 } 303 304 if (before_fork) { 305 transferred = vmsplice(fds[1], &iov, 1, 0); 306 if (transferred <= 0) { 307 ksft_perror("vmsplice() failed\n"); 308 log_test_result(KSFT_FAIL); 309 goto close_pipe; 310 } 311 } 312 313 ret = fork(); 314 if (ret < 0) { 315 ksft_perror("fork() failed\n"); 316 log_test_result(KSFT_FAIL); 317 goto close_pipe; 318 } else if (!ret) { 319 write(comm_pipes.child_ready[1], "0", 1); 320 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 321 ; 322 /* Modify page content in the child. */ 323 memset(mem, 0xff, size); 324 exit(0); 325 } 326 327 if (!before_fork) { 328 transferred = vmsplice(fds[1], &iov, 1, 0); 329 if (transferred <= 0) { 330 ksft_perror("vmsplice() failed"); 331 log_test_result(KSFT_FAIL); 332 wait(&ret); 333 goto close_pipe; 334 } 335 } 336 337 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 338 ; 339 if (munmap(mem, size) < 0) { 340 ksft_perror("munmap() failed"); 341 log_test_result(KSFT_FAIL); 342 goto close_pipe; 343 } 344 write(comm_pipes.parent_ready[1], "0", 1); 345 346 /* Wait until the child is done writing. */ 347 wait(&ret); 348 if (!WIFEXITED(ret)) { 349 ksft_perror("wait() failed"); 350 log_test_result(KSFT_FAIL); 351 goto close_pipe; 352 } 353 354 /* See if we still read the old values. */ 355 for (total = 0; total < transferred; total += cur) { 356 cur = read(fds[0], new + total, transferred - total); 357 if (cur < 0) { 358 ksft_perror("read() failed"); 359 log_test_result(KSFT_FAIL); 360 goto close_pipe; 361 } 362 } 363 364 if (!memcmp(old, new, transferred)) { 365 log_test_result(KSFT_PASS); 366 } else if (xfail) { 367 /* 368 * With hugetlb, some vmsplice() tests are currently expected to 369 * fail because (a) harder to fix and (b) nobody really cares. 370 * Flag them as expected failure for now. 371 */ 372 ksft_print_msg("Leak from child into parent\n"); 373 log_test_result(KSFT_XFAIL); 374 } else { 375 ksft_print_msg("Leak from child into parent\n"); 376 log_test_result(KSFT_FAIL); 377 } 378 close_pipe: 379 close(fds[0]); 380 close(fds[1]); 381 close_comm_pipes: 382 close_comm_pipes(&comm_pipes); 383 free: 384 free(old); 385 free(new); 386 } 387 388 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 389 { 390 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 391 } 392 393 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 394 { 395 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 396 } 397 398 #ifdef LOCAL_CONFIG_HAVE_LIBURING 399 static void do_test_iouring(char *mem, size_t size, bool use_fork) 400 { 401 struct comm_pipes comm_pipes; 402 struct io_uring_cqe *cqe; 403 struct io_uring_sqe *sqe; 404 struct io_uring ring; 405 ssize_t cur, total; 406 struct iovec iov; 407 char *buf, *tmp; 408 int ret, fd; 409 FILE *file; 410 411 ret = setup_comm_pipes(&comm_pipes); 412 if (ret) { 413 log_test_result(KSFT_FAIL); 414 return; 415 } 416 417 file = tmpfile(); 418 if (!file) { 419 ksft_perror("tmpfile() failed"); 420 log_test_result(KSFT_FAIL); 421 goto close_comm_pipes; 422 } 423 fd = fileno(file); 424 assert(fd); 425 426 tmp = malloc(size); 427 if (!tmp) { 428 ksft_print_msg("malloc() failed\n"); 429 log_test_result(KSFT_FAIL); 430 goto close_file; 431 } 432 433 /* Skip on errors, as we might just lack kernel support. */ 434 ret = io_uring_queue_init(1, &ring, 0); 435 if (ret < 0) { 436 ksft_print_msg("io_uring_queue_init() failed\n"); 437 log_test_result(KSFT_SKIP); 438 goto free_tmp; 439 } 440 441 /* 442 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 443 * | FOLL_LONGTERM the range. 444 * 445 * Skip on errors, as we might just lack kernel support or might not 446 * have sufficient MEMLOCK permissions. 447 */ 448 iov.iov_base = mem; 449 iov.iov_len = size; 450 ret = io_uring_register_buffers(&ring, &iov, 1); 451 if (ret) { 452 ksft_print_msg("io_uring_register_buffers() failed\n"); 453 log_test_result(KSFT_SKIP); 454 goto queue_exit; 455 } 456 457 if (use_fork) { 458 /* 459 * fork() and keep the child alive until we're done. Note that 460 * we expect the pinned page to not get shared with the child. 461 */ 462 ret = fork(); 463 if (ret < 0) { 464 ksft_perror("fork() failed"); 465 log_test_result(KSFT_FAIL); 466 goto unregister_buffers; 467 } else if (!ret) { 468 write(comm_pipes.child_ready[1], "0", 1); 469 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 470 ; 471 exit(0); 472 } 473 474 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 475 ; 476 } else { 477 /* 478 * Map the page R/O into the page table. Enable softdirty 479 * tracking to stop the page from getting mapped R/W immediately 480 * again by mprotect() optimizations. Note that we don't have an 481 * easy way to test if that worked (the pagemap does not export 482 * if the page is mapped R/O vs. R/W). 483 */ 484 ret = mprotect(mem, size, PROT_READ); 485 if (ret) { 486 ksft_perror("mprotect() failed"); 487 log_test_result(KSFT_FAIL); 488 goto unregister_buffers; 489 } 490 491 clear_softdirty(); 492 ret = mprotect(mem, size, PROT_READ | PROT_WRITE); 493 if (ret) { 494 ksft_perror("mprotect() failed"); 495 log_test_result(KSFT_FAIL); 496 goto unregister_buffers; 497 } 498 } 499 500 /* 501 * Modify the page and write page content as observed by the fixed 502 * buffer pin to the file so we can verify it. 503 */ 504 memset(mem, 0xff, size); 505 sqe = io_uring_get_sqe(&ring); 506 if (!sqe) { 507 ksft_print_msg("io_uring_get_sqe() failed\n"); 508 log_test_result(KSFT_FAIL); 509 goto quit_child; 510 } 511 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 512 513 ret = io_uring_submit(&ring); 514 if (ret < 0) { 515 ksft_print_msg("io_uring_submit() failed\n"); 516 log_test_result(KSFT_FAIL); 517 goto quit_child; 518 } 519 520 ret = io_uring_wait_cqe(&ring, &cqe); 521 if (ret < 0) { 522 ksft_print_msg("io_uring_wait_cqe() failed\n"); 523 log_test_result(KSFT_FAIL); 524 goto quit_child; 525 } 526 527 if (cqe->res != size) { 528 ksft_print_msg("write_fixed failed\n"); 529 log_test_result(KSFT_FAIL); 530 goto quit_child; 531 } 532 io_uring_cqe_seen(&ring, cqe); 533 534 /* Read back the file content to the temporary buffer. */ 535 total = 0; 536 while (total < size) { 537 cur = pread(fd, tmp + total, size - total, total); 538 if (cur < 0) { 539 ksft_perror("pread() failed\n"); 540 log_test_result(KSFT_FAIL); 541 goto quit_child; 542 } 543 total += cur; 544 } 545 546 /* Finally, check if we read what we expected. */ 547 if (!memcmp(mem, tmp, size)) { 548 log_test_result(KSFT_PASS); 549 } else { 550 ksft_print_msg("Longtom R/W pin is not reliable\n"); 551 log_test_result(KSFT_FAIL); 552 } 553 554 quit_child: 555 if (use_fork) { 556 write(comm_pipes.parent_ready[1], "0", 1); 557 wait(&ret); 558 } 559 unregister_buffers: 560 io_uring_unregister_buffers(&ring); 561 queue_exit: 562 io_uring_queue_exit(&ring); 563 free_tmp: 564 free(tmp); 565 close_file: 566 fclose(file); 567 close_comm_pipes: 568 close_comm_pipes(&comm_pipes); 569 } 570 571 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 572 { 573 do_test_iouring(mem, size, false); 574 } 575 576 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 577 { 578 do_test_iouring(mem, size, true); 579 } 580 581 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 582 583 enum ro_pin_test { 584 RO_PIN_TEST, 585 RO_PIN_TEST_SHARED, 586 RO_PIN_TEST_PREVIOUSLY_SHARED, 587 RO_PIN_TEST_RO_EXCLUSIVE, 588 }; 589 590 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 591 bool fast) 592 { 593 struct pin_longterm_test args; 594 struct comm_pipes comm_pipes; 595 char *tmp, buf; 596 __u64 tmp_val; 597 int ret; 598 599 if (gup_fd < 0) { 600 ksft_print_msg("gup_test not available\n"); 601 log_test_result(KSFT_SKIP); 602 return; 603 } 604 605 tmp = malloc(size); 606 if (!tmp) { 607 ksft_perror("malloc() failed\n"); 608 log_test_result(KSFT_FAIL); 609 return; 610 } 611 612 ret = setup_comm_pipes(&comm_pipes); 613 if (ret) { 614 log_test_result(KSFT_FAIL); 615 goto free_tmp; 616 } 617 618 switch (test) { 619 case RO_PIN_TEST: 620 break; 621 case RO_PIN_TEST_SHARED: 622 case RO_PIN_TEST_PREVIOUSLY_SHARED: 623 /* 624 * Share the pages with our child. As the pages are not pinned, 625 * this should just work. 626 */ 627 ret = fork(); 628 if (ret < 0) { 629 ksft_perror("fork() failed"); 630 log_test_result(KSFT_FAIL); 631 goto close_comm_pipes; 632 } else if (!ret) { 633 write(comm_pipes.child_ready[1], "0", 1); 634 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 635 ; 636 exit(0); 637 } 638 639 /* Wait until our child is ready. */ 640 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 641 ; 642 643 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 644 /* 645 * Tell the child to quit now and wait until it quit. 646 * The pages should now be mapped R/O into our page 647 * tables, but they are no longer shared. 648 */ 649 write(comm_pipes.parent_ready[1], "0", 1); 650 wait(&ret); 651 if (!WIFEXITED(ret)) 652 ksft_print_msg("[INFO] wait() failed\n"); 653 } 654 break; 655 case RO_PIN_TEST_RO_EXCLUSIVE: 656 /* 657 * Map the page R/O into the page table. Enable softdirty 658 * tracking to stop the page from getting mapped R/W immediately 659 * again by mprotect() optimizations. Note that we don't have an 660 * easy way to test if that worked (the pagemap does not export 661 * if the page is mapped R/O vs. R/W). 662 */ 663 ret = mprotect(mem, size, PROT_READ); 664 clear_softdirty(); 665 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 666 if (ret) { 667 ksft_perror("mprotect() failed"); 668 log_test_result(KSFT_FAIL); 669 goto close_comm_pipes; 670 } 671 break; 672 default: 673 assert(false); 674 } 675 676 /* Take a R/O pin. This should trigger unsharing. */ 677 args.addr = (__u64)(uintptr_t)mem; 678 args.size = size; 679 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 680 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 681 if (ret) { 682 if (errno == EINVAL) 683 ret = KSFT_SKIP; 684 else 685 ret = KSFT_FAIL; 686 ksft_perror("PIN_LONGTERM_TEST_START failed"); 687 log_test_result(ret); 688 goto wait; 689 } 690 691 /* Modify the page. */ 692 memset(mem, 0xff, size); 693 694 /* 695 * Read back the content via the pin to the temporary buffer and 696 * test if we observed the modification. 697 */ 698 tmp_val = (__u64)(uintptr_t)tmp; 699 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 700 if (ret) { 701 ksft_perror("PIN_LONGTERM_TEST_READ failed"); 702 log_test_result(KSFT_FAIL); 703 } else { 704 if (!memcmp(mem, tmp, size)) { 705 log_test_result(KSFT_PASS); 706 } else { 707 ksft_print_msg("Longterm R/O pin is not reliable\n"); 708 log_test_result(KSFT_FAIL); 709 } 710 } 711 712 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 713 if (ret) 714 ksft_perror("PIN_LONGTERM_TEST_STOP failed"); 715 wait: 716 switch (test) { 717 case RO_PIN_TEST_SHARED: 718 write(comm_pipes.parent_ready[1], "0", 1); 719 wait(&ret); 720 if (!WIFEXITED(ret)) 721 ksft_perror("wait() failed"); 722 break; 723 default: 724 break; 725 } 726 close_comm_pipes: 727 close_comm_pipes(&comm_pipes); 728 free_tmp: 729 free(tmp); 730 } 731 732 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 733 { 734 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 735 } 736 737 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 738 { 739 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 740 } 741 742 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 743 bool is_hugetlb) 744 { 745 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 746 } 747 748 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 749 bool is_hugetlb) 750 { 751 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 752 } 753 754 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 755 bool is_hugetlb) 756 { 757 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 758 } 759 760 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 761 bool is_hugetlb) 762 { 763 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 764 } 765 766 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 767 768 static void do_run_with_base_page(test_fn fn, bool swapout) 769 { 770 char *mem; 771 int ret; 772 773 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 774 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 775 if (mem == MAP_FAILED) { 776 ksft_perror("mmap() failed"); 777 log_test_result(KSFT_FAIL); 778 return; 779 } 780 781 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 782 /* Ignore if not around on a kernel. */ 783 if (ret && errno != EINVAL) { 784 ksft_perror("MADV_NOHUGEPAGE failed"); 785 log_test_result(KSFT_FAIL); 786 goto munmap; 787 } 788 789 /* Populate a base page. */ 790 memset(mem, 1, pagesize); 791 792 if (swapout) { 793 madvise(mem, pagesize, MADV_PAGEOUT); 794 if (!pagemap_is_swapped(pagemap_fd, mem)) { 795 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 796 log_test_result(KSFT_SKIP); 797 goto munmap; 798 } 799 } 800 801 fn(mem, pagesize, false); 802 munmap: 803 munmap(mem, pagesize); 804 } 805 806 static void run_with_base_page(test_fn fn, const char *desc) 807 { 808 log_test_start("%s ... with base page", desc); 809 do_run_with_base_page(fn, false); 810 } 811 812 static void run_with_base_page_swap(test_fn fn, const char *desc) 813 { 814 log_test_start("%s ... with swapped out base page", desc); 815 do_run_with_base_page(fn, true); 816 } 817 818 enum thp_run { 819 THP_RUN_PMD, 820 THP_RUN_PMD_SWAPOUT, 821 THP_RUN_PTE, 822 THP_RUN_PTE_SWAPOUT, 823 THP_RUN_SINGLE_PTE, 824 THP_RUN_SINGLE_PTE_SWAPOUT, 825 THP_RUN_PARTIAL_MREMAP, 826 THP_RUN_PARTIAL_SHARED, 827 }; 828 829 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 830 { 831 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 832 size_t size, mmap_size, mremap_size; 833 int ret; 834 835 /* For alignment purposes, we need twice the thp size. */ 836 mmap_size = 2 * thpsize; 837 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 838 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 839 if (mmap_mem == MAP_FAILED) { 840 ksft_perror("mmap() failed"); 841 log_test_result(KSFT_FAIL); 842 return; 843 } 844 845 /* We need a THP-aligned memory area. */ 846 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 847 848 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 849 if (ret) { 850 ksft_perror("MADV_HUGEPAGE failed"); 851 log_test_result(KSFT_FAIL); 852 goto munmap; 853 } 854 855 /* 856 * Try to populate a THP. Touch the first sub-page and test if 857 * we get the last sub-page populated automatically. 858 */ 859 mem[0] = 1; 860 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 861 ksft_print_msg("Did not get a THP populated\n"); 862 log_test_result(KSFT_SKIP); 863 goto munmap; 864 } 865 memset(mem, 1, thpsize); 866 867 size = thpsize; 868 switch (thp_run) { 869 case THP_RUN_PMD: 870 case THP_RUN_PMD_SWAPOUT: 871 assert(thpsize == pmdsize); 872 break; 873 case THP_RUN_PTE: 874 case THP_RUN_PTE_SWAPOUT: 875 /* 876 * Trigger PTE-mapping the THP by temporarily mapping a single 877 * subpage R/O. This is a noop if the THP is not pmdsize (and 878 * therefore already PTE-mapped). 879 */ 880 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 881 if (ret) { 882 ksft_perror("mprotect() failed"); 883 log_test_result(KSFT_FAIL); 884 goto munmap; 885 } 886 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 887 if (ret) { 888 ksft_perror("mprotect() failed"); 889 log_test_result(KSFT_FAIL); 890 goto munmap; 891 } 892 break; 893 case THP_RUN_SINGLE_PTE: 894 case THP_RUN_SINGLE_PTE_SWAPOUT: 895 /* 896 * Discard all but a single subpage of that PTE-mapped THP. What 897 * remains is a single PTE mapping a single subpage. 898 */ 899 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 900 if (ret) { 901 ksft_perror("MADV_DONTNEED failed"); 902 log_test_result(KSFT_FAIL); 903 goto munmap; 904 } 905 size = pagesize; 906 break; 907 case THP_RUN_PARTIAL_MREMAP: 908 /* 909 * Remap half of the THP. We need some new memory location 910 * for that. 911 */ 912 mremap_size = thpsize / 2; 913 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 914 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 915 if (mremap_mem == MAP_FAILED) { 916 ksft_perror("mmap() failed"); 917 log_test_result(KSFT_FAIL); 918 goto munmap; 919 } 920 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 921 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 922 if (tmp != mremap_mem) { 923 ksft_perror("mremap() failed"); 924 log_test_result(KSFT_FAIL); 925 goto munmap; 926 } 927 size = mremap_size; 928 break; 929 case THP_RUN_PARTIAL_SHARED: 930 /* 931 * Share the first page of the THP with a child and quit the 932 * child. This will result in some parts of the THP never 933 * have been shared. 934 */ 935 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 936 if (ret) { 937 ksft_perror("MADV_DONTFORK failed"); 938 log_test_result(KSFT_FAIL); 939 goto munmap; 940 } 941 ret = fork(); 942 if (ret < 0) { 943 ksft_perror("fork() failed"); 944 log_test_result(KSFT_FAIL); 945 goto munmap; 946 } else if (!ret) { 947 exit(0); 948 } 949 wait(&ret); 950 /* Allow for sharing all pages again. */ 951 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 952 if (ret) { 953 ksft_perror("MADV_DOFORK failed"); 954 log_test_result(KSFT_FAIL); 955 goto munmap; 956 } 957 break; 958 default: 959 assert(false); 960 } 961 962 switch (thp_run) { 963 case THP_RUN_PMD_SWAPOUT: 964 case THP_RUN_PTE_SWAPOUT: 965 case THP_RUN_SINGLE_PTE_SWAPOUT: 966 madvise(mem, size, MADV_PAGEOUT); 967 if (!range_is_swapped(mem, size)) { 968 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 969 log_test_result(KSFT_SKIP); 970 goto munmap; 971 } 972 break; 973 default: 974 break; 975 } 976 977 fn(mem, size, false); 978 munmap: 979 munmap(mmap_mem, mmap_size); 980 if (mremap_mem != MAP_FAILED) 981 munmap(mremap_mem, mremap_size); 982 } 983 984 static void run_with_thp(test_fn fn, const char *desc, size_t size) 985 { 986 log_test_start("%s ... with THP (%zu kB)", 987 desc, size / 1024); 988 do_run_with_thp(fn, THP_RUN_PMD, size); 989 } 990 991 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 992 { 993 log_test_start("%s ... with swapped-out THP (%zu kB)", 994 desc, size / 1024); 995 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 996 } 997 998 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 999 { 1000 log_test_start("%s ... with PTE-mapped THP (%zu kB)", 1001 desc, size / 1024); 1002 do_run_with_thp(fn, THP_RUN_PTE, size); 1003 } 1004 1005 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 1006 { 1007 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", 1008 desc, size / 1024); 1009 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 1010 } 1011 1012 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 1013 { 1014 log_test_start("%s ... with single PTE of THP (%zu kB)", 1015 desc, size / 1024); 1016 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 1017 } 1018 1019 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 1020 { 1021 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", 1022 desc, size / 1024); 1023 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 1024 } 1025 1026 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 1027 { 1028 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", 1029 desc, size / 1024); 1030 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 1031 } 1032 1033 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 1034 { 1035 log_test_start("%s ... with partially shared THP (%zu kB)", 1036 desc, size / 1024); 1037 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 1038 } 1039 1040 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 1041 { 1042 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1043 char *mem, *dummy; 1044 1045 log_test_start("%s ... with hugetlb (%zu kB)", desc, 1046 hugetlbsize / 1024); 1047 1048 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1049 1050 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1051 if (mem == MAP_FAILED) { 1052 ksft_perror("need more free huge pages"); 1053 log_test_result(KSFT_SKIP); 1054 return; 1055 } 1056 1057 /* Populate an huge page. */ 1058 memset(mem, 1, hugetlbsize); 1059 1060 /* 1061 * We need a total of two hugetlb pages to handle COW/unsharing 1062 * properly, otherwise we might get zapped by a SIGBUS. 1063 */ 1064 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1065 if (dummy == MAP_FAILED) { 1066 ksft_perror("need more free huge pages"); 1067 log_test_result(KSFT_SKIP); 1068 goto munmap; 1069 } 1070 munmap(dummy, hugetlbsize); 1071 1072 fn(mem, hugetlbsize, true); 1073 munmap: 1074 munmap(mem, hugetlbsize); 1075 } 1076 1077 struct test_case { 1078 const char *desc; 1079 test_fn fn; 1080 }; 1081 1082 /* 1083 * Test cases that are specific to anonymous pages: pages in private mappings 1084 * that may get shared via COW during fork(). 1085 */ 1086 static const struct test_case anon_test_cases[] = { 1087 /* 1088 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1089 * either the child can observe modifications by the parent or the 1090 * other way around. 1091 */ 1092 { 1093 "Basic COW after fork()", 1094 test_cow_in_parent, 1095 }, 1096 /* 1097 * Basic test, but do an additional mprotect(PROT_READ)+ 1098 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1099 */ 1100 { 1101 "Basic COW after fork() with mprotect() optimization", 1102 test_cow_in_parent_mprotect, 1103 }, 1104 /* 1105 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1106 * we miss to break COW, the child observes modifications by the parent. 1107 * This is CVE-2020-29374 reported by Jann Horn. 1108 */ 1109 { 1110 "vmsplice() + unmap in child", 1111 test_vmsplice_in_child, 1112 }, 1113 /* 1114 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1115 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1116 */ 1117 { 1118 "vmsplice() + unmap in child with mprotect() optimization", 1119 test_vmsplice_in_child_mprotect, 1120 }, 1121 /* 1122 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1123 * fork(); modify in the child. If we miss to break COW, the parent 1124 * observes modifications by the child. 1125 */ 1126 { 1127 "vmsplice() before fork(), unmap in parent after fork()", 1128 test_vmsplice_before_fork, 1129 }, 1130 /* 1131 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1132 * child. If we miss to break COW, the parent observes modifications by 1133 * the child. 1134 */ 1135 { 1136 "vmsplice() + unmap in parent after fork()", 1137 test_vmsplice_after_fork, 1138 }, 1139 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1140 /* 1141 * Take a R/W longterm pin and then map the page R/O into the page 1142 * table to trigger a write fault on next access. When modifying the 1143 * page, the page content must be visible via the pin. 1144 */ 1145 { 1146 "R/O-mapping a page registered as iouring fixed buffer", 1147 test_iouring_ro, 1148 }, 1149 /* 1150 * Take a R/W longterm pin and then fork() a child. When modifying the 1151 * page, the page content must be visible via the pin. We expect the 1152 * pinned page to not get shared with the child. 1153 */ 1154 { 1155 "fork() with an iouring fixed buffer", 1156 test_iouring_fork, 1157 }, 1158 1159 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1160 /* 1161 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1162 * When modifying the page via the page table, the page content change 1163 * must be visible via the pin. 1164 */ 1165 { 1166 "R/O GUP pin on R/O-mapped shared page", 1167 test_ro_pin_on_shared, 1168 }, 1169 /* Same as above, but using GUP-fast. */ 1170 { 1171 "R/O GUP-fast pin on R/O-mapped shared page", 1172 test_ro_fast_pin_on_shared, 1173 }, 1174 /* 1175 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1176 * was previously shared. When modifying the page via the page table, 1177 * the page content change must be visible via the pin. 1178 */ 1179 { 1180 "R/O GUP pin on R/O-mapped previously-shared page", 1181 test_ro_pin_on_ro_previously_shared, 1182 }, 1183 /* Same as above, but using GUP-fast. */ 1184 { 1185 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1186 test_ro_fast_pin_on_ro_previously_shared, 1187 }, 1188 /* 1189 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1190 * When modifying the page via the page table, the page content change 1191 * must be visible via the pin. 1192 */ 1193 { 1194 "R/O GUP pin on R/O-mapped exclusive page", 1195 test_ro_pin_on_ro_exclusive, 1196 }, 1197 /* Same as above, but using GUP-fast. */ 1198 { 1199 "R/O GUP-fast pin on R/O-mapped exclusive page", 1200 test_ro_fast_pin_on_ro_exclusive, 1201 }, 1202 }; 1203 1204 static void run_anon_test_case(struct test_case const *test_case) 1205 { 1206 int i; 1207 1208 run_with_base_page(test_case->fn, test_case->desc); 1209 run_with_base_page_swap(test_case->fn, test_case->desc); 1210 for (i = 0; i < nr_thpsizes; i++) { 1211 size_t size = thpsizes[i]; 1212 struct thp_settings settings = *thp_current_settings(); 1213 1214 settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER; 1215 settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; 1216 thp_push_settings(&settings); 1217 1218 if (size == pmdsize) { 1219 run_with_thp(test_case->fn, test_case->desc, size); 1220 run_with_thp_swap(test_case->fn, test_case->desc, size); 1221 } 1222 1223 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1224 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1225 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1226 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1227 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1228 run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1229 1230 thp_pop_settings(); 1231 } 1232 for (i = 0; i < nr_hugetlbsizes; i++) 1233 run_with_hugetlb(test_case->fn, test_case->desc, 1234 hugetlbsizes[i]); 1235 } 1236 1237 static void run_anon_test_cases(void) 1238 { 1239 int i; 1240 1241 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1242 1243 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1244 run_anon_test_case(&anon_test_cases[i]); 1245 } 1246 1247 static int tests_per_anon_test_case(void) 1248 { 1249 int tests = 2 + nr_hugetlbsizes; 1250 1251 tests += 6 * nr_thpsizes; 1252 if (pmdsize) 1253 tests += 2; 1254 return tests; 1255 } 1256 1257 enum anon_thp_collapse_test { 1258 ANON_THP_COLLAPSE_UNSHARED, 1259 ANON_THP_COLLAPSE_FULLY_SHARED, 1260 ANON_THP_COLLAPSE_LOWER_SHARED, 1261 ANON_THP_COLLAPSE_UPPER_SHARED, 1262 }; 1263 1264 static void do_test_anon_thp_collapse(char *mem, size_t size, 1265 enum anon_thp_collapse_test test) 1266 { 1267 struct comm_pipes comm_pipes; 1268 char buf; 1269 int ret; 1270 1271 ret = setup_comm_pipes(&comm_pipes); 1272 if (ret) { 1273 log_test_result(KSFT_FAIL); 1274 return; 1275 } 1276 1277 /* 1278 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1279 * R/O, such that we can try collapsing it later. 1280 */ 1281 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1282 if (ret) { 1283 ksft_perror("mprotect() failed"); 1284 log_test_result(KSFT_FAIL); 1285 goto close_comm_pipes; 1286 } 1287 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1288 if (ret) { 1289 ksft_perror("mprotect() failed"); 1290 log_test_result(KSFT_FAIL); 1291 goto close_comm_pipes; 1292 } 1293 1294 switch (test) { 1295 case ANON_THP_COLLAPSE_UNSHARED: 1296 /* Collapse before actually COW-sharing the page. */ 1297 ret = madvise(mem, size, MADV_COLLAPSE); 1298 if (ret) { 1299 ksft_perror("MADV_COLLAPSE failed"); 1300 log_test_result(KSFT_SKIP); 1301 goto close_comm_pipes; 1302 } 1303 break; 1304 case ANON_THP_COLLAPSE_FULLY_SHARED: 1305 /* COW-share the full PTE-mapped THP. */ 1306 break; 1307 case ANON_THP_COLLAPSE_LOWER_SHARED: 1308 /* Don't COW-share the upper part of the THP. */ 1309 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1310 if (ret) { 1311 ksft_perror("MADV_DONTFORK failed"); 1312 log_test_result(KSFT_FAIL); 1313 goto close_comm_pipes; 1314 } 1315 break; 1316 case ANON_THP_COLLAPSE_UPPER_SHARED: 1317 /* Don't COW-share the lower part of the THP. */ 1318 ret = madvise(mem, size / 2, MADV_DONTFORK); 1319 if (ret) { 1320 ksft_perror("MADV_DONTFORK failed"); 1321 log_test_result(KSFT_FAIL); 1322 goto close_comm_pipes; 1323 } 1324 break; 1325 default: 1326 assert(false); 1327 } 1328 1329 ret = fork(); 1330 if (ret < 0) { 1331 ksft_perror("fork() failed"); 1332 log_test_result(KSFT_FAIL); 1333 goto close_comm_pipes; 1334 } else if (!ret) { 1335 switch (test) { 1336 case ANON_THP_COLLAPSE_UNSHARED: 1337 case ANON_THP_COLLAPSE_FULLY_SHARED: 1338 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1339 break; 1340 case ANON_THP_COLLAPSE_LOWER_SHARED: 1341 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1342 break; 1343 case ANON_THP_COLLAPSE_UPPER_SHARED: 1344 exit(child_memcmp_fn(mem + size / 2, size / 2, 1345 &comm_pipes)); 1346 break; 1347 default: 1348 assert(false); 1349 } 1350 } 1351 1352 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1353 ; 1354 1355 switch (test) { 1356 case ANON_THP_COLLAPSE_UNSHARED: 1357 break; 1358 case ANON_THP_COLLAPSE_UPPER_SHARED: 1359 case ANON_THP_COLLAPSE_LOWER_SHARED: 1360 /* 1361 * Revert MADV_DONTFORK such that we merge the VMAs and are 1362 * able to actually collapse. 1363 */ 1364 ret = madvise(mem, size, MADV_DOFORK); 1365 if (ret) { 1366 ksft_perror("MADV_DOFORK failed"); 1367 log_test_result(KSFT_FAIL); 1368 write(comm_pipes.parent_ready[1], "0", 1); 1369 wait(&ret); 1370 goto close_comm_pipes; 1371 } 1372 /* FALLTHROUGH */ 1373 case ANON_THP_COLLAPSE_FULLY_SHARED: 1374 /* Collapse before anyone modified the COW-shared page. */ 1375 ret = madvise(mem, size, MADV_COLLAPSE); 1376 if (ret) { 1377 ksft_perror("MADV_COLLAPSE failed"); 1378 log_test_result(KSFT_SKIP); 1379 write(comm_pipes.parent_ready[1], "0", 1); 1380 wait(&ret); 1381 goto close_comm_pipes; 1382 } 1383 break; 1384 default: 1385 assert(false); 1386 } 1387 1388 /* Modify the page. */ 1389 memset(mem, 0xff, size); 1390 write(comm_pipes.parent_ready[1], "0", 1); 1391 1392 wait(&ret); 1393 if (WIFEXITED(ret)) 1394 ret = WEXITSTATUS(ret); 1395 else 1396 ret = -EINVAL; 1397 1398 if (!ret) { 1399 log_test_result(KSFT_PASS); 1400 } else { 1401 ksft_print_msg("Leak from parent into child\n"); 1402 log_test_result(KSFT_FAIL); 1403 } 1404 close_comm_pipes: 1405 close_comm_pipes(&comm_pipes); 1406 } 1407 1408 static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1409 bool is_hugetlb) 1410 { 1411 assert(!is_hugetlb); 1412 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1413 } 1414 1415 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1416 bool is_hugetlb) 1417 { 1418 assert(!is_hugetlb); 1419 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1420 } 1421 1422 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1423 bool is_hugetlb) 1424 { 1425 assert(!is_hugetlb); 1426 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1427 } 1428 1429 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1430 bool is_hugetlb) 1431 { 1432 assert(!is_hugetlb); 1433 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1434 } 1435 1436 /* 1437 * Test cases that are specific to anonymous THP: pages in private mappings 1438 * that may get shared via COW during fork(). 1439 */ 1440 static const struct test_case anon_thp_test_cases[] = { 1441 /* 1442 * Basic COW test for fork() without any GUP when collapsing a THP 1443 * before fork(). 1444 * 1445 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1446 * collapse") might easily get COW handling wrong when not collapsing 1447 * exclusivity information properly. 1448 */ 1449 { 1450 "Basic COW after fork() when collapsing before fork()", 1451 test_anon_thp_collapse_unshared, 1452 }, 1453 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1454 { 1455 "Basic COW after fork() when collapsing after fork() (fully shared)", 1456 test_anon_thp_collapse_fully_shared, 1457 }, 1458 /* 1459 * Basic COW test, but collapse after COW-sharing the lower half of a 1460 * THP. 1461 */ 1462 { 1463 "Basic COW after fork() when collapsing after fork() (lower shared)", 1464 test_anon_thp_collapse_lower_shared, 1465 }, 1466 /* 1467 * Basic COW test, but collapse after COW-sharing the upper half of a 1468 * THP. 1469 */ 1470 { 1471 "Basic COW after fork() when collapsing after fork() (upper shared)", 1472 test_anon_thp_collapse_upper_shared, 1473 }, 1474 }; 1475 1476 static void run_anon_thp_test_cases(void) 1477 { 1478 int i; 1479 1480 if (!pmdsize) 1481 return; 1482 1483 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1484 1485 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1486 struct test_case const *test_case = &anon_thp_test_cases[i]; 1487 1488 log_test_start("%s", test_case->desc); 1489 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1490 } 1491 } 1492 1493 static int tests_per_anon_thp_test_case(void) 1494 { 1495 return pmdsize ? 1 : 0; 1496 } 1497 1498 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1499 1500 static void test_cow(char *mem, const char *smem, size_t size) 1501 { 1502 char *old = malloc(size); 1503 1504 /* Backup the original content. */ 1505 memcpy(old, smem, size); 1506 1507 /* Modify the page. */ 1508 memset(mem, 0xff, size); 1509 1510 /* See if we still read the old values via the other mapping. */ 1511 if (!memcmp(smem, old, size)) { 1512 log_test_result(KSFT_PASS); 1513 } else { 1514 ksft_print_msg("Other mapping modified\n"); 1515 log_test_result(KSFT_FAIL); 1516 } 1517 free(old); 1518 } 1519 1520 static void test_ro_pin(char *mem, const char *smem, size_t size) 1521 { 1522 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1523 } 1524 1525 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1526 { 1527 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1528 } 1529 1530 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1531 { 1532 char *mem, *smem; 1533 1534 log_test_start("%s ... with shared zeropage", desc); 1535 1536 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1537 MAP_PRIVATE | MAP_ANON, -1, 0); 1538 if (mem == MAP_FAILED) { 1539 ksft_perror("mmap() failed"); 1540 log_test_result(KSFT_FAIL); 1541 return; 1542 } 1543 1544 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1545 if (smem == MAP_FAILED) { 1546 ksft_perror("mmap() failed"); 1547 log_test_result(KSFT_FAIL); 1548 goto munmap; 1549 } 1550 1551 /* Read from the page to populate the shared zeropage. */ 1552 FORCE_READ(*mem); 1553 FORCE_READ(*smem); 1554 1555 fn(mem, smem, pagesize); 1556 munmap: 1557 munmap(mem, pagesize); 1558 if (smem != MAP_FAILED) 1559 munmap(smem, pagesize); 1560 } 1561 1562 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1563 { 1564 char *mem, *smem, *mmap_mem, *mmap_smem; 1565 size_t mmap_size; 1566 int ret; 1567 1568 log_test_start("%s ... with huge zeropage", desc); 1569 1570 if (!has_huge_zeropage) { 1571 ksft_print_msg("Huge zeropage not enabled\n"); 1572 log_test_result(KSFT_SKIP); 1573 return; 1574 } 1575 1576 /* For alignment purposes, we need twice the thp size. */ 1577 mmap_size = 2 * pmdsize; 1578 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1579 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1580 if (mmap_mem == MAP_FAILED) { 1581 ksft_perror("mmap() failed"); 1582 log_test_result(KSFT_FAIL); 1583 return; 1584 } 1585 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1586 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1587 if (mmap_smem == MAP_FAILED) { 1588 ksft_perror("mmap() failed"); 1589 log_test_result(KSFT_FAIL); 1590 goto munmap; 1591 } 1592 1593 /* We need a THP-aligned memory area. */ 1594 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 1595 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1596 1597 ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 1598 if (ret) { 1599 ksft_perror("madvise()"); 1600 log_test_result(KSFT_FAIL); 1601 goto munmap; 1602 } 1603 ret = madvise(smem, pmdsize, MADV_HUGEPAGE); 1604 if (ret) { 1605 ksft_perror("madvise()"); 1606 log_test_result(KSFT_FAIL); 1607 goto munmap; 1608 } 1609 1610 /* 1611 * Read from the memory to populate the huge shared zeropage. Read from 1612 * the first sub-page and test if we get another sub-page populated 1613 * automatically. 1614 */ 1615 FORCE_READ(mem); 1616 FORCE_READ(smem); 1617 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1618 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1619 ksft_test_result_skip("Did not get THPs populated\n"); 1620 goto munmap; 1621 } 1622 1623 fn(mem, smem, pmdsize); 1624 munmap: 1625 munmap(mmap_mem, mmap_size); 1626 if (mmap_smem != MAP_FAILED) 1627 munmap(mmap_smem, mmap_size); 1628 } 1629 1630 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1631 { 1632 char *mem, *smem; 1633 int fd; 1634 1635 log_test_start("%s ... with memfd", desc); 1636 1637 fd = memfd_create("test", 0); 1638 if (fd < 0) { 1639 ksft_perror("memfd_create() failed"); 1640 log_test_result(KSFT_FAIL); 1641 return; 1642 } 1643 1644 /* File consists of a single page filled with zeroes. */ 1645 if (fallocate(fd, 0, 0, pagesize)) { 1646 ksft_perror("fallocate() failed"); 1647 log_test_result(KSFT_FAIL); 1648 goto close; 1649 } 1650 1651 /* Create a private mapping of the memfd. */ 1652 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1653 if (mem == MAP_FAILED) { 1654 ksft_perror("mmap() failed"); 1655 log_test_result(KSFT_FAIL); 1656 goto close; 1657 } 1658 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1659 if (smem == MAP_FAILED) { 1660 ksft_perror("mmap() failed"); 1661 log_test_result(KSFT_FAIL); 1662 goto munmap; 1663 } 1664 1665 /* Fault the page in. */ 1666 FORCE_READ(mem); 1667 FORCE_READ(smem); 1668 1669 fn(mem, smem, pagesize); 1670 munmap: 1671 munmap(mem, pagesize); 1672 if (smem != MAP_FAILED) 1673 munmap(smem, pagesize); 1674 close: 1675 close(fd); 1676 } 1677 1678 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1679 { 1680 char *mem, *smem; 1681 FILE *file; 1682 int fd; 1683 1684 log_test_start("%s ... with tmpfile", desc); 1685 1686 file = tmpfile(); 1687 if (!file) { 1688 ksft_perror("tmpfile() failed"); 1689 log_test_result(KSFT_FAIL); 1690 return; 1691 } 1692 1693 fd = fileno(file); 1694 if (fd < 0) { 1695 ksft_perror("fileno() failed"); 1696 log_test_result(KSFT_SKIP); 1697 return; 1698 } 1699 1700 /* File consists of a single page filled with zeroes. */ 1701 if (fallocate(fd, 0, 0, pagesize)) { 1702 ksft_perror("fallocate() failed"); 1703 log_test_result(KSFT_FAIL); 1704 goto close; 1705 } 1706 1707 /* Create a private mapping of the memfd. */ 1708 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1709 if (mem == MAP_FAILED) { 1710 ksft_perror("mmap() failed"); 1711 log_test_result(KSFT_FAIL); 1712 goto close; 1713 } 1714 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1715 if (smem == MAP_FAILED) { 1716 ksft_perror("mmap() failed"); 1717 log_test_result(KSFT_FAIL); 1718 goto munmap; 1719 } 1720 1721 /* Fault the page in. */ 1722 FORCE_READ(mem); 1723 FORCE_READ(smem); 1724 1725 fn(mem, smem, pagesize); 1726 munmap: 1727 munmap(mem, pagesize); 1728 if (smem != MAP_FAILED) 1729 munmap(smem, pagesize); 1730 close: 1731 fclose(file); 1732 } 1733 1734 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1735 size_t hugetlbsize) 1736 { 1737 int flags = MFD_HUGETLB; 1738 char *mem, *smem; 1739 int fd; 1740 1741 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 1742 hugetlbsize / 1024); 1743 1744 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1745 1746 fd = memfd_create("test", flags); 1747 if (fd < 0) { 1748 ksft_perror("memfd_create() failed"); 1749 log_test_result(KSFT_SKIP); 1750 return; 1751 } 1752 1753 /* File consists of a single page filled with zeroes. */ 1754 if (fallocate(fd, 0, 0, hugetlbsize)) { 1755 ksft_perror("need more free huge pages"); 1756 log_test_result(KSFT_SKIP); 1757 goto close; 1758 } 1759 1760 /* Create a private mapping of the memfd. */ 1761 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1762 0); 1763 if (mem == MAP_FAILED) { 1764 ksft_perror("need more free huge pages"); 1765 log_test_result(KSFT_SKIP); 1766 goto close; 1767 } 1768 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1769 if (smem == MAP_FAILED) { 1770 ksft_perror("mmap() failed"); 1771 log_test_result(KSFT_FAIL); 1772 goto munmap; 1773 } 1774 1775 /* Fault the page in. */ 1776 FORCE_READ(mem); 1777 FORCE_READ(smem); 1778 1779 fn(mem, smem, hugetlbsize); 1780 munmap: 1781 munmap(mem, hugetlbsize); 1782 if (smem != MAP_FAILED) 1783 munmap(smem, hugetlbsize); 1784 close: 1785 close(fd); 1786 } 1787 1788 struct non_anon_test_case { 1789 const char *desc; 1790 non_anon_test_fn fn; 1791 }; 1792 1793 /* 1794 * Test cases that target any pages in private mappings that are not anonymous: 1795 * pages that may get shared via COW ndependent of fork(). This includes 1796 * the shared zeropage(s), pagecache pages, ... 1797 */ 1798 static const struct non_anon_test_case non_anon_test_cases[] = { 1799 /* 1800 * Basic COW test without any GUP. If we miss to break COW, changes are 1801 * visible via other private/shared mappings. 1802 */ 1803 { 1804 "Basic COW", 1805 test_cow, 1806 }, 1807 /* 1808 * Take a R/O longterm pin. When modifying the page via the page table, 1809 * the page content change must be visible via the pin. 1810 */ 1811 { 1812 "R/O longterm GUP pin", 1813 test_ro_pin, 1814 }, 1815 /* Same as above, but using GUP-fast. */ 1816 { 1817 "R/O longterm GUP-fast pin", 1818 test_ro_fast_pin, 1819 }, 1820 }; 1821 1822 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1823 { 1824 int i; 1825 1826 run_with_zeropage(test_case->fn, test_case->desc); 1827 run_with_memfd(test_case->fn, test_case->desc); 1828 run_with_tmpfile(test_case->fn, test_case->desc); 1829 if (pmdsize) 1830 run_with_huge_zeropage(test_case->fn, test_case->desc); 1831 for (i = 0; i < nr_hugetlbsizes; i++) 1832 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1833 hugetlbsizes[i]); 1834 } 1835 1836 static void run_non_anon_test_cases(void) 1837 { 1838 int i; 1839 1840 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1841 1842 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1843 run_non_anon_test_case(&non_anon_test_cases[i]); 1844 } 1845 1846 static int tests_per_non_anon_test_case(void) 1847 { 1848 int tests = 3 + nr_hugetlbsizes; 1849 1850 if (pmdsize) 1851 tests += 1; 1852 return tests; 1853 } 1854 1855 int main(int argc, char **argv) 1856 { 1857 struct thp_settings default_settings; 1858 1859 ksft_print_header(); 1860 1861 pagesize = getpagesize(); 1862 pmdsize = read_pmd_pagesize(); 1863 if (pmdsize) { 1864 /* Only if THP is supported. */ 1865 thp_read_settings(&default_settings); 1866 default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT; 1867 thp_save_settings(); 1868 thp_push_settings(&default_settings); 1869 1870 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 1871 pmdsize / 1024); 1872 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 1873 } 1874 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1875 ARRAY_SIZE(hugetlbsizes)); 1876 has_huge_zeropage = detect_huge_zeropage(); 1877 1878 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1879 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1880 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1881 1882 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1883 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1884 if (pagemap_fd < 0) 1885 ksft_exit_fail_msg("opening pagemap failed\n"); 1886 1887 run_anon_test_cases(); 1888 run_anon_thp_test_cases(); 1889 run_non_anon_test_cases(); 1890 1891 if (pmdsize) { 1892 /* Only if THP is supported. */ 1893 thp_restore_settings(); 1894 } 1895 1896 ksft_finished(); 1897 } 1898