1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <sys/mman.h> 19 #include <sys/ioctl.h> 20 #include <sys/wait.h> 21 #include <linux/memfd.h> 22 23 #include "local_config.h" 24 #ifdef LOCAL_CONFIG_HAVE_LIBURING 25 #include <liburing.h> 26 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 27 28 #include "../../../../mm/gup_test.h" 29 #include "../kselftest.h" 30 #include "vm_util.h" 31 32 static size_t pagesize; 33 static int pagemap_fd; 34 static size_t thpsize; 35 static int nr_hugetlbsizes; 36 static size_t hugetlbsizes[10]; 37 static int gup_fd; 38 static bool has_huge_zeropage; 39 40 static void detect_huge_zeropage(void) 41 { 42 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 43 O_RDONLY); 44 size_t enabled = 0; 45 char buf[15]; 46 int ret; 47 48 if (fd < 0) 49 return; 50 51 ret = pread(fd, buf, sizeof(buf), 0); 52 if (ret > 0 && ret < sizeof(buf)) { 53 buf[ret] = 0; 54 55 enabled = strtoul(buf, NULL, 10); 56 if (enabled == 1) { 57 has_huge_zeropage = true; 58 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 59 } 60 } 61 62 close(fd); 63 } 64 65 static bool range_is_swapped(void *addr, size_t size) 66 { 67 for (; size; addr += pagesize, size -= pagesize) 68 if (!pagemap_is_swapped(pagemap_fd, addr)) 69 return false; 70 return true; 71 } 72 73 struct comm_pipes { 74 int child_ready[2]; 75 int parent_ready[2]; 76 }; 77 78 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 79 { 80 if (pipe(comm_pipes->child_ready) < 0) 81 return -errno; 82 if (pipe(comm_pipes->parent_ready) < 0) { 83 close(comm_pipes->child_ready[0]); 84 close(comm_pipes->child_ready[1]); 85 return -errno; 86 } 87 88 return 0; 89 } 90 91 static void close_comm_pipes(struct comm_pipes *comm_pipes) 92 { 93 close(comm_pipes->child_ready[0]); 94 close(comm_pipes->child_ready[1]); 95 close(comm_pipes->parent_ready[0]); 96 close(comm_pipes->parent_ready[1]); 97 } 98 99 static int child_memcmp_fn(char *mem, size_t size, 100 struct comm_pipes *comm_pipes) 101 { 102 char *old = malloc(size); 103 char buf; 104 105 /* Backup the original content. */ 106 memcpy(old, mem, size); 107 108 /* Wait until the parent modified the page. */ 109 write(comm_pipes->child_ready[1], "0", 1); 110 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 111 ; 112 113 /* See if we still read the old values. */ 114 return memcmp(old, mem, size); 115 } 116 117 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 118 struct comm_pipes *comm_pipes) 119 { 120 struct iovec iov = { 121 .iov_base = mem, 122 .iov_len = size, 123 }; 124 ssize_t cur, total, transferred; 125 char *old, *new; 126 int fds[2]; 127 char buf; 128 129 old = malloc(size); 130 new = malloc(size); 131 132 /* Backup the original content. */ 133 memcpy(old, mem, size); 134 135 if (pipe(fds) < 0) 136 return -errno; 137 138 /* Trigger a read-only pin. */ 139 transferred = vmsplice(fds[1], &iov, 1, 0); 140 if (transferred < 0) 141 return -errno; 142 if (transferred == 0) 143 return -EINVAL; 144 145 /* Unmap it from our page tables. */ 146 if (munmap(mem, size) < 0) 147 return -errno; 148 149 /* Wait until the parent modified it. */ 150 write(comm_pipes->child_ready[1], "0", 1); 151 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 152 ; 153 154 /* See if we still read the old values via the pipe. */ 155 for (total = 0; total < transferred; total += cur) { 156 cur = read(fds[0], new + total, transferred - total); 157 if (cur < 0) 158 return -errno; 159 } 160 161 return memcmp(old, new, transferred); 162 } 163 164 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 165 166 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 167 child_fn fn) 168 { 169 struct comm_pipes comm_pipes; 170 char buf; 171 int ret; 172 173 ret = setup_comm_pipes(&comm_pipes); 174 if (ret) { 175 ksft_test_result_fail("pipe() failed\n"); 176 return; 177 } 178 179 ret = fork(); 180 if (ret < 0) { 181 ksft_test_result_fail("fork() failed\n"); 182 goto close_comm_pipes; 183 } else if (!ret) { 184 exit(fn(mem, size, &comm_pipes)); 185 } 186 187 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 188 ; 189 190 if (do_mprotect) { 191 /* 192 * mprotect() optimizations might try avoiding 193 * write-faults by directly mapping pages writable. 194 */ 195 ret = mprotect(mem, size, PROT_READ); 196 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 197 if (ret) { 198 ksft_test_result_fail("mprotect() failed\n"); 199 write(comm_pipes.parent_ready[1], "0", 1); 200 wait(&ret); 201 goto close_comm_pipes; 202 } 203 } 204 205 /* Modify the page. */ 206 memset(mem, 0xff, size); 207 write(comm_pipes.parent_ready[1], "0", 1); 208 209 wait(&ret); 210 if (WIFEXITED(ret)) 211 ret = WEXITSTATUS(ret); 212 else 213 ret = -EINVAL; 214 215 ksft_test_result(!ret, "No leak from parent into child\n"); 216 close_comm_pipes: 217 close_comm_pipes(&comm_pipes); 218 } 219 220 static void test_cow_in_parent(char *mem, size_t size) 221 { 222 do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 223 } 224 225 static void test_cow_in_parent_mprotect(char *mem, size_t size) 226 { 227 do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 228 } 229 230 static void test_vmsplice_in_child(char *mem, size_t size) 231 { 232 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 233 } 234 235 static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 236 { 237 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 238 } 239 240 static void do_test_vmsplice_in_parent(char *mem, size_t size, 241 bool before_fork) 242 { 243 struct iovec iov = { 244 .iov_base = mem, 245 .iov_len = size, 246 }; 247 ssize_t cur, total, transferred; 248 struct comm_pipes comm_pipes; 249 char *old, *new; 250 int ret, fds[2]; 251 char buf; 252 253 old = malloc(size); 254 new = malloc(size); 255 256 memcpy(old, mem, size); 257 258 ret = setup_comm_pipes(&comm_pipes); 259 if (ret) { 260 ksft_test_result_fail("pipe() failed\n"); 261 goto free; 262 } 263 264 if (pipe(fds) < 0) { 265 ksft_test_result_fail("pipe() failed\n"); 266 goto close_comm_pipes; 267 } 268 269 if (before_fork) { 270 transferred = vmsplice(fds[1], &iov, 1, 0); 271 if (transferred <= 0) { 272 ksft_test_result_fail("vmsplice() failed\n"); 273 goto close_pipe; 274 } 275 } 276 277 ret = fork(); 278 if (ret < 0) { 279 ksft_test_result_fail("fork() failed\n"); 280 goto close_pipe; 281 } else if (!ret) { 282 write(comm_pipes.child_ready[1], "0", 1); 283 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 284 ; 285 /* Modify page content in the child. */ 286 memset(mem, 0xff, size); 287 exit(0); 288 } 289 290 if (!before_fork) { 291 transferred = vmsplice(fds[1], &iov, 1, 0); 292 if (transferred <= 0) { 293 ksft_test_result_fail("vmsplice() failed\n"); 294 wait(&ret); 295 goto close_pipe; 296 } 297 } 298 299 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 300 ; 301 if (munmap(mem, size) < 0) { 302 ksft_test_result_fail("munmap() failed\n"); 303 goto close_pipe; 304 } 305 write(comm_pipes.parent_ready[1], "0", 1); 306 307 /* Wait until the child is done writing. */ 308 wait(&ret); 309 if (!WIFEXITED(ret)) { 310 ksft_test_result_fail("wait() failed\n"); 311 goto close_pipe; 312 } 313 314 /* See if we still read the old values. */ 315 for (total = 0; total < transferred; total += cur) { 316 cur = read(fds[0], new + total, transferred - total); 317 if (cur < 0) { 318 ksft_test_result_fail("read() failed\n"); 319 goto close_pipe; 320 } 321 } 322 323 ksft_test_result(!memcmp(old, new, transferred), 324 "No leak from child into parent\n"); 325 close_pipe: 326 close(fds[0]); 327 close(fds[1]); 328 close_comm_pipes: 329 close_comm_pipes(&comm_pipes); 330 free: 331 free(old); 332 free(new); 333 } 334 335 static void test_vmsplice_before_fork(char *mem, size_t size) 336 { 337 do_test_vmsplice_in_parent(mem, size, true); 338 } 339 340 static void test_vmsplice_after_fork(char *mem, size_t size) 341 { 342 do_test_vmsplice_in_parent(mem, size, false); 343 } 344 345 #ifdef LOCAL_CONFIG_HAVE_LIBURING 346 static void do_test_iouring(char *mem, size_t size, bool use_fork) 347 { 348 struct comm_pipes comm_pipes; 349 struct io_uring_cqe *cqe; 350 struct io_uring_sqe *sqe; 351 struct io_uring ring; 352 ssize_t cur, total; 353 struct iovec iov; 354 char *buf, *tmp; 355 int ret, fd; 356 FILE *file; 357 358 ret = setup_comm_pipes(&comm_pipes); 359 if (ret) { 360 ksft_test_result_fail("pipe() failed\n"); 361 return; 362 } 363 364 file = tmpfile(); 365 if (!file) { 366 ksft_test_result_fail("tmpfile() failed\n"); 367 goto close_comm_pipes; 368 } 369 fd = fileno(file); 370 assert(fd); 371 372 tmp = malloc(size); 373 if (!tmp) { 374 ksft_test_result_fail("malloc() failed\n"); 375 goto close_file; 376 } 377 378 /* Skip on errors, as we might just lack kernel support. */ 379 ret = io_uring_queue_init(1, &ring, 0); 380 if (ret < 0) { 381 ksft_test_result_skip("io_uring_queue_init() failed\n"); 382 goto free_tmp; 383 } 384 385 /* 386 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 387 * | FOLL_LONGTERM the range. 388 * 389 * Skip on errors, as we might just lack kernel support or might not 390 * have sufficient MEMLOCK permissions. 391 */ 392 iov.iov_base = mem; 393 iov.iov_len = size; 394 ret = io_uring_register_buffers(&ring, &iov, 1); 395 if (ret) { 396 ksft_test_result_skip("io_uring_register_buffers() failed\n"); 397 goto queue_exit; 398 } 399 400 if (use_fork) { 401 /* 402 * fork() and keep the child alive until we're done. Note that 403 * we expect the pinned page to not get shared with the child. 404 */ 405 ret = fork(); 406 if (ret < 0) { 407 ksft_test_result_fail("fork() failed\n"); 408 goto unregister_buffers; 409 } else if (!ret) { 410 write(comm_pipes.child_ready[1], "0", 1); 411 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 412 ; 413 exit(0); 414 } 415 416 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 417 ; 418 } else { 419 /* 420 * Map the page R/O into the page table. Enable softdirty 421 * tracking to stop the page from getting mapped R/W immediately 422 * again by mprotect() optimizations. Note that we don't have an 423 * easy way to test if that worked (the pagemap does not export 424 * if the page is mapped R/O vs. R/W). 425 */ 426 ret = mprotect(mem, size, PROT_READ); 427 clear_softdirty(); 428 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 429 if (ret) { 430 ksft_test_result_fail("mprotect() failed\n"); 431 goto unregister_buffers; 432 } 433 } 434 435 /* 436 * Modify the page and write page content as observed by the fixed 437 * buffer pin to the file so we can verify it. 438 */ 439 memset(mem, 0xff, size); 440 sqe = io_uring_get_sqe(&ring); 441 if (!sqe) { 442 ksft_test_result_fail("io_uring_get_sqe() failed\n"); 443 goto quit_child; 444 } 445 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 446 447 ret = io_uring_submit(&ring); 448 if (ret < 0) { 449 ksft_test_result_fail("io_uring_submit() failed\n"); 450 goto quit_child; 451 } 452 453 ret = io_uring_wait_cqe(&ring, &cqe); 454 if (ret < 0) { 455 ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 456 goto quit_child; 457 } 458 459 if (cqe->res != size) { 460 ksft_test_result_fail("write_fixed failed\n"); 461 goto quit_child; 462 } 463 io_uring_cqe_seen(&ring, cqe); 464 465 /* Read back the file content to the temporary buffer. */ 466 total = 0; 467 while (total < size) { 468 cur = pread(fd, tmp + total, size - total, total); 469 if (cur < 0) { 470 ksft_test_result_fail("pread() failed\n"); 471 goto quit_child; 472 } 473 total += cur; 474 } 475 476 /* Finally, check if we read what we expected. */ 477 ksft_test_result(!memcmp(mem, tmp, size), 478 "Longterm R/W pin is reliable\n"); 479 480 quit_child: 481 if (use_fork) { 482 write(comm_pipes.parent_ready[1], "0", 1); 483 wait(&ret); 484 } 485 unregister_buffers: 486 io_uring_unregister_buffers(&ring); 487 queue_exit: 488 io_uring_queue_exit(&ring); 489 free_tmp: 490 free(tmp); 491 close_file: 492 fclose(file); 493 close_comm_pipes: 494 close_comm_pipes(&comm_pipes); 495 } 496 497 static void test_iouring_ro(char *mem, size_t size) 498 { 499 do_test_iouring(mem, size, false); 500 } 501 502 static void test_iouring_fork(char *mem, size_t size) 503 { 504 do_test_iouring(mem, size, true); 505 } 506 507 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 508 509 enum ro_pin_test { 510 RO_PIN_TEST, 511 RO_PIN_TEST_SHARED, 512 RO_PIN_TEST_PREVIOUSLY_SHARED, 513 RO_PIN_TEST_RO_EXCLUSIVE, 514 }; 515 516 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 517 bool fast) 518 { 519 struct pin_longterm_test args; 520 struct comm_pipes comm_pipes; 521 char *tmp, buf; 522 __u64 tmp_val; 523 int ret; 524 525 if (gup_fd < 0) { 526 ksft_test_result_skip("gup_test not available\n"); 527 return; 528 } 529 530 tmp = malloc(size); 531 if (!tmp) { 532 ksft_test_result_fail("malloc() failed\n"); 533 return; 534 } 535 536 ret = setup_comm_pipes(&comm_pipes); 537 if (ret) { 538 ksft_test_result_fail("pipe() failed\n"); 539 goto free_tmp; 540 } 541 542 switch (test) { 543 case RO_PIN_TEST: 544 break; 545 case RO_PIN_TEST_SHARED: 546 case RO_PIN_TEST_PREVIOUSLY_SHARED: 547 /* 548 * Share the pages with our child. As the pages are not pinned, 549 * this should just work. 550 */ 551 ret = fork(); 552 if (ret < 0) { 553 ksft_test_result_fail("fork() failed\n"); 554 goto close_comm_pipes; 555 } else if (!ret) { 556 write(comm_pipes.child_ready[1], "0", 1); 557 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 558 ; 559 exit(0); 560 } 561 562 /* Wait until our child is ready. */ 563 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 564 ; 565 566 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 567 /* 568 * Tell the child to quit now and wait until it quit. 569 * The pages should now be mapped R/O into our page 570 * tables, but they are no longer shared. 571 */ 572 write(comm_pipes.parent_ready[1], "0", 1); 573 wait(&ret); 574 if (!WIFEXITED(ret)) 575 ksft_print_msg("[INFO] wait() failed\n"); 576 } 577 break; 578 case RO_PIN_TEST_RO_EXCLUSIVE: 579 /* 580 * Map the page R/O into the page table. Enable softdirty 581 * tracking to stop the page from getting mapped R/W immediately 582 * again by mprotect() optimizations. Note that we don't have an 583 * easy way to test if that worked (the pagemap does not export 584 * if the page is mapped R/O vs. R/W). 585 */ 586 ret = mprotect(mem, size, PROT_READ); 587 clear_softdirty(); 588 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 589 if (ret) { 590 ksft_test_result_fail("mprotect() failed\n"); 591 goto close_comm_pipes; 592 } 593 break; 594 default: 595 assert(false); 596 } 597 598 /* Take a R/O pin. This should trigger unsharing. */ 599 args.addr = (__u64)(uintptr_t)mem; 600 args.size = size; 601 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 602 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 603 if (ret) { 604 if (errno == EINVAL) 605 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 606 else 607 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 608 goto wait; 609 } 610 611 /* Modify the page. */ 612 memset(mem, 0xff, size); 613 614 /* 615 * Read back the content via the pin to the temporary buffer and 616 * test if we observed the modification. 617 */ 618 tmp_val = (__u64)(uintptr_t)tmp; 619 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 620 if (ret) 621 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 622 else 623 ksft_test_result(!memcmp(mem, tmp, size), 624 "Longterm R/O pin is reliable\n"); 625 626 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 627 if (ret) 628 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 629 wait: 630 switch (test) { 631 case RO_PIN_TEST_SHARED: 632 write(comm_pipes.parent_ready[1], "0", 1); 633 wait(&ret); 634 if (!WIFEXITED(ret)) 635 ksft_print_msg("[INFO] wait() failed\n"); 636 break; 637 default: 638 break; 639 } 640 close_comm_pipes: 641 close_comm_pipes(&comm_pipes); 642 free_tmp: 643 free(tmp); 644 } 645 646 static void test_ro_pin_on_shared(char *mem, size_t size) 647 { 648 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 649 } 650 651 static void test_ro_fast_pin_on_shared(char *mem, size_t size) 652 { 653 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 654 } 655 656 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 657 { 658 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 659 } 660 661 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 662 { 663 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 664 } 665 666 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 667 { 668 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 669 } 670 671 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 672 { 673 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 674 } 675 676 typedef void (*test_fn)(char *mem, size_t size); 677 678 static void do_run_with_base_page(test_fn fn, bool swapout) 679 { 680 char *mem; 681 int ret; 682 683 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 684 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 685 if (mem == MAP_FAILED) { 686 ksft_test_result_fail("mmap() failed\n"); 687 return; 688 } 689 690 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 691 /* Ignore if not around on a kernel. */ 692 if (ret && errno != EINVAL) { 693 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 694 goto munmap; 695 } 696 697 /* Populate a base page. */ 698 memset(mem, 0, pagesize); 699 700 if (swapout) { 701 madvise(mem, pagesize, MADV_PAGEOUT); 702 if (!pagemap_is_swapped(pagemap_fd, mem)) { 703 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 704 goto munmap; 705 } 706 } 707 708 fn(mem, pagesize); 709 munmap: 710 munmap(mem, pagesize); 711 } 712 713 static void run_with_base_page(test_fn fn, const char *desc) 714 { 715 ksft_print_msg("[RUN] %s ... with base page\n", desc); 716 do_run_with_base_page(fn, false); 717 } 718 719 static void run_with_base_page_swap(test_fn fn, const char *desc) 720 { 721 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 722 do_run_with_base_page(fn, true); 723 } 724 725 enum thp_run { 726 THP_RUN_PMD, 727 THP_RUN_PMD_SWAPOUT, 728 THP_RUN_PTE, 729 THP_RUN_PTE_SWAPOUT, 730 THP_RUN_SINGLE_PTE, 731 THP_RUN_SINGLE_PTE_SWAPOUT, 732 THP_RUN_PARTIAL_MREMAP, 733 THP_RUN_PARTIAL_SHARED, 734 }; 735 736 static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 737 { 738 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 739 size_t size, mmap_size, mremap_size; 740 int ret; 741 742 /* For alignment purposes, we need twice the thp size. */ 743 mmap_size = 2 * thpsize; 744 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 745 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 746 if (mmap_mem == MAP_FAILED) { 747 ksft_test_result_fail("mmap() failed\n"); 748 return; 749 } 750 751 /* We need a THP-aligned memory area. */ 752 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 753 754 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 755 if (ret) { 756 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 757 goto munmap; 758 } 759 760 /* 761 * Try to populate a THP. Touch the first sub-page and test if we get 762 * another sub-page populated automatically. 763 */ 764 mem[0] = 0; 765 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 766 ksft_test_result_skip("Did not get a THP populated\n"); 767 goto munmap; 768 } 769 memset(mem, 0, thpsize); 770 771 size = thpsize; 772 switch (thp_run) { 773 case THP_RUN_PMD: 774 case THP_RUN_PMD_SWAPOUT: 775 break; 776 case THP_RUN_PTE: 777 case THP_RUN_PTE_SWAPOUT: 778 /* 779 * Trigger PTE-mapping the THP by temporarily mapping a single 780 * subpage R/O. 781 */ 782 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 783 if (ret) { 784 ksft_test_result_fail("mprotect() failed\n"); 785 goto munmap; 786 } 787 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 788 if (ret) { 789 ksft_test_result_fail("mprotect() failed\n"); 790 goto munmap; 791 } 792 break; 793 case THP_RUN_SINGLE_PTE: 794 case THP_RUN_SINGLE_PTE_SWAPOUT: 795 /* 796 * Discard all but a single subpage of that PTE-mapped THP. What 797 * remains is a single PTE mapping a single subpage. 798 */ 799 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 800 if (ret) { 801 ksft_test_result_fail("MADV_DONTNEED failed\n"); 802 goto munmap; 803 } 804 size = pagesize; 805 break; 806 case THP_RUN_PARTIAL_MREMAP: 807 /* 808 * Remap half of the THP. We need some new memory location 809 * for that. 810 */ 811 mremap_size = thpsize / 2; 812 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 813 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 814 if (mem == MAP_FAILED) { 815 ksft_test_result_fail("mmap() failed\n"); 816 goto munmap; 817 } 818 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 819 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 820 if (tmp != mremap_mem) { 821 ksft_test_result_fail("mremap() failed\n"); 822 goto munmap; 823 } 824 size = mremap_size; 825 break; 826 case THP_RUN_PARTIAL_SHARED: 827 /* 828 * Share the first page of the THP with a child and quit the 829 * child. This will result in some parts of the THP never 830 * have been shared. 831 */ 832 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 833 if (ret) { 834 ksft_test_result_fail("MADV_DONTFORK failed\n"); 835 goto munmap; 836 } 837 ret = fork(); 838 if (ret < 0) { 839 ksft_test_result_fail("fork() failed\n"); 840 goto munmap; 841 } else if (!ret) { 842 exit(0); 843 } 844 wait(&ret); 845 /* Allow for sharing all pages again. */ 846 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 847 if (ret) { 848 ksft_test_result_fail("MADV_DOFORK failed\n"); 849 goto munmap; 850 } 851 break; 852 default: 853 assert(false); 854 } 855 856 switch (thp_run) { 857 case THP_RUN_PMD_SWAPOUT: 858 case THP_RUN_PTE_SWAPOUT: 859 case THP_RUN_SINGLE_PTE_SWAPOUT: 860 madvise(mem, size, MADV_PAGEOUT); 861 if (!range_is_swapped(mem, size)) { 862 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 863 goto munmap; 864 } 865 break; 866 default: 867 break; 868 } 869 870 fn(mem, size); 871 munmap: 872 munmap(mmap_mem, mmap_size); 873 if (mremap_mem != MAP_FAILED) 874 munmap(mremap_mem, mremap_size); 875 } 876 877 static void run_with_thp(test_fn fn, const char *desc) 878 { 879 ksft_print_msg("[RUN] %s ... with THP\n", desc); 880 do_run_with_thp(fn, THP_RUN_PMD); 881 } 882 883 static void run_with_thp_swap(test_fn fn, const char *desc) 884 { 885 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 886 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 887 } 888 889 static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 890 { 891 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 892 do_run_with_thp(fn, THP_RUN_PTE); 893 } 894 895 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 896 { 897 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 898 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 899 } 900 901 static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 902 { 903 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 904 do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 905 } 906 907 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 908 { 909 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 910 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 911 } 912 913 static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 914 { 915 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 916 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 917 } 918 919 static void run_with_partial_shared_thp(test_fn fn, const char *desc) 920 { 921 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 922 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 923 } 924 925 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 926 { 927 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 928 char *mem, *dummy; 929 930 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 931 hugetlbsize / 1024); 932 933 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 934 935 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 936 if (mem == MAP_FAILED) { 937 ksft_test_result_skip("need more free huge pages\n"); 938 return; 939 } 940 941 /* Populate an huge page. */ 942 memset(mem, 0, hugetlbsize); 943 944 /* 945 * We need a total of two hugetlb pages to handle COW/unsharing 946 * properly, otherwise we might get zapped by a SIGBUS. 947 */ 948 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 949 if (dummy == MAP_FAILED) { 950 ksft_test_result_skip("need more free huge pages\n"); 951 goto munmap; 952 } 953 munmap(dummy, hugetlbsize); 954 955 fn(mem, hugetlbsize); 956 munmap: 957 munmap(mem, hugetlbsize); 958 } 959 960 struct test_case { 961 const char *desc; 962 test_fn fn; 963 }; 964 965 /* 966 * Test cases that are specific to anonymous pages: pages in private mappings 967 * that may get shared via COW during fork(). 968 */ 969 static const struct test_case anon_test_cases[] = { 970 /* 971 * Basic COW tests for fork() without any GUP. If we miss to break COW, 972 * either the child can observe modifications by the parent or the 973 * other way around. 974 */ 975 { 976 "Basic COW after fork()", 977 test_cow_in_parent, 978 }, 979 /* 980 * Basic test, but do an additional mprotect(PROT_READ)+ 981 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 982 */ 983 { 984 "Basic COW after fork() with mprotect() optimization", 985 test_cow_in_parent_mprotect, 986 }, 987 /* 988 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 989 * we miss to break COW, the child observes modifications by the parent. 990 * This is CVE-2020-29374 reported by Jann Horn. 991 */ 992 { 993 "vmsplice() + unmap in child", 994 test_vmsplice_in_child 995 }, 996 /* 997 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 998 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 999 */ 1000 { 1001 "vmsplice() + unmap in child with mprotect() optimization", 1002 test_vmsplice_in_child_mprotect 1003 }, 1004 /* 1005 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1006 * fork(); modify in the child. If we miss to break COW, the parent 1007 * observes modifications by the child. 1008 */ 1009 { 1010 "vmsplice() before fork(), unmap in parent after fork()", 1011 test_vmsplice_before_fork, 1012 }, 1013 /* 1014 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1015 * child. If we miss to break COW, the parent observes modifications by 1016 * the child. 1017 */ 1018 { 1019 "vmsplice() + unmap in parent after fork()", 1020 test_vmsplice_after_fork, 1021 }, 1022 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1023 /* 1024 * Take a R/W longterm pin and then map the page R/O into the page 1025 * table to trigger a write fault on next access. When modifying the 1026 * page, the page content must be visible via the pin. 1027 */ 1028 { 1029 "R/O-mapping a page registered as iouring fixed buffer", 1030 test_iouring_ro, 1031 }, 1032 /* 1033 * Take a R/W longterm pin and then fork() a child. When modifying the 1034 * page, the page content must be visible via the pin. We expect the 1035 * pinned page to not get shared with the child. 1036 */ 1037 { 1038 "fork() with an iouring fixed buffer", 1039 test_iouring_fork, 1040 }, 1041 1042 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1043 /* 1044 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1045 * When modifying the page via the page table, the page content change 1046 * must be visible via the pin. 1047 */ 1048 { 1049 "R/O GUP pin on R/O-mapped shared page", 1050 test_ro_pin_on_shared, 1051 }, 1052 /* Same as above, but using GUP-fast. */ 1053 { 1054 "R/O GUP-fast pin on R/O-mapped shared page", 1055 test_ro_fast_pin_on_shared, 1056 }, 1057 /* 1058 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1059 * was previously shared. When modifying the page via the page table, 1060 * the page content change must be visible via the pin. 1061 */ 1062 { 1063 "R/O GUP pin on R/O-mapped previously-shared page", 1064 test_ro_pin_on_ro_previously_shared, 1065 }, 1066 /* Same as above, but using GUP-fast. */ 1067 { 1068 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1069 test_ro_fast_pin_on_ro_previously_shared, 1070 }, 1071 /* 1072 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1073 * When modifying the page via the page table, the page content change 1074 * must be visible via the pin. 1075 */ 1076 { 1077 "R/O GUP pin on R/O-mapped exclusive page", 1078 test_ro_pin_on_ro_exclusive, 1079 }, 1080 /* Same as above, but using GUP-fast. */ 1081 { 1082 "R/O GUP-fast pin on R/O-mapped exclusive page", 1083 test_ro_fast_pin_on_ro_exclusive, 1084 }, 1085 }; 1086 1087 static void run_anon_test_case(struct test_case const *test_case) 1088 { 1089 int i; 1090 1091 run_with_base_page(test_case->fn, test_case->desc); 1092 run_with_base_page_swap(test_case->fn, test_case->desc); 1093 if (thpsize) { 1094 run_with_thp(test_case->fn, test_case->desc); 1095 run_with_thp_swap(test_case->fn, test_case->desc); 1096 run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1097 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1098 run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1099 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1100 run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1101 run_with_partial_shared_thp(test_case->fn, test_case->desc); 1102 } 1103 for (i = 0; i < nr_hugetlbsizes; i++) 1104 run_with_hugetlb(test_case->fn, test_case->desc, 1105 hugetlbsizes[i]); 1106 } 1107 1108 static void run_anon_test_cases(void) 1109 { 1110 int i; 1111 1112 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1113 1114 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1115 run_anon_test_case(&anon_test_cases[i]); 1116 } 1117 1118 static int tests_per_anon_test_case(void) 1119 { 1120 int tests = 2 + nr_hugetlbsizes; 1121 1122 if (thpsize) 1123 tests += 8; 1124 return tests; 1125 } 1126 1127 enum anon_thp_collapse_test { 1128 ANON_THP_COLLAPSE_UNSHARED, 1129 ANON_THP_COLLAPSE_FULLY_SHARED, 1130 ANON_THP_COLLAPSE_LOWER_SHARED, 1131 ANON_THP_COLLAPSE_UPPER_SHARED, 1132 }; 1133 1134 static void do_test_anon_thp_collapse(char *mem, size_t size, 1135 enum anon_thp_collapse_test test) 1136 { 1137 struct comm_pipes comm_pipes; 1138 char buf; 1139 int ret; 1140 1141 ret = setup_comm_pipes(&comm_pipes); 1142 if (ret) { 1143 ksft_test_result_fail("pipe() failed\n"); 1144 return; 1145 } 1146 1147 /* 1148 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1149 * R/O, such that we can try collapsing it later. 1150 */ 1151 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1152 if (ret) { 1153 ksft_test_result_fail("mprotect() failed\n"); 1154 goto close_comm_pipes; 1155 } 1156 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1157 if (ret) { 1158 ksft_test_result_fail("mprotect() failed\n"); 1159 goto close_comm_pipes; 1160 } 1161 1162 switch (test) { 1163 case ANON_THP_COLLAPSE_UNSHARED: 1164 /* Collapse before actually COW-sharing the page. */ 1165 ret = madvise(mem, size, MADV_COLLAPSE); 1166 if (ret) { 1167 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1168 strerror(errno)); 1169 goto close_comm_pipes; 1170 } 1171 break; 1172 case ANON_THP_COLLAPSE_FULLY_SHARED: 1173 /* COW-share the full PTE-mapped THP. */ 1174 break; 1175 case ANON_THP_COLLAPSE_LOWER_SHARED: 1176 /* Don't COW-share the upper part of the THP. */ 1177 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1178 if (ret) { 1179 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1180 goto close_comm_pipes; 1181 } 1182 break; 1183 case ANON_THP_COLLAPSE_UPPER_SHARED: 1184 /* Don't COW-share the lower part of the THP. */ 1185 ret = madvise(mem, size / 2, MADV_DONTFORK); 1186 if (ret) { 1187 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1188 goto close_comm_pipes; 1189 } 1190 break; 1191 default: 1192 assert(false); 1193 } 1194 1195 ret = fork(); 1196 if (ret < 0) { 1197 ksft_test_result_fail("fork() failed\n"); 1198 goto close_comm_pipes; 1199 } else if (!ret) { 1200 switch (test) { 1201 case ANON_THP_COLLAPSE_UNSHARED: 1202 case ANON_THP_COLLAPSE_FULLY_SHARED: 1203 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1204 break; 1205 case ANON_THP_COLLAPSE_LOWER_SHARED: 1206 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1207 break; 1208 case ANON_THP_COLLAPSE_UPPER_SHARED: 1209 exit(child_memcmp_fn(mem + size / 2, size / 2, 1210 &comm_pipes)); 1211 break; 1212 default: 1213 assert(false); 1214 } 1215 } 1216 1217 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1218 ; 1219 1220 switch (test) { 1221 case ANON_THP_COLLAPSE_UNSHARED: 1222 break; 1223 case ANON_THP_COLLAPSE_UPPER_SHARED: 1224 case ANON_THP_COLLAPSE_LOWER_SHARED: 1225 /* 1226 * Revert MADV_DONTFORK such that we merge the VMAs and are 1227 * able to actually collapse. 1228 */ 1229 ret = madvise(mem, size, MADV_DOFORK); 1230 if (ret) { 1231 ksft_test_result_fail("MADV_DOFORK failed\n"); 1232 write(comm_pipes.parent_ready[1], "0", 1); 1233 wait(&ret); 1234 goto close_comm_pipes; 1235 } 1236 /* FALLTHROUGH */ 1237 case ANON_THP_COLLAPSE_FULLY_SHARED: 1238 /* Collapse before anyone modified the COW-shared page. */ 1239 ret = madvise(mem, size, MADV_COLLAPSE); 1240 if (ret) { 1241 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1242 strerror(errno)); 1243 write(comm_pipes.parent_ready[1], "0", 1); 1244 wait(&ret); 1245 goto close_comm_pipes; 1246 } 1247 break; 1248 default: 1249 assert(false); 1250 } 1251 1252 /* Modify the page. */ 1253 memset(mem, 0xff, size); 1254 write(comm_pipes.parent_ready[1], "0", 1); 1255 1256 wait(&ret); 1257 if (WIFEXITED(ret)) 1258 ret = WEXITSTATUS(ret); 1259 else 1260 ret = -EINVAL; 1261 1262 ksft_test_result(!ret, "No leak from parent into child\n"); 1263 close_comm_pipes: 1264 close_comm_pipes(&comm_pipes); 1265 } 1266 1267 static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1268 { 1269 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1270 } 1271 1272 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1273 { 1274 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1275 } 1276 1277 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1278 { 1279 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1280 } 1281 1282 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1283 { 1284 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1285 } 1286 1287 /* 1288 * Test cases that are specific to anonymous THP: pages in private mappings 1289 * that may get shared via COW during fork(). 1290 */ 1291 static const struct test_case anon_thp_test_cases[] = { 1292 /* 1293 * Basic COW test for fork() without any GUP when collapsing a THP 1294 * before fork(). 1295 * 1296 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1297 * collapse") might easily get COW handling wrong when not collapsing 1298 * exclusivity information properly. 1299 */ 1300 { 1301 "Basic COW after fork() when collapsing before fork()", 1302 test_anon_thp_collapse_unshared, 1303 }, 1304 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1305 { 1306 "Basic COW after fork() when collapsing after fork() (fully shared)", 1307 test_anon_thp_collapse_fully_shared, 1308 }, 1309 /* 1310 * Basic COW test, but collapse after COW-sharing the lower half of a 1311 * THP. 1312 */ 1313 { 1314 "Basic COW after fork() when collapsing after fork() (lower shared)", 1315 test_anon_thp_collapse_lower_shared, 1316 }, 1317 /* 1318 * Basic COW test, but collapse after COW-sharing the upper half of a 1319 * THP. 1320 */ 1321 { 1322 "Basic COW after fork() when collapsing after fork() (upper shared)", 1323 test_anon_thp_collapse_upper_shared, 1324 }, 1325 }; 1326 1327 static void run_anon_thp_test_cases(void) 1328 { 1329 int i; 1330 1331 if (!thpsize) 1332 return; 1333 1334 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1335 1336 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1337 struct test_case const *test_case = &anon_thp_test_cases[i]; 1338 1339 ksft_print_msg("[RUN] %s\n", test_case->desc); 1340 do_run_with_thp(test_case->fn, THP_RUN_PMD); 1341 } 1342 } 1343 1344 static int tests_per_anon_thp_test_case(void) 1345 { 1346 return thpsize ? 1 : 0; 1347 } 1348 1349 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1350 1351 static void test_cow(char *mem, const char *smem, size_t size) 1352 { 1353 char *old = malloc(size); 1354 1355 /* Backup the original content. */ 1356 memcpy(old, smem, size); 1357 1358 /* Modify the page. */ 1359 memset(mem, 0xff, size); 1360 1361 /* See if we still read the old values via the other mapping. */ 1362 ksft_test_result(!memcmp(smem, old, size), 1363 "Other mapping not modified\n"); 1364 free(old); 1365 } 1366 1367 static void test_ro_pin(char *mem, const char *smem, size_t size) 1368 { 1369 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1370 } 1371 1372 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1373 { 1374 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1375 } 1376 1377 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1378 { 1379 char *mem, *smem, tmp; 1380 1381 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1382 1383 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1384 MAP_PRIVATE | MAP_ANON, -1, 0); 1385 if (mem == MAP_FAILED) { 1386 ksft_test_result_fail("mmap() failed\n"); 1387 return; 1388 } 1389 1390 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1391 if (mem == MAP_FAILED) { 1392 ksft_test_result_fail("mmap() failed\n"); 1393 goto munmap; 1394 } 1395 1396 /* Read from the page to populate the shared zeropage. */ 1397 tmp = *mem + *smem; 1398 asm volatile("" : "+r" (tmp)); 1399 1400 fn(mem, smem, pagesize); 1401 munmap: 1402 munmap(mem, pagesize); 1403 if (smem != MAP_FAILED) 1404 munmap(smem, pagesize); 1405 } 1406 1407 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1408 { 1409 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1410 size_t mmap_size; 1411 int ret; 1412 1413 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1414 1415 if (!has_huge_zeropage) { 1416 ksft_test_result_skip("Huge zeropage not enabled\n"); 1417 return; 1418 } 1419 1420 /* For alignment purposes, we need twice the thp size. */ 1421 mmap_size = 2 * thpsize; 1422 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1423 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1424 if (mmap_mem == MAP_FAILED) { 1425 ksft_test_result_fail("mmap() failed\n"); 1426 return; 1427 } 1428 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1429 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1430 if (mmap_smem == MAP_FAILED) { 1431 ksft_test_result_fail("mmap() failed\n"); 1432 goto munmap; 1433 } 1434 1435 /* We need a THP-aligned memory area. */ 1436 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1437 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1438 1439 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1440 ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1441 if (ret) { 1442 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1443 goto munmap; 1444 } 1445 1446 /* 1447 * Read from the memory to populate the huge shared zeropage. Read from 1448 * the first sub-page and test if we get another sub-page populated 1449 * automatically. 1450 */ 1451 tmp = *mem + *smem; 1452 asm volatile("" : "+r" (tmp)); 1453 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1454 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1455 ksft_test_result_skip("Did not get THPs populated\n"); 1456 goto munmap; 1457 } 1458 1459 fn(mem, smem, thpsize); 1460 munmap: 1461 munmap(mmap_mem, mmap_size); 1462 if (mmap_smem != MAP_FAILED) 1463 munmap(mmap_smem, mmap_size); 1464 } 1465 1466 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1467 { 1468 char *mem, *smem, tmp; 1469 int fd; 1470 1471 ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1472 1473 fd = memfd_create("test", 0); 1474 if (fd < 0) { 1475 ksft_test_result_fail("memfd_create() failed\n"); 1476 return; 1477 } 1478 1479 /* File consists of a single page filled with zeroes. */ 1480 if (fallocate(fd, 0, 0, pagesize)) { 1481 ksft_test_result_fail("fallocate() failed\n"); 1482 goto close; 1483 } 1484 1485 /* Create a private mapping of the memfd. */ 1486 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1487 if (mem == MAP_FAILED) { 1488 ksft_test_result_fail("mmap() failed\n"); 1489 goto close; 1490 } 1491 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1492 if (mem == MAP_FAILED) { 1493 ksft_test_result_fail("mmap() failed\n"); 1494 goto munmap; 1495 } 1496 1497 /* Fault the page in. */ 1498 tmp = *mem + *smem; 1499 asm volatile("" : "+r" (tmp)); 1500 1501 fn(mem, smem, pagesize); 1502 munmap: 1503 munmap(mem, pagesize); 1504 if (smem != MAP_FAILED) 1505 munmap(smem, pagesize); 1506 close: 1507 close(fd); 1508 } 1509 1510 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1511 { 1512 char *mem, *smem, tmp; 1513 FILE *file; 1514 int fd; 1515 1516 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1517 1518 file = tmpfile(); 1519 if (!file) { 1520 ksft_test_result_fail("tmpfile() failed\n"); 1521 return; 1522 } 1523 1524 fd = fileno(file); 1525 if (fd < 0) { 1526 ksft_test_result_skip("fileno() failed\n"); 1527 return; 1528 } 1529 1530 /* File consists of a single page filled with zeroes. */ 1531 if (fallocate(fd, 0, 0, pagesize)) { 1532 ksft_test_result_fail("fallocate() failed\n"); 1533 goto close; 1534 } 1535 1536 /* Create a private mapping of the memfd. */ 1537 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1538 if (mem == MAP_FAILED) { 1539 ksft_test_result_fail("mmap() failed\n"); 1540 goto close; 1541 } 1542 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1543 if (mem == MAP_FAILED) { 1544 ksft_test_result_fail("mmap() failed\n"); 1545 goto munmap; 1546 } 1547 1548 /* Fault the page in. */ 1549 tmp = *mem + *smem; 1550 asm volatile("" : "+r" (tmp)); 1551 1552 fn(mem, smem, pagesize); 1553 munmap: 1554 munmap(mem, pagesize); 1555 if (smem != MAP_FAILED) 1556 munmap(smem, pagesize); 1557 close: 1558 fclose(file); 1559 } 1560 1561 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1562 size_t hugetlbsize) 1563 { 1564 int flags = MFD_HUGETLB; 1565 char *mem, *smem, tmp; 1566 int fd; 1567 1568 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1569 hugetlbsize / 1024); 1570 1571 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1572 1573 fd = memfd_create("test", flags); 1574 if (fd < 0) { 1575 ksft_test_result_skip("memfd_create() failed\n"); 1576 return; 1577 } 1578 1579 /* File consists of a single page filled with zeroes. */ 1580 if (fallocate(fd, 0, 0, hugetlbsize)) { 1581 ksft_test_result_skip("need more free huge pages\n"); 1582 goto close; 1583 } 1584 1585 /* Create a private mapping of the memfd. */ 1586 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1587 0); 1588 if (mem == MAP_FAILED) { 1589 ksft_test_result_skip("need more free huge pages\n"); 1590 goto close; 1591 } 1592 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1593 if (mem == MAP_FAILED) { 1594 ksft_test_result_fail("mmap() failed\n"); 1595 goto munmap; 1596 } 1597 1598 /* Fault the page in. */ 1599 tmp = *mem + *smem; 1600 asm volatile("" : "+r" (tmp)); 1601 1602 fn(mem, smem, hugetlbsize); 1603 munmap: 1604 munmap(mem, hugetlbsize); 1605 if (mem != MAP_FAILED) 1606 munmap(smem, hugetlbsize); 1607 close: 1608 close(fd); 1609 } 1610 1611 struct non_anon_test_case { 1612 const char *desc; 1613 non_anon_test_fn fn; 1614 }; 1615 1616 /* 1617 * Test cases that target any pages in private mappings that are not anonymous: 1618 * pages that may get shared via COW ndependent of fork(). This includes 1619 * the shared zeropage(s), pagecache pages, ... 1620 */ 1621 static const struct non_anon_test_case non_anon_test_cases[] = { 1622 /* 1623 * Basic COW test without any GUP. If we miss to break COW, changes are 1624 * visible via other private/shared mappings. 1625 */ 1626 { 1627 "Basic COW", 1628 test_cow, 1629 }, 1630 /* 1631 * Take a R/O longterm pin. When modifying the page via the page table, 1632 * the page content change must be visible via the pin. 1633 */ 1634 { 1635 "R/O longterm GUP pin", 1636 test_ro_pin, 1637 }, 1638 /* Same as above, but using GUP-fast. */ 1639 { 1640 "R/O longterm GUP-fast pin", 1641 test_ro_fast_pin, 1642 }, 1643 }; 1644 1645 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1646 { 1647 int i; 1648 1649 run_with_zeropage(test_case->fn, test_case->desc); 1650 run_with_memfd(test_case->fn, test_case->desc); 1651 run_with_tmpfile(test_case->fn, test_case->desc); 1652 if (thpsize) 1653 run_with_huge_zeropage(test_case->fn, test_case->desc); 1654 for (i = 0; i < nr_hugetlbsizes; i++) 1655 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1656 hugetlbsizes[i]); 1657 } 1658 1659 static void run_non_anon_test_cases(void) 1660 { 1661 int i; 1662 1663 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1664 1665 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1666 run_non_anon_test_case(&non_anon_test_cases[i]); 1667 } 1668 1669 static int tests_per_non_anon_test_case(void) 1670 { 1671 int tests = 3 + nr_hugetlbsizes; 1672 1673 if (thpsize) 1674 tests += 1; 1675 return tests; 1676 } 1677 1678 int main(int argc, char **argv) 1679 { 1680 int err; 1681 1682 pagesize = getpagesize(); 1683 thpsize = read_pmd_pagesize(); 1684 if (thpsize) 1685 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 1686 thpsize / 1024); 1687 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1688 ARRAY_SIZE(hugetlbsizes)); 1689 detect_huge_zeropage(); 1690 1691 ksft_print_header(); 1692 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1693 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1694 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1695 1696 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1697 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1698 if (pagemap_fd < 0) 1699 ksft_exit_fail_msg("opening pagemap failed\n"); 1700 1701 run_anon_test_cases(); 1702 run_anon_thp_test_cases(); 1703 run_non_anon_test_cases(); 1704 1705 err = ksft_get_fail_cnt(); 1706 if (err) 1707 ksft_exit_fail_msg("%d out of %d tests failed\n", 1708 err, ksft_test_num()); 1709 return ksft_exit_pass(); 1710 } 1711