1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 33 static size_t pagesize; 34 static int pagemap_fd; 35 static size_t thpsize; 36 static int nr_hugetlbsizes; 37 static size_t hugetlbsizes[10]; 38 static int gup_fd; 39 static bool has_huge_zeropage; 40 41 static void detect_huge_zeropage(void) 42 { 43 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 44 O_RDONLY); 45 size_t enabled = 0; 46 char buf[15]; 47 int ret; 48 49 if (fd < 0) 50 return; 51 52 ret = pread(fd, buf, sizeof(buf), 0); 53 if (ret > 0 && ret < sizeof(buf)) { 54 buf[ret] = 0; 55 56 enabled = strtoul(buf, NULL, 10); 57 if (enabled == 1) { 58 has_huge_zeropage = true; 59 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 60 } 61 } 62 63 close(fd); 64 } 65 66 static bool range_is_swapped(void *addr, size_t size) 67 { 68 for (; size; addr += pagesize, size -= pagesize) 69 if (!pagemap_is_swapped(pagemap_fd, addr)) 70 return false; 71 return true; 72 } 73 74 struct comm_pipes { 75 int child_ready[2]; 76 int parent_ready[2]; 77 }; 78 79 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 80 { 81 if (pipe(comm_pipes->child_ready) < 0) 82 return -errno; 83 if (pipe(comm_pipes->parent_ready) < 0) { 84 close(comm_pipes->child_ready[0]); 85 close(comm_pipes->child_ready[1]); 86 return -errno; 87 } 88 89 return 0; 90 } 91 92 static void close_comm_pipes(struct comm_pipes *comm_pipes) 93 { 94 close(comm_pipes->child_ready[0]); 95 close(comm_pipes->child_ready[1]); 96 close(comm_pipes->parent_ready[0]); 97 close(comm_pipes->parent_ready[1]); 98 } 99 100 static int child_memcmp_fn(char *mem, size_t size, 101 struct comm_pipes *comm_pipes) 102 { 103 char *old = malloc(size); 104 char buf; 105 106 /* Backup the original content. */ 107 memcpy(old, mem, size); 108 109 /* Wait until the parent modified the page. */ 110 write(comm_pipes->child_ready[1], "0", 1); 111 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 112 ; 113 114 /* See if we still read the old values. */ 115 return memcmp(old, mem, size); 116 } 117 118 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 119 struct comm_pipes *comm_pipes) 120 { 121 struct iovec iov = { 122 .iov_base = mem, 123 .iov_len = size, 124 }; 125 ssize_t cur, total, transferred; 126 char *old, *new; 127 int fds[2]; 128 char buf; 129 130 old = malloc(size); 131 new = malloc(size); 132 133 /* Backup the original content. */ 134 memcpy(old, mem, size); 135 136 if (pipe(fds) < 0) 137 return -errno; 138 139 /* Trigger a read-only pin. */ 140 transferred = vmsplice(fds[1], &iov, 1, 0); 141 if (transferred < 0) 142 return -errno; 143 if (transferred == 0) 144 return -EINVAL; 145 146 /* Unmap it from our page tables. */ 147 if (munmap(mem, size) < 0) 148 return -errno; 149 150 /* Wait until the parent modified it. */ 151 write(comm_pipes->child_ready[1], "0", 1); 152 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 153 ; 154 155 /* See if we still read the old values via the pipe. */ 156 for (total = 0; total < transferred; total += cur) { 157 cur = read(fds[0], new + total, transferred - total); 158 if (cur < 0) 159 return -errno; 160 } 161 162 return memcmp(old, new, transferred); 163 } 164 165 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 166 167 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 168 child_fn fn) 169 { 170 struct comm_pipes comm_pipes; 171 char buf; 172 int ret; 173 174 ret = setup_comm_pipes(&comm_pipes); 175 if (ret) { 176 ksft_test_result_fail("pipe() failed\n"); 177 return; 178 } 179 180 ret = fork(); 181 if (ret < 0) { 182 ksft_test_result_fail("fork() failed\n"); 183 goto close_comm_pipes; 184 } else if (!ret) { 185 exit(fn(mem, size, &comm_pipes)); 186 } 187 188 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 189 ; 190 191 if (do_mprotect) { 192 /* 193 * mprotect() optimizations might try avoiding 194 * write-faults by directly mapping pages writable. 195 */ 196 ret = mprotect(mem, size, PROT_READ); 197 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 198 if (ret) { 199 ksft_test_result_fail("mprotect() failed\n"); 200 write(comm_pipes.parent_ready[1], "0", 1); 201 wait(&ret); 202 goto close_comm_pipes; 203 } 204 } 205 206 /* Modify the page. */ 207 memset(mem, 0xff, size); 208 write(comm_pipes.parent_ready[1], "0", 1); 209 210 wait(&ret); 211 if (WIFEXITED(ret)) 212 ret = WEXITSTATUS(ret); 213 else 214 ret = -EINVAL; 215 216 ksft_test_result(!ret, "No leak from parent into child\n"); 217 close_comm_pipes: 218 close_comm_pipes(&comm_pipes); 219 } 220 221 static void test_cow_in_parent(char *mem, size_t size) 222 { 223 do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 224 } 225 226 static void test_cow_in_parent_mprotect(char *mem, size_t size) 227 { 228 do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 229 } 230 231 static void test_vmsplice_in_child(char *mem, size_t size) 232 { 233 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 234 } 235 236 static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 237 { 238 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 239 } 240 241 static void do_test_vmsplice_in_parent(char *mem, size_t size, 242 bool before_fork) 243 { 244 struct iovec iov = { 245 .iov_base = mem, 246 .iov_len = size, 247 }; 248 ssize_t cur, total, transferred; 249 struct comm_pipes comm_pipes; 250 char *old, *new; 251 int ret, fds[2]; 252 char buf; 253 254 old = malloc(size); 255 new = malloc(size); 256 257 memcpy(old, mem, size); 258 259 ret = setup_comm_pipes(&comm_pipes); 260 if (ret) { 261 ksft_test_result_fail("pipe() failed\n"); 262 goto free; 263 } 264 265 if (pipe(fds) < 0) { 266 ksft_test_result_fail("pipe() failed\n"); 267 goto close_comm_pipes; 268 } 269 270 if (before_fork) { 271 transferred = vmsplice(fds[1], &iov, 1, 0); 272 if (transferred <= 0) { 273 ksft_test_result_fail("vmsplice() failed\n"); 274 goto close_pipe; 275 } 276 } 277 278 ret = fork(); 279 if (ret < 0) { 280 ksft_test_result_fail("fork() failed\n"); 281 goto close_pipe; 282 } else if (!ret) { 283 write(comm_pipes.child_ready[1], "0", 1); 284 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 285 ; 286 /* Modify page content in the child. */ 287 memset(mem, 0xff, size); 288 exit(0); 289 } 290 291 if (!before_fork) { 292 transferred = vmsplice(fds[1], &iov, 1, 0); 293 if (transferred <= 0) { 294 ksft_test_result_fail("vmsplice() failed\n"); 295 wait(&ret); 296 goto close_pipe; 297 } 298 } 299 300 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 301 ; 302 if (munmap(mem, size) < 0) { 303 ksft_test_result_fail("munmap() failed\n"); 304 goto close_pipe; 305 } 306 write(comm_pipes.parent_ready[1], "0", 1); 307 308 /* Wait until the child is done writing. */ 309 wait(&ret); 310 if (!WIFEXITED(ret)) { 311 ksft_test_result_fail("wait() failed\n"); 312 goto close_pipe; 313 } 314 315 /* See if we still read the old values. */ 316 for (total = 0; total < transferred; total += cur) { 317 cur = read(fds[0], new + total, transferred - total); 318 if (cur < 0) { 319 ksft_test_result_fail("read() failed\n"); 320 goto close_pipe; 321 } 322 } 323 324 ksft_test_result(!memcmp(old, new, transferred), 325 "No leak from child into parent\n"); 326 close_pipe: 327 close(fds[0]); 328 close(fds[1]); 329 close_comm_pipes: 330 close_comm_pipes(&comm_pipes); 331 free: 332 free(old); 333 free(new); 334 } 335 336 static void test_vmsplice_before_fork(char *mem, size_t size) 337 { 338 do_test_vmsplice_in_parent(mem, size, true); 339 } 340 341 static void test_vmsplice_after_fork(char *mem, size_t size) 342 { 343 do_test_vmsplice_in_parent(mem, size, false); 344 } 345 346 #ifdef LOCAL_CONFIG_HAVE_LIBURING 347 static void do_test_iouring(char *mem, size_t size, bool use_fork) 348 { 349 struct comm_pipes comm_pipes; 350 struct io_uring_cqe *cqe; 351 struct io_uring_sqe *sqe; 352 struct io_uring ring; 353 ssize_t cur, total; 354 struct iovec iov; 355 char *buf, *tmp; 356 int ret, fd; 357 FILE *file; 358 359 ret = setup_comm_pipes(&comm_pipes); 360 if (ret) { 361 ksft_test_result_fail("pipe() failed\n"); 362 return; 363 } 364 365 file = tmpfile(); 366 if (!file) { 367 ksft_test_result_fail("tmpfile() failed\n"); 368 goto close_comm_pipes; 369 } 370 fd = fileno(file); 371 assert(fd); 372 373 tmp = malloc(size); 374 if (!tmp) { 375 ksft_test_result_fail("malloc() failed\n"); 376 goto close_file; 377 } 378 379 /* Skip on errors, as we might just lack kernel support. */ 380 ret = io_uring_queue_init(1, &ring, 0); 381 if (ret < 0) { 382 ksft_test_result_skip("io_uring_queue_init() failed\n"); 383 goto free_tmp; 384 } 385 386 /* 387 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 388 * | FOLL_LONGTERM the range. 389 * 390 * Skip on errors, as we might just lack kernel support or might not 391 * have sufficient MEMLOCK permissions. 392 */ 393 iov.iov_base = mem; 394 iov.iov_len = size; 395 ret = io_uring_register_buffers(&ring, &iov, 1); 396 if (ret) { 397 ksft_test_result_skip("io_uring_register_buffers() failed\n"); 398 goto queue_exit; 399 } 400 401 if (use_fork) { 402 /* 403 * fork() and keep the child alive until we're done. Note that 404 * we expect the pinned page to not get shared with the child. 405 */ 406 ret = fork(); 407 if (ret < 0) { 408 ksft_test_result_fail("fork() failed\n"); 409 goto unregister_buffers; 410 } else if (!ret) { 411 write(comm_pipes.child_ready[1], "0", 1); 412 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 413 ; 414 exit(0); 415 } 416 417 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 418 ; 419 } else { 420 /* 421 * Map the page R/O into the page table. Enable softdirty 422 * tracking to stop the page from getting mapped R/W immediately 423 * again by mprotect() optimizations. Note that we don't have an 424 * easy way to test if that worked (the pagemap does not export 425 * if the page is mapped R/O vs. R/W). 426 */ 427 ret = mprotect(mem, size, PROT_READ); 428 clear_softdirty(); 429 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 430 if (ret) { 431 ksft_test_result_fail("mprotect() failed\n"); 432 goto unregister_buffers; 433 } 434 } 435 436 /* 437 * Modify the page and write page content as observed by the fixed 438 * buffer pin to the file so we can verify it. 439 */ 440 memset(mem, 0xff, size); 441 sqe = io_uring_get_sqe(&ring); 442 if (!sqe) { 443 ksft_test_result_fail("io_uring_get_sqe() failed\n"); 444 goto quit_child; 445 } 446 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 447 448 ret = io_uring_submit(&ring); 449 if (ret < 0) { 450 ksft_test_result_fail("io_uring_submit() failed\n"); 451 goto quit_child; 452 } 453 454 ret = io_uring_wait_cqe(&ring, &cqe); 455 if (ret < 0) { 456 ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 457 goto quit_child; 458 } 459 460 if (cqe->res != size) { 461 ksft_test_result_fail("write_fixed failed\n"); 462 goto quit_child; 463 } 464 io_uring_cqe_seen(&ring, cqe); 465 466 /* Read back the file content to the temporary buffer. */ 467 total = 0; 468 while (total < size) { 469 cur = pread(fd, tmp + total, size - total, total); 470 if (cur < 0) { 471 ksft_test_result_fail("pread() failed\n"); 472 goto quit_child; 473 } 474 total += cur; 475 } 476 477 /* Finally, check if we read what we expected. */ 478 ksft_test_result(!memcmp(mem, tmp, size), 479 "Longterm R/W pin is reliable\n"); 480 481 quit_child: 482 if (use_fork) { 483 write(comm_pipes.parent_ready[1], "0", 1); 484 wait(&ret); 485 } 486 unregister_buffers: 487 io_uring_unregister_buffers(&ring); 488 queue_exit: 489 io_uring_queue_exit(&ring); 490 free_tmp: 491 free(tmp); 492 close_file: 493 fclose(file); 494 close_comm_pipes: 495 close_comm_pipes(&comm_pipes); 496 } 497 498 static void test_iouring_ro(char *mem, size_t size) 499 { 500 do_test_iouring(mem, size, false); 501 } 502 503 static void test_iouring_fork(char *mem, size_t size) 504 { 505 do_test_iouring(mem, size, true); 506 } 507 508 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 509 510 enum ro_pin_test { 511 RO_PIN_TEST, 512 RO_PIN_TEST_SHARED, 513 RO_PIN_TEST_PREVIOUSLY_SHARED, 514 RO_PIN_TEST_RO_EXCLUSIVE, 515 }; 516 517 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 518 bool fast) 519 { 520 struct pin_longterm_test args; 521 struct comm_pipes comm_pipes; 522 char *tmp, buf; 523 __u64 tmp_val; 524 int ret; 525 526 if (gup_fd < 0) { 527 ksft_test_result_skip("gup_test not available\n"); 528 return; 529 } 530 531 tmp = malloc(size); 532 if (!tmp) { 533 ksft_test_result_fail("malloc() failed\n"); 534 return; 535 } 536 537 ret = setup_comm_pipes(&comm_pipes); 538 if (ret) { 539 ksft_test_result_fail("pipe() failed\n"); 540 goto free_tmp; 541 } 542 543 switch (test) { 544 case RO_PIN_TEST: 545 break; 546 case RO_PIN_TEST_SHARED: 547 case RO_PIN_TEST_PREVIOUSLY_SHARED: 548 /* 549 * Share the pages with our child. As the pages are not pinned, 550 * this should just work. 551 */ 552 ret = fork(); 553 if (ret < 0) { 554 ksft_test_result_fail("fork() failed\n"); 555 goto close_comm_pipes; 556 } else if (!ret) { 557 write(comm_pipes.child_ready[1], "0", 1); 558 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 559 ; 560 exit(0); 561 } 562 563 /* Wait until our child is ready. */ 564 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 565 ; 566 567 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 568 /* 569 * Tell the child to quit now and wait until it quit. 570 * The pages should now be mapped R/O into our page 571 * tables, but they are no longer shared. 572 */ 573 write(comm_pipes.parent_ready[1], "0", 1); 574 wait(&ret); 575 if (!WIFEXITED(ret)) 576 ksft_print_msg("[INFO] wait() failed\n"); 577 } 578 break; 579 case RO_PIN_TEST_RO_EXCLUSIVE: 580 /* 581 * Map the page R/O into the page table. Enable softdirty 582 * tracking to stop the page from getting mapped R/W immediately 583 * again by mprotect() optimizations. Note that we don't have an 584 * easy way to test if that worked (the pagemap does not export 585 * if the page is mapped R/O vs. R/W). 586 */ 587 ret = mprotect(mem, size, PROT_READ); 588 clear_softdirty(); 589 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 590 if (ret) { 591 ksft_test_result_fail("mprotect() failed\n"); 592 goto close_comm_pipes; 593 } 594 break; 595 default: 596 assert(false); 597 } 598 599 /* Take a R/O pin. This should trigger unsharing. */ 600 args.addr = (__u64)(uintptr_t)mem; 601 args.size = size; 602 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 603 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 604 if (ret) { 605 if (errno == EINVAL) 606 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 607 else 608 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 609 goto wait; 610 } 611 612 /* Modify the page. */ 613 memset(mem, 0xff, size); 614 615 /* 616 * Read back the content via the pin to the temporary buffer and 617 * test if we observed the modification. 618 */ 619 tmp_val = (__u64)(uintptr_t)tmp; 620 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 621 if (ret) 622 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 623 else 624 ksft_test_result(!memcmp(mem, tmp, size), 625 "Longterm R/O pin is reliable\n"); 626 627 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 628 if (ret) 629 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 630 wait: 631 switch (test) { 632 case RO_PIN_TEST_SHARED: 633 write(comm_pipes.parent_ready[1], "0", 1); 634 wait(&ret); 635 if (!WIFEXITED(ret)) 636 ksft_print_msg("[INFO] wait() failed\n"); 637 break; 638 default: 639 break; 640 } 641 close_comm_pipes: 642 close_comm_pipes(&comm_pipes); 643 free_tmp: 644 free(tmp); 645 } 646 647 static void test_ro_pin_on_shared(char *mem, size_t size) 648 { 649 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 650 } 651 652 static void test_ro_fast_pin_on_shared(char *mem, size_t size) 653 { 654 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 655 } 656 657 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 658 { 659 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 660 } 661 662 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 663 { 664 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 665 } 666 667 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 668 { 669 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 670 } 671 672 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 673 { 674 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 675 } 676 677 typedef void (*test_fn)(char *mem, size_t size); 678 679 static void do_run_with_base_page(test_fn fn, bool swapout) 680 { 681 char *mem; 682 int ret; 683 684 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 685 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 686 if (mem == MAP_FAILED) { 687 ksft_test_result_fail("mmap() failed\n"); 688 return; 689 } 690 691 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 692 /* Ignore if not around on a kernel. */ 693 if (ret && errno != EINVAL) { 694 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 695 goto munmap; 696 } 697 698 /* Populate a base page. */ 699 memset(mem, 0, pagesize); 700 701 if (swapout) { 702 madvise(mem, pagesize, MADV_PAGEOUT); 703 if (!pagemap_is_swapped(pagemap_fd, mem)) { 704 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 705 goto munmap; 706 } 707 } 708 709 fn(mem, pagesize); 710 munmap: 711 munmap(mem, pagesize); 712 } 713 714 static void run_with_base_page(test_fn fn, const char *desc) 715 { 716 ksft_print_msg("[RUN] %s ... with base page\n", desc); 717 do_run_with_base_page(fn, false); 718 } 719 720 static void run_with_base_page_swap(test_fn fn, const char *desc) 721 { 722 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 723 do_run_with_base_page(fn, true); 724 } 725 726 enum thp_run { 727 THP_RUN_PMD, 728 THP_RUN_PMD_SWAPOUT, 729 THP_RUN_PTE, 730 THP_RUN_PTE_SWAPOUT, 731 THP_RUN_SINGLE_PTE, 732 THP_RUN_SINGLE_PTE_SWAPOUT, 733 THP_RUN_PARTIAL_MREMAP, 734 THP_RUN_PARTIAL_SHARED, 735 }; 736 737 static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 738 { 739 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 740 size_t size, mmap_size, mremap_size; 741 int ret; 742 743 /* For alignment purposes, we need twice the thp size. */ 744 mmap_size = 2 * thpsize; 745 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 746 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 747 if (mmap_mem == MAP_FAILED) { 748 ksft_test_result_fail("mmap() failed\n"); 749 return; 750 } 751 752 /* We need a THP-aligned memory area. */ 753 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 754 755 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 756 if (ret) { 757 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 758 goto munmap; 759 } 760 761 /* 762 * Try to populate a THP. Touch the first sub-page and test if we get 763 * another sub-page populated automatically. 764 */ 765 mem[0] = 0; 766 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 767 ksft_test_result_skip("Did not get a THP populated\n"); 768 goto munmap; 769 } 770 memset(mem, 0, thpsize); 771 772 size = thpsize; 773 switch (thp_run) { 774 case THP_RUN_PMD: 775 case THP_RUN_PMD_SWAPOUT: 776 break; 777 case THP_RUN_PTE: 778 case THP_RUN_PTE_SWAPOUT: 779 /* 780 * Trigger PTE-mapping the THP by temporarily mapping a single 781 * subpage R/O. 782 */ 783 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 784 if (ret) { 785 ksft_test_result_fail("mprotect() failed\n"); 786 goto munmap; 787 } 788 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 789 if (ret) { 790 ksft_test_result_fail("mprotect() failed\n"); 791 goto munmap; 792 } 793 break; 794 case THP_RUN_SINGLE_PTE: 795 case THP_RUN_SINGLE_PTE_SWAPOUT: 796 /* 797 * Discard all but a single subpage of that PTE-mapped THP. What 798 * remains is a single PTE mapping a single subpage. 799 */ 800 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 801 if (ret) { 802 ksft_test_result_fail("MADV_DONTNEED failed\n"); 803 goto munmap; 804 } 805 size = pagesize; 806 break; 807 case THP_RUN_PARTIAL_MREMAP: 808 /* 809 * Remap half of the THP. We need some new memory location 810 * for that. 811 */ 812 mremap_size = thpsize / 2; 813 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 814 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 815 if (mem == MAP_FAILED) { 816 ksft_test_result_fail("mmap() failed\n"); 817 goto munmap; 818 } 819 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 820 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 821 if (tmp != mremap_mem) { 822 ksft_test_result_fail("mremap() failed\n"); 823 goto munmap; 824 } 825 size = mremap_size; 826 break; 827 case THP_RUN_PARTIAL_SHARED: 828 /* 829 * Share the first page of the THP with a child and quit the 830 * child. This will result in some parts of the THP never 831 * have been shared. 832 */ 833 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 834 if (ret) { 835 ksft_test_result_fail("MADV_DONTFORK failed\n"); 836 goto munmap; 837 } 838 ret = fork(); 839 if (ret < 0) { 840 ksft_test_result_fail("fork() failed\n"); 841 goto munmap; 842 } else if (!ret) { 843 exit(0); 844 } 845 wait(&ret); 846 /* Allow for sharing all pages again. */ 847 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 848 if (ret) { 849 ksft_test_result_fail("MADV_DOFORK failed\n"); 850 goto munmap; 851 } 852 break; 853 default: 854 assert(false); 855 } 856 857 switch (thp_run) { 858 case THP_RUN_PMD_SWAPOUT: 859 case THP_RUN_PTE_SWAPOUT: 860 case THP_RUN_SINGLE_PTE_SWAPOUT: 861 madvise(mem, size, MADV_PAGEOUT); 862 if (!range_is_swapped(mem, size)) { 863 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 864 goto munmap; 865 } 866 break; 867 default: 868 break; 869 } 870 871 fn(mem, size); 872 munmap: 873 munmap(mmap_mem, mmap_size); 874 if (mremap_mem != MAP_FAILED) 875 munmap(mremap_mem, mremap_size); 876 } 877 878 static void run_with_thp(test_fn fn, const char *desc) 879 { 880 ksft_print_msg("[RUN] %s ... with THP\n", desc); 881 do_run_with_thp(fn, THP_RUN_PMD); 882 } 883 884 static void run_with_thp_swap(test_fn fn, const char *desc) 885 { 886 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 887 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 888 } 889 890 static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 891 { 892 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 893 do_run_with_thp(fn, THP_RUN_PTE); 894 } 895 896 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 897 { 898 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 899 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 900 } 901 902 static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 903 { 904 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 905 do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 906 } 907 908 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 909 { 910 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 911 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 912 } 913 914 static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 915 { 916 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 917 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 918 } 919 920 static void run_with_partial_shared_thp(test_fn fn, const char *desc) 921 { 922 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 923 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 924 } 925 926 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 927 { 928 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 929 char *mem, *dummy; 930 931 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 932 hugetlbsize / 1024); 933 934 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 935 936 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 937 if (mem == MAP_FAILED) { 938 ksft_test_result_skip("need more free huge pages\n"); 939 return; 940 } 941 942 /* Populate an huge page. */ 943 memset(mem, 0, hugetlbsize); 944 945 /* 946 * We need a total of two hugetlb pages to handle COW/unsharing 947 * properly, otherwise we might get zapped by a SIGBUS. 948 */ 949 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 950 if (dummy == MAP_FAILED) { 951 ksft_test_result_skip("need more free huge pages\n"); 952 goto munmap; 953 } 954 munmap(dummy, hugetlbsize); 955 956 fn(mem, hugetlbsize); 957 munmap: 958 munmap(mem, hugetlbsize); 959 } 960 961 struct test_case { 962 const char *desc; 963 test_fn fn; 964 }; 965 966 /* 967 * Test cases that are specific to anonymous pages: pages in private mappings 968 * that may get shared via COW during fork(). 969 */ 970 static const struct test_case anon_test_cases[] = { 971 /* 972 * Basic COW tests for fork() without any GUP. If we miss to break COW, 973 * either the child can observe modifications by the parent or the 974 * other way around. 975 */ 976 { 977 "Basic COW after fork()", 978 test_cow_in_parent, 979 }, 980 /* 981 * Basic test, but do an additional mprotect(PROT_READ)+ 982 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 983 */ 984 { 985 "Basic COW after fork() with mprotect() optimization", 986 test_cow_in_parent_mprotect, 987 }, 988 /* 989 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 990 * we miss to break COW, the child observes modifications by the parent. 991 * This is CVE-2020-29374 reported by Jann Horn. 992 */ 993 { 994 "vmsplice() + unmap in child", 995 test_vmsplice_in_child 996 }, 997 /* 998 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 999 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1000 */ 1001 { 1002 "vmsplice() + unmap in child with mprotect() optimization", 1003 test_vmsplice_in_child_mprotect 1004 }, 1005 /* 1006 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1007 * fork(); modify in the child. If we miss to break COW, the parent 1008 * observes modifications by the child. 1009 */ 1010 { 1011 "vmsplice() before fork(), unmap in parent after fork()", 1012 test_vmsplice_before_fork, 1013 }, 1014 /* 1015 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1016 * child. If we miss to break COW, the parent observes modifications by 1017 * the child. 1018 */ 1019 { 1020 "vmsplice() + unmap in parent after fork()", 1021 test_vmsplice_after_fork, 1022 }, 1023 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1024 /* 1025 * Take a R/W longterm pin and then map the page R/O into the page 1026 * table to trigger a write fault on next access. When modifying the 1027 * page, the page content must be visible via the pin. 1028 */ 1029 { 1030 "R/O-mapping a page registered as iouring fixed buffer", 1031 test_iouring_ro, 1032 }, 1033 /* 1034 * Take a R/W longterm pin and then fork() a child. When modifying the 1035 * page, the page content must be visible via the pin. We expect the 1036 * pinned page to not get shared with the child. 1037 */ 1038 { 1039 "fork() with an iouring fixed buffer", 1040 test_iouring_fork, 1041 }, 1042 1043 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1044 /* 1045 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1046 * When modifying the page via the page table, the page content change 1047 * must be visible via the pin. 1048 */ 1049 { 1050 "R/O GUP pin on R/O-mapped shared page", 1051 test_ro_pin_on_shared, 1052 }, 1053 /* Same as above, but using GUP-fast. */ 1054 { 1055 "R/O GUP-fast pin on R/O-mapped shared page", 1056 test_ro_fast_pin_on_shared, 1057 }, 1058 /* 1059 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1060 * was previously shared. When modifying the page via the page table, 1061 * the page content change must be visible via the pin. 1062 */ 1063 { 1064 "R/O GUP pin on R/O-mapped previously-shared page", 1065 test_ro_pin_on_ro_previously_shared, 1066 }, 1067 /* Same as above, but using GUP-fast. */ 1068 { 1069 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1070 test_ro_fast_pin_on_ro_previously_shared, 1071 }, 1072 /* 1073 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1074 * When modifying the page via the page table, the page content change 1075 * must be visible via the pin. 1076 */ 1077 { 1078 "R/O GUP pin on R/O-mapped exclusive page", 1079 test_ro_pin_on_ro_exclusive, 1080 }, 1081 /* Same as above, but using GUP-fast. */ 1082 { 1083 "R/O GUP-fast pin on R/O-mapped exclusive page", 1084 test_ro_fast_pin_on_ro_exclusive, 1085 }, 1086 }; 1087 1088 static void run_anon_test_case(struct test_case const *test_case) 1089 { 1090 int i; 1091 1092 run_with_base_page(test_case->fn, test_case->desc); 1093 run_with_base_page_swap(test_case->fn, test_case->desc); 1094 if (thpsize) { 1095 run_with_thp(test_case->fn, test_case->desc); 1096 run_with_thp_swap(test_case->fn, test_case->desc); 1097 run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1098 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1099 run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1100 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1101 run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1102 run_with_partial_shared_thp(test_case->fn, test_case->desc); 1103 } 1104 for (i = 0; i < nr_hugetlbsizes; i++) 1105 run_with_hugetlb(test_case->fn, test_case->desc, 1106 hugetlbsizes[i]); 1107 } 1108 1109 static void run_anon_test_cases(void) 1110 { 1111 int i; 1112 1113 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1114 1115 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1116 run_anon_test_case(&anon_test_cases[i]); 1117 } 1118 1119 static int tests_per_anon_test_case(void) 1120 { 1121 int tests = 2 + nr_hugetlbsizes; 1122 1123 if (thpsize) 1124 tests += 8; 1125 return tests; 1126 } 1127 1128 enum anon_thp_collapse_test { 1129 ANON_THP_COLLAPSE_UNSHARED, 1130 ANON_THP_COLLAPSE_FULLY_SHARED, 1131 ANON_THP_COLLAPSE_LOWER_SHARED, 1132 ANON_THP_COLLAPSE_UPPER_SHARED, 1133 }; 1134 1135 static void do_test_anon_thp_collapse(char *mem, size_t size, 1136 enum anon_thp_collapse_test test) 1137 { 1138 struct comm_pipes comm_pipes; 1139 char buf; 1140 int ret; 1141 1142 ret = setup_comm_pipes(&comm_pipes); 1143 if (ret) { 1144 ksft_test_result_fail("pipe() failed\n"); 1145 return; 1146 } 1147 1148 /* 1149 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1150 * R/O, such that we can try collapsing it later. 1151 */ 1152 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1153 if (ret) { 1154 ksft_test_result_fail("mprotect() failed\n"); 1155 goto close_comm_pipes; 1156 } 1157 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1158 if (ret) { 1159 ksft_test_result_fail("mprotect() failed\n"); 1160 goto close_comm_pipes; 1161 } 1162 1163 switch (test) { 1164 case ANON_THP_COLLAPSE_UNSHARED: 1165 /* Collapse before actually COW-sharing the page. */ 1166 ret = madvise(mem, size, MADV_COLLAPSE); 1167 if (ret) { 1168 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1169 strerror(errno)); 1170 goto close_comm_pipes; 1171 } 1172 break; 1173 case ANON_THP_COLLAPSE_FULLY_SHARED: 1174 /* COW-share the full PTE-mapped THP. */ 1175 break; 1176 case ANON_THP_COLLAPSE_LOWER_SHARED: 1177 /* Don't COW-share the upper part of the THP. */ 1178 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1179 if (ret) { 1180 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1181 goto close_comm_pipes; 1182 } 1183 break; 1184 case ANON_THP_COLLAPSE_UPPER_SHARED: 1185 /* Don't COW-share the lower part of the THP. */ 1186 ret = madvise(mem, size / 2, MADV_DONTFORK); 1187 if (ret) { 1188 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1189 goto close_comm_pipes; 1190 } 1191 break; 1192 default: 1193 assert(false); 1194 } 1195 1196 ret = fork(); 1197 if (ret < 0) { 1198 ksft_test_result_fail("fork() failed\n"); 1199 goto close_comm_pipes; 1200 } else if (!ret) { 1201 switch (test) { 1202 case ANON_THP_COLLAPSE_UNSHARED: 1203 case ANON_THP_COLLAPSE_FULLY_SHARED: 1204 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1205 break; 1206 case ANON_THP_COLLAPSE_LOWER_SHARED: 1207 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1208 break; 1209 case ANON_THP_COLLAPSE_UPPER_SHARED: 1210 exit(child_memcmp_fn(mem + size / 2, size / 2, 1211 &comm_pipes)); 1212 break; 1213 default: 1214 assert(false); 1215 } 1216 } 1217 1218 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1219 ; 1220 1221 switch (test) { 1222 case ANON_THP_COLLAPSE_UNSHARED: 1223 break; 1224 case ANON_THP_COLLAPSE_UPPER_SHARED: 1225 case ANON_THP_COLLAPSE_LOWER_SHARED: 1226 /* 1227 * Revert MADV_DONTFORK such that we merge the VMAs and are 1228 * able to actually collapse. 1229 */ 1230 ret = madvise(mem, size, MADV_DOFORK); 1231 if (ret) { 1232 ksft_test_result_fail("MADV_DOFORK failed\n"); 1233 write(comm_pipes.parent_ready[1], "0", 1); 1234 wait(&ret); 1235 goto close_comm_pipes; 1236 } 1237 /* FALLTHROUGH */ 1238 case ANON_THP_COLLAPSE_FULLY_SHARED: 1239 /* Collapse before anyone modified the COW-shared page. */ 1240 ret = madvise(mem, size, MADV_COLLAPSE); 1241 if (ret) { 1242 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1243 strerror(errno)); 1244 write(comm_pipes.parent_ready[1], "0", 1); 1245 wait(&ret); 1246 goto close_comm_pipes; 1247 } 1248 break; 1249 default: 1250 assert(false); 1251 } 1252 1253 /* Modify the page. */ 1254 memset(mem, 0xff, size); 1255 write(comm_pipes.parent_ready[1], "0", 1); 1256 1257 wait(&ret); 1258 if (WIFEXITED(ret)) 1259 ret = WEXITSTATUS(ret); 1260 else 1261 ret = -EINVAL; 1262 1263 ksft_test_result(!ret, "No leak from parent into child\n"); 1264 close_comm_pipes: 1265 close_comm_pipes(&comm_pipes); 1266 } 1267 1268 static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1269 { 1270 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1271 } 1272 1273 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1274 { 1275 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1276 } 1277 1278 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1279 { 1280 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1281 } 1282 1283 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1284 { 1285 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1286 } 1287 1288 /* 1289 * Test cases that are specific to anonymous THP: pages in private mappings 1290 * that may get shared via COW during fork(). 1291 */ 1292 static const struct test_case anon_thp_test_cases[] = { 1293 /* 1294 * Basic COW test for fork() without any GUP when collapsing a THP 1295 * before fork(). 1296 * 1297 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1298 * collapse") might easily get COW handling wrong when not collapsing 1299 * exclusivity information properly. 1300 */ 1301 { 1302 "Basic COW after fork() when collapsing before fork()", 1303 test_anon_thp_collapse_unshared, 1304 }, 1305 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1306 { 1307 "Basic COW after fork() when collapsing after fork() (fully shared)", 1308 test_anon_thp_collapse_fully_shared, 1309 }, 1310 /* 1311 * Basic COW test, but collapse after COW-sharing the lower half of a 1312 * THP. 1313 */ 1314 { 1315 "Basic COW after fork() when collapsing after fork() (lower shared)", 1316 test_anon_thp_collapse_lower_shared, 1317 }, 1318 /* 1319 * Basic COW test, but collapse after COW-sharing the upper half of a 1320 * THP. 1321 */ 1322 { 1323 "Basic COW after fork() when collapsing after fork() (upper shared)", 1324 test_anon_thp_collapse_upper_shared, 1325 }, 1326 }; 1327 1328 static void run_anon_thp_test_cases(void) 1329 { 1330 int i; 1331 1332 if (!thpsize) 1333 return; 1334 1335 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1336 1337 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1338 struct test_case const *test_case = &anon_thp_test_cases[i]; 1339 1340 ksft_print_msg("[RUN] %s\n", test_case->desc); 1341 do_run_with_thp(test_case->fn, THP_RUN_PMD); 1342 } 1343 } 1344 1345 static int tests_per_anon_thp_test_case(void) 1346 { 1347 return thpsize ? 1 : 0; 1348 } 1349 1350 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1351 1352 static void test_cow(char *mem, const char *smem, size_t size) 1353 { 1354 char *old = malloc(size); 1355 1356 /* Backup the original content. */ 1357 memcpy(old, smem, size); 1358 1359 /* Modify the page. */ 1360 memset(mem, 0xff, size); 1361 1362 /* See if we still read the old values via the other mapping. */ 1363 ksft_test_result(!memcmp(smem, old, size), 1364 "Other mapping not modified\n"); 1365 free(old); 1366 } 1367 1368 static void test_ro_pin(char *mem, const char *smem, size_t size) 1369 { 1370 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1371 } 1372 1373 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1374 { 1375 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1376 } 1377 1378 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1379 { 1380 char *mem, *smem, tmp; 1381 1382 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1383 1384 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1385 MAP_PRIVATE | MAP_ANON, -1, 0); 1386 if (mem == MAP_FAILED) { 1387 ksft_test_result_fail("mmap() failed\n"); 1388 return; 1389 } 1390 1391 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1392 if (mem == MAP_FAILED) { 1393 ksft_test_result_fail("mmap() failed\n"); 1394 goto munmap; 1395 } 1396 1397 /* Read from the page to populate the shared zeropage. */ 1398 tmp = *mem + *smem; 1399 asm volatile("" : "+r" (tmp)); 1400 1401 fn(mem, smem, pagesize); 1402 munmap: 1403 munmap(mem, pagesize); 1404 if (smem != MAP_FAILED) 1405 munmap(smem, pagesize); 1406 } 1407 1408 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1409 { 1410 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1411 size_t mmap_size; 1412 int ret; 1413 1414 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1415 1416 if (!has_huge_zeropage) { 1417 ksft_test_result_skip("Huge zeropage not enabled\n"); 1418 return; 1419 } 1420 1421 /* For alignment purposes, we need twice the thp size. */ 1422 mmap_size = 2 * thpsize; 1423 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1424 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1425 if (mmap_mem == MAP_FAILED) { 1426 ksft_test_result_fail("mmap() failed\n"); 1427 return; 1428 } 1429 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1430 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1431 if (mmap_smem == MAP_FAILED) { 1432 ksft_test_result_fail("mmap() failed\n"); 1433 goto munmap; 1434 } 1435 1436 /* We need a THP-aligned memory area. */ 1437 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1438 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1439 1440 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1441 ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1442 if (ret) { 1443 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1444 goto munmap; 1445 } 1446 1447 /* 1448 * Read from the memory to populate the huge shared zeropage. Read from 1449 * the first sub-page and test if we get another sub-page populated 1450 * automatically. 1451 */ 1452 tmp = *mem + *smem; 1453 asm volatile("" : "+r" (tmp)); 1454 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1455 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1456 ksft_test_result_skip("Did not get THPs populated\n"); 1457 goto munmap; 1458 } 1459 1460 fn(mem, smem, thpsize); 1461 munmap: 1462 munmap(mmap_mem, mmap_size); 1463 if (mmap_smem != MAP_FAILED) 1464 munmap(mmap_smem, mmap_size); 1465 } 1466 1467 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1468 { 1469 char *mem, *smem, tmp; 1470 int fd; 1471 1472 ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1473 1474 fd = memfd_create("test", 0); 1475 if (fd < 0) { 1476 ksft_test_result_fail("memfd_create() failed\n"); 1477 return; 1478 } 1479 1480 /* File consists of a single page filled with zeroes. */ 1481 if (fallocate(fd, 0, 0, pagesize)) { 1482 ksft_test_result_fail("fallocate() failed\n"); 1483 goto close; 1484 } 1485 1486 /* Create a private mapping of the memfd. */ 1487 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1488 if (mem == MAP_FAILED) { 1489 ksft_test_result_fail("mmap() failed\n"); 1490 goto close; 1491 } 1492 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1493 if (mem == MAP_FAILED) { 1494 ksft_test_result_fail("mmap() failed\n"); 1495 goto munmap; 1496 } 1497 1498 /* Fault the page in. */ 1499 tmp = *mem + *smem; 1500 asm volatile("" : "+r" (tmp)); 1501 1502 fn(mem, smem, pagesize); 1503 munmap: 1504 munmap(mem, pagesize); 1505 if (smem != MAP_FAILED) 1506 munmap(smem, pagesize); 1507 close: 1508 close(fd); 1509 } 1510 1511 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1512 { 1513 char *mem, *smem, tmp; 1514 FILE *file; 1515 int fd; 1516 1517 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1518 1519 file = tmpfile(); 1520 if (!file) { 1521 ksft_test_result_fail("tmpfile() failed\n"); 1522 return; 1523 } 1524 1525 fd = fileno(file); 1526 if (fd < 0) { 1527 ksft_test_result_skip("fileno() failed\n"); 1528 return; 1529 } 1530 1531 /* File consists of a single page filled with zeroes. */ 1532 if (fallocate(fd, 0, 0, pagesize)) { 1533 ksft_test_result_fail("fallocate() failed\n"); 1534 goto close; 1535 } 1536 1537 /* Create a private mapping of the memfd. */ 1538 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1539 if (mem == MAP_FAILED) { 1540 ksft_test_result_fail("mmap() failed\n"); 1541 goto close; 1542 } 1543 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1544 if (mem == MAP_FAILED) { 1545 ksft_test_result_fail("mmap() failed\n"); 1546 goto munmap; 1547 } 1548 1549 /* Fault the page in. */ 1550 tmp = *mem + *smem; 1551 asm volatile("" : "+r" (tmp)); 1552 1553 fn(mem, smem, pagesize); 1554 munmap: 1555 munmap(mem, pagesize); 1556 if (smem != MAP_FAILED) 1557 munmap(smem, pagesize); 1558 close: 1559 fclose(file); 1560 } 1561 1562 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1563 size_t hugetlbsize) 1564 { 1565 int flags = MFD_HUGETLB; 1566 char *mem, *smem, tmp; 1567 int fd; 1568 1569 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1570 hugetlbsize / 1024); 1571 1572 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1573 1574 fd = memfd_create("test", flags); 1575 if (fd < 0) { 1576 ksft_test_result_skip("memfd_create() failed\n"); 1577 return; 1578 } 1579 1580 /* File consists of a single page filled with zeroes. */ 1581 if (fallocate(fd, 0, 0, hugetlbsize)) { 1582 ksft_test_result_skip("need more free huge pages\n"); 1583 goto close; 1584 } 1585 1586 /* Create a private mapping of the memfd. */ 1587 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1588 0); 1589 if (mem == MAP_FAILED) { 1590 ksft_test_result_skip("need more free huge pages\n"); 1591 goto close; 1592 } 1593 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1594 if (mem == MAP_FAILED) { 1595 ksft_test_result_fail("mmap() failed\n"); 1596 goto munmap; 1597 } 1598 1599 /* Fault the page in. */ 1600 tmp = *mem + *smem; 1601 asm volatile("" : "+r" (tmp)); 1602 1603 fn(mem, smem, hugetlbsize); 1604 munmap: 1605 munmap(mem, hugetlbsize); 1606 if (mem != MAP_FAILED) 1607 munmap(smem, hugetlbsize); 1608 close: 1609 close(fd); 1610 } 1611 1612 struct non_anon_test_case { 1613 const char *desc; 1614 non_anon_test_fn fn; 1615 }; 1616 1617 /* 1618 * Test cases that target any pages in private mappings that are not anonymous: 1619 * pages that may get shared via COW ndependent of fork(). This includes 1620 * the shared zeropage(s), pagecache pages, ... 1621 */ 1622 static const struct non_anon_test_case non_anon_test_cases[] = { 1623 /* 1624 * Basic COW test without any GUP. If we miss to break COW, changes are 1625 * visible via other private/shared mappings. 1626 */ 1627 { 1628 "Basic COW", 1629 test_cow, 1630 }, 1631 /* 1632 * Take a R/O longterm pin. When modifying the page via the page table, 1633 * the page content change must be visible via the pin. 1634 */ 1635 { 1636 "R/O longterm GUP pin", 1637 test_ro_pin, 1638 }, 1639 /* Same as above, but using GUP-fast. */ 1640 { 1641 "R/O longterm GUP-fast pin", 1642 test_ro_fast_pin, 1643 }, 1644 }; 1645 1646 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1647 { 1648 int i; 1649 1650 run_with_zeropage(test_case->fn, test_case->desc); 1651 run_with_memfd(test_case->fn, test_case->desc); 1652 run_with_tmpfile(test_case->fn, test_case->desc); 1653 if (thpsize) 1654 run_with_huge_zeropage(test_case->fn, test_case->desc); 1655 for (i = 0; i < nr_hugetlbsizes; i++) 1656 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1657 hugetlbsizes[i]); 1658 } 1659 1660 static void run_non_anon_test_cases(void) 1661 { 1662 int i; 1663 1664 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1665 1666 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1667 run_non_anon_test_case(&non_anon_test_cases[i]); 1668 } 1669 1670 static int tests_per_non_anon_test_case(void) 1671 { 1672 int tests = 3 + nr_hugetlbsizes; 1673 1674 if (thpsize) 1675 tests += 1; 1676 return tests; 1677 } 1678 1679 int main(int argc, char **argv) 1680 { 1681 int err; 1682 1683 pagesize = getpagesize(); 1684 thpsize = read_pmd_pagesize(); 1685 if (thpsize) 1686 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 1687 thpsize / 1024); 1688 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1689 ARRAY_SIZE(hugetlbsizes)); 1690 detect_huge_zeropage(); 1691 1692 ksft_print_header(); 1693 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1694 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1695 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1696 1697 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1698 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1699 if (pagemap_fd < 0) 1700 ksft_exit_fail_msg("opening pagemap failed\n"); 1701 1702 run_anon_test_cases(); 1703 run_anon_thp_test_cases(); 1704 run_non_anon_test_cases(); 1705 1706 err = ksft_get_fail_cnt(); 1707 if (err) 1708 ksft_exit_fail_msg("%d out of %d tests failed\n", 1709 err, ksft_test_num()); 1710 return ksft_exit_pass(); 1711 } 1712