1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <dirent.h> 18 #include <assert.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 33 #ifndef MADV_PAGEOUT 34 #define MADV_PAGEOUT 21 35 #endif 36 #ifndef MADV_COLLAPSE 37 #define MADV_COLLAPSE 25 38 #endif 39 40 static size_t pagesize; 41 static int pagemap_fd; 42 static size_t thpsize; 43 static int nr_hugetlbsizes; 44 static size_t hugetlbsizes[10]; 45 static int gup_fd; 46 static bool has_huge_zeropage; 47 48 static void detect_thpsize(void) 49 { 50 int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", 51 O_RDONLY); 52 size_t size = 0; 53 char buf[15]; 54 int ret; 55 56 if (fd < 0) 57 return; 58 59 ret = pread(fd, buf, sizeof(buf), 0); 60 if (ret > 0 && ret < sizeof(buf)) { 61 buf[ret] = 0; 62 63 size = strtoul(buf, NULL, 10); 64 if (size < pagesize) 65 size = 0; 66 if (size > 0) { 67 thpsize = size; 68 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 69 thpsize / 1024); 70 } 71 } 72 73 close(fd); 74 } 75 76 static void detect_huge_zeropage(void) 77 { 78 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 79 O_RDONLY); 80 size_t enabled = 0; 81 char buf[15]; 82 int ret; 83 84 if (fd < 0) 85 return; 86 87 ret = pread(fd, buf, sizeof(buf), 0); 88 if (ret > 0 && ret < sizeof(buf)) { 89 buf[ret] = 0; 90 91 enabled = strtoul(buf, NULL, 10); 92 if (enabled == 1) { 93 has_huge_zeropage = true; 94 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 95 } 96 } 97 98 close(fd); 99 } 100 101 static void detect_hugetlbsizes(void) 102 { 103 DIR *dir = opendir("/sys/kernel/mm/hugepages/"); 104 105 if (!dir) 106 return; 107 108 while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { 109 struct dirent *entry = readdir(dir); 110 size_t kb; 111 112 if (!entry) 113 break; 114 if (entry->d_type != DT_DIR) 115 continue; 116 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) 117 continue; 118 hugetlbsizes[nr_hugetlbsizes] = kb * 1024; 119 nr_hugetlbsizes++; 120 ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", 121 kb); 122 } 123 closedir(dir); 124 } 125 126 static bool range_is_swapped(void *addr, size_t size) 127 { 128 for (; size; addr += pagesize, size -= pagesize) 129 if (!pagemap_is_swapped(pagemap_fd, addr)) 130 return false; 131 return true; 132 } 133 134 struct comm_pipes { 135 int child_ready[2]; 136 int parent_ready[2]; 137 }; 138 139 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 140 { 141 if (pipe(comm_pipes->child_ready) < 0) 142 return -errno; 143 if (pipe(comm_pipes->parent_ready) < 0) { 144 close(comm_pipes->child_ready[0]); 145 close(comm_pipes->child_ready[1]); 146 return -errno; 147 } 148 149 return 0; 150 } 151 152 static void close_comm_pipes(struct comm_pipes *comm_pipes) 153 { 154 close(comm_pipes->child_ready[0]); 155 close(comm_pipes->child_ready[1]); 156 close(comm_pipes->parent_ready[0]); 157 close(comm_pipes->parent_ready[1]); 158 } 159 160 static int child_memcmp_fn(char *mem, size_t size, 161 struct comm_pipes *comm_pipes) 162 { 163 char *old = malloc(size); 164 char buf; 165 166 /* Backup the original content. */ 167 memcpy(old, mem, size); 168 169 /* Wait until the parent modified the page. */ 170 write(comm_pipes->child_ready[1], "0", 1); 171 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 172 ; 173 174 /* See if we still read the old values. */ 175 return memcmp(old, mem, size); 176 } 177 178 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 179 struct comm_pipes *comm_pipes) 180 { 181 struct iovec iov = { 182 .iov_base = mem, 183 .iov_len = size, 184 }; 185 ssize_t cur, total, transferred; 186 char *old, *new; 187 int fds[2]; 188 char buf; 189 190 old = malloc(size); 191 new = malloc(size); 192 193 /* Backup the original content. */ 194 memcpy(old, mem, size); 195 196 if (pipe(fds) < 0) 197 return -errno; 198 199 /* Trigger a read-only pin. */ 200 transferred = vmsplice(fds[1], &iov, 1, 0); 201 if (transferred < 0) 202 return -errno; 203 if (transferred == 0) 204 return -EINVAL; 205 206 /* Unmap it from our page tables. */ 207 if (munmap(mem, size) < 0) 208 return -errno; 209 210 /* Wait until the parent modified it. */ 211 write(comm_pipes->child_ready[1], "0", 1); 212 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 213 ; 214 215 /* See if we still read the old values via the pipe. */ 216 for (total = 0; total < transferred; total += cur) { 217 cur = read(fds[0], new + total, transferred - total); 218 if (cur < 0) 219 return -errno; 220 } 221 222 return memcmp(old, new, transferred); 223 } 224 225 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 226 227 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 228 child_fn fn) 229 { 230 struct comm_pipes comm_pipes; 231 char buf; 232 int ret; 233 234 ret = setup_comm_pipes(&comm_pipes); 235 if (ret) { 236 ksft_test_result_fail("pipe() failed\n"); 237 return; 238 } 239 240 ret = fork(); 241 if (ret < 0) { 242 ksft_test_result_fail("fork() failed\n"); 243 goto close_comm_pipes; 244 } else if (!ret) { 245 exit(fn(mem, size, &comm_pipes)); 246 } 247 248 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 249 ; 250 251 if (do_mprotect) { 252 /* 253 * mprotect() optimizations might try avoiding 254 * write-faults by directly mapping pages writable. 255 */ 256 ret = mprotect(mem, size, PROT_READ); 257 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 258 if (ret) { 259 ksft_test_result_fail("mprotect() failed\n"); 260 write(comm_pipes.parent_ready[1], "0", 1); 261 wait(&ret); 262 goto close_comm_pipes; 263 } 264 } 265 266 /* Modify the page. */ 267 memset(mem, 0xff, size); 268 write(comm_pipes.parent_ready[1], "0", 1); 269 270 wait(&ret); 271 if (WIFEXITED(ret)) 272 ret = WEXITSTATUS(ret); 273 else 274 ret = -EINVAL; 275 276 ksft_test_result(!ret, "No leak from parent into child\n"); 277 close_comm_pipes: 278 close_comm_pipes(&comm_pipes); 279 } 280 281 static void test_cow_in_parent(char *mem, size_t size) 282 { 283 do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 284 } 285 286 static void test_cow_in_parent_mprotect(char *mem, size_t size) 287 { 288 do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 289 } 290 291 static void test_vmsplice_in_child(char *mem, size_t size) 292 { 293 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 294 } 295 296 static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 297 { 298 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 299 } 300 301 static void do_test_vmsplice_in_parent(char *mem, size_t size, 302 bool before_fork) 303 { 304 struct iovec iov = { 305 .iov_base = mem, 306 .iov_len = size, 307 }; 308 ssize_t cur, total, transferred; 309 struct comm_pipes comm_pipes; 310 char *old, *new; 311 int ret, fds[2]; 312 char buf; 313 314 old = malloc(size); 315 new = malloc(size); 316 317 memcpy(old, mem, size); 318 319 ret = setup_comm_pipes(&comm_pipes); 320 if (ret) { 321 ksft_test_result_fail("pipe() failed\n"); 322 goto free; 323 } 324 325 if (pipe(fds) < 0) { 326 ksft_test_result_fail("pipe() failed\n"); 327 goto close_comm_pipes; 328 } 329 330 if (before_fork) { 331 transferred = vmsplice(fds[1], &iov, 1, 0); 332 if (transferred <= 0) { 333 ksft_test_result_fail("vmsplice() failed\n"); 334 goto close_pipe; 335 } 336 } 337 338 ret = fork(); 339 if (ret < 0) { 340 ksft_test_result_fail("fork() failed\n"); 341 goto close_pipe; 342 } else if (!ret) { 343 write(comm_pipes.child_ready[1], "0", 1); 344 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 345 ; 346 /* Modify page content in the child. */ 347 memset(mem, 0xff, size); 348 exit(0); 349 } 350 351 if (!before_fork) { 352 transferred = vmsplice(fds[1], &iov, 1, 0); 353 if (transferred <= 0) { 354 ksft_test_result_fail("vmsplice() failed\n"); 355 wait(&ret); 356 goto close_pipe; 357 } 358 } 359 360 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 361 ; 362 if (munmap(mem, size) < 0) { 363 ksft_test_result_fail("munmap() failed\n"); 364 goto close_pipe; 365 } 366 write(comm_pipes.parent_ready[1], "0", 1); 367 368 /* Wait until the child is done writing. */ 369 wait(&ret); 370 if (!WIFEXITED(ret)) { 371 ksft_test_result_fail("wait() failed\n"); 372 goto close_pipe; 373 } 374 375 /* See if we still read the old values. */ 376 for (total = 0; total < transferred; total += cur) { 377 cur = read(fds[0], new + total, transferred - total); 378 if (cur < 0) { 379 ksft_test_result_fail("read() failed\n"); 380 goto close_pipe; 381 } 382 } 383 384 ksft_test_result(!memcmp(old, new, transferred), 385 "No leak from child into parent\n"); 386 close_pipe: 387 close(fds[0]); 388 close(fds[1]); 389 close_comm_pipes: 390 close_comm_pipes(&comm_pipes); 391 free: 392 free(old); 393 free(new); 394 } 395 396 static void test_vmsplice_before_fork(char *mem, size_t size) 397 { 398 do_test_vmsplice_in_parent(mem, size, true); 399 } 400 401 static void test_vmsplice_after_fork(char *mem, size_t size) 402 { 403 do_test_vmsplice_in_parent(mem, size, false); 404 } 405 406 #ifdef LOCAL_CONFIG_HAVE_LIBURING 407 static void do_test_iouring(char *mem, size_t size, bool use_fork) 408 { 409 struct comm_pipes comm_pipes; 410 struct io_uring_cqe *cqe; 411 struct io_uring_sqe *sqe; 412 struct io_uring ring; 413 ssize_t cur, total; 414 struct iovec iov; 415 char *buf, *tmp; 416 int ret, fd; 417 FILE *file; 418 419 ret = setup_comm_pipes(&comm_pipes); 420 if (ret) { 421 ksft_test_result_fail("pipe() failed\n"); 422 return; 423 } 424 425 file = tmpfile(); 426 if (!file) { 427 ksft_test_result_fail("tmpfile() failed\n"); 428 goto close_comm_pipes; 429 } 430 fd = fileno(file); 431 assert(fd); 432 433 tmp = malloc(size); 434 if (!tmp) { 435 ksft_test_result_fail("malloc() failed\n"); 436 goto close_file; 437 } 438 439 /* Skip on errors, as we might just lack kernel support. */ 440 ret = io_uring_queue_init(1, &ring, 0); 441 if (ret < 0) { 442 ksft_test_result_skip("io_uring_queue_init() failed\n"); 443 goto free_tmp; 444 } 445 446 /* 447 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 448 * | FOLL_LONGTERM the range. 449 * 450 * Skip on errors, as we might just lack kernel support or might not 451 * have sufficient MEMLOCK permissions. 452 */ 453 iov.iov_base = mem; 454 iov.iov_len = size; 455 ret = io_uring_register_buffers(&ring, &iov, 1); 456 if (ret) { 457 ksft_test_result_skip("io_uring_register_buffers() failed\n"); 458 goto queue_exit; 459 } 460 461 if (use_fork) { 462 /* 463 * fork() and keep the child alive until we're done. Note that 464 * we expect the pinned page to not get shared with the child. 465 */ 466 ret = fork(); 467 if (ret < 0) { 468 ksft_test_result_fail("fork() failed\n"); 469 goto unregister_buffers; 470 } else if (!ret) { 471 write(comm_pipes.child_ready[1], "0", 1); 472 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 473 ; 474 exit(0); 475 } 476 477 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 478 ; 479 } else { 480 /* 481 * Map the page R/O into the page table. Enable softdirty 482 * tracking to stop the page from getting mapped R/W immediately 483 * again by mprotect() optimizations. Note that we don't have an 484 * easy way to test if that worked (the pagemap does not export 485 * if the page is mapped R/O vs. R/W). 486 */ 487 ret = mprotect(mem, size, PROT_READ); 488 clear_softdirty(); 489 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 490 if (ret) { 491 ksft_test_result_fail("mprotect() failed\n"); 492 goto unregister_buffers; 493 } 494 } 495 496 /* 497 * Modify the page and write page content as observed by the fixed 498 * buffer pin to the file so we can verify it. 499 */ 500 memset(mem, 0xff, size); 501 sqe = io_uring_get_sqe(&ring); 502 if (!sqe) { 503 ksft_test_result_fail("io_uring_get_sqe() failed\n"); 504 goto quit_child; 505 } 506 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 507 508 ret = io_uring_submit(&ring); 509 if (ret < 0) { 510 ksft_test_result_fail("io_uring_submit() failed\n"); 511 goto quit_child; 512 } 513 514 ret = io_uring_wait_cqe(&ring, &cqe); 515 if (ret < 0) { 516 ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 517 goto quit_child; 518 } 519 520 if (cqe->res != size) { 521 ksft_test_result_fail("write_fixed failed\n"); 522 goto quit_child; 523 } 524 io_uring_cqe_seen(&ring, cqe); 525 526 /* Read back the file content to the temporary buffer. */ 527 total = 0; 528 while (total < size) { 529 cur = pread(fd, tmp + total, size - total, total); 530 if (cur < 0) { 531 ksft_test_result_fail("pread() failed\n"); 532 goto quit_child; 533 } 534 total += cur; 535 } 536 537 /* Finally, check if we read what we expected. */ 538 ksft_test_result(!memcmp(mem, tmp, size), 539 "Longterm R/W pin is reliable\n"); 540 541 quit_child: 542 if (use_fork) { 543 write(comm_pipes.parent_ready[1], "0", 1); 544 wait(&ret); 545 } 546 unregister_buffers: 547 io_uring_unregister_buffers(&ring); 548 queue_exit: 549 io_uring_queue_exit(&ring); 550 free_tmp: 551 free(tmp); 552 close_file: 553 fclose(file); 554 close_comm_pipes: 555 close_comm_pipes(&comm_pipes); 556 } 557 558 static void test_iouring_ro(char *mem, size_t size) 559 { 560 do_test_iouring(mem, size, false); 561 } 562 563 static void test_iouring_fork(char *mem, size_t size) 564 { 565 do_test_iouring(mem, size, true); 566 } 567 568 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 569 570 enum ro_pin_test { 571 RO_PIN_TEST, 572 RO_PIN_TEST_SHARED, 573 RO_PIN_TEST_PREVIOUSLY_SHARED, 574 RO_PIN_TEST_RO_EXCLUSIVE, 575 }; 576 577 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 578 bool fast) 579 { 580 struct pin_longterm_test args; 581 struct comm_pipes comm_pipes; 582 char *tmp, buf; 583 __u64 tmp_val; 584 int ret; 585 586 if (gup_fd < 0) { 587 ksft_test_result_skip("gup_test not available\n"); 588 return; 589 } 590 591 tmp = malloc(size); 592 if (!tmp) { 593 ksft_test_result_fail("malloc() failed\n"); 594 return; 595 } 596 597 ret = setup_comm_pipes(&comm_pipes); 598 if (ret) { 599 ksft_test_result_fail("pipe() failed\n"); 600 goto free_tmp; 601 } 602 603 switch (test) { 604 case RO_PIN_TEST: 605 break; 606 case RO_PIN_TEST_SHARED: 607 case RO_PIN_TEST_PREVIOUSLY_SHARED: 608 /* 609 * Share the pages with our child. As the pages are not pinned, 610 * this should just work. 611 */ 612 ret = fork(); 613 if (ret < 0) { 614 ksft_test_result_fail("fork() failed\n"); 615 goto close_comm_pipes; 616 } else if (!ret) { 617 write(comm_pipes.child_ready[1], "0", 1); 618 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 619 ; 620 exit(0); 621 } 622 623 /* Wait until our child is ready. */ 624 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 625 ; 626 627 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 628 /* 629 * Tell the child to quit now and wait until it quit. 630 * The pages should now be mapped R/O into our page 631 * tables, but they are no longer shared. 632 */ 633 write(comm_pipes.parent_ready[1], "0", 1); 634 wait(&ret); 635 if (!WIFEXITED(ret)) 636 ksft_print_msg("[INFO] wait() failed\n"); 637 } 638 break; 639 case RO_PIN_TEST_RO_EXCLUSIVE: 640 /* 641 * Map the page R/O into the page table. Enable softdirty 642 * tracking to stop the page from getting mapped R/W immediately 643 * again by mprotect() optimizations. Note that we don't have an 644 * easy way to test if that worked (the pagemap does not export 645 * if the page is mapped R/O vs. R/W). 646 */ 647 ret = mprotect(mem, size, PROT_READ); 648 clear_softdirty(); 649 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 650 if (ret) { 651 ksft_test_result_fail("mprotect() failed\n"); 652 goto close_comm_pipes; 653 } 654 break; 655 default: 656 assert(false); 657 } 658 659 /* Take a R/O pin. This should trigger unsharing. */ 660 args.addr = (__u64)(uintptr_t)mem; 661 args.size = size; 662 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 663 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 664 if (ret) { 665 if (errno == EINVAL) 666 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 667 else 668 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 669 goto wait; 670 } 671 672 /* Modify the page. */ 673 memset(mem, 0xff, size); 674 675 /* 676 * Read back the content via the pin to the temporary buffer and 677 * test if we observed the modification. 678 */ 679 tmp_val = (__u64)(uintptr_t)tmp; 680 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 681 if (ret) 682 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 683 else 684 ksft_test_result(!memcmp(mem, tmp, size), 685 "Longterm R/O pin is reliable\n"); 686 687 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 688 if (ret) 689 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 690 wait: 691 switch (test) { 692 case RO_PIN_TEST_SHARED: 693 write(comm_pipes.parent_ready[1], "0", 1); 694 wait(&ret); 695 if (!WIFEXITED(ret)) 696 ksft_print_msg("[INFO] wait() failed\n"); 697 break; 698 default: 699 break; 700 } 701 close_comm_pipes: 702 close_comm_pipes(&comm_pipes); 703 free_tmp: 704 free(tmp); 705 } 706 707 static void test_ro_pin_on_shared(char *mem, size_t size) 708 { 709 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 710 } 711 712 static void test_ro_fast_pin_on_shared(char *mem, size_t size) 713 { 714 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 715 } 716 717 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 718 { 719 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 720 } 721 722 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 723 { 724 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 725 } 726 727 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 728 { 729 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 730 } 731 732 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 733 { 734 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 735 } 736 737 typedef void (*test_fn)(char *mem, size_t size); 738 739 static void do_run_with_base_page(test_fn fn, bool swapout) 740 { 741 char *mem; 742 int ret; 743 744 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 745 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 746 if (mem == MAP_FAILED) { 747 ksft_test_result_fail("mmap() failed\n"); 748 return; 749 } 750 751 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 752 /* Ignore if not around on a kernel. */ 753 if (ret && errno != EINVAL) { 754 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 755 goto munmap; 756 } 757 758 /* Populate a base page. */ 759 memset(mem, 0, pagesize); 760 761 if (swapout) { 762 madvise(mem, pagesize, MADV_PAGEOUT); 763 if (!pagemap_is_swapped(pagemap_fd, mem)) { 764 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 765 goto munmap; 766 } 767 } 768 769 fn(mem, pagesize); 770 munmap: 771 munmap(mem, pagesize); 772 } 773 774 static void run_with_base_page(test_fn fn, const char *desc) 775 { 776 ksft_print_msg("[RUN] %s ... with base page\n", desc); 777 do_run_with_base_page(fn, false); 778 } 779 780 static void run_with_base_page_swap(test_fn fn, const char *desc) 781 { 782 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 783 do_run_with_base_page(fn, true); 784 } 785 786 enum thp_run { 787 THP_RUN_PMD, 788 THP_RUN_PMD_SWAPOUT, 789 THP_RUN_PTE, 790 THP_RUN_PTE_SWAPOUT, 791 THP_RUN_SINGLE_PTE, 792 THP_RUN_SINGLE_PTE_SWAPOUT, 793 THP_RUN_PARTIAL_MREMAP, 794 THP_RUN_PARTIAL_SHARED, 795 }; 796 797 static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 798 { 799 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 800 size_t size, mmap_size, mremap_size; 801 int ret; 802 803 /* For alignment purposes, we need twice the thp size. */ 804 mmap_size = 2 * thpsize; 805 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 806 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 807 if (mmap_mem == MAP_FAILED) { 808 ksft_test_result_fail("mmap() failed\n"); 809 return; 810 } 811 812 /* We need a THP-aligned memory area. */ 813 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 814 815 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 816 if (ret) { 817 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 818 goto munmap; 819 } 820 821 /* 822 * Try to populate a THP. Touch the first sub-page and test if we get 823 * another sub-page populated automatically. 824 */ 825 mem[0] = 0; 826 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 827 ksft_test_result_skip("Did not get a THP populated\n"); 828 goto munmap; 829 } 830 memset(mem, 0, thpsize); 831 832 size = thpsize; 833 switch (thp_run) { 834 case THP_RUN_PMD: 835 case THP_RUN_PMD_SWAPOUT: 836 break; 837 case THP_RUN_PTE: 838 case THP_RUN_PTE_SWAPOUT: 839 /* 840 * Trigger PTE-mapping the THP by temporarily mapping a single 841 * subpage R/O. 842 */ 843 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 844 if (ret) { 845 ksft_test_result_fail("mprotect() failed\n"); 846 goto munmap; 847 } 848 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 849 if (ret) { 850 ksft_test_result_fail("mprotect() failed\n"); 851 goto munmap; 852 } 853 break; 854 case THP_RUN_SINGLE_PTE: 855 case THP_RUN_SINGLE_PTE_SWAPOUT: 856 /* 857 * Discard all but a single subpage of that PTE-mapped THP. What 858 * remains is a single PTE mapping a single subpage. 859 */ 860 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 861 if (ret) { 862 ksft_test_result_fail("MADV_DONTNEED failed\n"); 863 goto munmap; 864 } 865 size = pagesize; 866 break; 867 case THP_RUN_PARTIAL_MREMAP: 868 /* 869 * Remap half of the THP. We need some new memory location 870 * for that. 871 */ 872 mremap_size = thpsize / 2; 873 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 874 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 875 if (mem == MAP_FAILED) { 876 ksft_test_result_fail("mmap() failed\n"); 877 goto munmap; 878 } 879 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 880 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 881 if (tmp != mremap_mem) { 882 ksft_test_result_fail("mremap() failed\n"); 883 goto munmap; 884 } 885 size = mremap_size; 886 break; 887 case THP_RUN_PARTIAL_SHARED: 888 /* 889 * Share the first page of the THP with a child and quit the 890 * child. This will result in some parts of the THP never 891 * have been shared. 892 */ 893 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 894 if (ret) { 895 ksft_test_result_fail("MADV_DONTFORK failed\n"); 896 goto munmap; 897 } 898 ret = fork(); 899 if (ret < 0) { 900 ksft_test_result_fail("fork() failed\n"); 901 goto munmap; 902 } else if (!ret) { 903 exit(0); 904 } 905 wait(&ret); 906 /* Allow for sharing all pages again. */ 907 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 908 if (ret) { 909 ksft_test_result_fail("MADV_DOFORK failed\n"); 910 goto munmap; 911 } 912 break; 913 default: 914 assert(false); 915 } 916 917 switch (thp_run) { 918 case THP_RUN_PMD_SWAPOUT: 919 case THP_RUN_PTE_SWAPOUT: 920 case THP_RUN_SINGLE_PTE_SWAPOUT: 921 madvise(mem, size, MADV_PAGEOUT); 922 if (!range_is_swapped(mem, size)) { 923 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 924 goto munmap; 925 } 926 break; 927 default: 928 break; 929 } 930 931 fn(mem, size); 932 munmap: 933 munmap(mmap_mem, mmap_size); 934 if (mremap_mem != MAP_FAILED) 935 munmap(mremap_mem, mremap_size); 936 } 937 938 static void run_with_thp(test_fn fn, const char *desc) 939 { 940 ksft_print_msg("[RUN] %s ... with THP\n", desc); 941 do_run_with_thp(fn, THP_RUN_PMD); 942 } 943 944 static void run_with_thp_swap(test_fn fn, const char *desc) 945 { 946 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 947 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 948 } 949 950 static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 951 { 952 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 953 do_run_with_thp(fn, THP_RUN_PTE); 954 } 955 956 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 957 { 958 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 959 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 960 } 961 962 static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 963 { 964 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 965 do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 966 } 967 968 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 969 { 970 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 971 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 972 } 973 974 static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 975 { 976 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 977 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 978 } 979 980 static void run_with_partial_shared_thp(test_fn fn, const char *desc) 981 { 982 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 983 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 984 } 985 986 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 987 { 988 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 989 char *mem, *dummy; 990 991 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 992 hugetlbsize / 1024); 993 994 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 995 996 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 997 if (mem == MAP_FAILED) { 998 ksft_test_result_skip("need more free huge pages\n"); 999 return; 1000 } 1001 1002 /* Populate an huge page. */ 1003 memset(mem, 0, hugetlbsize); 1004 1005 /* 1006 * We need a total of two hugetlb pages to handle COW/unsharing 1007 * properly, otherwise we might get zapped by a SIGBUS. 1008 */ 1009 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1010 if (dummy == MAP_FAILED) { 1011 ksft_test_result_skip("need more free huge pages\n"); 1012 goto munmap; 1013 } 1014 munmap(dummy, hugetlbsize); 1015 1016 fn(mem, hugetlbsize); 1017 munmap: 1018 munmap(mem, hugetlbsize); 1019 } 1020 1021 struct test_case { 1022 const char *desc; 1023 test_fn fn; 1024 }; 1025 1026 /* 1027 * Test cases that are specific to anonymous pages: pages in private mappings 1028 * that may get shared via COW during fork(). 1029 */ 1030 static const struct test_case anon_test_cases[] = { 1031 /* 1032 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1033 * either the child can observe modifications by the parent or the 1034 * other way around. 1035 */ 1036 { 1037 "Basic COW after fork()", 1038 test_cow_in_parent, 1039 }, 1040 /* 1041 * Basic test, but do an additional mprotect(PROT_READ)+ 1042 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1043 */ 1044 { 1045 "Basic COW after fork() with mprotect() optimization", 1046 test_cow_in_parent_mprotect, 1047 }, 1048 /* 1049 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1050 * we miss to break COW, the child observes modifications by the parent. 1051 * This is CVE-2020-29374 reported by Jann Horn. 1052 */ 1053 { 1054 "vmsplice() + unmap in child", 1055 test_vmsplice_in_child 1056 }, 1057 /* 1058 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1059 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1060 */ 1061 { 1062 "vmsplice() + unmap in child with mprotect() optimization", 1063 test_vmsplice_in_child_mprotect 1064 }, 1065 /* 1066 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1067 * fork(); modify in the child. If we miss to break COW, the parent 1068 * observes modifications by the child. 1069 */ 1070 { 1071 "vmsplice() before fork(), unmap in parent after fork()", 1072 test_vmsplice_before_fork, 1073 }, 1074 /* 1075 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1076 * child. If we miss to break COW, the parent observes modifications by 1077 * the child. 1078 */ 1079 { 1080 "vmsplice() + unmap in parent after fork()", 1081 test_vmsplice_after_fork, 1082 }, 1083 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1084 /* 1085 * Take a R/W longterm pin and then map the page R/O into the page 1086 * table to trigger a write fault on next access. When modifying the 1087 * page, the page content must be visible via the pin. 1088 */ 1089 { 1090 "R/O-mapping a page registered as iouring fixed buffer", 1091 test_iouring_ro, 1092 }, 1093 /* 1094 * Take a R/W longterm pin and then fork() a child. When modifying the 1095 * page, the page content must be visible via the pin. We expect the 1096 * pinned page to not get shared with the child. 1097 */ 1098 { 1099 "fork() with an iouring fixed buffer", 1100 test_iouring_fork, 1101 }, 1102 1103 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1104 /* 1105 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1106 * When modifying the page via the page table, the page content change 1107 * must be visible via the pin. 1108 */ 1109 { 1110 "R/O GUP pin on R/O-mapped shared page", 1111 test_ro_pin_on_shared, 1112 }, 1113 /* Same as above, but using GUP-fast. */ 1114 { 1115 "R/O GUP-fast pin on R/O-mapped shared page", 1116 test_ro_fast_pin_on_shared, 1117 }, 1118 /* 1119 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1120 * was previously shared. When modifying the page via the page table, 1121 * the page content change must be visible via the pin. 1122 */ 1123 { 1124 "R/O GUP pin on R/O-mapped previously-shared page", 1125 test_ro_pin_on_ro_previously_shared, 1126 }, 1127 /* Same as above, but using GUP-fast. */ 1128 { 1129 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1130 test_ro_fast_pin_on_ro_previously_shared, 1131 }, 1132 /* 1133 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1134 * When modifying the page via the page table, the page content change 1135 * must be visible via the pin. 1136 */ 1137 { 1138 "R/O GUP pin on R/O-mapped exclusive page", 1139 test_ro_pin_on_ro_exclusive, 1140 }, 1141 /* Same as above, but using GUP-fast. */ 1142 { 1143 "R/O GUP-fast pin on R/O-mapped exclusive page", 1144 test_ro_fast_pin_on_ro_exclusive, 1145 }, 1146 }; 1147 1148 static void run_anon_test_case(struct test_case const *test_case) 1149 { 1150 int i; 1151 1152 run_with_base_page(test_case->fn, test_case->desc); 1153 run_with_base_page_swap(test_case->fn, test_case->desc); 1154 if (thpsize) { 1155 run_with_thp(test_case->fn, test_case->desc); 1156 run_with_thp_swap(test_case->fn, test_case->desc); 1157 run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1158 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1159 run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1160 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1161 run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1162 run_with_partial_shared_thp(test_case->fn, test_case->desc); 1163 } 1164 for (i = 0; i < nr_hugetlbsizes; i++) 1165 run_with_hugetlb(test_case->fn, test_case->desc, 1166 hugetlbsizes[i]); 1167 } 1168 1169 static void run_anon_test_cases(void) 1170 { 1171 int i; 1172 1173 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1174 1175 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1176 run_anon_test_case(&anon_test_cases[i]); 1177 } 1178 1179 static int tests_per_anon_test_case(void) 1180 { 1181 int tests = 2 + nr_hugetlbsizes; 1182 1183 if (thpsize) 1184 tests += 8; 1185 return tests; 1186 } 1187 1188 enum anon_thp_collapse_test { 1189 ANON_THP_COLLAPSE_UNSHARED, 1190 ANON_THP_COLLAPSE_FULLY_SHARED, 1191 ANON_THP_COLLAPSE_LOWER_SHARED, 1192 ANON_THP_COLLAPSE_UPPER_SHARED, 1193 }; 1194 1195 static void do_test_anon_thp_collapse(char *mem, size_t size, 1196 enum anon_thp_collapse_test test) 1197 { 1198 struct comm_pipes comm_pipes; 1199 char buf; 1200 int ret; 1201 1202 ret = setup_comm_pipes(&comm_pipes); 1203 if (ret) { 1204 ksft_test_result_fail("pipe() failed\n"); 1205 return; 1206 } 1207 1208 /* 1209 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1210 * R/O, such that we can try collapsing it later. 1211 */ 1212 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1213 if (ret) { 1214 ksft_test_result_fail("mprotect() failed\n"); 1215 goto close_comm_pipes; 1216 } 1217 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1218 if (ret) { 1219 ksft_test_result_fail("mprotect() failed\n"); 1220 goto close_comm_pipes; 1221 } 1222 1223 switch (test) { 1224 case ANON_THP_COLLAPSE_UNSHARED: 1225 /* Collapse before actually COW-sharing the page. */ 1226 ret = madvise(mem, size, MADV_COLLAPSE); 1227 if (ret) { 1228 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1229 strerror(errno)); 1230 goto close_comm_pipes; 1231 } 1232 break; 1233 case ANON_THP_COLLAPSE_FULLY_SHARED: 1234 /* COW-share the full PTE-mapped THP. */ 1235 break; 1236 case ANON_THP_COLLAPSE_LOWER_SHARED: 1237 /* Don't COW-share the upper part of the THP. */ 1238 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1239 if (ret) { 1240 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1241 goto close_comm_pipes; 1242 } 1243 break; 1244 case ANON_THP_COLLAPSE_UPPER_SHARED: 1245 /* Don't COW-share the lower part of the THP. */ 1246 ret = madvise(mem, size / 2, MADV_DONTFORK); 1247 if (ret) { 1248 ksft_test_result_fail("MADV_DONTFORK failed\n"); 1249 goto close_comm_pipes; 1250 } 1251 break; 1252 default: 1253 assert(false); 1254 } 1255 1256 ret = fork(); 1257 if (ret < 0) { 1258 ksft_test_result_fail("fork() failed\n"); 1259 goto close_comm_pipes; 1260 } else if (!ret) { 1261 switch (test) { 1262 case ANON_THP_COLLAPSE_UNSHARED: 1263 case ANON_THP_COLLAPSE_FULLY_SHARED: 1264 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1265 break; 1266 case ANON_THP_COLLAPSE_LOWER_SHARED: 1267 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1268 break; 1269 case ANON_THP_COLLAPSE_UPPER_SHARED: 1270 exit(child_memcmp_fn(mem + size / 2, size / 2, 1271 &comm_pipes)); 1272 break; 1273 default: 1274 assert(false); 1275 } 1276 } 1277 1278 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1279 ; 1280 1281 switch (test) { 1282 case ANON_THP_COLLAPSE_UNSHARED: 1283 break; 1284 case ANON_THP_COLLAPSE_UPPER_SHARED: 1285 case ANON_THP_COLLAPSE_LOWER_SHARED: 1286 /* 1287 * Revert MADV_DONTFORK such that we merge the VMAs and are 1288 * able to actually collapse. 1289 */ 1290 ret = madvise(mem, size, MADV_DOFORK); 1291 if (ret) { 1292 ksft_test_result_fail("MADV_DOFORK failed\n"); 1293 write(comm_pipes.parent_ready[1], "0", 1); 1294 wait(&ret); 1295 goto close_comm_pipes; 1296 } 1297 /* FALLTHROUGH */ 1298 case ANON_THP_COLLAPSE_FULLY_SHARED: 1299 /* Collapse before anyone modified the COW-shared page. */ 1300 ret = madvise(mem, size, MADV_COLLAPSE); 1301 if (ret) { 1302 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1303 strerror(errno)); 1304 write(comm_pipes.parent_ready[1], "0", 1); 1305 wait(&ret); 1306 goto close_comm_pipes; 1307 } 1308 break; 1309 default: 1310 assert(false); 1311 } 1312 1313 /* Modify the page. */ 1314 memset(mem, 0xff, size); 1315 write(comm_pipes.parent_ready[1], "0", 1); 1316 1317 wait(&ret); 1318 if (WIFEXITED(ret)) 1319 ret = WEXITSTATUS(ret); 1320 else 1321 ret = -EINVAL; 1322 1323 ksft_test_result(!ret, "No leak from parent into child\n"); 1324 close_comm_pipes: 1325 close_comm_pipes(&comm_pipes); 1326 } 1327 1328 static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1329 { 1330 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1331 } 1332 1333 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1334 { 1335 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1336 } 1337 1338 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1339 { 1340 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1341 } 1342 1343 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1344 { 1345 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1346 } 1347 1348 /* 1349 * Test cases that are specific to anonymous THP: pages in private mappings 1350 * that may get shared via COW during fork(). 1351 */ 1352 static const struct test_case anon_thp_test_cases[] = { 1353 /* 1354 * Basic COW test for fork() without any GUP when collapsing a THP 1355 * before fork(). 1356 * 1357 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1358 * collapse") might easily get COW handling wrong when not collapsing 1359 * exclusivity information properly. 1360 */ 1361 { 1362 "Basic COW after fork() when collapsing before fork()", 1363 test_anon_thp_collapse_unshared, 1364 }, 1365 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1366 { 1367 "Basic COW after fork() when collapsing after fork() (fully shared)", 1368 test_anon_thp_collapse_fully_shared, 1369 }, 1370 /* 1371 * Basic COW test, but collapse after COW-sharing the lower half of a 1372 * THP. 1373 */ 1374 { 1375 "Basic COW after fork() when collapsing after fork() (lower shared)", 1376 test_anon_thp_collapse_lower_shared, 1377 }, 1378 /* 1379 * Basic COW test, but collapse after COW-sharing the upper half of a 1380 * THP. 1381 */ 1382 { 1383 "Basic COW after fork() when collapsing after fork() (upper shared)", 1384 test_anon_thp_collapse_upper_shared, 1385 }, 1386 }; 1387 1388 static void run_anon_thp_test_cases(void) 1389 { 1390 int i; 1391 1392 if (!thpsize) 1393 return; 1394 1395 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1396 1397 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1398 struct test_case const *test_case = &anon_thp_test_cases[i]; 1399 1400 ksft_print_msg("[RUN] %s\n", test_case->desc); 1401 do_run_with_thp(test_case->fn, THP_RUN_PMD); 1402 } 1403 } 1404 1405 static int tests_per_anon_thp_test_case(void) 1406 { 1407 return thpsize ? 1 : 0; 1408 } 1409 1410 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1411 1412 static void test_cow(char *mem, const char *smem, size_t size) 1413 { 1414 char *old = malloc(size); 1415 1416 /* Backup the original content. */ 1417 memcpy(old, smem, size); 1418 1419 /* Modify the page. */ 1420 memset(mem, 0xff, size); 1421 1422 /* See if we still read the old values via the other mapping. */ 1423 ksft_test_result(!memcmp(smem, old, size), 1424 "Other mapping not modified\n"); 1425 free(old); 1426 } 1427 1428 static void test_ro_pin(char *mem, const char *smem, size_t size) 1429 { 1430 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1431 } 1432 1433 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1434 { 1435 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1436 } 1437 1438 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1439 { 1440 char *mem, *smem, tmp; 1441 1442 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1443 1444 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1445 MAP_PRIVATE | MAP_ANON, -1, 0); 1446 if (mem == MAP_FAILED) { 1447 ksft_test_result_fail("mmap() failed\n"); 1448 return; 1449 } 1450 1451 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1452 if (mem == MAP_FAILED) { 1453 ksft_test_result_fail("mmap() failed\n"); 1454 goto munmap; 1455 } 1456 1457 /* Read from the page to populate the shared zeropage. */ 1458 tmp = *mem + *smem; 1459 asm volatile("" : "+r" (tmp)); 1460 1461 fn(mem, smem, pagesize); 1462 munmap: 1463 munmap(mem, pagesize); 1464 if (smem != MAP_FAILED) 1465 munmap(smem, pagesize); 1466 } 1467 1468 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1469 { 1470 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1471 size_t mmap_size; 1472 int ret; 1473 1474 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1475 1476 if (!has_huge_zeropage) { 1477 ksft_test_result_skip("Huge zeropage not enabled\n"); 1478 return; 1479 } 1480 1481 /* For alignment purposes, we need twice the thp size. */ 1482 mmap_size = 2 * thpsize; 1483 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1484 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1485 if (mmap_mem == MAP_FAILED) { 1486 ksft_test_result_fail("mmap() failed\n"); 1487 return; 1488 } 1489 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1490 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1491 if (mmap_smem == MAP_FAILED) { 1492 ksft_test_result_fail("mmap() failed\n"); 1493 goto munmap; 1494 } 1495 1496 /* We need a THP-aligned memory area. */ 1497 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1498 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1499 1500 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1501 ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1502 if (ret) { 1503 ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1504 goto munmap; 1505 } 1506 1507 /* 1508 * Read from the memory to populate the huge shared zeropage. Read from 1509 * the first sub-page and test if we get another sub-page populated 1510 * automatically. 1511 */ 1512 tmp = *mem + *smem; 1513 asm volatile("" : "+r" (tmp)); 1514 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1515 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1516 ksft_test_result_skip("Did not get THPs populated\n"); 1517 goto munmap; 1518 } 1519 1520 fn(mem, smem, thpsize); 1521 munmap: 1522 munmap(mmap_mem, mmap_size); 1523 if (mmap_smem != MAP_FAILED) 1524 munmap(mmap_smem, mmap_size); 1525 } 1526 1527 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1528 { 1529 char *mem, *smem, tmp; 1530 int fd; 1531 1532 ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1533 1534 fd = memfd_create("test", 0); 1535 if (fd < 0) { 1536 ksft_test_result_fail("memfd_create() failed\n"); 1537 return; 1538 } 1539 1540 /* File consists of a single page filled with zeroes. */ 1541 if (fallocate(fd, 0, 0, pagesize)) { 1542 ksft_test_result_fail("fallocate() failed\n"); 1543 goto close; 1544 } 1545 1546 /* Create a private mapping of the memfd. */ 1547 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1548 if (mem == MAP_FAILED) { 1549 ksft_test_result_fail("mmap() failed\n"); 1550 goto close; 1551 } 1552 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1553 if (mem == MAP_FAILED) { 1554 ksft_test_result_fail("mmap() failed\n"); 1555 goto munmap; 1556 } 1557 1558 /* Fault the page in. */ 1559 tmp = *mem + *smem; 1560 asm volatile("" : "+r" (tmp)); 1561 1562 fn(mem, smem, pagesize); 1563 munmap: 1564 munmap(mem, pagesize); 1565 if (smem != MAP_FAILED) 1566 munmap(smem, pagesize); 1567 close: 1568 close(fd); 1569 } 1570 1571 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1572 { 1573 char *mem, *smem, tmp; 1574 FILE *file; 1575 int fd; 1576 1577 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1578 1579 file = tmpfile(); 1580 if (!file) { 1581 ksft_test_result_fail("tmpfile() failed\n"); 1582 return; 1583 } 1584 1585 fd = fileno(file); 1586 if (fd < 0) { 1587 ksft_test_result_skip("fileno() failed\n"); 1588 return; 1589 } 1590 1591 /* File consists of a single page filled with zeroes. */ 1592 if (fallocate(fd, 0, 0, pagesize)) { 1593 ksft_test_result_fail("fallocate() failed\n"); 1594 goto close; 1595 } 1596 1597 /* Create a private mapping of the memfd. */ 1598 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1599 if (mem == MAP_FAILED) { 1600 ksft_test_result_fail("mmap() failed\n"); 1601 goto close; 1602 } 1603 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1604 if (mem == MAP_FAILED) { 1605 ksft_test_result_fail("mmap() failed\n"); 1606 goto munmap; 1607 } 1608 1609 /* Fault the page in. */ 1610 tmp = *mem + *smem; 1611 asm volatile("" : "+r" (tmp)); 1612 1613 fn(mem, smem, pagesize); 1614 munmap: 1615 munmap(mem, pagesize); 1616 if (smem != MAP_FAILED) 1617 munmap(smem, pagesize); 1618 close: 1619 fclose(file); 1620 } 1621 1622 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1623 size_t hugetlbsize) 1624 { 1625 int flags = MFD_HUGETLB; 1626 char *mem, *smem, tmp; 1627 int fd; 1628 1629 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1630 hugetlbsize / 1024); 1631 1632 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1633 1634 fd = memfd_create("test", flags); 1635 if (fd < 0) { 1636 ksft_test_result_skip("memfd_create() failed\n"); 1637 return; 1638 } 1639 1640 /* File consists of a single page filled with zeroes. */ 1641 if (fallocate(fd, 0, 0, hugetlbsize)) { 1642 ksft_test_result_skip("need more free huge pages\n"); 1643 goto close; 1644 } 1645 1646 /* Create a private mapping of the memfd. */ 1647 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1648 0); 1649 if (mem == MAP_FAILED) { 1650 ksft_test_result_skip("need more free huge pages\n"); 1651 goto close; 1652 } 1653 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1654 if (mem == MAP_FAILED) { 1655 ksft_test_result_fail("mmap() failed\n"); 1656 goto munmap; 1657 } 1658 1659 /* Fault the page in. */ 1660 tmp = *mem + *smem; 1661 asm volatile("" : "+r" (tmp)); 1662 1663 fn(mem, smem, hugetlbsize); 1664 munmap: 1665 munmap(mem, hugetlbsize); 1666 if (mem != MAP_FAILED) 1667 munmap(smem, hugetlbsize); 1668 close: 1669 close(fd); 1670 } 1671 1672 struct non_anon_test_case { 1673 const char *desc; 1674 non_anon_test_fn fn; 1675 }; 1676 1677 /* 1678 * Test cases that target any pages in private mappings that are not anonymous: 1679 * pages that may get shared via COW ndependent of fork(). This includes 1680 * the shared zeropage(s), pagecache pages, ... 1681 */ 1682 static const struct non_anon_test_case non_anon_test_cases[] = { 1683 /* 1684 * Basic COW test without any GUP. If we miss to break COW, changes are 1685 * visible via other private/shared mappings. 1686 */ 1687 { 1688 "Basic COW", 1689 test_cow, 1690 }, 1691 /* 1692 * Take a R/O longterm pin. When modifying the page via the page table, 1693 * the page content change must be visible via the pin. 1694 */ 1695 { 1696 "R/O longterm GUP pin", 1697 test_ro_pin, 1698 }, 1699 /* Same as above, but using GUP-fast. */ 1700 { 1701 "R/O longterm GUP-fast pin", 1702 test_ro_fast_pin, 1703 }, 1704 }; 1705 1706 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1707 { 1708 int i; 1709 1710 run_with_zeropage(test_case->fn, test_case->desc); 1711 run_with_memfd(test_case->fn, test_case->desc); 1712 run_with_tmpfile(test_case->fn, test_case->desc); 1713 if (thpsize) 1714 run_with_huge_zeropage(test_case->fn, test_case->desc); 1715 for (i = 0; i < nr_hugetlbsizes; i++) 1716 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1717 hugetlbsizes[i]); 1718 } 1719 1720 static void run_non_anon_test_cases(void) 1721 { 1722 int i; 1723 1724 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1725 1726 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1727 run_non_anon_test_case(&non_anon_test_cases[i]); 1728 } 1729 1730 static int tests_per_non_anon_test_case(void) 1731 { 1732 int tests = 3 + nr_hugetlbsizes; 1733 1734 if (thpsize) 1735 tests += 1; 1736 return tests; 1737 } 1738 1739 int main(int argc, char **argv) 1740 { 1741 int err; 1742 1743 pagesize = getpagesize(); 1744 detect_thpsize(); 1745 detect_hugetlbsizes(); 1746 detect_huge_zeropage(); 1747 1748 ksft_print_header(); 1749 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1750 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1751 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1752 1753 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1754 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1755 if (pagemap_fd < 0) 1756 ksft_exit_fail_msg("opening pagemap failed\n"); 1757 1758 run_anon_test_cases(); 1759 run_anon_thp_test_cases(); 1760 run_non_anon_test_cases(); 1761 1762 err = ksft_get_fail_cnt(); 1763 if (err) 1764 ksft_exit_fail_msg("%d out of %d tests failed\n", 1765 err, ksft_test_num()); 1766 return ksft_exit_pass(); 1767 } 1768