1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Userfaultfd unit tests. 4 * 5 * Copyright (C) 2015-2023 Red Hat, Inc. 6 */ 7 8 #include "uffd-common.h" 9 10 #include "../../../../mm/gup_test.h" 11 12 #ifdef __NR_userfaultfd 13 14 /* The unit test doesn't need a large or random size, make it 32MB for now */ 15 #define UFFD_TEST_MEM_SIZE (32UL << 20) 16 17 #define MEM_ANON BIT_ULL(0) 18 #define MEM_SHMEM BIT_ULL(1) 19 #define MEM_SHMEM_PRIVATE BIT_ULL(2) 20 #define MEM_HUGETLB BIT_ULL(3) 21 #define MEM_HUGETLB_PRIVATE BIT_ULL(4) 22 23 #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \ 24 MEM_HUGETLB | MEM_HUGETLB_PRIVATE) 25 26 #define ALIGN_UP(x, align_to) \ 27 ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) 28 29 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 30 31 struct mem_type { 32 const char *name; 33 unsigned int mem_flag; 34 uffd_test_ops_t *mem_ops; 35 bool shared; 36 }; 37 typedef struct mem_type mem_type_t; 38 39 mem_type_t mem_types[] = { 40 { 41 .name = "anon", 42 .mem_flag = MEM_ANON, 43 .mem_ops = &anon_uffd_test_ops, 44 .shared = false, 45 }, 46 { 47 .name = "shmem", 48 .mem_flag = MEM_SHMEM, 49 .mem_ops = &shmem_uffd_test_ops, 50 .shared = true, 51 }, 52 { 53 .name = "shmem-private", 54 .mem_flag = MEM_SHMEM_PRIVATE, 55 .mem_ops = &shmem_uffd_test_ops, 56 .shared = false, 57 }, 58 { 59 .name = "hugetlb", 60 .mem_flag = MEM_HUGETLB, 61 .mem_ops = &hugetlb_uffd_test_ops, 62 .shared = true, 63 }, 64 { 65 .name = "hugetlb-private", 66 .mem_flag = MEM_HUGETLB_PRIVATE, 67 .mem_ops = &hugetlb_uffd_test_ops, 68 .shared = false, 69 }, 70 }; 71 72 /* Arguments to be passed over to each uffd unit test */ 73 struct uffd_test_args { 74 mem_type_t *mem_type; 75 }; 76 typedef struct uffd_test_args uffd_test_args_t; 77 78 /* Returns: UFFD_TEST_* */ 79 typedef void (*uffd_test_fn)(uffd_test_args_t *); 80 81 typedef struct { 82 const char *name; 83 uffd_test_fn uffd_fn; 84 unsigned int mem_targets; 85 uint64_t uffd_feature_required; 86 uffd_test_case_ops_t *test_case_ops; 87 } uffd_test_case_t; 88 89 static void uffd_test_report(void) 90 { 91 printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n", 92 ksft_get_pass_cnt(), 93 ksft_get_xskip_cnt(), 94 ksft_get_fail_cnt(), 95 ksft_test_num()); 96 } 97 98 static void uffd_test_pass(void) 99 { 100 printf("done\n"); 101 ksft_inc_pass_cnt(); 102 } 103 104 #define uffd_test_start(...) do { \ 105 printf("Testing "); \ 106 printf(__VA_ARGS__); \ 107 printf("... "); \ 108 fflush(stdout); \ 109 } while (0) 110 111 #define uffd_test_fail(...) do { \ 112 printf("failed [reason: "); \ 113 printf(__VA_ARGS__); \ 114 printf("]\n"); \ 115 ksft_inc_fail_cnt(); \ 116 } while (0) 117 118 static void uffd_test_skip(const char *message) 119 { 120 printf("skipped [reason: %s]\n", message); 121 ksft_inc_xskip_cnt(); 122 } 123 124 /* 125 * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll 126 * return 1 even if some test failed as long as uffd supported, because in 127 * that case we still want to proceed with the rest uffd unit tests. 128 */ 129 static int test_uffd_api(bool use_dev) 130 { 131 struct uffdio_api uffdio_api; 132 int uffd; 133 134 uffd_test_start("UFFDIO_API (with %s)", 135 use_dev ? "/dev/userfaultfd" : "syscall"); 136 137 if (use_dev) 138 uffd = uffd_open_dev(UFFD_FLAGS); 139 else 140 uffd = uffd_open_sys(UFFD_FLAGS); 141 if (uffd < 0) { 142 uffd_test_skip("cannot open userfaultfd handle"); 143 return 0; 144 } 145 146 /* Test wrong UFFD_API */ 147 uffdio_api.api = 0xab; 148 uffdio_api.features = 0; 149 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 150 uffd_test_fail("UFFDIO_API should fail with wrong api but didn't"); 151 goto out; 152 } 153 154 /* Test wrong feature bit */ 155 uffdio_api.api = UFFD_API; 156 uffdio_api.features = BIT_ULL(63); 157 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 158 uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't"); 159 goto out; 160 } 161 162 /* Test normal UFFDIO_API */ 163 uffdio_api.api = UFFD_API; 164 uffdio_api.features = 0; 165 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { 166 uffd_test_fail("UFFDIO_API should succeed but failed"); 167 goto out; 168 } 169 170 /* Test double requests of UFFDIO_API with a random feature set */ 171 uffdio_api.features = BIT_ULL(0); 172 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 173 uffd_test_fail("UFFDIO_API should reject initialized uffd"); 174 goto out; 175 } 176 177 uffd_test_pass(); 178 out: 179 close(uffd); 180 /* We have a valid uffd handle */ 181 return 1; 182 } 183 184 /* 185 * This function initializes the global variables. TODO: remove global 186 * vars and then remove this. 187 */ 188 static int 189 uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, 190 mem_type_t *mem_type, const char **errmsg) 191 { 192 map_shared = mem_type->shared; 193 uffd_test_ops = mem_type->mem_ops; 194 uffd_test_case_ops = test->test_case_ops; 195 196 if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) 197 page_size = default_huge_page_size(); 198 else 199 page_size = psize(); 200 201 /* Ensure we have at least 2 pages */ 202 nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; 203 /* TODO: remove this global var.. it's so ugly */ 204 nr_parallel = 1; 205 206 /* Initialize test arguments */ 207 args->mem_type = mem_type; 208 209 return uffd_test_ctx_init(test->uffd_feature_required, errmsg); 210 } 211 212 static bool uffd_feature_supported(uffd_test_case_t *test) 213 { 214 uint64_t features; 215 216 if (uffd_get_features(&features)) 217 return false; 218 219 return (features & test->uffd_feature_required) == 220 test->uffd_feature_required; 221 } 222 223 static int pagemap_open(void) 224 { 225 int fd = open("/proc/self/pagemap", O_RDONLY); 226 227 if (fd < 0) 228 err("open pagemap"); 229 230 return fd; 231 } 232 233 /* This macro let __LINE__ works in err() */ 234 #define pagemap_check_wp(value, wp) do { \ 235 if (!!(value & PM_UFFD_WP) != wp) \ 236 err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \ 237 } while (0) 238 239 typedef struct { 240 int parent_uffd, child_uffd; 241 } fork_event_args; 242 243 static void *fork_event_consumer(void *data) 244 { 245 fork_event_args *args = data; 246 struct uffd_msg msg = { 0 }; 247 248 ready_for_fork = true; 249 250 /* Read until a full msg received */ 251 while (uffd_read_msg(args->parent_uffd, &msg)); 252 253 if (msg.event != UFFD_EVENT_FORK) 254 err("wrong message: %u\n", msg.event); 255 256 /* Just to be properly freed later */ 257 args->child_uffd = msg.arg.fork.ufd; 258 return NULL; 259 } 260 261 typedef struct { 262 int gup_fd; 263 bool pinned; 264 } pin_args; 265 266 /* 267 * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired 268 * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy 269 * all needs of the test cases (e.g., trigger unshare, trigger fork() early 270 * CoW, etc.). 271 */ 272 static int pin_pages(pin_args *args, void *buffer, size_t size) 273 { 274 struct pin_longterm_test test = { 275 .addr = (uintptr_t)buffer, 276 .size = size, 277 /* Read-only pins */ 278 .flags = 0, 279 }; 280 281 if (args->pinned) 282 err("already pinned"); 283 284 args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 285 if (args->gup_fd < 0) 286 return -errno; 287 288 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) { 289 /* Even if gup_test existed, can be an old gup_test / kernel */ 290 close(args->gup_fd); 291 return -errno; 292 } 293 args->pinned = true; 294 return 0; 295 } 296 297 static void unpin_pages(pin_args *args) 298 { 299 if (!args->pinned) 300 err("unpin without pin first"); 301 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP)) 302 err("PIN_LONGTERM_TEST_STOP"); 303 close(args->gup_fd); 304 args->pinned = false; 305 } 306 307 static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) 308 { 309 fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; 310 pthread_t thread; 311 pid_t child; 312 uint64_t value; 313 int fd, result; 314 315 /* Prepare a thread to resolve EVENT_FORK */ 316 if (with_event) { 317 ready_for_fork = false; 318 if (pthread_create(&thread, NULL, fork_event_consumer, &args)) 319 err("pthread_create()"); 320 while (!ready_for_fork) 321 ; /* Wait for the poll_thread to start executing before forking */ 322 } 323 324 child = fork(); 325 if (!child) { 326 /* Open the pagemap fd of the child itself */ 327 pin_args args = {}; 328 329 fd = pagemap_open(); 330 331 if (test_pin && pin_pages(&args, area_dst, page_size)) 332 /* 333 * Normally when reach here we have pinned in 334 * previous tests, so shouldn't fail anymore 335 */ 336 err("pin page failed in child"); 337 338 value = pagemap_get_entry(fd, area_dst); 339 /* 340 * After fork(), we should handle uffd-wp bit differently: 341 * 342 * (1) when with EVENT_FORK, it should persist 343 * (2) when without EVENT_FORK, it should be dropped 344 */ 345 pagemap_check_wp(value, with_event); 346 if (test_pin) 347 unpin_pages(&args); 348 /* Succeed */ 349 exit(0); 350 } 351 waitpid(child, &result, 0); 352 353 if (with_event) { 354 if (pthread_join(thread, NULL)) 355 err("pthread_join()"); 356 if (args.child_uffd < 0) 357 err("Didn't receive child uffd"); 358 close(args.child_uffd); 359 } 360 361 return result; 362 } 363 364 static void uffd_wp_unpopulated_test(uffd_test_args_t *args) 365 { 366 uint64_t value; 367 int pagemap_fd; 368 369 if (uffd_register(uffd, area_dst, nr_pages * page_size, 370 false, true, false)) 371 err("register failed"); 372 373 pagemap_fd = pagemap_open(); 374 375 /* Test applying pte marker to anon unpopulated */ 376 wp_range(uffd, (uint64_t)area_dst, page_size, true); 377 value = pagemap_get_entry(pagemap_fd, area_dst); 378 pagemap_check_wp(value, true); 379 380 /* Test unprotect on anon pte marker */ 381 wp_range(uffd, (uint64_t)area_dst, page_size, false); 382 value = pagemap_get_entry(pagemap_fd, area_dst); 383 pagemap_check_wp(value, false); 384 385 /* Test zap on anon marker */ 386 wp_range(uffd, (uint64_t)area_dst, page_size, true); 387 if (madvise(area_dst, page_size, MADV_DONTNEED)) 388 err("madvise(MADV_DONTNEED) failed"); 389 value = pagemap_get_entry(pagemap_fd, area_dst); 390 pagemap_check_wp(value, false); 391 392 /* Test fault in after marker removed */ 393 *area_dst = 1; 394 value = pagemap_get_entry(pagemap_fd, area_dst); 395 pagemap_check_wp(value, false); 396 /* Drop it to make pte none again */ 397 if (madvise(area_dst, page_size, MADV_DONTNEED)) 398 err("madvise(MADV_DONTNEED) failed"); 399 400 /* Test read-zero-page upon pte marker */ 401 wp_range(uffd, (uint64_t)area_dst, page_size, true); 402 *(volatile char *)area_dst; 403 /* Drop it to make pte none again */ 404 if (madvise(area_dst, page_size, MADV_DONTNEED)) 405 err("madvise(MADV_DONTNEED) failed"); 406 407 uffd_test_pass(); 408 } 409 410 static void uffd_wp_fork_test_common(uffd_test_args_t *args, 411 bool with_event) 412 { 413 int pagemap_fd; 414 uint64_t value; 415 416 if (uffd_register(uffd, area_dst, nr_pages * page_size, 417 false, true, false)) 418 err("register failed"); 419 420 pagemap_fd = pagemap_open(); 421 422 /* Touch the page */ 423 *area_dst = 1; 424 wp_range(uffd, (uint64_t)area_dst, page_size, true); 425 value = pagemap_get_entry(pagemap_fd, area_dst); 426 pagemap_check_wp(value, true); 427 if (pagemap_test_fork(uffd, with_event, false)) { 428 uffd_test_fail("Detected %s uffd-wp bit in child in present pte", 429 with_event ? "missing" : "stall"); 430 goto out; 431 } 432 433 /* 434 * This is an attempt for zapping the pgtable so as to test the 435 * markers. 436 * 437 * For private mappings, PAGEOUT will only work on exclusive ptes 438 * (PM_MMAP_EXCLUSIVE) which we should satisfy. 439 * 440 * For shared, PAGEOUT may not work. Use DONTNEED instead which 441 * plays a similar role of zapping (rather than freeing the page) 442 * to expose pte markers. 443 */ 444 if (args->mem_type->shared) { 445 if (madvise(area_dst, page_size, MADV_DONTNEED)) 446 err("MADV_DONTNEED"); 447 } else { 448 /* 449 * NOTE: ignore retval because private-hugetlb doesn't yet 450 * support swapping, so it could fail. 451 */ 452 madvise(area_dst, page_size, MADV_PAGEOUT); 453 } 454 455 /* Uffd-wp should persist even swapped out */ 456 value = pagemap_get_entry(pagemap_fd, area_dst); 457 pagemap_check_wp(value, true); 458 if (pagemap_test_fork(uffd, with_event, false)) { 459 uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", 460 with_event ? "missing" : "stall"); 461 goto out; 462 } 463 464 /* Unprotect; this tests swap pte modifications */ 465 wp_range(uffd, (uint64_t)area_dst, page_size, false); 466 value = pagemap_get_entry(pagemap_fd, area_dst); 467 pagemap_check_wp(value, false); 468 469 /* Fault in the page from disk */ 470 *area_dst = 2; 471 value = pagemap_get_entry(pagemap_fd, area_dst); 472 pagemap_check_wp(value, false); 473 uffd_test_pass(); 474 out: 475 if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) 476 err("unregister failed"); 477 close(pagemap_fd); 478 } 479 480 static void uffd_wp_fork_test(uffd_test_args_t *args) 481 { 482 uffd_wp_fork_test_common(args, false); 483 } 484 485 static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) 486 { 487 uffd_wp_fork_test_common(args, true); 488 } 489 490 static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, 491 bool with_event) 492 { 493 int pagemap_fd; 494 pin_args pin_args = {}; 495 496 if (uffd_register(uffd, area_dst, page_size, false, true, false)) 497 err("register failed"); 498 499 pagemap_fd = pagemap_open(); 500 501 /* Touch the page */ 502 *area_dst = 1; 503 wp_range(uffd, (uint64_t)area_dst, page_size, true); 504 505 /* 506 * 1. First pin, then fork(). This tests fork() special path when 507 * doing early CoW if the page is private. 508 */ 509 if (pin_pages(&pin_args, area_dst, page_size)) { 510 uffd_test_skip("Possibly CONFIG_GUP_TEST missing " 511 "or unprivileged"); 512 close(pagemap_fd); 513 uffd_unregister(uffd, area_dst, page_size); 514 return; 515 } 516 517 if (pagemap_test_fork(uffd, with_event, false)) { 518 uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", 519 with_event ? "missing" : "stall"); 520 unpin_pages(&pin_args); 521 goto out; 522 } 523 524 unpin_pages(&pin_args); 525 526 /* 527 * 2. First fork(), then pin (in the child, where test_pin==true). 528 * This tests COR, aka, page unsharing on private memories. 529 */ 530 if (pagemap_test_fork(uffd, with_event, true)) { 531 uffd_test_fail("Detected %s uffd-wp bit when RO pin", 532 with_event ? "missing" : "stall"); 533 goto out; 534 } 535 uffd_test_pass(); 536 out: 537 if (uffd_unregister(uffd, area_dst, page_size)) 538 err("register failed"); 539 close(pagemap_fd); 540 } 541 542 static void uffd_wp_fork_pin_test(uffd_test_args_t *args) 543 { 544 uffd_wp_fork_pin_test_common(args, false); 545 } 546 547 static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) 548 { 549 uffd_wp_fork_pin_test_common(args, true); 550 } 551 552 static void check_memory_contents(char *p) 553 { 554 unsigned long i, j; 555 uint8_t expected_byte; 556 557 for (i = 0; i < nr_pages; ++i) { 558 expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); 559 for (j = 0; j < page_size; j++) { 560 uint8_t v = *(uint8_t *)(p + (i * page_size) + j); 561 if (v != expected_byte) 562 err("unexpected page contents"); 563 } 564 } 565 } 566 567 static void uffd_minor_test_common(bool test_collapse, bool test_wp) 568 { 569 unsigned long p; 570 pthread_t uffd_mon; 571 char c; 572 struct uffd_args args = { 0 }; 573 574 /* 575 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing 576 * both do not make much sense. 577 */ 578 assert(!(test_collapse && test_wp)); 579 580 if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, 581 /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ 582 false, test_wp, true)) 583 err("register failure"); 584 585 /* 586 * After registering with UFFD, populate the non-UFFD-registered side of 587 * the shared mapping. This should *not* trigger any UFFD minor faults. 588 */ 589 for (p = 0; p < nr_pages; ++p) 590 memset(area_dst + (p * page_size), p % ((uint8_t)-1), 591 page_size); 592 593 args.apply_wp = test_wp; 594 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 595 err("uffd_poll_thread create"); 596 597 /* 598 * Read each of the pages back using the UFFD-registered mapping. We 599 * expect that the first time we touch a page, it will result in a minor 600 * fault. uffd_poll_thread will resolve the fault by bit-flipping the 601 * page's contents, and then issuing a CONTINUE ioctl. 602 */ 603 check_memory_contents(area_dst_alias); 604 605 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) 606 err("pipe write"); 607 if (pthread_join(uffd_mon, NULL)) 608 err("join() failed"); 609 610 if (test_collapse) { 611 if (madvise(area_dst_alias, nr_pages * page_size, 612 MADV_COLLAPSE)) { 613 /* It's fine to fail for this one... */ 614 uffd_test_skip("MADV_COLLAPSE failed"); 615 return; 616 } 617 618 uffd_test_ops->check_pmd_mapping(area_dst, 619 nr_pages * page_size / 620 read_pmd_pagesize()); 621 /* 622 * This won't cause uffd-fault - it purely just makes sure there 623 * was no corruption. 624 */ 625 check_memory_contents(area_dst_alias); 626 } 627 628 if (args.missing_faults != 0 || args.minor_faults != nr_pages) 629 uffd_test_fail("stats check error"); 630 else 631 uffd_test_pass(); 632 } 633 634 void uffd_minor_test(uffd_test_args_t *args) 635 { 636 uffd_minor_test_common(false, false); 637 } 638 639 void uffd_minor_wp_test(uffd_test_args_t *args) 640 { 641 uffd_minor_test_common(false, true); 642 } 643 644 void uffd_minor_collapse_test(uffd_test_args_t *args) 645 { 646 uffd_minor_test_common(true, false); 647 } 648 649 static sigjmp_buf jbuf, *sigbuf; 650 651 static void sighndl(int sig, siginfo_t *siginfo, void *ptr) 652 { 653 if (sig == SIGBUS) { 654 if (sigbuf) 655 siglongjmp(*sigbuf, 1); 656 abort(); 657 } 658 } 659 660 /* 661 * For non-cooperative userfaultfd test we fork() a process that will 662 * generate pagefaults, will mremap the area monitored by the 663 * userfaultfd and at last this process will release the monitored 664 * area. 665 * For the anonymous and shared memory the area is divided into two 666 * parts, the first part is accessed before mremap, and the second 667 * part is accessed after mremap. Since hugetlbfs does not support 668 * mremap, the entire monitored area is accessed in a single pass for 669 * HUGETLB_TEST. 670 * The release of the pages currently generates event for shmem and 671 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked 672 * for hugetlb. 673 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register 674 * monitored area, generate pagefaults and test that signal is delivered. 675 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 676 * test robustness use case - we release monitored area, fork a process 677 * that will generate pagefaults and verify signal is generated. 678 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal 679 * feature. Using monitor thread, verify no userfault events are generated. 680 */ 681 static int faulting_process(int signal_test, bool wp) 682 { 683 unsigned long nr, i; 684 unsigned long long count; 685 unsigned long split_nr_pages; 686 unsigned long lastnr; 687 struct sigaction act; 688 volatile unsigned long signalled = 0; 689 690 split_nr_pages = (nr_pages + 1) / 2; 691 692 if (signal_test) { 693 sigbuf = &jbuf; 694 memset(&act, 0, sizeof(act)); 695 act.sa_sigaction = sighndl; 696 act.sa_flags = SA_SIGINFO; 697 if (sigaction(SIGBUS, &act, 0)) 698 err("sigaction"); 699 lastnr = (unsigned long)-1; 700 } 701 702 for (nr = 0; nr < split_nr_pages; nr++) { 703 volatile int steps = 1; 704 unsigned long offset = nr * page_size; 705 706 if (signal_test) { 707 if (sigsetjmp(*sigbuf, 1) != 0) { 708 if (steps == 1 && nr == lastnr) 709 err("Signal repeated"); 710 711 lastnr = nr; 712 if (signal_test == 1) { 713 if (steps == 1) { 714 /* This is a MISSING request */ 715 steps++; 716 if (copy_page(uffd, offset, wp)) 717 signalled++; 718 } else { 719 /* This is a WP request */ 720 assert(steps == 2); 721 wp_range(uffd, 722 (__u64)area_dst + 723 offset, 724 page_size, false); 725 } 726 } else { 727 signalled++; 728 continue; 729 } 730 } 731 } 732 733 count = *area_count(area_dst, nr); 734 if (count != count_verify[nr]) 735 err("nr %lu memory corruption %llu %llu\n", 736 nr, count, count_verify[nr]); 737 /* 738 * Trigger write protection if there is by writing 739 * the same value back. 740 */ 741 *area_count(area_dst, nr) = count; 742 } 743 744 if (signal_test) 745 return signalled != split_nr_pages; 746 747 area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, 748 MREMAP_MAYMOVE | MREMAP_FIXED, area_src); 749 if (area_dst == MAP_FAILED) 750 err("mremap"); 751 /* Reset area_src since we just clobbered it */ 752 area_src = NULL; 753 754 for (; nr < nr_pages; nr++) { 755 count = *area_count(area_dst, nr); 756 if (count != count_verify[nr]) { 757 err("nr %lu memory corruption %llu %llu\n", 758 nr, count, count_verify[nr]); 759 } 760 /* 761 * Trigger write protection if there is by writing 762 * the same value back. 763 */ 764 *area_count(area_dst, nr) = count; 765 } 766 767 uffd_test_ops->release_pages(area_dst); 768 769 for (nr = 0; nr < nr_pages; nr++) 770 for (i = 0; i < page_size; i++) 771 if (*(area_dst + nr * page_size + i) != 0) 772 err("page %lu offset %lu is not zero", nr, i); 773 774 return 0; 775 } 776 777 static void uffd_sigbus_test_common(bool wp) 778 { 779 unsigned long userfaults; 780 pthread_t uffd_mon; 781 pid_t pid; 782 int err; 783 char c; 784 struct uffd_args args = { 0 }; 785 786 ready_for_fork = false; 787 788 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); 789 790 if (uffd_register(uffd, area_dst, nr_pages * page_size, 791 true, wp, false)) 792 err("register failure"); 793 794 if (faulting_process(1, wp)) 795 err("faulting process failed"); 796 797 uffd_test_ops->release_pages(area_dst); 798 799 args.apply_wp = wp; 800 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 801 err("uffd_poll_thread create"); 802 803 while (!ready_for_fork) 804 ; /* Wait for the poll_thread to start executing before forking */ 805 806 pid = fork(); 807 if (pid < 0) 808 err("fork"); 809 810 if (!pid) 811 exit(faulting_process(2, wp)); 812 813 waitpid(pid, &err, 0); 814 if (err) 815 err("faulting process failed"); 816 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) 817 err("pipe write"); 818 if (pthread_join(uffd_mon, (void **)&userfaults)) 819 err("pthread_join()"); 820 821 if (userfaults) 822 uffd_test_fail("Signal test failed, userfaults: %ld", userfaults); 823 else 824 uffd_test_pass(); 825 } 826 827 static void uffd_sigbus_test(uffd_test_args_t *args) 828 { 829 uffd_sigbus_test_common(false); 830 } 831 832 static void uffd_sigbus_wp_test(uffd_test_args_t *args) 833 { 834 uffd_sigbus_test_common(true); 835 } 836 837 static void uffd_events_test_common(bool wp) 838 { 839 pthread_t uffd_mon; 840 pid_t pid; 841 int err; 842 char c; 843 struct uffd_args args = { 0 }; 844 845 ready_for_fork = false; 846 847 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); 848 if (uffd_register(uffd, area_dst, nr_pages * page_size, 849 true, wp, false)) 850 err("register failure"); 851 852 args.apply_wp = wp; 853 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 854 err("uffd_poll_thread create"); 855 856 while (!ready_for_fork) 857 ; /* Wait for the poll_thread to start executing before forking */ 858 859 pid = fork(); 860 if (pid < 0) 861 err("fork"); 862 863 if (!pid) 864 exit(faulting_process(0, wp)); 865 866 waitpid(pid, &err, 0); 867 if (err) 868 err("faulting process failed"); 869 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) 870 err("pipe write"); 871 if (pthread_join(uffd_mon, NULL)) 872 err("pthread_join()"); 873 874 if (args.missing_faults != nr_pages) 875 uffd_test_fail("Fault counts wrong"); 876 else 877 uffd_test_pass(); 878 } 879 880 static void uffd_events_test(uffd_test_args_t *args) 881 { 882 uffd_events_test_common(false); 883 } 884 885 static void uffd_events_wp_test(uffd_test_args_t *args) 886 { 887 uffd_events_test_common(true); 888 } 889 890 static void retry_uffdio_zeropage(int ufd, 891 struct uffdio_zeropage *uffdio_zeropage) 892 { 893 uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, 894 uffdio_zeropage->range.len, 895 0); 896 if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { 897 if (uffdio_zeropage->zeropage != -EEXIST) 898 err("UFFDIO_ZEROPAGE error: %"PRId64, 899 (int64_t)uffdio_zeropage->zeropage); 900 } else { 901 err("UFFDIO_ZEROPAGE error: %"PRId64, 902 (int64_t)uffdio_zeropage->zeropage); 903 } 904 } 905 906 static bool do_uffdio_zeropage(int ufd, bool has_zeropage) 907 { 908 struct uffdio_zeropage uffdio_zeropage = { 0 }; 909 int ret; 910 __s64 res; 911 912 uffdio_zeropage.range.start = (unsigned long) area_dst; 913 uffdio_zeropage.range.len = page_size; 914 uffdio_zeropage.mode = 0; 915 ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); 916 res = uffdio_zeropage.zeropage; 917 if (ret) { 918 /* real retval in ufdio_zeropage.zeropage */ 919 if (has_zeropage) 920 err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res); 921 else if (res != -EINVAL) 922 err("UFFDIO_ZEROPAGE not -EINVAL"); 923 } else if (has_zeropage) { 924 if (res != page_size) 925 err("UFFDIO_ZEROPAGE unexpected size"); 926 else 927 retry_uffdio_zeropage(ufd, &uffdio_zeropage); 928 return true; 929 } else 930 err("UFFDIO_ZEROPAGE succeeded"); 931 932 return false; 933 } 934 935 /* 936 * Registers a range with MISSING mode only for zeropage test. Return true 937 * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register() 938 * because we want to detect .ioctls along the way. 939 */ 940 static bool 941 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) 942 { 943 uint64_t ioctls = 0; 944 945 if (uffd_register_with_ioctls(uffd, addr, len, true, 946 false, false, &ioctls)) 947 err("zeropage register fail"); 948 949 return ioctls & (1 << _UFFDIO_ZEROPAGE); 950 } 951 952 /* exercise UFFDIO_ZEROPAGE */ 953 static void uffd_zeropage_test(uffd_test_args_t *args) 954 { 955 bool has_zeropage; 956 int i; 957 958 has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); 959 if (area_dst_alias) 960 /* Ignore the retval; we already have it */ 961 uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); 962 963 if (do_uffdio_zeropage(uffd, has_zeropage)) 964 for (i = 0; i < page_size; i++) 965 if (area_dst[i] != 0) 966 err("data non-zero at offset %d\n", i); 967 968 if (uffd_unregister(uffd, area_dst, page_size)) 969 err("unregister"); 970 971 if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) 972 err("unregister"); 973 974 uffd_test_pass(); 975 } 976 977 static void uffd_register_poison(int uffd, void *addr, uint64_t len) 978 { 979 uint64_t ioctls = 0; 980 uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); 981 982 if (uffd_register_with_ioctls(uffd, addr, len, true, 983 false, false, &ioctls)) 984 err("poison register fail"); 985 986 if ((ioctls & expected) != expected) 987 err("registered area doesn't support COPY and POISON ioctls"); 988 } 989 990 static void do_uffdio_poison(int uffd, unsigned long offset) 991 { 992 struct uffdio_poison uffdio_poison = { 0 }; 993 int ret; 994 __s64 res; 995 996 uffdio_poison.range.start = (unsigned long) area_dst + offset; 997 uffdio_poison.range.len = page_size; 998 uffdio_poison.mode = 0; 999 ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); 1000 res = uffdio_poison.updated; 1001 1002 if (ret) 1003 err("UFFDIO_POISON error: %"PRId64, (int64_t)res); 1004 else if (res != page_size) 1005 err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); 1006 } 1007 1008 static void uffd_poison_handle_fault( 1009 struct uffd_msg *msg, struct uffd_args *args) 1010 { 1011 unsigned long offset; 1012 1013 if (msg->event != UFFD_EVENT_PAGEFAULT) 1014 err("unexpected msg event %u", msg->event); 1015 1016 if (msg->arg.pagefault.flags & 1017 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) 1018 err("unexpected fault type %llu", msg->arg.pagefault.flags); 1019 1020 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; 1021 offset &= ~(page_size-1); 1022 1023 /* Odd pages -> copy zeroed page; even pages -> poison. */ 1024 if (offset & page_size) 1025 copy_page(uffd, offset, false); 1026 else 1027 do_uffdio_poison(uffd, offset); 1028 } 1029 1030 /* Make sure to cover odd/even, and minimum duplications */ 1031 #define UFFD_POISON_TEST_NPAGES 4 1032 1033 static void uffd_poison_test(uffd_test_args_t *targs) 1034 { 1035 pthread_t uffd_mon; 1036 char c; 1037 struct uffd_args args = { 0 }; 1038 struct sigaction act = { 0 }; 1039 unsigned long nr_sigbus = 0; 1040 unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; 1041 1042 if (nr_pages < poison_pages) { 1043 uffd_test_skip("Too few pages for POISON test"); 1044 return; 1045 } 1046 1047 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); 1048 1049 uffd_register_poison(uffd, area_dst, poison_pages * page_size); 1050 memset(area_src, 0, poison_pages * page_size); 1051 1052 args.handle_fault = uffd_poison_handle_fault; 1053 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 1054 err("uffd_poll_thread create"); 1055 1056 sigbuf = &jbuf; 1057 act.sa_sigaction = sighndl; 1058 act.sa_flags = SA_SIGINFO; 1059 if (sigaction(SIGBUS, &act, 0)) 1060 err("sigaction"); 1061 1062 for (nr = 0; nr < poison_pages; ++nr) { 1063 unsigned long offset = nr * page_size; 1064 const char *bytes = (const char *) area_dst + offset; 1065 const char *i; 1066 1067 if (sigsetjmp(*sigbuf, 1)) { 1068 /* 1069 * Access below triggered a SIGBUS, which was caught by 1070 * sighndl, which then jumped here. Count this SIGBUS, 1071 * and move on to next page. 1072 */ 1073 ++nr_sigbus; 1074 continue; 1075 } 1076 1077 for (i = bytes; i < bytes + page_size; ++i) { 1078 if (*i) 1079 err("nonzero byte in area_dst (%p) at %p: %u", 1080 area_dst, i, *i); 1081 } 1082 } 1083 1084 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) 1085 err("pipe write"); 1086 if (pthread_join(uffd_mon, NULL)) 1087 err("pthread_join()"); 1088 1089 if (nr_sigbus != poison_pages / 2) 1090 err("expected to receive %lu SIGBUS, actually received %lu", 1091 poison_pages / 2, nr_sigbus); 1092 1093 uffd_test_pass(); 1094 } 1095 1096 static void 1097 uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, 1098 unsigned long len) 1099 { 1100 unsigned long offset; 1101 1102 if (msg->event != UFFD_EVENT_PAGEFAULT) 1103 err("unexpected msg event %u", msg->event); 1104 1105 if (msg->arg.pagefault.flags & 1106 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) 1107 err("unexpected fault type %llu", msg->arg.pagefault.flags); 1108 1109 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; 1110 offset &= ~(len-1); 1111 1112 if (move_page(uffd, offset, len)) 1113 args->missing_faults++; 1114 } 1115 1116 static void uffd_move_handle_fault(struct uffd_msg *msg, 1117 struct uffd_args *args) 1118 { 1119 uffd_move_handle_fault_common(msg, args, page_size); 1120 } 1121 1122 static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, 1123 struct uffd_args *args) 1124 { 1125 uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); 1126 } 1127 1128 static void 1129 uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, 1130 void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) 1131 { 1132 unsigned long nr; 1133 pthread_t uffd_mon; 1134 char c; 1135 unsigned long long count; 1136 struct uffd_args args = { 0 }; 1137 char *orig_area_src = NULL, *orig_area_dst = NULL; 1138 unsigned long step_size, step_count; 1139 unsigned long src_offs = 0; 1140 unsigned long dst_offs = 0; 1141 1142 /* Prevent source pages from being mapped more than once */ 1143 if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) 1144 err("madvise(MADV_DONTFORK) failure"); 1145 1146 if (uffd_register(uffd, area_dst, nr_pages * page_size, 1147 true, false, false)) 1148 err("register failure"); 1149 1150 args.handle_fault = handle_fault; 1151 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 1152 err("uffd_poll_thread create"); 1153 1154 step_size = chunk_size / page_size; 1155 step_count = nr_pages / step_size; 1156 1157 if (chunk_size > page_size) { 1158 char *aligned_src = ALIGN_UP(area_src, chunk_size); 1159 char *aligned_dst = ALIGN_UP(area_dst, chunk_size); 1160 1161 if (aligned_src != area_src || aligned_dst != area_dst) { 1162 src_offs = (aligned_src - area_src) / page_size; 1163 dst_offs = (aligned_dst - area_dst) / page_size; 1164 step_count--; 1165 } 1166 orig_area_src = area_src; 1167 orig_area_dst = area_dst; 1168 area_src = aligned_src; 1169 area_dst = aligned_dst; 1170 } 1171 1172 /* 1173 * Read each of the pages back using the UFFD-registered mapping. We 1174 * expect that the first time we touch a page, it will result in a missing 1175 * fault. uffd_poll_thread will resolve the fault by moving source 1176 * page to destination. 1177 */ 1178 for (nr = 0; nr < step_count * step_size; nr += step_size) { 1179 unsigned long i; 1180 1181 /* Check area_src content */ 1182 for (i = 0; i < step_size; i++) { 1183 count = *area_count(area_src, nr + i); 1184 if (count != count_verify[src_offs + nr + i]) 1185 err("nr %lu source memory invalid %llu %llu\n", 1186 nr + i, count, count_verify[src_offs + nr + i]); 1187 } 1188 1189 /* Faulting into area_dst should move the page or the huge page */ 1190 for (i = 0; i < step_size; i++) { 1191 count = *area_count(area_dst, nr + i); 1192 if (count != count_verify[dst_offs + nr + i]) 1193 err("nr %lu memory corruption %llu %llu\n", 1194 nr, count, count_verify[dst_offs + nr + i]); 1195 } 1196 1197 /* Re-check area_src content which should be empty */ 1198 for (i = 0; i < step_size; i++) { 1199 count = *area_count(area_src, nr + i); 1200 if (count != 0) 1201 err("nr %lu move failed %llu %llu\n", 1202 nr, count, count_verify[src_offs + nr + i]); 1203 } 1204 } 1205 if (chunk_size > page_size) { 1206 area_src = orig_area_src; 1207 area_dst = orig_area_dst; 1208 } 1209 1210 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) 1211 err("pipe write"); 1212 if (pthread_join(uffd_mon, NULL)) 1213 err("join() failed"); 1214 1215 if (args.missing_faults != step_count || args.minor_faults != 0) 1216 uffd_test_fail("stats check error"); 1217 else 1218 uffd_test_pass(); 1219 } 1220 1221 static void uffd_move_test(uffd_test_args_t *targs) 1222 { 1223 uffd_move_test_common(targs, page_size, uffd_move_handle_fault); 1224 } 1225 1226 static void uffd_move_pmd_test(uffd_test_args_t *targs) 1227 { 1228 if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) 1229 err("madvise(MADV_HUGEPAGE) failure"); 1230 uffd_move_test_common(targs, read_pmd_pagesize(), 1231 uffd_move_pmd_handle_fault); 1232 } 1233 1234 static void uffd_move_pmd_split_test(uffd_test_args_t *targs) 1235 { 1236 if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) 1237 err("madvise(MADV_NOHUGEPAGE) failure"); 1238 uffd_move_test_common(targs, read_pmd_pagesize(), 1239 uffd_move_pmd_handle_fault); 1240 } 1241 1242 static bool 1243 uffdio_verify_results(const char *name, int ret, int error, long result) 1244 { 1245 /* 1246 * Should always return -1 with errno=EAGAIN, with corresponding 1247 * result field updated in ioctl() args to be -EAGAIN too 1248 * (e.g. copy.copy field for UFFDIO_COPY). 1249 */ 1250 if (ret != -1) { 1251 uffd_test_fail("%s should have returned -1", name); 1252 return false; 1253 } 1254 1255 if (error != EAGAIN) { 1256 uffd_test_fail("%s should have errno==EAGAIN", name); 1257 return false; 1258 } 1259 1260 if (result != -EAGAIN) { 1261 uffd_test_fail("%s should have been updated for -EAGAIN", 1262 name); 1263 return false; 1264 } 1265 1266 return true; 1267 } 1268 1269 /* 1270 * This defines a function to test one ioctl. Note that here "field" can 1271 * be 1 or anything not -EAGAIN. With that initial value set, we can 1272 * verify later that it should be updated by kernel (when -EAGAIN 1273 * returned), by checking whether it is also updated to -EAGAIN. 1274 */ 1275 #define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \ 1276 static bool uffdio_mmap_changing_test_##name(int fd) \ 1277 { \ 1278 int ret; \ 1279 struct uffdio_##name args = { \ 1280 .field = 1, \ 1281 }; \ 1282 ret = ioctl(fd, ioctl_name, &args); \ 1283 return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ 1284 } 1285 1286 DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) 1287 DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) 1288 DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) 1289 DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) 1290 DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) 1291 1292 typedef enum { 1293 /* We actually do not care about any state except UNINTERRUPTIBLE.. */ 1294 THR_STATE_UNKNOWN = 0, 1295 THR_STATE_UNINTERRUPTIBLE, 1296 } thread_state; 1297 1298 static void sleep_short(void) 1299 { 1300 usleep(1000); 1301 } 1302 1303 static thread_state thread_state_get(pid_t tid) 1304 { 1305 const char *header = "State:\t"; 1306 char tmp[256], *p, c; 1307 FILE *fp; 1308 1309 snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); 1310 fp = fopen(tmp, "r"); 1311 1312 if (!fp) 1313 return THR_STATE_UNKNOWN; 1314 1315 while (fgets(tmp, sizeof(tmp), fp)) { 1316 p = strstr(tmp, header); 1317 if (p) { 1318 /* For example, "State:\tD (disk sleep)" */ 1319 c = *(p + sizeof(header) - 1); 1320 return c == 'D' ? 1321 THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; 1322 } 1323 } 1324 1325 return THR_STATE_UNKNOWN; 1326 } 1327 1328 static void thread_state_until(pid_t tid, thread_state state) 1329 { 1330 thread_state s; 1331 1332 do { 1333 s = thread_state_get(tid); 1334 sleep_short(); 1335 } while (s != state); 1336 } 1337 1338 static void *uffd_mmap_changing_thread(void *opaque) 1339 { 1340 volatile pid_t *pid = opaque; 1341 int ret; 1342 1343 /* Unfortunately, it's only fetch-able from the thread itself.. */ 1344 assert(*pid == 0); 1345 *pid = syscall(SYS_gettid); 1346 1347 /* Inject an event, this will hang solid until the event read */ 1348 ret = madvise(area_dst, page_size, MADV_REMOVE); 1349 if (ret) 1350 err("madvise(MADV_REMOVE) failed"); 1351 1352 return NULL; 1353 } 1354 1355 static void uffd_consume_message(int fd) 1356 { 1357 struct uffd_msg msg = { 0 }; 1358 1359 while (uffd_read_msg(fd, &msg)); 1360 } 1361 1362 static void uffd_mmap_changing_test(uffd_test_args_t *targs) 1363 { 1364 /* 1365 * This stores the real PID (which can be different from how tid is 1366 * defined..) for the child thread, 0 means not initialized. 1367 */ 1368 pid_t pid = 0; 1369 pthread_t tid; 1370 int ret; 1371 1372 if (uffd_register(uffd, area_dst, nr_pages * page_size, 1373 true, false, false)) 1374 err("uffd_register() failed"); 1375 1376 /* Create a thread to generate the racy event */ 1377 ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); 1378 if (ret) 1379 err("pthread_create() failed"); 1380 1381 /* 1382 * Wait until the thread setup the pid. Use volatile to make sure 1383 * it reads from RAM not regs. 1384 */ 1385 while (!(volatile pid_t)pid) 1386 sleep_short(); 1387 1388 /* Wait until the thread hangs at REMOVE event */ 1389 thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); 1390 1391 if (!uffdio_mmap_changing_test_copy(uffd)) 1392 return; 1393 1394 if (!uffdio_mmap_changing_test_zeropage(uffd)) 1395 return; 1396 1397 if (!uffdio_mmap_changing_test_move(uffd)) 1398 return; 1399 1400 if (!uffdio_mmap_changing_test_poison(uffd)) 1401 return; 1402 1403 if (!uffdio_mmap_changing_test_continue(uffd)) 1404 return; 1405 1406 /* 1407 * All succeeded above! Recycle everything. Start by reading the 1408 * event so as to kick the thread roll again.. 1409 */ 1410 uffd_consume_message(uffd); 1411 1412 ret = pthread_join(tid, NULL); 1413 assert(ret == 0); 1414 1415 uffd_test_pass(); 1416 } 1417 1418 static int prevent_hugepages(const char **errmsg) 1419 { 1420 /* This should be done before source area is populated */ 1421 if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { 1422 /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ 1423 if (errno != EINVAL) { 1424 if (errmsg) 1425 *errmsg = "madvise(MADV_NOHUGEPAGE) failed"; 1426 return -errno; 1427 } 1428 } 1429 return 0; 1430 } 1431 1432 static int request_hugepages(const char **errmsg) 1433 { 1434 /* This should be done before source area is populated */ 1435 if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { 1436 if (errmsg) { 1437 *errmsg = (errno == EINVAL) ? 1438 "CONFIG_TRANSPARENT_HUGEPAGE is not set" : 1439 "madvise(MADV_HUGEPAGE) failed"; 1440 } 1441 return -errno; 1442 } 1443 return 0; 1444 } 1445 1446 struct uffd_test_case_ops uffd_move_test_case_ops = { 1447 .post_alloc = prevent_hugepages, 1448 }; 1449 1450 struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { 1451 .post_alloc = request_hugepages, 1452 }; 1453 1454 /* 1455 * Test the returned uffdio_register.ioctls with different register modes. 1456 * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. 1457 */ 1458 static void 1459 do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) 1460 { 1461 uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); 1462 mem_type_t *mem_type = args->mem_type; 1463 int ret; 1464 1465 ret = uffd_register_with_ioctls(uffd, area_dst, page_size, 1466 miss, wp, minor, &ioctls); 1467 1468 /* 1469 * Handle special cases of UFFDIO_REGISTER here where it should 1470 * just fail with -EINVAL first.. 1471 * 1472 * Case 1: register MINOR on anon 1473 * Case 2: register with no mode selected 1474 */ 1475 if ((minor && (mem_type->mem_flag == MEM_ANON)) || 1476 (!miss && !wp && !minor)) { 1477 if (ret != -EINVAL) 1478 err("register (miss=%d, wp=%d, minor=%d) failed " 1479 "with wrong errno=%d", miss, wp, minor, ret); 1480 return; 1481 } 1482 1483 /* UFFDIO_REGISTER should succeed, then check ioctls returned */ 1484 if (miss) 1485 expected |= BIT_ULL(_UFFDIO_COPY); 1486 if (wp) 1487 expected |= BIT_ULL(_UFFDIO_WRITEPROTECT); 1488 if (minor) 1489 expected |= BIT_ULL(_UFFDIO_CONTINUE); 1490 1491 if ((ioctls & expected) != expected) 1492 err("unexpected uffdio_register.ioctls " 1493 "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " 1494 "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); 1495 1496 if (uffd_unregister(uffd, area_dst, page_size)) 1497 err("unregister"); 1498 } 1499 1500 static void uffd_register_ioctls_test(uffd_test_args_t *args) 1501 { 1502 int miss, wp, minor; 1503 1504 for (miss = 0; miss <= 1; miss++) 1505 for (wp = 0; wp <= 1; wp++) 1506 for (minor = 0; minor <= 1; minor++) 1507 do_register_ioctls_test(args, miss, wp, minor); 1508 1509 uffd_test_pass(); 1510 } 1511 1512 uffd_test_case_t uffd_tests[] = { 1513 { 1514 /* Test returned uffdio_register.ioctls. */ 1515 .name = "register-ioctls", 1516 .uffd_fn = uffd_register_ioctls_test, 1517 .mem_targets = MEM_ALL, 1518 .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS | 1519 UFFD_FEATURE_MISSING_SHMEM | 1520 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1521 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1522 UFFD_FEATURE_MINOR_HUGETLBFS | 1523 UFFD_FEATURE_MINOR_SHMEM, 1524 }, 1525 { 1526 .name = "zeropage", 1527 .uffd_fn = uffd_zeropage_test, 1528 .mem_targets = MEM_ALL, 1529 .uffd_feature_required = 0, 1530 }, 1531 { 1532 .name = "move", 1533 .uffd_fn = uffd_move_test, 1534 .mem_targets = MEM_ANON, 1535 .uffd_feature_required = UFFD_FEATURE_MOVE, 1536 .test_case_ops = &uffd_move_test_case_ops, 1537 }, 1538 { 1539 .name = "move-pmd", 1540 .uffd_fn = uffd_move_pmd_test, 1541 .mem_targets = MEM_ANON, 1542 .uffd_feature_required = UFFD_FEATURE_MOVE, 1543 .test_case_ops = &uffd_move_test_pmd_case_ops, 1544 }, 1545 { 1546 .name = "move-pmd-split", 1547 .uffd_fn = uffd_move_pmd_split_test, 1548 .mem_targets = MEM_ANON, 1549 .uffd_feature_required = UFFD_FEATURE_MOVE, 1550 .test_case_ops = &uffd_move_test_pmd_case_ops, 1551 }, 1552 { 1553 .name = "wp-fork", 1554 .uffd_fn = uffd_wp_fork_test, 1555 .mem_targets = MEM_ALL, 1556 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1557 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1558 }, 1559 { 1560 .name = "wp-fork-with-event", 1561 .uffd_fn = uffd_wp_fork_with_event_test, 1562 .mem_targets = MEM_ALL, 1563 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1564 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1565 /* when set, child process should inherit uffd-wp bits */ 1566 UFFD_FEATURE_EVENT_FORK, 1567 }, 1568 { 1569 .name = "wp-fork-pin", 1570 .uffd_fn = uffd_wp_fork_pin_test, 1571 .mem_targets = MEM_ALL, 1572 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1573 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1574 }, 1575 { 1576 .name = "wp-fork-pin-with-event", 1577 .uffd_fn = uffd_wp_fork_pin_with_event_test, 1578 .mem_targets = MEM_ALL, 1579 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1580 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1581 /* when set, child process should inherit uffd-wp bits */ 1582 UFFD_FEATURE_EVENT_FORK, 1583 }, 1584 { 1585 .name = "wp-unpopulated", 1586 .uffd_fn = uffd_wp_unpopulated_test, 1587 .mem_targets = MEM_ANON, 1588 .uffd_feature_required = 1589 UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED, 1590 }, 1591 { 1592 .name = "minor", 1593 .uffd_fn = uffd_minor_test, 1594 .mem_targets = MEM_SHMEM | MEM_HUGETLB, 1595 .uffd_feature_required = 1596 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM, 1597 }, 1598 { 1599 .name = "minor-wp", 1600 .uffd_fn = uffd_minor_wp_test, 1601 .mem_targets = MEM_SHMEM | MEM_HUGETLB, 1602 .uffd_feature_required = 1603 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM | 1604 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1605 /* 1606 * HACK: here we leveraged WP_UNPOPULATED to detect whether 1607 * minor mode supports wr-protect. There's no feature flag 1608 * for it so this is the best we can test against. 1609 */ 1610 UFFD_FEATURE_WP_UNPOPULATED, 1611 }, 1612 { 1613 .name = "minor-collapse", 1614 .uffd_fn = uffd_minor_collapse_test, 1615 /* MADV_COLLAPSE only works with shmem */ 1616 .mem_targets = MEM_SHMEM, 1617 /* We can't test MADV_COLLAPSE, so try our luck */ 1618 .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM, 1619 }, 1620 { 1621 .name = "sigbus", 1622 .uffd_fn = uffd_sigbus_test, 1623 .mem_targets = MEM_ALL, 1624 .uffd_feature_required = UFFD_FEATURE_SIGBUS | 1625 UFFD_FEATURE_EVENT_FORK, 1626 }, 1627 { 1628 .name = "sigbus-wp", 1629 .uffd_fn = uffd_sigbus_wp_test, 1630 .mem_targets = MEM_ALL, 1631 .uffd_feature_required = UFFD_FEATURE_SIGBUS | 1632 UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1633 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1634 }, 1635 { 1636 .name = "events", 1637 .uffd_fn = uffd_events_test, 1638 .mem_targets = MEM_ALL, 1639 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | 1640 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE, 1641 }, 1642 { 1643 .name = "events-wp", 1644 .uffd_fn = uffd_events_wp_test, 1645 .mem_targets = MEM_ALL, 1646 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | 1647 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE | 1648 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1649 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1650 }, 1651 { 1652 .name = "poison", 1653 .uffd_fn = uffd_poison_test, 1654 .mem_targets = MEM_ALL, 1655 .uffd_feature_required = UFFD_FEATURE_POISON, 1656 }, 1657 { 1658 .name = "mmap-changing", 1659 .uffd_fn = uffd_mmap_changing_test, 1660 /* 1661 * There's no point running this test over all mem types as 1662 * they share the same code paths. 1663 * 1664 * Choose shmem for simplicity, because (1) shmem supports 1665 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is 1666 * almost always available (unlike hugetlb). Here we 1667 * abused SHMEM for UFFDIO_MOVE, but the test we want to 1668 * cover doesn't yet need the correct memory type.. 1669 */ 1670 .mem_targets = MEM_SHMEM, 1671 /* 1672 * Any UFFD_FEATURE_EVENT_* should work to trigger the 1673 * race logically, but choose the simplest (REMOVE). 1674 * 1675 * Meanwhile, since we'll cover quite a few new ioctl()s 1676 * (CONTINUE, POISON, MOVE), skip this test for old kernels 1677 * by choosing all of them. 1678 */ 1679 .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | 1680 UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | 1681 UFFD_FEATURE_MINOR_SHMEM, 1682 }, 1683 }; 1684 1685 static void usage(const char *prog) 1686 { 1687 printf("usage: %s [-f TESTNAME]\n", prog); 1688 puts(""); 1689 puts(" -f: test name to filter (e.g., event)"); 1690 puts(" -h: show the help msg"); 1691 puts(" -l: list tests only"); 1692 puts(""); 1693 exit(KSFT_FAIL); 1694 } 1695 1696 int main(int argc, char *argv[]) 1697 { 1698 int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t); 1699 int n_mems = sizeof(mem_types) / sizeof(mem_type_t); 1700 const char *test_filter = NULL; 1701 bool list_only = false; 1702 uffd_test_case_t *test; 1703 mem_type_t *mem_type; 1704 uffd_test_args_t args; 1705 const char *errmsg; 1706 int has_uffd, opt; 1707 int i, j; 1708 1709 while ((opt = getopt(argc, argv, "f:hl")) != -1) { 1710 switch (opt) { 1711 case 'f': 1712 test_filter = optarg; 1713 break; 1714 case 'l': 1715 list_only = true; 1716 break; 1717 case 'h': 1718 default: 1719 /* Unknown */ 1720 usage(argv[0]); 1721 break; 1722 } 1723 } 1724 1725 if (!test_filter && !list_only) { 1726 has_uffd = test_uffd_api(false); 1727 has_uffd |= test_uffd_api(true); 1728 1729 if (!has_uffd) { 1730 printf("Userfaultfd not supported or unprivileged, skip all tests\n"); 1731 exit(KSFT_SKIP); 1732 } 1733 } 1734 1735 for (i = 0; i < n_tests; i++) { 1736 test = &uffd_tests[i]; 1737 if (test_filter && !strstr(test->name, test_filter)) 1738 continue; 1739 if (list_only) { 1740 printf("%s\n", test->name); 1741 continue; 1742 } 1743 for (j = 0; j < n_mems; j++) { 1744 mem_type = &mem_types[j]; 1745 if (!(test->mem_targets & mem_type->mem_flag)) 1746 continue; 1747 1748 uffd_test_start("%s on %s", test->name, mem_type->name); 1749 if ((mem_type->mem_flag == MEM_HUGETLB || 1750 mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && 1751 (default_huge_page_size() == 0)) { 1752 uffd_test_skip("huge page size is 0, feature missing?"); 1753 continue; 1754 } 1755 if (!uffd_feature_supported(test)) { 1756 uffd_test_skip("feature missing"); 1757 continue; 1758 } 1759 if (uffd_setup_environment(&args, test, mem_type, 1760 &errmsg)) { 1761 uffd_test_skip(errmsg); 1762 continue; 1763 } 1764 test->uffd_fn(&args); 1765 uffd_test_ctx_clear(); 1766 } 1767 } 1768 1769 if (!list_only) 1770 uffd_test_report(); 1771 1772 return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; 1773 } 1774 1775 #else /* __NR_userfaultfd */ 1776 1777 #warning "missing __NR_userfaultfd definition" 1778 1779 int main(void) 1780 { 1781 printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); 1782 return KSFT_SKIP; 1783 } 1784 1785 #endif /* __NR_userfaultfd */ 1786