1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Userfaultfd unit tests. 4 * 5 * Copyright (C) 2015-2023 Red Hat, Inc. 6 */ 7 8 #include "uffd-common.h" 9 10 #include "../../../../mm/gup_test.h" 11 12 #ifdef __NR_userfaultfd 13 14 /* The unit test doesn't need a large or random size, make it 32MB for now */ 15 #define UFFD_TEST_MEM_SIZE (32UL << 20) 16 17 #define MEM_ANON BIT_ULL(0) 18 #define MEM_SHMEM BIT_ULL(1) 19 #define MEM_SHMEM_PRIVATE BIT_ULL(2) 20 #define MEM_HUGETLB BIT_ULL(3) 21 #define MEM_HUGETLB_PRIVATE BIT_ULL(4) 22 23 #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \ 24 MEM_HUGETLB | MEM_HUGETLB_PRIVATE) 25 26 #define ALIGN_UP(x, align_to) \ 27 ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) 28 29 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 30 31 struct mem_type { 32 const char *name; 33 unsigned int mem_flag; 34 uffd_test_ops_t *mem_ops; 35 bool shared; 36 }; 37 typedef struct mem_type mem_type_t; 38 39 mem_type_t mem_types[] = { 40 { 41 .name = "anon", 42 .mem_flag = MEM_ANON, 43 .mem_ops = &anon_uffd_test_ops, 44 .shared = false, 45 }, 46 { 47 .name = "shmem", 48 .mem_flag = MEM_SHMEM, 49 .mem_ops = &shmem_uffd_test_ops, 50 .shared = true, 51 }, 52 { 53 .name = "shmem-private", 54 .mem_flag = MEM_SHMEM_PRIVATE, 55 .mem_ops = &shmem_uffd_test_ops, 56 .shared = false, 57 }, 58 { 59 .name = "hugetlb", 60 .mem_flag = MEM_HUGETLB, 61 .mem_ops = &hugetlb_uffd_test_ops, 62 .shared = true, 63 }, 64 { 65 .name = "hugetlb-private", 66 .mem_flag = MEM_HUGETLB_PRIVATE, 67 .mem_ops = &hugetlb_uffd_test_ops, 68 .shared = false, 69 }, 70 }; 71 72 /* Arguments to be passed over to each uffd unit test */ 73 struct uffd_test_args { 74 mem_type_t *mem_type; 75 }; 76 typedef struct uffd_test_args uffd_test_args_t; 77 78 /* Returns: UFFD_TEST_* */ 79 typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); 80 81 typedef struct { 82 const char *name; 83 uffd_test_fn uffd_fn; 84 unsigned int mem_targets; 85 uint64_t uffd_feature_required; 86 uffd_test_case_ops_t *test_case_ops; 87 } uffd_test_case_t; 88 89 static char current_test[256]; 90 91 static void uffd_test_pass(void) 92 { 93 ksft_test_result_pass("%s\n", current_test); 94 } 95 96 #define uffd_test_start(...) do { \ 97 snprintf(current_test, sizeof(current_test), __VA_ARGS__); \ 98 } while (0) 99 100 #define uffd_test_fail(fmt, ...) do { \ 101 ksft_print_msg("failed reason: [" fmt "]\n", ##__VA_ARGS__); \ 102 ksft_test_result_fail("%s\n", current_test); \ 103 } while (0) 104 105 static void uffd_test_skip(const char *message) 106 { 107 ksft_test_result_skip("%s (%s)\n", current_test, message); 108 } 109 110 static void test_uffd_api(bool use_dev) 111 { 112 struct uffdio_api uffdio_api; 113 int uffd; 114 115 uffd_test_start("UFFDIO_API (with %s)", 116 use_dev ? "/dev/userfaultfd" : "syscall"); 117 118 if (use_dev) 119 uffd = uffd_open_dev(UFFD_FLAGS); 120 else 121 uffd = uffd_open_sys(UFFD_FLAGS); 122 if (uffd < 0) { 123 uffd_test_skip("cannot open userfaultfd handle"); 124 return; 125 } 126 127 /* Test wrong UFFD_API */ 128 uffdio_api.api = 0xab; 129 uffdio_api.features = 0; 130 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 131 uffd_test_fail("UFFDIO_API should fail with wrong api but didn't"); 132 goto out; 133 } 134 135 /* Test wrong feature bit */ 136 uffdio_api.api = UFFD_API; 137 uffdio_api.features = BIT_ULL(63); 138 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 139 uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't"); 140 goto out; 141 } 142 143 /* Test normal UFFDIO_API */ 144 uffdio_api.api = UFFD_API; 145 uffdio_api.features = 0; 146 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { 147 uffd_test_fail("UFFDIO_API should succeed but failed"); 148 goto out; 149 } 150 151 /* Test double requests of UFFDIO_API with a random feature set */ 152 uffdio_api.features = BIT_ULL(0); 153 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { 154 uffd_test_fail("UFFDIO_API should reject initialized uffd"); 155 goto out; 156 } 157 158 uffd_test_pass(); 159 out: 160 close(uffd); 161 } 162 163 164 static bool uffd_feature_supported(uffd_test_case_t *test) 165 { 166 uint64_t features; 167 168 if (uffd_get_features(&features)) 169 return false; 170 171 return (features & test->uffd_feature_required) == 172 test->uffd_feature_required; 173 } 174 175 static int pagemap_open(void) 176 { 177 int fd = open("/proc/self/pagemap", O_RDONLY); 178 179 if (fd < 0) 180 err("open pagemap"); 181 182 return fd; 183 } 184 185 /* This macro let __LINE__ works in err() */ 186 #define pagemap_check_wp(value, wp) do { \ 187 if (!!(value & PM_UFFD_WP) != wp) \ 188 err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \ 189 } while (0) 190 191 typedef struct { 192 uffd_global_test_opts_t *gopts; 193 int child_uffd; 194 } fork_event_args; 195 196 static void *fork_event_consumer(void *data) 197 { 198 fork_event_args *args = data; 199 struct uffd_msg msg = { 0 }; 200 201 args->gopts->ready_for_fork = true; 202 203 /* Read until a full msg received */ 204 while (uffd_read_msg(args->gopts, &msg)); 205 206 if (msg.event != UFFD_EVENT_FORK) 207 err("wrong message: %u\n", msg.event); 208 209 /* Just to be properly freed later */ 210 args->child_uffd = msg.arg.fork.ufd; 211 return NULL; 212 } 213 214 typedef struct { 215 int gup_fd; 216 bool pinned; 217 } pin_args; 218 219 /* 220 * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired 221 * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy 222 * all needs of the test cases (e.g., trigger unshare, trigger fork() early 223 * CoW, etc.). 224 */ 225 static int pin_pages(pin_args *args, void *buffer, size_t size) 226 { 227 struct pin_longterm_test test = { 228 .addr = (uintptr_t)buffer, 229 .size = size, 230 /* Read-only pins */ 231 .flags = 0, 232 }; 233 234 if (args->pinned) 235 err("already pinned"); 236 237 args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 238 if (args->gup_fd < 0) 239 return -errno; 240 241 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) { 242 /* Even if gup_test existed, can be an old gup_test / kernel */ 243 close(args->gup_fd); 244 return -errno; 245 } 246 args->pinned = true; 247 return 0; 248 } 249 250 static void unpin_pages(pin_args *args) 251 { 252 if (!args->pinned) 253 err("unpin without pin first"); 254 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP)) 255 err("PIN_LONGTERM_TEST_STOP"); 256 close(args->gup_fd); 257 args->pinned = false; 258 } 259 260 static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) 261 { 262 fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; 263 pthread_t thread; 264 pid_t child; 265 uint64_t value; 266 int fd, result; 267 268 /* Prepare a thread to resolve EVENT_FORK */ 269 if (with_event) { 270 gopts->ready_for_fork = false; 271 if (pthread_create(&thread, NULL, fork_event_consumer, &args)) 272 err("pthread_create()"); 273 while (!gopts->ready_for_fork) 274 ; /* Wait for the poll_thread to start executing before forking */ 275 } 276 277 child = fork(); 278 if (!child) { 279 /* Open the pagemap fd of the child itself */ 280 pin_args args = {}; 281 282 fd = pagemap_open(); 283 284 if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) 285 /* 286 * Normally when reach here we have pinned in 287 * previous tests, so shouldn't fail anymore 288 */ 289 err("pin page failed in child"); 290 291 value = pagemap_get_entry(fd, gopts->area_dst); 292 /* 293 * After fork(), we should handle uffd-wp bit differently: 294 * 295 * (1) when with EVENT_FORK, it should persist 296 * (2) when without EVENT_FORK, it should be dropped 297 */ 298 pagemap_check_wp(value, with_event); 299 if (test_pin) 300 unpin_pages(&args); 301 /* Succeed */ 302 _exit(0); 303 } 304 waitpid(child, &result, 0); 305 306 if (with_event) { 307 if (pthread_join(thread, NULL)) 308 err("pthread_join()"); 309 if (args.child_uffd < 0) 310 err("Didn't receive child uffd"); 311 close(args.child_uffd); 312 } 313 314 return result; 315 } 316 317 static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 318 { 319 uint64_t value; 320 int pagemap_fd; 321 322 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 323 false, true, false)) 324 err("register failed"); 325 326 pagemap_fd = pagemap_open(); 327 328 /* Test applying pte marker to anon unpopulated */ 329 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); 330 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 331 pagemap_check_wp(value, true); 332 333 /* Test unprotect on anon pte marker */ 334 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); 335 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 336 pagemap_check_wp(value, false); 337 338 /* Test zap on anon marker */ 339 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); 340 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) 341 err("madvise(MADV_DONTNEED) failed"); 342 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 343 pagemap_check_wp(value, false); 344 345 /* Test fault in after marker removed */ 346 *gopts->area_dst = 1; 347 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 348 pagemap_check_wp(value, false); 349 /* Drop it to make pte none again */ 350 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) 351 err("madvise(MADV_DONTNEED) failed"); 352 353 /* Test read-zero-page upon pte marker */ 354 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); 355 *(volatile char *)gopts->area_dst; 356 /* Drop it to make pte none again */ 357 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) 358 err("madvise(MADV_DONTNEED) failed"); 359 360 uffd_test_pass(); 361 } 362 363 static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, 364 bool with_event) 365 { 366 int pagemap_fd; 367 uint64_t value; 368 369 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 370 false, true, false)) 371 err("register failed"); 372 373 pagemap_fd = pagemap_open(); 374 375 /* Touch the page */ 376 *gopts->area_dst = 1; 377 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); 378 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 379 pagemap_check_wp(value, true); 380 if (pagemap_test_fork(gopts, with_event, false)) { 381 uffd_test_fail("Detected %s uffd-wp bit in child in present pte", 382 with_event ? "missing" : "stall"); 383 goto out; 384 } 385 386 /* 387 * This is an attempt for zapping the pgtable so as to test the 388 * markers. 389 * 390 * For private mappings, PAGEOUT will only work on exclusive ptes 391 * (PM_MMAP_EXCLUSIVE) which we should satisfy. 392 * 393 * For shared, PAGEOUT may not work. Use DONTNEED instead which 394 * plays a similar role of zapping (rather than freeing the page) 395 * to expose pte markers. 396 */ 397 if (args->mem_type->shared) { 398 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) 399 err("MADV_DONTNEED"); 400 } else { 401 /* 402 * NOTE: ignore retval because private-hugetlb doesn't yet 403 * support swapping, so it could fail. 404 */ 405 madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); 406 } 407 408 /* Uffd-wp should persist even swapped out */ 409 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 410 pagemap_check_wp(value, true); 411 if (pagemap_test_fork(gopts, with_event, false)) { 412 uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", 413 with_event ? "missing" : "stall"); 414 goto out; 415 } 416 417 /* Unprotect; this tests swap pte modifications */ 418 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); 419 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 420 pagemap_check_wp(value, false); 421 422 /* Fault in the page from disk */ 423 *gopts->area_dst = 2; 424 value = pagemap_get_entry(pagemap_fd, gopts->area_dst); 425 pagemap_check_wp(value, false); 426 uffd_test_pass(); 427 out: 428 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) 429 err("unregister failed"); 430 close(pagemap_fd); 431 } 432 433 static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 434 { 435 uffd_wp_fork_test_common(gopts, args, false); 436 } 437 438 static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 439 { 440 uffd_wp_fork_test_common(gopts, args, true); 441 } 442 443 static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, 444 uffd_test_args_t *args, 445 bool with_event) 446 { 447 int pagemap_fd; 448 pin_args pin_args = {}; 449 450 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) 451 err("register failed"); 452 453 pagemap_fd = pagemap_open(); 454 455 /* Touch the page */ 456 *gopts->area_dst = 1; 457 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); 458 459 /* 460 * 1. First pin, then fork(). This tests fork() special path when 461 * doing early CoW if the page is private. 462 */ 463 if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { 464 uffd_test_skip("Possibly CONFIG_GUP_TEST missing " 465 "or unprivileged"); 466 close(pagemap_fd); 467 uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); 468 return; 469 } 470 471 if (pagemap_test_fork(gopts, with_event, false)) { 472 uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", 473 with_event ? "missing" : "stall"); 474 unpin_pages(&pin_args); 475 goto out; 476 } 477 478 unpin_pages(&pin_args); 479 480 /* 481 * 2. First fork(), then pin (in the child, where test_pin==true). 482 * This tests COR, aka, page unsharing on private memories. 483 */ 484 if (pagemap_test_fork(gopts, with_event, true)) { 485 uffd_test_fail("Detected %s uffd-wp bit when RO pin", 486 with_event ? "missing" : "stall"); 487 goto out; 488 } 489 uffd_test_pass(); 490 out: 491 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) 492 err("register failed"); 493 close(pagemap_fd); 494 } 495 496 static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 497 { 498 uffd_wp_fork_pin_test_common(gopts, args, false); 499 } 500 501 static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 502 { 503 uffd_wp_fork_pin_test_common(gopts, args, true); 504 } 505 506 static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) 507 { 508 unsigned long i, j; 509 uint8_t expected_byte; 510 511 for (i = 0; i < gopts->nr_pages; ++i) { 512 expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); 513 for (j = 0; j < gopts->page_size; j++) { 514 uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); 515 if (v != expected_byte) 516 err("unexpected page contents"); 517 } 518 } 519 } 520 521 static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) 522 { 523 unsigned long p; 524 pthread_t uffd_mon; 525 char c = '\0'; 526 struct uffd_args args = { 0 }; 527 args.gopts = gopts; 528 529 /* 530 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing 531 * both do not make much sense. 532 */ 533 assert(!(test_collapse && test_wp)); 534 535 if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, 536 /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ 537 false, test_wp, true)) 538 err("register failure"); 539 540 /* 541 * After registering with UFFD, populate the non-UFFD-registered side of 542 * the shared mapping. This should *not* trigger any UFFD minor faults. 543 */ 544 for (p = 0; p < gopts->nr_pages; ++p) 545 memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), 546 gopts->page_size); 547 548 args.apply_wp = test_wp; 549 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 550 err("uffd_poll_thread create"); 551 552 /* 553 * Read each of the pages back using the UFFD-registered mapping. We 554 * expect that the first time we touch a page, it will result in a minor 555 * fault. uffd_poll_thread will resolve the fault by bit-flipping the 556 * page's contents, and then issuing a CONTINUE ioctl. 557 */ 558 check_memory_contents(gopts, gopts->area_dst_alias); 559 560 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) 561 err("pipe write"); 562 if (pthread_join(uffd_mon, NULL)) 563 err("join() failed"); 564 565 if (test_collapse) { 566 if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, 567 MADV_COLLAPSE)) { 568 /* It's fine to fail for this one... */ 569 uffd_test_skip("MADV_COLLAPSE failed"); 570 return; 571 } 572 573 uffd_test_ops->check_pmd_mapping(gopts, 574 gopts->area_dst, 575 gopts->nr_pages * gopts->page_size / 576 read_pmd_pagesize()); 577 /* 578 * This won't cause uffd-fault - it purely just makes sure there 579 * was no corruption. 580 */ 581 check_memory_contents(gopts, gopts->area_dst_alias); 582 } 583 584 if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) 585 uffd_test_fail("stats check error"); 586 else 587 uffd_test_pass(); 588 } 589 590 void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 591 { 592 uffd_minor_test_common(gopts, false, false); 593 } 594 595 void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 596 { 597 uffd_minor_test_common(gopts, false, true); 598 } 599 600 void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 601 { 602 uffd_minor_test_common(gopts, true, false); 603 } 604 605 static sigjmp_buf jbuf, *sigbuf; 606 607 static void sighndl(int sig, siginfo_t *siginfo, void *ptr) 608 { 609 if (sig == SIGBUS) { 610 if (sigbuf) 611 siglongjmp(*sigbuf, 1); 612 abort(); 613 } 614 } 615 616 /* 617 * For non-cooperative userfaultfd test we fork() a process that will 618 * generate pagefaults, will mremap the area monitored by the 619 * userfaultfd and at last this process will release the monitored 620 * area. 621 * For the anonymous and shared memory the area is divided into two 622 * parts, the first part is accessed before mremap, and the second 623 * part is accessed after mremap. Since hugetlbfs does not support 624 * mremap, the entire monitored area is accessed in a single pass for 625 * HUGETLB_TEST. 626 * The release of the pages currently generates event for shmem and 627 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked 628 * for hugetlb. 629 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register 630 * monitored area, generate pagefaults and test that signal is delivered. 631 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 632 * test robustness use case - we release monitored area, fork a process 633 * that will generate pagefaults and verify signal is generated. 634 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal 635 * feature. Using monitor thread, verify no userfault events are generated. 636 */ 637 static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) 638 { 639 unsigned long nr, i; 640 unsigned long long count; 641 unsigned long split_nr_pages; 642 unsigned long lastnr; 643 struct sigaction act; 644 volatile unsigned long signalled = 0; 645 646 split_nr_pages = (gopts->nr_pages + 1) / 2; 647 648 if (signal_test) { 649 sigbuf = &jbuf; 650 memset(&act, 0, sizeof(act)); 651 act.sa_sigaction = sighndl; 652 act.sa_flags = SA_SIGINFO; 653 if (sigaction(SIGBUS, &act, 0)) 654 err("sigaction"); 655 lastnr = (unsigned long)-1; 656 } 657 658 for (nr = 0; nr < split_nr_pages; nr++) { 659 volatile int steps = 1; 660 unsigned long offset = nr * gopts->page_size; 661 662 if (signal_test) { 663 if (sigsetjmp(*sigbuf, 1) != 0) { 664 if (steps == 1 && nr == lastnr) 665 err("Signal repeated"); 666 667 lastnr = nr; 668 if (signal_test == 1) { 669 if (steps == 1) { 670 /* This is a MISSING request */ 671 steps++; 672 if (copy_page(gopts, offset, wp)) 673 signalled++; 674 } else { 675 /* This is a WP request */ 676 assert(steps == 2); 677 wp_range(gopts->uffd, 678 (__u64)gopts->area_dst + 679 offset, 680 gopts->page_size, false); 681 } 682 } else { 683 signalled++; 684 continue; 685 } 686 } 687 } 688 689 count = *area_count(gopts->area_dst, nr, gopts); 690 if (count != gopts->count_verify[nr]) 691 err("nr %lu memory corruption %llu %llu\n", 692 nr, count, gopts->count_verify[nr]); 693 /* 694 * Trigger write protection if there is by writing 695 * the same value back. 696 */ 697 *area_count(gopts->area_dst, nr, gopts) = count; 698 } 699 700 if (signal_test) 701 return signalled != split_nr_pages; 702 703 gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, 704 gopts->nr_pages * gopts->page_size, 705 MREMAP_MAYMOVE | MREMAP_FIXED, 706 gopts->area_src); 707 if (gopts->area_dst == MAP_FAILED) 708 err("mremap"); 709 /* Reset area_src since we just clobbered it */ 710 gopts->area_src = NULL; 711 712 for (; nr < gopts->nr_pages; nr++) { 713 count = *area_count(gopts->area_dst, nr, gopts); 714 if (count != gopts->count_verify[nr]) { 715 err("nr %lu memory corruption %llu %llu\n", 716 nr, count, gopts->count_verify[nr]); 717 } 718 /* 719 * Trigger write protection if there is by writing 720 * the same value back. 721 */ 722 *area_count(gopts->area_dst, nr, gopts) = count; 723 } 724 725 uffd_test_ops->release_pages(gopts, gopts->area_dst); 726 727 for (nr = 0; nr < gopts->nr_pages; nr++) 728 for (i = 0; i < gopts->page_size; i++) 729 if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) 730 err("page %lu offset %lu is not zero", nr, i); 731 732 return 0; 733 } 734 735 static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) 736 { 737 unsigned long userfaults; 738 pthread_t uffd_mon; 739 pid_t pid; 740 int err; 741 char c = '\0'; 742 struct uffd_args args = { 0 }; 743 args.gopts = gopts; 744 745 gopts->ready_for_fork = false; 746 747 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); 748 749 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 750 true, wp, false)) 751 err("register failure"); 752 753 if (faulting_process(gopts, 1, wp)) 754 err("faulting process failed"); 755 756 uffd_test_ops->release_pages(gopts, gopts->area_dst); 757 758 args.apply_wp = wp; 759 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 760 err("uffd_poll_thread create"); 761 762 while (!gopts->ready_for_fork) 763 ; /* Wait for the poll_thread to start executing before forking */ 764 765 pid = fork(); 766 if (pid < 0) 767 err("fork"); 768 769 if (!pid) 770 _exit(faulting_process(gopts, 2, wp)); 771 772 waitpid(pid, &err, 0); 773 if (err) 774 err("faulting process failed"); 775 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) 776 err("pipe write"); 777 if (pthread_join(uffd_mon, (void **)&userfaults)) 778 err("pthread_join()"); 779 780 if (userfaults) 781 uffd_test_fail("Signal test failed, userfaults: %ld", userfaults); 782 else 783 uffd_test_pass(); 784 } 785 786 static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 787 { 788 uffd_sigbus_test_common(gopts, false); 789 } 790 791 static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 792 { 793 uffd_sigbus_test_common(gopts, true); 794 } 795 796 static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) 797 { 798 pthread_t uffd_mon; 799 pid_t pid; 800 int err; 801 char c = '\0'; 802 struct uffd_args args = { 0 }; 803 args.gopts = gopts; 804 805 gopts->ready_for_fork = false; 806 807 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); 808 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 809 true, wp, false)) 810 err("register failure"); 811 812 args.apply_wp = wp; 813 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 814 err("uffd_poll_thread create"); 815 816 while (!gopts->ready_for_fork) 817 ; /* Wait for the poll_thread to start executing before forking */ 818 819 pid = fork(); 820 if (pid < 0) 821 err("fork"); 822 823 if (!pid) 824 _exit(faulting_process(gopts, 0, wp)); 825 826 waitpid(pid, &err, 0); 827 if (err) 828 err("faulting process failed"); 829 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) 830 err("pipe write"); 831 if (pthread_join(uffd_mon, NULL)) 832 err("pthread_join()"); 833 834 if (args.missing_faults != gopts->nr_pages) 835 uffd_test_fail("Fault counts wrong"); 836 else 837 uffd_test_pass(); 838 } 839 840 static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 841 { 842 uffd_events_test_common(gopts, false); 843 } 844 845 static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 846 { 847 uffd_events_test_common(gopts, true); 848 } 849 850 static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, 851 struct uffdio_zeropage *uffdio_zeropage) 852 { 853 uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, 854 uffdio_zeropage->range.len, 855 0); 856 if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { 857 if (uffdio_zeropage->zeropage != -EEXIST) 858 err("UFFDIO_ZEROPAGE error: %"PRId64, 859 (int64_t)uffdio_zeropage->zeropage); 860 } else { 861 err("UFFDIO_ZEROPAGE error: %"PRId64, 862 (int64_t)uffdio_zeropage->zeropage); 863 } 864 } 865 866 static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) 867 { 868 struct uffdio_zeropage uffdio_zeropage = { 0 }; 869 int ret; 870 __s64 res; 871 872 uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; 873 uffdio_zeropage.range.len = gopts->page_size; 874 uffdio_zeropage.mode = 0; 875 ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); 876 res = uffdio_zeropage.zeropage; 877 if (ret) { 878 /* real retval in ufdio_zeropage.zeropage */ 879 if (has_zeropage) 880 err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res); 881 else if (res != -EINVAL) 882 err("UFFDIO_ZEROPAGE not -EINVAL"); 883 } else if (has_zeropage) { 884 if (res != gopts->page_size) 885 err("UFFDIO_ZEROPAGE unexpected size"); 886 else 887 retry_uffdio_zeropage(gopts, &uffdio_zeropage); 888 return true; 889 } else 890 err("UFFDIO_ZEROPAGE succeeded"); 891 892 return false; 893 } 894 895 /* 896 * Registers a range with MISSING mode only for zeropage test. Return true 897 * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register() 898 * because we want to detect .ioctls along the way. 899 */ 900 static bool 901 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) 902 { 903 uint64_t ioctls = 0; 904 905 if (uffd_register_with_ioctls(uffd, addr, len, true, 906 false, false, &ioctls)) 907 err("zeropage register fail"); 908 909 return ioctls & (1 << _UFFDIO_ZEROPAGE); 910 } 911 912 /* exercise UFFDIO_ZEROPAGE */ 913 static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 914 { 915 bool has_zeropage; 916 int i; 917 918 has_zeropage = uffd_register_detect_zeropage(gopts->uffd, 919 gopts->area_dst, 920 gopts->page_size); 921 if (gopts->area_dst_alias) 922 /* Ignore the retval; we already have it */ 923 uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); 924 925 if (do_uffdio_zeropage(gopts, has_zeropage)) 926 for (i = 0; i < gopts->page_size; i++) 927 if (gopts->area_dst[i] != 0) 928 err("data non-zero at offset %d\n", i); 929 930 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) 931 err("unregister"); 932 933 if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, 934 gopts->area_dst_alias, 935 gopts->page_size)) 936 err("unregister"); 937 938 uffd_test_pass(); 939 } 940 941 static void uffd_register_poison(int uffd, void *addr, uint64_t len) 942 { 943 uint64_t ioctls = 0; 944 uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); 945 946 if (uffd_register_with_ioctls(uffd, addr, len, true, 947 false, false, &ioctls)) 948 err("poison register fail"); 949 950 if ((ioctls & expected) != expected) 951 err("registered area doesn't support COPY and POISON ioctls"); 952 } 953 954 static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) 955 { 956 struct uffdio_poison uffdio_poison = { 0 }; 957 int ret; 958 __s64 res; 959 960 uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; 961 uffdio_poison.range.len = gopts->page_size; 962 uffdio_poison.mode = 0; 963 ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); 964 res = uffdio_poison.updated; 965 966 if (ret) 967 err("UFFDIO_POISON error: %"PRId64, (int64_t)res); 968 else if (res != gopts->page_size) 969 err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); 970 } 971 972 static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, 973 struct uffd_msg *msg, 974 struct uffd_args *args) 975 { 976 unsigned long offset; 977 978 if (msg->event != UFFD_EVENT_PAGEFAULT) 979 err("unexpected msg event %u", msg->event); 980 981 if (msg->arg.pagefault.flags & 982 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) 983 err("unexpected fault type %llu", msg->arg.pagefault.flags); 984 985 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; 986 offset &= ~(gopts->page_size-1); 987 988 /* Odd pages -> copy zeroed page; even pages -> poison. */ 989 if (offset & gopts->page_size) 990 copy_page(gopts, offset, false); 991 else 992 do_uffdio_poison(gopts, offset); 993 } 994 995 /* Make sure to cover odd/even, and minimum duplications */ 996 #define UFFD_POISON_TEST_NPAGES 4 997 998 static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) 999 { 1000 pthread_t uffd_mon; 1001 char c; 1002 struct uffd_args args = { 0 }; 1003 struct sigaction act = { 0 }; 1004 unsigned long nr_sigbus = 0; 1005 unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; 1006 1007 if (gopts->nr_pages < poison_pages) { 1008 uffd_test_skip("Too less pages for POISON test"); 1009 return; 1010 } 1011 1012 args.gopts = gopts; 1013 1014 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); 1015 1016 uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); 1017 memset(gopts->area_src, 0, poison_pages * gopts->page_size); 1018 1019 args.handle_fault = uffd_poison_handle_fault; 1020 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 1021 err("uffd_poll_thread create"); 1022 1023 sigbuf = &jbuf; 1024 act.sa_sigaction = sighndl; 1025 act.sa_flags = SA_SIGINFO; 1026 if (sigaction(SIGBUS, &act, 0)) 1027 err("sigaction"); 1028 1029 for (nr = 0; nr < poison_pages; ++nr) { 1030 unsigned long offset = nr * gopts->page_size; 1031 const char *bytes = (const char *) gopts->area_dst + offset; 1032 const char *i; 1033 1034 if (sigsetjmp(*sigbuf, 1)) { 1035 /* 1036 * Access below triggered a SIGBUS, which was caught by 1037 * sighndl, which then jumped here. Count this SIGBUS, 1038 * and move on to next page. 1039 */ 1040 ++nr_sigbus; 1041 continue; 1042 } 1043 1044 for (i = bytes; i < bytes + gopts->page_size; ++i) { 1045 if (*i) 1046 err("nonzero byte in area_dst (%p) at %p: %u", 1047 gopts->area_dst, i, *i); 1048 } 1049 } 1050 1051 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) 1052 err("pipe write"); 1053 if (pthread_join(uffd_mon, NULL)) 1054 err("pthread_join()"); 1055 1056 if (nr_sigbus != poison_pages / 2) 1057 err("expected to receive %lu SIGBUS, actually received %lu", 1058 poison_pages / 2, nr_sigbus); 1059 1060 uffd_test_pass(); 1061 } 1062 1063 static void 1064 uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, 1065 struct uffd_msg *msg, 1066 struct uffd_args *args, 1067 unsigned long len) 1068 { 1069 unsigned long offset; 1070 1071 if (msg->event != UFFD_EVENT_PAGEFAULT) 1072 err("unexpected msg event %u", msg->event); 1073 1074 if (msg->arg.pagefault.flags & 1075 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) 1076 err("unexpected fault type %llu", msg->arg.pagefault.flags); 1077 1078 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; 1079 offset &= ~(len-1); 1080 1081 if (move_page(gopts, offset, len)) 1082 args->missing_faults++; 1083 } 1084 1085 static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, 1086 struct uffd_args *args) 1087 { 1088 uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); 1089 } 1090 1091 static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, 1092 struct uffd_args *args) 1093 { 1094 uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); 1095 } 1096 1097 static void 1098 uffd_move_test_common(uffd_global_test_opts_t *gopts, 1099 uffd_test_args_t *targs, 1100 unsigned long chunk_size, 1101 void (*handle_fault)(struct uffd_global_test_opts *gopts, 1102 struct uffd_msg *msg, struct uffd_args *args) 1103 ) 1104 { 1105 unsigned long nr; 1106 pthread_t uffd_mon; 1107 char c = '\0'; 1108 unsigned long long count; 1109 struct uffd_args args = { 0 }; 1110 char *orig_area_src = NULL, *orig_area_dst = NULL; 1111 unsigned long step_size, step_count; 1112 unsigned long src_offs = 0; 1113 unsigned long dst_offs = 0; 1114 1115 args.gopts = gopts; 1116 1117 /* Prevent source pages from being mapped more than once */ 1118 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) 1119 err("madvise(MADV_DONTFORK) failure"); 1120 1121 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 1122 true, false, false)) 1123 err("register failure"); 1124 1125 args.handle_fault = handle_fault; 1126 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) 1127 err("uffd_poll_thread create"); 1128 1129 step_size = chunk_size / gopts->page_size; 1130 step_count = gopts->nr_pages / step_size; 1131 1132 if (chunk_size > gopts->page_size) { 1133 char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); 1134 char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); 1135 1136 if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { 1137 src_offs = (aligned_src - gopts->area_src) / gopts->page_size; 1138 dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; 1139 step_count--; 1140 } 1141 orig_area_src = gopts->area_src; 1142 orig_area_dst = gopts->area_dst; 1143 gopts->area_src = aligned_src; 1144 gopts->area_dst = aligned_dst; 1145 } 1146 1147 /* 1148 * Read each of the pages back using the UFFD-registered mapping. We 1149 * expect that the first time we touch a page, it will result in a missing 1150 * fault. uffd_poll_thread will resolve the fault by moving source 1151 * page to destination. 1152 */ 1153 for (nr = 0; nr < step_count * step_size; nr += step_size) { 1154 unsigned long i; 1155 1156 /* Check area_src content */ 1157 for (i = 0; i < step_size; i++) { 1158 count = *area_count(gopts->area_src, nr + i, gopts); 1159 if (count != gopts->count_verify[src_offs + nr + i]) 1160 err("nr %lu source memory invalid %llu %llu\n", 1161 nr + i, count, gopts->count_verify[src_offs + nr + i]); 1162 } 1163 1164 /* Faulting into area_dst should move the page or the huge page */ 1165 for (i = 0; i < step_size; i++) { 1166 count = *area_count(gopts->area_dst, nr + i, gopts); 1167 if (count != gopts->count_verify[dst_offs + nr + i]) 1168 err("nr %lu memory corruption %llu %llu\n", 1169 nr, count, gopts->count_verify[dst_offs + nr + i]); 1170 } 1171 1172 /* Re-check area_src content which should be empty */ 1173 for (i = 0; i < step_size; i++) { 1174 count = *area_count(gopts->area_src, nr + i, gopts); 1175 if (count != 0) 1176 err("nr %lu move failed %llu %llu\n", 1177 nr, count, gopts->count_verify[src_offs + nr + i]); 1178 } 1179 } 1180 if (chunk_size > gopts->page_size) { 1181 gopts->area_src = orig_area_src; 1182 gopts->area_dst = orig_area_dst; 1183 } 1184 1185 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) 1186 err("pipe write"); 1187 if (pthread_join(uffd_mon, NULL)) 1188 err("join() failed"); 1189 1190 if (args.missing_faults != step_count || args.minor_faults != 0) 1191 uffd_test_fail("stats check error"); 1192 else 1193 uffd_test_pass(); 1194 } 1195 1196 static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) 1197 { 1198 uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); 1199 } 1200 1201 static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) 1202 { 1203 if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) 1204 err("madvise(MADV_HUGEPAGE) failure"); 1205 uffd_move_test_common(gopts, targs, read_pmd_pagesize(), 1206 uffd_move_pmd_handle_fault); 1207 } 1208 1209 static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) 1210 { 1211 if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) 1212 err("madvise(MADV_NOHUGEPAGE) failure"); 1213 uffd_move_test_common(gopts, targs, read_pmd_pagesize(), 1214 uffd_move_pmd_handle_fault); 1215 } 1216 1217 static bool 1218 uffdio_verify_results(const char *name, int ret, int error, long result) 1219 { 1220 /* 1221 * Should always return -1 with errno=EAGAIN, with corresponding 1222 * result field updated in ioctl() args to be -EAGAIN too 1223 * (e.g. copy.copy field for UFFDIO_COPY). 1224 */ 1225 if (ret != -1) { 1226 uffd_test_fail("%s should have returned -1", name); 1227 return false; 1228 } 1229 1230 if (error != EAGAIN) { 1231 uffd_test_fail("%s should have errno==EAGAIN", name); 1232 return false; 1233 } 1234 1235 if (result != -EAGAIN) { 1236 uffd_test_fail("%s should have been updated for -EAGAIN", 1237 name); 1238 return false; 1239 } 1240 1241 return true; 1242 } 1243 1244 /* 1245 * This defines a function to test one ioctl. Note that here "field" can 1246 * be 1 or anything not -EAGAIN. With that initial value set, we can 1247 * verify later that it should be updated by kernel (when -EAGAIN 1248 * returned), by checking whether it is also updated to -EAGAIN. 1249 */ 1250 #define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \ 1251 static bool uffdio_mmap_changing_test_##name(int fd) \ 1252 { \ 1253 int ret; \ 1254 struct uffdio_##name args = { \ 1255 .field = 1, \ 1256 }; \ 1257 ret = ioctl(fd, ioctl_name, &args); \ 1258 return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ 1259 } 1260 1261 DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) 1262 DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) 1263 DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) 1264 DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) 1265 DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) 1266 1267 typedef enum { 1268 /* We actually do not care about any state except UNINTERRUPTIBLE.. */ 1269 THR_STATE_UNKNOWN = 0, 1270 THR_STATE_UNINTERRUPTIBLE, 1271 } thread_state; 1272 1273 typedef struct { 1274 uffd_global_test_opts_t *gopts; 1275 volatile pid_t *pid; 1276 } mmap_changing_thread_args; 1277 1278 static void sleep_short(void) 1279 { 1280 usleep(1000); 1281 } 1282 1283 static thread_state thread_state_get(pid_t tid) 1284 { 1285 const char *header = "State:\t"; 1286 char tmp[256], *p, c; 1287 FILE *fp; 1288 1289 snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); 1290 fp = fopen(tmp, "r"); 1291 1292 if (!fp) 1293 return THR_STATE_UNKNOWN; 1294 1295 while (fgets(tmp, sizeof(tmp), fp)) { 1296 p = strstr(tmp, header); 1297 if (p) { 1298 /* For example, "State:\tD (disk sleep)" */ 1299 c = *(p + strlen(header)); 1300 return c == 'D' ? 1301 THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; 1302 } 1303 } 1304 1305 return THR_STATE_UNKNOWN; 1306 } 1307 1308 static void thread_state_until(pid_t tid, thread_state state) 1309 { 1310 thread_state s; 1311 1312 do { 1313 s = thread_state_get(tid); 1314 sleep_short(); 1315 } while (s != state); 1316 } 1317 1318 static void *uffd_mmap_changing_thread(void *opaque) 1319 { 1320 mmap_changing_thread_args *args = opaque; 1321 uffd_global_test_opts_t *gopts = args->gopts; 1322 volatile pid_t *pid = args->pid; 1323 int ret; 1324 1325 /* Unfortunately, it's only fetch-able from the thread itself.. */ 1326 assert(*pid == 0); 1327 *pid = syscall(SYS_gettid); 1328 1329 /* Inject an event, this will hang solid until the event read */ 1330 ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); 1331 if (ret) 1332 err("madvise(MADV_REMOVE) failed"); 1333 1334 return NULL; 1335 } 1336 1337 static void uffd_consume_message(uffd_global_test_opts_t *gopts) 1338 { 1339 struct uffd_msg msg = { 0 }; 1340 1341 while (uffd_read_msg(gopts, &msg)); 1342 } 1343 1344 static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) 1345 { 1346 /* 1347 * This stores the real PID (which can be different from how tid is 1348 * defined..) for the child thread, 0 means not initialized. 1349 */ 1350 pid_t pid = 0; 1351 pthread_t tid; 1352 int ret; 1353 mmap_changing_thread_args args = { gopts, &pid }; 1354 1355 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, 1356 true, false, false)) 1357 err("uffd_register() failed"); 1358 1359 /* Create a thread to generate the racy event */ 1360 ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); 1361 if (ret) 1362 err("pthread_create() failed"); 1363 1364 /* 1365 * Wait until the thread setup the pid. Use volatile to make sure 1366 * it reads from RAM not regs. 1367 */ 1368 while (!(volatile pid_t)pid) 1369 sleep_short(); 1370 1371 /* Wait until the thread hangs at REMOVE event */ 1372 thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); 1373 1374 if (!uffdio_mmap_changing_test_copy(gopts->uffd)) 1375 return; 1376 1377 if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) 1378 return; 1379 1380 if (!uffdio_mmap_changing_test_move(gopts->uffd)) 1381 return; 1382 1383 if (!uffdio_mmap_changing_test_poison(gopts->uffd)) 1384 return; 1385 1386 if (!uffdio_mmap_changing_test_continue(gopts->uffd)) 1387 return; 1388 1389 /* 1390 * All succeeded above! Recycle everything. Start by reading the 1391 * event so as to kick the thread roll again.. 1392 */ 1393 uffd_consume_message(gopts); 1394 1395 ret = pthread_join(tid, NULL); 1396 assert(ret == 0); 1397 1398 uffd_test_pass(); 1399 } 1400 1401 static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) 1402 { 1403 /* This should be done before source area is populated */ 1404 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { 1405 /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ 1406 if (errno != EINVAL) { 1407 if (errmsg) 1408 *errmsg = "madvise(MADV_NOHUGEPAGE) failed"; 1409 return -errno; 1410 } 1411 } 1412 return 0; 1413 } 1414 1415 static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) 1416 { 1417 /* This should be done before source area is populated */ 1418 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { 1419 if (errmsg) { 1420 *errmsg = (errno == EINVAL) ? 1421 "CONFIG_TRANSPARENT_HUGEPAGE is not set" : 1422 "madvise(MADV_HUGEPAGE) failed"; 1423 } 1424 return -errno; 1425 } 1426 return 0; 1427 } 1428 1429 struct uffd_test_case_ops uffd_move_test_case_ops = { 1430 .post_alloc = prevent_hugepages, 1431 }; 1432 1433 struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { 1434 .post_alloc = request_hugepages, 1435 }; 1436 1437 /* 1438 * Test the returned uffdio_register.ioctls with different register modes. 1439 * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. 1440 */ 1441 static void 1442 do_register_ioctls_test(uffd_global_test_opts_t *gopts, 1443 uffd_test_args_t *args, 1444 bool miss, 1445 bool wp, 1446 bool minor) 1447 { 1448 uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); 1449 mem_type_t *mem_type = args->mem_type; 1450 int ret; 1451 1452 ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, 1453 miss, wp, minor, &ioctls); 1454 1455 /* 1456 * Handle special cases of UFFDIO_REGISTER here where it should 1457 * just fail with -EINVAL first.. 1458 * 1459 * Case 1: register MINOR on anon 1460 * Case 2: register with no mode selected 1461 */ 1462 if ((minor && (mem_type->mem_flag == MEM_ANON)) || 1463 (!miss && !wp && !minor)) { 1464 if (ret != -EINVAL) 1465 err("register (miss=%d, wp=%d, minor=%d) failed " 1466 "with wrong errno=%d", miss, wp, minor, ret); 1467 return; 1468 } 1469 1470 /* UFFDIO_REGISTER should succeed, then check ioctls returned */ 1471 if (miss) 1472 expected |= BIT_ULL(_UFFDIO_COPY); 1473 if (wp) 1474 expected |= BIT_ULL(_UFFDIO_WRITEPROTECT); 1475 if (minor) 1476 expected |= BIT_ULL(_UFFDIO_CONTINUE); 1477 1478 if ((ioctls & expected) != expected) 1479 err("unexpected uffdio_register.ioctls " 1480 "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " 1481 "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); 1482 1483 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) 1484 err("unregister"); 1485 } 1486 1487 static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) 1488 { 1489 int miss, wp, minor; 1490 1491 for (miss = 0; miss <= 1; miss++) 1492 for (wp = 0; wp <= 1; wp++) 1493 for (minor = 0; minor <= 1; minor++) 1494 do_register_ioctls_test(gopts, args, miss, wp, minor); 1495 1496 uffd_test_pass(); 1497 } 1498 1499 uffd_test_case_t uffd_tests[] = { 1500 { 1501 /* Test returned uffdio_register.ioctls. */ 1502 .name = "register-ioctls", 1503 .uffd_fn = uffd_register_ioctls_test, 1504 .mem_targets = MEM_ALL, 1505 .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS | 1506 UFFD_FEATURE_MISSING_SHMEM | 1507 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1508 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1509 UFFD_FEATURE_MINOR_HUGETLBFS | 1510 UFFD_FEATURE_MINOR_SHMEM, 1511 }, 1512 { 1513 .name = "zeropage", 1514 .uffd_fn = uffd_zeropage_test, 1515 .mem_targets = MEM_ALL, 1516 .uffd_feature_required = 0, 1517 }, 1518 { 1519 .name = "move", 1520 .uffd_fn = uffd_move_test, 1521 .mem_targets = MEM_ANON, 1522 .uffd_feature_required = UFFD_FEATURE_MOVE, 1523 .test_case_ops = &uffd_move_test_case_ops, 1524 }, 1525 { 1526 .name = "move-pmd", 1527 .uffd_fn = uffd_move_pmd_test, 1528 .mem_targets = MEM_ANON, 1529 .uffd_feature_required = UFFD_FEATURE_MOVE, 1530 .test_case_ops = &uffd_move_test_pmd_case_ops, 1531 }, 1532 { 1533 .name = "move-pmd-split", 1534 .uffd_fn = uffd_move_pmd_split_test, 1535 .mem_targets = MEM_ANON, 1536 .uffd_feature_required = UFFD_FEATURE_MOVE, 1537 .test_case_ops = &uffd_move_test_pmd_case_ops, 1538 }, 1539 { 1540 .name = "wp-fork", 1541 .uffd_fn = uffd_wp_fork_test, 1542 .mem_targets = MEM_ALL, 1543 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1544 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1545 }, 1546 { 1547 .name = "wp-fork-with-event", 1548 .uffd_fn = uffd_wp_fork_with_event_test, 1549 .mem_targets = MEM_ALL, 1550 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1551 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1552 /* when set, child process should inherit uffd-wp bits */ 1553 UFFD_FEATURE_EVENT_FORK, 1554 }, 1555 { 1556 .name = "wp-fork-pin", 1557 .uffd_fn = uffd_wp_fork_pin_test, 1558 .mem_targets = MEM_ALL, 1559 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1560 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1561 }, 1562 { 1563 .name = "wp-fork-pin-with-event", 1564 .uffd_fn = uffd_wp_fork_pin_with_event_test, 1565 .mem_targets = MEM_ALL, 1566 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1567 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | 1568 /* when set, child process should inherit uffd-wp bits */ 1569 UFFD_FEATURE_EVENT_FORK, 1570 }, 1571 { 1572 .name = "wp-unpopulated", 1573 .uffd_fn = uffd_wp_unpopulated_test, 1574 .mem_targets = MEM_ANON, 1575 .uffd_feature_required = 1576 UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED, 1577 }, 1578 { 1579 .name = "minor", 1580 .uffd_fn = uffd_minor_test, 1581 .mem_targets = MEM_SHMEM | MEM_HUGETLB, 1582 .uffd_feature_required = 1583 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM, 1584 }, 1585 { 1586 .name = "minor-wp", 1587 .uffd_fn = uffd_minor_wp_test, 1588 .mem_targets = MEM_SHMEM | MEM_HUGETLB, 1589 .uffd_feature_required = 1590 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM | 1591 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1592 /* 1593 * HACK: here we leveraged WP_UNPOPULATED to detect whether 1594 * minor mode supports wr-protect. There's no feature flag 1595 * for it so this is the best we can test against. 1596 */ 1597 UFFD_FEATURE_WP_UNPOPULATED, 1598 }, 1599 { 1600 .name = "minor-collapse", 1601 .uffd_fn = uffd_minor_collapse_test, 1602 /* MADV_COLLAPSE only works with shmem */ 1603 .mem_targets = MEM_SHMEM, 1604 /* We can't test MADV_COLLAPSE, so try our luck */ 1605 .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM, 1606 }, 1607 { 1608 .name = "sigbus", 1609 .uffd_fn = uffd_sigbus_test, 1610 .mem_targets = MEM_ALL, 1611 .uffd_feature_required = UFFD_FEATURE_SIGBUS | 1612 UFFD_FEATURE_EVENT_FORK, 1613 }, 1614 { 1615 .name = "sigbus-wp", 1616 .uffd_fn = uffd_sigbus_wp_test, 1617 .mem_targets = MEM_ALL, 1618 .uffd_feature_required = UFFD_FEATURE_SIGBUS | 1619 UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1620 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1621 }, 1622 { 1623 .name = "events", 1624 .uffd_fn = uffd_events_test, 1625 .mem_targets = MEM_ALL, 1626 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | 1627 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE, 1628 }, 1629 { 1630 .name = "events-wp", 1631 .uffd_fn = uffd_events_wp_test, 1632 .mem_targets = MEM_ALL, 1633 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | 1634 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE | 1635 UFFD_FEATURE_PAGEFAULT_FLAG_WP | 1636 UFFD_FEATURE_WP_HUGETLBFS_SHMEM, 1637 }, 1638 { 1639 .name = "poison", 1640 .uffd_fn = uffd_poison_test, 1641 .mem_targets = MEM_ALL, 1642 .uffd_feature_required = UFFD_FEATURE_POISON, 1643 }, 1644 { 1645 .name = "mmap-changing", 1646 .uffd_fn = uffd_mmap_changing_test, 1647 /* 1648 * There's no point running this test over all mem types as 1649 * they share the same code paths. 1650 * 1651 * Choose shmem for simplicity, because (1) shmem supports 1652 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is 1653 * almost always available (unlike hugetlb). Here we 1654 * abused SHMEM for UFFDIO_MOVE, but the test we want to 1655 * cover doesn't yet need the correct memory type.. 1656 */ 1657 .mem_targets = MEM_SHMEM, 1658 /* 1659 * Any UFFD_FEATURE_EVENT_* should work to trigger the 1660 * race logically, but choose the simplest (REMOVE). 1661 * 1662 * Meanwhile, since we'll cover quite a few new ioctl()s 1663 * (CONTINUE, POISON, MOVE), skip this test for old kernels 1664 * by choosing all of them. 1665 */ 1666 .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | 1667 UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | 1668 UFFD_FEATURE_MINOR_SHMEM, 1669 }, 1670 }; 1671 1672 static void usage(const char *prog) 1673 { 1674 printf("usage: %s [-f TESTNAME]\n", prog); 1675 puts(""); 1676 puts(" -f: test name to filter (e.g., event)"); 1677 puts(" -h: show the help msg"); 1678 puts(" -l: list tests only"); 1679 puts(""); 1680 exit(KSFT_FAIL); 1681 } 1682 1683 static int uffd_count_tests(int n_tests, int n_mems, const char *test_filter) 1684 { 1685 uffd_test_case_t *test; 1686 int i, j, count = 0; 1687 1688 if (!test_filter) 1689 count += 2; /* test_uffd_api(false) + test_uffd_api(true) */ 1690 1691 for (i = 0; i < n_tests; i++) { 1692 test = &uffd_tests[i]; 1693 if (test_filter && !strstr(test->name, test_filter)) 1694 continue; 1695 for (j = 0; j < n_mems; j++) 1696 if (test->mem_targets & mem_types[j].mem_flag) 1697 count++; 1698 } 1699 1700 return count; 1701 } 1702 1703 static unsigned long uffd_setup_hugetlb(void) 1704 { 1705 unsigned long nr_hugepages, hp_size; 1706 1707 hugetlb_save_settings(); 1708 hp_size = default_huge_page_size(); 1709 1710 if (!hp_size) 1711 return 0; 1712 1713 /* need twice UFFD_TEST_MEM_SIZE, one for src area and one for dst */ 1714 nr_hugepages = 2 * MAX(UFFD_TEST_MEM_SIZE, hp_size * 2) / hp_size; 1715 hugetlb_set_nr_default_pages(nr_hugepages); 1716 1717 if (hugetlb_free_default_pages() < nr_hugepages) 1718 return 0; 1719 1720 return hp_size; 1721 } 1722 1723 int main(int argc, char *argv[]) 1724 { 1725 int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t); 1726 int n_mems = sizeof(mem_types) / sizeof(mem_type_t); 1727 const char *test_filter = NULL; 1728 unsigned long hugepage_size; 1729 bool list_only = false; 1730 uffd_test_case_t *test; 1731 mem_type_t *mem_type; 1732 uffd_test_args_t args; 1733 const char *errmsg; 1734 int i, j, opt; 1735 1736 while ((opt = getopt(argc, argv, "f:hl")) != -1) { 1737 switch (opt) { 1738 case 'f': 1739 test_filter = optarg; 1740 break; 1741 case 'l': 1742 list_only = true; 1743 break; 1744 case 'h': 1745 default: 1746 /* Unknown */ 1747 usage(argv[0]); 1748 break; 1749 } 1750 } 1751 1752 if (list_only) { 1753 for (i = 0; i < n_tests; i++) { 1754 test = &uffd_tests[i]; 1755 if (test_filter && !strstr(test->name, test_filter)) 1756 continue; 1757 printf("%s\n", test->name); 1758 } 1759 return KSFT_PASS; 1760 } 1761 1762 hugepage_size = uffd_setup_hugetlb(); 1763 1764 ksft_print_header(); 1765 ksft_set_plan(uffd_count_tests(n_tests, n_mems, test_filter)); 1766 1767 if (!test_filter) { 1768 test_uffd_api(false); 1769 test_uffd_api(true); 1770 } 1771 1772 for (i = 0; i < n_tests; i++) { 1773 test = &uffd_tests[i]; 1774 if (test_filter && !strstr(test->name, test_filter)) 1775 continue; 1776 for (j = 0; j < n_mems; j++) { 1777 mem_type = &mem_types[j]; 1778 1779 /* Initialize global test options */ 1780 uffd_global_test_opts_t gopts = { 0 }; 1781 1782 gopts.map_shared = mem_type->shared; 1783 uffd_test_ops = mem_type->mem_ops; 1784 uffd_test_case_ops = test->test_case_ops; 1785 1786 if (!(test->mem_targets & mem_type->mem_flag)) 1787 continue; 1788 1789 uffd_test_start("%s on %s", test->name, mem_type->name); 1790 if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) { 1791 gopts.page_size = hugepage_size; 1792 if (gopts.page_size == 0) { 1793 uffd_test_skip("not enough HugeTLB pages"); 1794 continue; 1795 } 1796 } else { 1797 gopts.page_size = psize(); 1798 } 1799 1800 /* Ensure we have at least 2 pages */ 1801 gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) 1802 / gopts.page_size; 1803 1804 gopts.nr_parallel = 1; 1805 1806 /* Initialize test arguments */ 1807 args.mem_type = mem_type; 1808 1809 if (!uffd_feature_supported(test)) { 1810 uffd_test_skip("feature missing"); 1811 continue; 1812 } 1813 if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { 1814 uffd_test_skip(errmsg); 1815 continue; 1816 } 1817 test->uffd_fn(&gopts, &args); 1818 uffd_test_ctx_clear(&gopts); 1819 } 1820 } 1821 1822 ksft_finished(); 1823 } 1824 1825 #else /* __NR_userfaultfd */ 1826 1827 #warning "missing __NR_userfaultfd definition" 1828 1829 int main(void) 1830 { 1831 ksft_print_header(); 1832 ksft_exit_skip("missing __NR_userfaultfd definition\n"); 1833 } 1834 1835 #endif /* __NR_userfaultfd */ 1836