1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Userfaultfd tests util functions 4 * 5 * Copyright (C) 2015-2023 Red Hat, Inc. 6 */ 7 8 #include "uffd-common.h" 9 10 uffd_test_ops_t *uffd_test_ops; 11 uffd_test_case_ops_t *uffd_test_case_ops; 12 13 #define BASE_PMD_ADDR ((void *)(1UL << 30)) 14 15 /* pthread_mutex_t starts at page offset 0 */ 16 pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) 17 { 18 return (pthread_mutex_t *) (area + nr * gopts->page_size); 19 } 20 21 /* 22 * count is placed in the page after pthread_mutex_t naturally aligned 23 * to avoid non alignment faults on non-x86 archs. 24 */ 25 volatile unsigned long long *area_count(char *area, unsigned long nr, 26 uffd_global_test_opts_t *gopts) 27 { 28 return (volatile unsigned long long *) 29 ((unsigned long)(area + nr * gopts->page_size + 30 sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & 31 ~(unsigned long)(sizeof(unsigned long long) - 1)); 32 } 33 34 static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) 35 { 36 unsigned int memfd_flags = 0; 37 int mem_fd; 38 39 if (hugetlb) 40 memfd_flags = MFD_HUGETLB; 41 mem_fd = memfd_create("uffd-test", memfd_flags); 42 if (mem_fd < 0) 43 err("memfd_create"); 44 if (ftruncate(mem_fd, mem_size)) 45 err("ftruncate"); 46 if (fallocate(mem_fd, 47 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 48 mem_size)) 49 err("fallocate"); 50 51 return mem_fd; 52 } 53 54 static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 55 { 56 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) 57 err("madvise(MADV_DONTNEED) failed"); 58 } 59 60 static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 61 { 62 *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, 63 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 64 if (*alloc_area == MAP_FAILED) { 65 *alloc_area = NULL; 66 return -errno; 67 } 68 return 0; 69 } 70 71 static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 72 size_t len, unsigned long offset) 73 { 74 } 75 76 static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 77 { 78 if (!gopts->map_shared) { 79 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) 80 err("madvise(MADV_DONTNEED) failed"); 81 } else { 82 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) 83 err("madvise(MADV_REMOVE) failed"); 84 } 85 } 86 87 static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 88 { 89 off_t size = gopts->nr_pages * gopts->page_size; 90 off_t offset = is_src ? 0 : size; 91 void *area_alias = NULL; 92 char **alloc_area_alias; 93 int mem_fd = uffd_mem_fd_create(size * 2, true); 94 95 *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, 96 (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | 97 (is_src ? 0 : MAP_NORESERVE), 98 mem_fd, offset); 99 if (*alloc_area == MAP_FAILED) { 100 *alloc_area = NULL; 101 return -errno; 102 } 103 104 if (gopts->map_shared) { 105 area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, 106 MAP_SHARED, mem_fd, offset); 107 if (area_alias == MAP_FAILED) 108 return -errno; 109 } 110 111 if (is_src) { 112 alloc_area_alias = &gopts->area_src_alias; 113 } else { 114 alloc_area_alias = &gopts->area_dst_alias; 115 } 116 if (area_alias) 117 *alloc_area_alias = area_alias; 118 119 close(mem_fd); 120 return 0; 121 } 122 123 static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 124 size_t len, unsigned long offset) 125 { 126 if (!gopts->map_shared) 127 return; 128 129 *start = (unsigned long) gopts->area_dst_alias + offset; 130 } 131 132 static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 133 { 134 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) 135 err("madvise(MADV_REMOVE) failed"); 136 } 137 138 static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 139 { 140 void *area_alias = NULL; 141 size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); 142 unsigned long offset = is_src ? 0 : bytes; 143 char *p = NULL, *p_alias = NULL; 144 int mem_fd = uffd_mem_fd_create(bytes * 2, false); 145 146 /* TODO: clean this up. Use a static addr is ugly */ 147 p = BASE_PMD_ADDR; 148 if (!is_src) 149 /* src map + alias + interleaved hpages */ 150 p += 2 * (bytes + hpage_size); 151 p_alias = p; 152 p_alias += bytes; 153 p_alias += hpage_size; /* Prevent src/dst VMA merge */ 154 155 *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, 156 mem_fd, offset); 157 if (*alloc_area == MAP_FAILED) { 158 *alloc_area = NULL; 159 return -errno; 160 } 161 if (*alloc_area != p) 162 err("mmap of memfd failed at %p", p); 163 164 area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, 165 mem_fd, offset); 166 if (area_alias == MAP_FAILED) { 167 munmap(*alloc_area, bytes); 168 *alloc_area = NULL; 169 return -errno; 170 } 171 if (area_alias != p_alias) 172 err("mmap of anonymous memory failed at %p", p_alias); 173 174 if (is_src) 175 gopts->area_src_alias = area_alias; 176 else 177 gopts->area_dst_alias = area_alias; 178 179 close(mem_fd); 180 return 0; 181 } 182 183 static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 184 size_t len, unsigned long offset) 185 { 186 *start = (unsigned long)gopts->area_dst_alias + offset; 187 } 188 189 static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) 190 { 191 if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, 192 read_pmd_pagesize())) 193 err("Did not find expected %d number of hugepages", 194 expect_nr_hpages); 195 } 196 197 struct uffd_test_ops anon_uffd_test_ops = { 198 .allocate_area = anon_allocate_area, 199 .release_pages = anon_release_pages, 200 .alias_mapping = noop_alias_mapping, 201 .check_pmd_mapping = NULL, 202 }; 203 204 struct uffd_test_ops shmem_uffd_test_ops = { 205 .allocate_area = shmem_allocate_area, 206 .release_pages = shmem_release_pages, 207 .alias_mapping = shmem_alias_mapping, 208 .check_pmd_mapping = shmem_check_pmd_mapping, 209 }; 210 211 struct uffd_test_ops hugetlb_uffd_test_ops = { 212 .allocate_area = hugetlb_allocate_area, 213 .release_pages = hugetlb_release_pages, 214 .alias_mapping = hugetlb_alias_mapping, 215 .check_pmd_mapping = NULL, 216 }; 217 218 void uffd_stats_report(struct uffd_args *args, int n_cpus) 219 { 220 int i; 221 unsigned long long miss_total = 0, wp_total = 0, minor_total = 0; 222 223 for (i = 0; i < n_cpus; i++) { 224 miss_total += args[i].missing_faults; 225 wp_total += args[i].wp_faults; 226 minor_total += args[i].minor_faults; 227 } 228 229 printf("userfaults: "); 230 if (miss_total) { 231 printf("%llu missing (", miss_total); 232 for (i = 0; i < n_cpus; i++) 233 printf("%lu+", args[i].missing_faults); 234 printf("\b) "); 235 } 236 if (wp_total) { 237 printf("%llu wp (", wp_total); 238 for (i = 0; i < n_cpus; i++) 239 printf("%lu+", args[i].wp_faults); 240 printf("\b) "); 241 } 242 if (minor_total) { 243 printf("%llu minor (", minor_total); 244 for (i = 0; i < n_cpus; i++) 245 printf("%lu+", args[i].minor_faults); 246 printf("\b)"); 247 } 248 printf("\n"); 249 } 250 251 int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) 252 { 253 struct uffdio_api uffdio_api; 254 255 gopts->uffd = uffd_open(UFFD_FLAGS); 256 if (gopts->uffd < 0) 257 return -1; 258 gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); 259 260 uffdio_api.api = UFFD_API; 261 uffdio_api.features = *features; 262 if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) 263 /* Probably lack of CAP_PTRACE? */ 264 return -1; 265 if (uffdio_api.api != UFFD_API) 266 err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api); 267 268 *features = uffdio_api.features; 269 return 0; 270 } 271 272 static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) 273 { 274 if (*area) 275 if (munmap(*area, gopts->nr_pages * gopts->page_size)) 276 err("munmap"); 277 278 *area = NULL; 279 } 280 281 void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) 282 { 283 size_t i; 284 285 if (gopts->pipefd) { 286 for (i = 0; i < gopts->nr_parallel * 2; ++i) { 287 if (close(gopts->pipefd[i])) 288 err("close pipefd"); 289 } 290 free(gopts->pipefd); 291 gopts->pipefd = NULL; 292 } 293 294 if (gopts->count_verify) { 295 free(gopts->count_verify); 296 gopts->count_verify = NULL; 297 } 298 299 if (gopts->uffd != -1) { 300 if (close(gopts->uffd)) 301 err("close uffd"); 302 gopts->uffd = -1; 303 } 304 305 munmap_area(gopts, (void **)&gopts->area_src); 306 munmap_area(gopts, (void **)&gopts->area_src_alias); 307 munmap_area(gopts, (void **)&gopts->area_dst); 308 munmap_area(gopts, (void **)&gopts->area_dst_alias); 309 munmap_area(gopts, (void **)&gopts->area_remap); 310 } 311 312 int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) 313 { 314 unsigned long nr, cpu; 315 int ret; 316 317 gopts->area_src_alias = NULL; 318 gopts->area_dst_alias = NULL; 319 gopts->area_remap = NULL; 320 321 if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { 322 ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); 323 if (ret) 324 return ret; 325 } 326 327 ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); 328 ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); 329 if (ret) { 330 if (errmsg) 331 *errmsg = "memory allocation failed"; 332 return ret; 333 } 334 335 if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { 336 ret = uffd_test_case_ops->post_alloc(gopts, errmsg); 337 if (ret) 338 return ret; 339 } 340 341 ret = userfaultfd_open(gopts, &features); 342 if (ret) { 343 if (errmsg) 344 *errmsg = "possible lack of privilege"; 345 return ret; 346 } 347 348 gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); 349 if (!gopts->count_verify) 350 err("count_verify"); 351 352 for (nr = 0; nr < gopts->nr_pages; nr++) { 353 *area_mutex(gopts->area_src, nr, gopts) = 354 (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; 355 gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; 356 /* 357 * In the transition between 255 to 256, powerpc will 358 * read out of order in my_bcmp and see both bytes as 359 * zero, so leave a placeholder below always non-zero 360 * after the count, to avoid my_bcmp to trigger false 361 * positives. 362 */ 363 *(area_count(gopts->area_src, nr, gopts) + 1) = 1; 364 } 365 366 /* 367 * After initialization of area_src, we must explicitly release pages 368 * for area_dst to make sure it's fully empty. Otherwise we could have 369 * some area_dst pages be erroneously initialized with zero pages, 370 * hence we could hit memory corruption later in the test. 371 * 372 * One example is when THP is globally enabled, above allocate_area() 373 * calls could have the two areas merged into a single VMA (as they 374 * will have the same VMA flags so they're mergeable). When we 375 * initialize the area_src above, it's possible that some part of 376 * area_dst could have been faulted in via one huge THP that will be 377 * shared between area_src and area_dst. It could cause some of the 378 * area_dst won't be trapped by missing userfaults. 379 * 380 * This release_pages() will guarantee even if that happened, we'll 381 * proactively split the thp and drop any accidentally initialized 382 * pages within area_dst. 383 */ 384 uffd_test_ops->release_pages(gopts, gopts->area_dst); 385 386 gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); 387 if (!gopts->pipefd) 388 err("pipefd"); 389 for (cpu = 0; cpu < gopts->nr_parallel; cpu++) 390 if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) 391 err("pipe"); 392 393 return 0; 394 } 395 396 void wp_range(int ufd, __u64 start, __u64 len, bool wp) 397 { 398 struct uffdio_writeprotect prms; 399 400 /* Write protection page faults */ 401 prms.range.start = start; 402 prms.range.len = len; 403 /* Undo write-protect, do wakeup after that */ 404 prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; 405 406 if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) 407 err("clear WP failed: address=0x%"PRIx64, (uint64_t)start); 408 } 409 410 static void continue_range(int ufd, __u64 start, __u64 len, bool wp) 411 { 412 struct uffdio_continue req; 413 int ret; 414 415 req.range.start = start; 416 req.range.len = len; 417 req.mode = 0; 418 if (wp) 419 req.mode |= UFFDIO_CONTINUE_MODE_WP; 420 421 if (ioctl(ufd, UFFDIO_CONTINUE, &req)) 422 err("UFFDIO_CONTINUE failed for address 0x%" PRIx64, 423 (uint64_t)start); 424 425 /* 426 * Error handling within the kernel for continue is subtly different 427 * from copy or zeropage, so it may be a source of bugs. Trigger an 428 * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG. 429 */ 430 req.mapped = 0; 431 ret = ioctl(ufd, UFFDIO_CONTINUE, &req); 432 if (ret >= 0 || req.mapped != -EEXIST) 433 err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64, 434 ret, (int64_t) req.mapped); 435 } 436 437 int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) 438 { 439 int ret = read(gopts->uffd, msg, sizeof(*msg)); 440 441 if (ret != sizeof(*msg)) { 442 if (ret < 0) { 443 if (errno == EAGAIN || errno == EINTR) 444 return 1; 445 err("blocking read error"); 446 } else { 447 err("short read"); 448 } 449 } 450 451 return 0; 452 } 453 454 void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, 455 struct uffd_args *args) 456 { 457 unsigned long offset; 458 459 if (msg->event != UFFD_EVENT_PAGEFAULT) 460 err("unexpected msg event %u", msg->event); 461 462 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { 463 /* Write protect page faults */ 464 wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); 465 args->wp_faults++; 466 } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { 467 uint8_t *area; 468 int b; 469 470 /* 471 * Minor page faults 472 * 473 * To prove we can modify the original range for testing 474 * purposes, we're going to bit flip this range before 475 * continuing. 476 * 477 * Note that this requires all minor page fault tests operate on 478 * area_dst (non-UFFD-registered) and area_dst_alias 479 * (UFFD-registered). 480 */ 481 482 area = (uint8_t *)(gopts->area_dst + 483 ((char *)msg->arg.pagefault.address - 484 gopts->area_dst_alias)); 485 for (b = 0; b < gopts->page_size; ++b) 486 area[b] = ~area[b]; 487 continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, 488 args->apply_wp); 489 args->minor_faults++; 490 } else { 491 /* 492 * Missing page faults. 493 * 494 * Here we force a write check for each of the missing mode 495 * faults. It's guaranteed because the only threads that 496 * will trigger uffd faults are the locking threads, and 497 * their first instruction to touch the missing page will 498 * always be pthread_mutex_lock(). 499 * 500 * Note that here we relied on an NPTL glibc impl detail to 501 * always read the lock type at the entry of the lock op 502 * (pthread_mutex_t.__data.__type, offset 0x10) before 503 * doing any locking operations to guarantee that. It's 504 * actually not good to rely on this impl detail because 505 * logically a pthread-compatible lib can implement the 506 * locks without types and we can fail when linking with 507 * them. However since we used to find bugs with this 508 * strict check we still keep it around. Hopefully this 509 * could be a good hint when it fails again. If one day 510 * it'll break on some other impl of glibc we'll revisit. 511 */ 512 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) 513 err("unexpected write fault"); 514 515 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; 516 offset &= ~(gopts->page_size-1); 517 518 if (copy_page(gopts, offset, args->apply_wp)) 519 args->missing_faults++; 520 } 521 } 522 523 void *uffd_poll_thread(void *arg) 524 { 525 struct uffd_args *args = (struct uffd_args *)arg; 526 uffd_global_test_opts_t *gopts = args->gopts; 527 unsigned long cpu = args->cpu; 528 struct pollfd pollfd[2]; 529 struct uffd_msg msg; 530 struct uffdio_register uffd_reg; 531 int ret; 532 char tmp_chr; 533 534 if (!args->handle_fault) 535 args->handle_fault = uffd_handle_page_fault; 536 537 pollfd[0].fd = gopts->uffd; 538 pollfd[0].events = POLLIN; 539 pollfd[1].fd = gopts->pipefd[cpu*2]; 540 pollfd[1].events = POLLIN; 541 542 gopts->ready_for_fork = true; 543 544 for (;;) { 545 ret = poll(pollfd, 2, -1); 546 if (ret <= 0) { 547 if (errno == EINTR || errno == EAGAIN) 548 continue; 549 err("poll error: %d", ret); 550 } 551 if (pollfd[1].revents) { 552 if (!(pollfd[1].revents & POLLIN)) 553 err("pollfd[1].revents %d", pollfd[1].revents); 554 if (read(pollfd[1].fd, &tmp_chr, 1) != 1) 555 err("read pipefd error"); 556 break; 557 } 558 if (!(pollfd[0].revents & POLLIN)) 559 err("pollfd[0].revents %d", pollfd[0].revents); 560 if (uffd_read_msg(gopts, &msg)) 561 continue; 562 switch (msg.event) { 563 default: 564 err("unexpected msg event %u\n", msg.event); 565 break; 566 case UFFD_EVENT_PAGEFAULT: 567 args->handle_fault(gopts, &msg, args); 568 break; 569 case UFFD_EVENT_FORK: 570 close(gopts->uffd); 571 gopts->uffd = msg.arg.fork.ufd; 572 pollfd[0].fd = gopts->uffd; 573 break; 574 case UFFD_EVENT_REMOVE: 575 uffd_reg.range.start = msg.arg.remove.start; 576 uffd_reg.range.len = msg.arg.remove.end - 577 msg.arg.remove.start; 578 if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) 579 err("remove failure"); 580 break; 581 case UFFD_EVENT_REMAP: 582 gopts->area_remap = gopts->area_dst; /* save for later unmap */ 583 gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; 584 break; 585 } 586 } 587 588 return NULL; 589 } 590 591 static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, 592 unsigned long offset) 593 { 594 uffd_test_ops->alias_mapping(gopts, 595 &uffdio_copy->dst, 596 uffdio_copy->len, 597 offset); 598 if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { 599 /* real retval in ufdio_copy.copy */ 600 if (uffdio_copy->copy != -EEXIST) 601 err("UFFDIO_COPY retry error: %"PRId64, 602 (int64_t)uffdio_copy->copy); 603 } else { 604 err("UFFDIO_COPY retry unexpected: %"PRId64, 605 (int64_t)uffdio_copy->copy); 606 } 607 } 608 609 static void wake_range(int ufd, unsigned long addr, unsigned long len) 610 { 611 struct uffdio_range uffdio_wake; 612 613 uffdio_wake.start = addr; 614 uffdio_wake.len = len; 615 616 if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake)) 617 fprintf(stderr, "error waking %lu\n", 618 addr), exit(1); 619 } 620 621 int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) 622 { 623 struct uffdio_copy uffdio_copy; 624 625 if (offset >= gopts->nr_pages * gopts->page_size) 626 err("unexpected offset %lu\n", offset); 627 uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; 628 uffdio_copy.src = (unsigned long) gopts->area_src + offset; 629 uffdio_copy.len = gopts->page_size; 630 if (wp) 631 uffdio_copy.mode = UFFDIO_COPY_MODE_WP; 632 else 633 uffdio_copy.mode = 0; 634 uffdio_copy.copy = 0; 635 if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { 636 /* real retval in ufdio_copy.copy */ 637 if (uffdio_copy.copy != -EEXIST) 638 err("UFFDIO_COPY error: %"PRId64, 639 (int64_t)uffdio_copy.copy); 640 wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); 641 } else if (uffdio_copy.copy != gopts->page_size) { 642 err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); 643 } else { 644 if (gopts->test_uffdio_copy_eexist && retry) { 645 gopts->test_uffdio_copy_eexist = false; 646 retry_copy_page(gopts, &uffdio_copy, offset); 647 } 648 return 1; 649 } 650 return 0; 651 } 652 653 int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) 654 { 655 return __copy_page(gopts, offset, false, wp); 656 } 657 658 int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) 659 { 660 struct uffdio_move uffdio_move; 661 662 if (offset + len > gopts->nr_pages * gopts->page_size) 663 err("unexpected offset %lu and length %lu\n", offset, len); 664 uffdio_move.dst = (unsigned long) gopts->area_dst + offset; 665 uffdio_move.src = (unsigned long) gopts->area_src + offset; 666 uffdio_move.len = len; 667 uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; 668 uffdio_move.move = 0; 669 if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { 670 /* real retval in uffdio_move.move */ 671 if (uffdio_move.move != -EEXIST) 672 err("UFFDIO_MOVE error: %"PRId64, 673 (int64_t)uffdio_move.move); 674 wake_range(gopts->uffd, uffdio_move.dst, len); 675 } else if (uffdio_move.move != len) { 676 err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); 677 } else 678 return 1; 679 return 0; 680 } 681 682 int uffd_open_dev(unsigned int flags) 683 { 684 int fd, uffd; 685 686 fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); 687 if (fd < 0) 688 return fd; 689 uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); 690 close(fd); 691 692 return uffd; 693 } 694 695 int uffd_open_sys(unsigned int flags) 696 { 697 #ifdef __NR_userfaultfd 698 return syscall(__NR_userfaultfd, flags); 699 #else 700 return -1; 701 #endif 702 } 703 704 int uffd_open(unsigned int flags) 705 { 706 int uffd = uffd_open_sys(flags); 707 708 if (uffd < 0) 709 uffd = uffd_open_dev(flags); 710 711 return uffd; 712 } 713 714 int uffd_get_features(uint64_t *features) 715 { 716 struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; 717 /* 718 * This should by default work in most kernels; the feature list 719 * will be the same no matter what we pass in here. 720 */ 721 int fd = uffd_open(UFFD_USER_MODE_ONLY); 722 723 if (fd < 0) 724 /* Maybe the kernel is older than user-only mode? */ 725 fd = uffd_open(0); 726 727 if (fd < 0) 728 return fd; 729 730 if (ioctl(fd, UFFDIO_API, &uffdio_api)) { 731 close(fd); 732 return -errno; 733 } 734 735 *features = uffdio_api.features; 736 close(fd); 737 738 return 0; 739 } 740