1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Userfaultfd tests util functions 4 * 5 * Copyright (C) 2015-2023 Red Hat, Inc. 6 */ 7 8 #include "uffd-common.h" 9 10 uffd_test_ops_t *uffd_test_ops; 11 uffd_test_case_ops_t *uffd_test_case_ops; 12 13 14 /* pthread_mutex_t starts at page offset 0 */ 15 pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) 16 { 17 return (pthread_mutex_t *) (area + nr * gopts->page_size); 18 } 19 20 /* 21 * count is placed in the page after pthread_mutex_t naturally aligned 22 * to avoid non alignment faults on non-x86 archs. 23 */ 24 volatile unsigned long long *area_count(char *area, unsigned long nr, 25 uffd_global_test_opts_t *gopts) 26 { 27 return (volatile unsigned long long *) 28 ((unsigned long)(area + nr * gopts->page_size + 29 sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & 30 ~(unsigned long)(sizeof(unsigned long long) - 1)); 31 } 32 33 static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) 34 { 35 unsigned int memfd_flags = 0; 36 int mem_fd; 37 38 if (hugetlb) 39 memfd_flags = MFD_HUGETLB; 40 mem_fd = memfd_create("uffd-test", memfd_flags); 41 if (mem_fd < 0) 42 err("memfd_create"); 43 if (ftruncate(mem_fd, mem_size)) 44 err("ftruncate"); 45 if (fallocate(mem_fd, 46 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 47 mem_size)) 48 err("fallocate"); 49 50 return mem_fd; 51 } 52 53 static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 54 { 55 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) 56 err("madvise(MADV_DONTNEED) failed"); 57 } 58 59 static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 60 { 61 *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, 62 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 63 if (*alloc_area == MAP_FAILED) { 64 *alloc_area = NULL; 65 return -errno; 66 } 67 return 0; 68 } 69 70 static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 71 size_t len, unsigned long offset) 72 { 73 } 74 75 static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 76 { 77 if (!gopts->map_shared) { 78 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) 79 err("madvise(MADV_DONTNEED) failed"); 80 } else { 81 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) 82 err("madvise(MADV_REMOVE) failed"); 83 } 84 } 85 86 static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 87 { 88 off_t size = gopts->nr_pages * gopts->page_size; 89 off_t offset = is_src ? 0 : size; 90 void *area_alias = NULL; 91 char **alloc_area_alias; 92 int mem_fd = uffd_mem_fd_create(size * 2, true); 93 94 *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, 95 (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | 96 (is_src ? 0 : MAP_NORESERVE), 97 mem_fd, offset); 98 if (*alloc_area == MAP_FAILED) { 99 *alloc_area = NULL; 100 return -errno; 101 } 102 103 if (gopts->map_shared) { 104 area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, 105 MAP_SHARED, mem_fd, offset); 106 if (area_alias == MAP_FAILED) 107 return -errno; 108 } 109 110 if (is_src) { 111 alloc_area_alias = &gopts->area_src_alias; 112 } else { 113 alloc_area_alias = &gopts->area_dst_alias; 114 } 115 if (area_alias) 116 *alloc_area_alias = area_alias; 117 118 close(mem_fd); 119 return 0; 120 } 121 122 static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 123 size_t len, unsigned long offset) 124 { 125 if (!gopts->map_shared) 126 return; 127 128 *start = (unsigned long) gopts->area_dst_alias + offset; 129 } 130 131 static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) 132 { 133 if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) 134 err("madvise(MADV_REMOVE) failed"); 135 } 136 137 static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) 138 { 139 void *area_alias = NULL; 140 size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); 141 unsigned long offset = is_src ? 0 : bytes; 142 char *p = NULL, *p_alias = NULL; 143 int mem_fd = uffd_mem_fd_create(bytes * 2, false); 144 size_t region_size = bytes * 2 + hpage_size; 145 146 void *reserve = mmap(NULL, region_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 147 -1, 0); 148 if (reserve == MAP_FAILED) { 149 close(mem_fd); 150 return -errno; 151 } 152 153 p = reserve; 154 p_alias = p; 155 p_alias += bytes; 156 p_alias += hpage_size; /* Prevent src/dst VMA merge */ 157 158 *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, 159 mem_fd, offset); 160 if (*alloc_area == MAP_FAILED) { 161 *alloc_area = NULL; 162 munmap(reserve, region_size); 163 close(mem_fd); 164 return -errno; 165 } 166 if (*alloc_area != p) 167 err("mmap of memfd failed at %p", p); 168 169 area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, 170 mem_fd, offset); 171 if (area_alias == MAP_FAILED) { 172 *alloc_area = NULL; 173 munmap(reserve, region_size); 174 close(mem_fd); 175 return -errno; 176 } 177 if (area_alias != p_alias) 178 err("mmap of anonymous memory failed at %p", p_alias); 179 180 if (is_src) 181 gopts->area_src_alias = area_alias; 182 else 183 gopts->area_dst_alias = area_alias; 184 185 close(mem_fd); 186 return 0; 187 } 188 189 static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, 190 size_t len, unsigned long offset) 191 { 192 *start = (unsigned long)gopts->area_dst_alias + offset; 193 } 194 195 static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) 196 { 197 if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, 198 read_pmd_pagesize())) 199 err("Did not find expected %d number of hugepages", 200 expect_nr_hpages); 201 } 202 203 struct uffd_test_ops anon_uffd_test_ops = { 204 .allocate_area = anon_allocate_area, 205 .release_pages = anon_release_pages, 206 .alias_mapping = noop_alias_mapping, 207 .check_pmd_mapping = NULL, 208 }; 209 210 struct uffd_test_ops shmem_uffd_test_ops = { 211 .allocate_area = shmem_allocate_area, 212 .release_pages = shmem_release_pages, 213 .alias_mapping = shmem_alias_mapping, 214 .check_pmd_mapping = shmem_check_pmd_mapping, 215 }; 216 217 struct uffd_test_ops hugetlb_uffd_test_ops = { 218 .allocate_area = hugetlb_allocate_area, 219 .release_pages = hugetlb_release_pages, 220 .alias_mapping = hugetlb_alias_mapping, 221 .check_pmd_mapping = NULL, 222 }; 223 224 void uffd_stats_report(struct uffd_args *args, int n_cpus) 225 { 226 int i; 227 unsigned long long miss_total = 0, wp_total = 0, minor_total = 0; 228 229 for (i = 0; i < n_cpus; i++) { 230 miss_total += args[i].missing_faults; 231 wp_total += args[i].wp_faults; 232 minor_total += args[i].minor_faults; 233 } 234 235 printf("userfaults: "); 236 if (miss_total) { 237 printf("%llu missing (", miss_total); 238 for (i = 0; i < n_cpus; i++) 239 printf("%lu+", args[i].missing_faults); 240 printf("\b) "); 241 } 242 if (wp_total) { 243 printf("%llu wp (", wp_total); 244 for (i = 0; i < n_cpus; i++) 245 printf("%lu+", args[i].wp_faults); 246 printf("\b) "); 247 } 248 if (minor_total) { 249 printf("%llu minor (", minor_total); 250 for (i = 0; i < n_cpus; i++) 251 printf("%lu+", args[i].minor_faults); 252 printf("\b)"); 253 } 254 printf("\n"); 255 } 256 257 int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) 258 { 259 struct uffdio_api uffdio_api; 260 261 gopts->uffd = uffd_open(UFFD_FLAGS); 262 if (gopts->uffd < 0) 263 return -1; 264 gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); 265 266 uffdio_api.api = UFFD_API; 267 uffdio_api.features = *features; 268 if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) 269 /* Probably lack of CAP_PTRACE? */ 270 return -1; 271 if (uffdio_api.api != UFFD_API) 272 err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api); 273 274 *features = uffdio_api.features; 275 return 0; 276 } 277 278 static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) 279 { 280 if (*area) 281 if (munmap(*area, gopts->nr_pages * gopts->page_size)) 282 err("munmap"); 283 284 *area = NULL; 285 } 286 287 void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) 288 { 289 size_t i; 290 291 if (gopts->pipefd) { 292 for (i = 0; i < gopts->nr_parallel * 2; ++i) { 293 if (close(gopts->pipefd[i])) 294 err("close pipefd"); 295 } 296 free(gopts->pipefd); 297 gopts->pipefd = NULL; 298 } 299 300 if (gopts->count_verify) { 301 free(gopts->count_verify); 302 gopts->count_verify = NULL; 303 } 304 305 if (gopts->uffd != -1) { 306 if (close(gopts->uffd)) 307 err("close uffd"); 308 gopts->uffd = -1; 309 } 310 311 munmap_area(gopts, (void **)&gopts->area_src); 312 munmap_area(gopts, (void **)&gopts->area_src_alias); 313 munmap_area(gopts, (void **)&gopts->area_dst); 314 munmap_area(gopts, (void **)&gopts->area_dst_alias); 315 munmap_area(gopts, (void **)&gopts->area_remap); 316 } 317 318 int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) 319 { 320 unsigned long nr, cpu; 321 int ret; 322 323 gopts->area_src_alias = NULL; 324 gopts->area_dst_alias = NULL; 325 gopts->area_remap = NULL; 326 327 if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { 328 ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); 329 if (ret) 330 return ret; 331 } 332 333 ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); 334 ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); 335 if (ret) { 336 if (errmsg) 337 *errmsg = "memory allocation failed"; 338 return ret; 339 } 340 341 if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { 342 ret = uffd_test_case_ops->post_alloc(gopts, errmsg); 343 if (ret) 344 return ret; 345 } 346 347 ret = userfaultfd_open(gopts, &features); 348 if (ret) { 349 if (errmsg) 350 *errmsg = "possible lack of privilege"; 351 return ret; 352 } 353 354 gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); 355 if (!gopts->count_verify) 356 err("count_verify"); 357 358 for (nr = 0; nr < gopts->nr_pages; nr++) { 359 *area_mutex(gopts->area_src, nr, gopts) = 360 (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; 361 gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; 362 /* 363 * In the transition between 255 to 256, powerpc will 364 * read out of order in my_bcmp and see both bytes as 365 * zero, so leave a placeholder below always non-zero 366 * after the count, to avoid my_bcmp to trigger false 367 * positives. 368 */ 369 *(area_count(gopts->area_src, nr, gopts) + 1) = 1; 370 } 371 372 /* 373 * After initialization of area_src, we must explicitly release pages 374 * for area_dst to make sure it's fully empty. Otherwise we could have 375 * some area_dst pages be erroneously initialized with zero pages, 376 * hence we could hit memory corruption later in the test. 377 * 378 * One example is when THP is globally enabled, above allocate_area() 379 * calls could have the two areas merged into a single VMA (as they 380 * will have the same VMA flags so they're mergeable). When we 381 * initialize the area_src above, it's possible that some part of 382 * area_dst could have been faulted in via one huge THP that will be 383 * shared between area_src and area_dst. It could cause some of the 384 * area_dst won't be trapped by missing userfaults. 385 * 386 * This release_pages() will guarantee even if that happened, we'll 387 * proactively split the thp and drop any accidentally initialized 388 * pages within area_dst. 389 */ 390 uffd_test_ops->release_pages(gopts, gopts->area_dst); 391 392 gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); 393 if (!gopts->pipefd) 394 err("pipefd"); 395 for (cpu = 0; cpu < gopts->nr_parallel; cpu++) 396 if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) 397 err("pipe"); 398 399 return 0; 400 } 401 402 void wp_range(int ufd, __u64 start, __u64 len, bool wp) 403 { 404 struct uffdio_writeprotect prms; 405 406 /* Write protection page faults */ 407 prms.range.start = start; 408 prms.range.len = len; 409 /* Undo write-protect, do wakeup after that */ 410 prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; 411 412 if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) 413 err("clear WP failed: address=0x%"PRIx64, (uint64_t)start); 414 } 415 416 static void continue_range(int ufd, __u64 start, __u64 len, bool wp) 417 { 418 struct uffdio_continue req; 419 int ret; 420 421 req.range.start = start; 422 req.range.len = len; 423 req.mode = 0; 424 if (wp) 425 req.mode |= UFFDIO_CONTINUE_MODE_WP; 426 427 if (ioctl(ufd, UFFDIO_CONTINUE, &req)) 428 err("UFFDIO_CONTINUE failed for address 0x%" PRIx64, 429 (uint64_t)start); 430 431 /* 432 * Error handling within the kernel for continue is subtly different 433 * from copy or zeropage, so it may be a source of bugs. Trigger an 434 * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG. 435 */ 436 req.mapped = 0; 437 ret = ioctl(ufd, UFFDIO_CONTINUE, &req); 438 if (ret >= 0 || req.mapped != -EEXIST) 439 err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64, 440 ret, (int64_t) req.mapped); 441 } 442 443 int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) 444 { 445 int ret = read(gopts->uffd, msg, sizeof(*msg)); 446 447 if (ret != sizeof(*msg)) { 448 if (ret < 0) { 449 if (errno == EAGAIN || errno == EINTR) 450 return 1; 451 err("blocking read error"); 452 } else { 453 err("short read"); 454 } 455 } 456 457 return 0; 458 } 459 460 void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, 461 struct uffd_args *args) 462 { 463 unsigned long offset; 464 465 if (msg->event != UFFD_EVENT_PAGEFAULT) 466 err("unexpected msg event %u", msg->event); 467 468 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { 469 /* Write protect page faults */ 470 wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); 471 args->wp_faults++; 472 } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { 473 uint8_t *area; 474 int b; 475 476 /* 477 * Minor page faults 478 * 479 * To prove we can modify the original range for testing 480 * purposes, we're going to bit flip this range before 481 * continuing. 482 * 483 * Note that this requires all minor page fault tests operate on 484 * area_dst (non-UFFD-registered) and area_dst_alias 485 * (UFFD-registered). 486 */ 487 488 area = (uint8_t *)(gopts->area_dst + 489 ((char *)msg->arg.pagefault.address - 490 gopts->area_dst_alias)); 491 for (b = 0; b < gopts->page_size; ++b) 492 area[b] = ~area[b]; 493 continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, 494 args->apply_wp); 495 args->minor_faults++; 496 } else { 497 /* 498 * Missing page faults. 499 * 500 * Here we force a write check for each of the missing mode 501 * faults. It's guaranteed because the only threads that 502 * will trigger uffd faults are the locking threads, and 503 * their first instruction to touch the missing page will 504 * always be pthread_mutex_lock(). 505 * 506 * Note that here we relied on an NPTL glibc impl detail to 507 * always read the lock type at the entry of the lock op 508 * (pthread_mutex_t.__data.__type, offset 0x10) before 509 * doing any locking operations to guarantee that. It's 510 * actually not good to rely on this impl detail because 511 * logically a pthread-compatible lib can implement the 512 * locks without types and we can fail when linking with 513 * them. However since we used to find bugs with this 514 * strict check we still keep it around. Hopefully this 515 * could be a good hint when it fails again. If one day 516 * it'll break on some other impl of glibc we'll revisit. 517 */ 518 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) 519 err("unexpected write fault"); 520 521 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; 522 offset &= ~(gopts->page_size-1); 523 524 if (copy_page(gopts, offset, args->apply_wp)) 525 args->missing_faults++; 526 } 527 } 528 529 void *uffd_poll_thread(void *arg) 530 { 531 struct uffd_args *args = (struct uffd_args *)arg; 532 uffd_global_test_opts_t *gopts = args->gopts; 533 unsigned long cpu = args->cpu; 534 struct pollfd pollfd[2]; 535 struct uffd_msg msg; 536 struct uffdio_register uffd_reg; 537 int ret; 538 char tmp_chr; 539 540 if (!args->handle_fault) 541 args->handle_fault = uffd_handle_page_fault; 542 543 pollfd[0].fd = gopts->uffd; 544 pollfd[0].events = POLLIN; 545 pollfd[1].fd = gopts->pipefd[cpu*2]; 546 pollfd[1].events = POLLIN; 547 548 gopts->ready_for_fork = true; 549 550 for (;;) { 551 ret = poll(pollfd, 2, -1); 552 if (ret <= 0) { 553 if (errno == EINTR || errno == EAGAIN) 554 continue; 555 err("poll error: %d", ret); 556 } 557 if (pollfd[1].revents) { 558 if (!(pollfd[1].revents & POLLIN)) 559 err("pollfd[1].revents %d", pollfd[1].revents); 560 if (read(pollfd[1].fd, &tmp_chr, 1) != 1) 561 err("read pipefd error"); 562 break; 563 } 564 if (!(pollfd[0].revents & POLLIN)) 565 err("pollfd[0].revents %d", pollfd[0].revents); 566 if (uffd_read_msg(gopts, &msg)) 567 continue; 568 switch (msg.event) { 569 default: 570 err("unexpected msg event %u\n", msg.event); 571 break; 572 case UFFD_EVENT_PAGEFAULT: 573 args->handle_fault(gopts, &msg, args); 574 break; 575 case UFFD_EVENT_FORK: 576 close(gopts->uffd); 577 gopts->uffd = msg.arg.fork.ufd; 578 pollfd[0].fd = gopts->uffd; 579 break; 580 case UFFD_EVENT_REMOVE: 581 uffd_reg.range.start = msg.arg.remove.start; 582 uffd_reg.range.len = msg.arg.remove.end - 583 msg.arg.remove.start; 584 if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) 585 err("remove failure"); 586 break; 587 case UFFD_EVENT_REMAP: 588 gopts->area_remap = gopts->area_dst; /* save for later unmap */ 589 gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; 590 break; 591 } 592 } 593 594 return NULL; 595 } 596 597 static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, 598 unsigned long offset) 599 { 600 uffd_test_ops->alias_mapping(gopts, 601 &uffdio_copy->dst, 602 uffdio_copy->len, 603 offset); 604 if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { 605 /* real retval in ufdio_copy.copy */ 606 if (uffdio_copy->copy != -EEXIST) 607 err("UFFDIO_COPY retry error: %"PRId64, 608 (int64_t)uffdio_copy->copy); 609 } else { 610 err("UFFDIO_COPY retry unexpected: %"PRId64, 611 (int64_t)uffdio_copy->copy); 612 } 613 } 614 615 static void wake_range(int ufd, unsigned long addr, unsigned long len) 616 { 617 struct uffdio_range uffdio_wake; 618 619 uffdio_wake.start = addr; 620 uffdio_wake.len = len; 621 622 if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake)) 623 fprintf(stderr, "error waking %lu\n", 624 addr), exit(1); 625 } 626 627 int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) 628 { 629 struct uffdio_copy uffdio_copy; 630 631 if (offset >= gopts->nr_pages * gopts->page_size) 632 err("unexpected offset %lu\n", offset); 633 uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; 634 uffdio_copy.src = (unsigned long) gopts->area_src + offset; 635 uffdio_copy.len = gopts->page_size; 636 if (wp) 637 uffdio_copy.mode = UFFDIO_COPY_MODE_WP; 638 else 639 uffdio_copy.mode = 0; 640 uffdio_copy.copy = 0; 641 if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { 642 /* real retval in ufdio_copy.copy */ 643 if (uffdio_copy.copy != -EEXIST) 644 err("UFFDIO_COPY error: %"PRId64, 645 (int64_t)uffdio_copy.copy); 646 wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); 647 } else if (uffdio_copy.copy != gopts->page_size) { 648 err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); 649 } else { 650 if (gopts->test_uffdio_copy_eexist && retry) { 651 gopts->test_uffdio_copy_eexist = false; 652 retry_copy_page(gopts, &uffdio_copy, offset); 653 } 654 return 1; 655 } 656 return 0; 657 } 658 659 int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) 660 { 661 return __copy_page(gopts, offset, false, wp); 662 } 663 664 int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) 665 { 666 struct uffdio_move uffdio_move; 667 668 if (offset + len > gopts->nr_pages * gopts->page_size) 669 err("unexpected offset %lu and length %lu\n", offset, len); 670 uffdio_move.dst = (unsigned long) gopts->area_dst + offset; 671 uffdio_move.src = (unsigned long) gopts->area_src + offset; 672 uffdio_move.len = len; 673 uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; 674 uffdio_move.move = 0; 675 if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { 676 /* real retval in uffdio_move.move */ 677 if (uffdio_move.move != -EEXIST) 678 err("UFFDIO_MOVE error: %"PRId64, 679 (int64_t)uffdio_move.move); 680 wake_range(gopts->uffd, uffdio_move.dst, len); 681 } else if (uffdio_move.move != len) { 682 err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); 683 } else 684 return 1; 685 return 0; 686 } 687 688 int uffd_open_dev(unsigned int flags) 689 { 690 int fd, uffd; 691 692 fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); 693 if (fd < 0) 694 return fd; 695 uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); 696 close(fd); 697 698 return uffd; 699 } 700 701 int uffd_open_sys(unsigned int flags) 702 { 703 #ifdef __NR_userfaultfd 704 return syscall(__NR_userfaultfd, flags); 705 #else 706 return -1; 707 #endif 708 } 709 710 int uffd_open(unsigned int flags) 711 { 712 int uffd = uffd_open_sys(flags); 713 714 if (uffd < 0) 715 uffd = uffd_open_dev(flags); 716 717 return uffd; 718 } 719 720 int uffd_get_features(uint64_t *features) 721 { 722 struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; 723 /* 724 * This should by default work in most kernels; the feature list 725 * will be the same no matter what we pass in here. 726 */ 727 int fd = uffd_open(UFFD_USER_MODE_ONLY); 728 729 if (fd < 0) 730 /* Maybe the kernel is older than user-only mode? */ 731 fd = uffd_open(0); 732 733 if (fd < 0) 734 return fd; 735 736 if (ioctl(fd, UFFDIO_API, &uffdio_api)) { 737 close(fd); 738 return -errno; 739 } 740 741 *features = uffdio_api.features; 742 close(fd); 743 744 return 0; 745 } 746