1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Userfaultfd tests util functions 4 * 5 * Copyright (C) 2015-2023 Red Hat, Inc. 6 */ 7 8 #include "uffd-common.h" 9 10 #define BASE_PMD_ADDR ((void *)(1UL << 30)) 11 12 volatile bool test_uffdio_copy_eexist = true; 13 unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; 14 char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; 15 int uffd = -1, uffd_flags, finished, *pipefd, test_type; 16 bool map_shared; 17 bool test_uffdio_wp = true; 18 unsigned long long *count_verify; 19 uffd_test_ops_t *uffd_test_ops; 20 uffd_test_case_ops_t *uffd_test_case_ops; 21 pthread_barrier_t ready_for_fork; 22 23 static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) 24 { 25 unsigned int memfd_flags = 0; 26 int mem_fd; 27 28 if (hugetlb) 29 memfd_flags = MFD_HUGETLB; 30 mem_fd = memfd_create("uffd-test", memfd_flags); 31 if (mem_fd < 0) 32 err("memfd_create"); 33 if (ftruncate(mem_fd, mem_size)) 34 err("ftruncate"); 35 if (fallocate(mem_fd, 36 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 37 mem_size)) 38 err("fallocate"); 39 40 return mem_fd; 41 } 42 43 static void anon_release_pages(char *rel_area) 44 { 45 if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) 46 err("madvise(MADV_DONTNEED) failed"); 47 } 48 49 static int anon_allocate_area(void **alloc_area, bool is_src) 50 { 51 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, 52 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 53 if (*alloc_area == MAP_FAILED) { 54 *alloc_area = NULL; 55 return -errno; 56 } 57 return 0; 58 } 59 60 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) 61 { 62 } 63 64 static void hugetlb_release_pages(char *rel_area) 65 { 66 if (!map_shared) { 67 if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) 68 err("madvise(MADV_DONTNEED) failed"); 69 } else { 70 if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) 71 err("madvise(MADV_REMOVE) failed"); 72 } 73 } 74 75 static int hugetlb_allocate_area(void **alloc_area, bool is_src) 76 { 77 off_t size = nr_pages * page_size; 78 off_t offset = is_src ? 0 : size; 79 void *area_alias = NULL; 80 char **alloc_area_alias; 81 int mem_fd = uffd_mem_fd_create(size * 2, true); 82 83 *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, 84 (map_shared ? MAP_SHARED : MAP_PRIVATE) | 85 (is_src ? 0 : MAP_NORESERVE), 86 mem_fd, offset); 87 if (*alloc_area == MAP_FAILED) { 88 *alloc_area = NULL; 89 return -errno; 90 } 91 92 if (map_shared) { 93 area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, 94 MAP_SHARED, mem_fd, offset); 95 if (area_alias == MAP_FAILED) 96 return -errno; 97 } 98 99 if (is_src) { 100 alloc_area_alias = &area_src_alias; 101 } else { 102 alloc_area_alias = &area_dst_alias; 103 } 104 if (area_alias) 105 *alloc_area_alias = area_alias; 106 107 close(mem_fd); 108 return 0; 109 } 110 111 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) 112 { 113 if (!map_shared) 114 return; 115 116 *start = (unsigned long) area_dst_alias + offset; 117 } 118 119 static void shmem_release_pages(char *rel_area) 120 { 121 if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) 122 err("madvise(MADV_REMOVE) failed"); 123 } 124 125 static int shmem_allocate_area(void **alloc_area, bool is_src) 126 { 127 void *area_alias = NULL; 128 size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); 129 unsigned long offset = is_src ? 0 : bytes; 130 char *p = NULL, *p_alias = NULL; 131 int mem_fd = uffd_mem_fd_create(bytes * 2, false); 132 133 /* TODO: clean this up. Use a static addr is ugly */ 134 p = BASE_PMD_ADDR; 135 if (!is_src) 136 /* src map + alias + interleaved hpages */ 137 p += 2 * (bytes + hpage_size); 138 p_alias = p; 139 p_alias += bytes; 140 p_alias += hpage_size; /* Prevent src/dst VMA merge */ 141 142 *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, 143 mem_fd, offset); 144 if (*alloc_area == MAP_FAILED) { 145 *alloc_area = NULL; 146 return -errno; 147 } 148 if (*alloc_area != p) 149 err("mmap of memfd failed at %p", p); 150 151 area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, 152 mem_fd, offset); 153 if (area_alias == MAP_FAILED) { 154 munmap(*alloc_area, bytes); 155 *alloc_area = NULL; 156 return -errno; 157 } 158 if (area_alias != p_alias) 159 err("mmap of anonymous memory failed at %p", p_alias); 160 161 if (is_src) 162 area_src_alias = area_alias; 163 else 164 area_dst_alias = area_alias; 165 166 close(mem_fd); 167 return 0; 168 } 169 170 static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) 171 { 172 *start = (unsigned long)area_dst_alias + offset; 173 } 174 175 static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) 176 { 177 if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, 178 read_pmd_pagesize())) 179 err("Did not find expected %d number of hugepages", 180 expect_nr_hpages); 181 } 182 183 struct uffd_test_ops anon_uffd_test_ops = { 184 .allocate_area = anon_allocate_area, 185 .release_pages = anon_release_pages, 186 .alias_mapping = noop_alias_mapping, 187 .check_pmd_mapping = NULL, 188 }; 189 190 struct uffd_test_ops shmem_uffd_test_ops = { 191 .allocate_area = shmem_allocate_area, 192 .release_pages = shmem_release_pages, 193 .alias_mapping = shmem_alias_mapping, 194 .check_pmd_mapping = shmem_check_pmd_mapping, 195 }; 196 197 struct uffd_test_ops hugetlb_uffd_test_ops = { 198 .allocate_area = hugetlb_allocate_area, 199 .release_pages = hugetlb_release_pages, 200 .alias_mapping = hugetlb_alias_mapping, 201 .check_pmd_mapping = NULL, 202 }; 203 204 void uffd_stats_report(struct uffd_args *args, int n_cpus) 205 { 206 int i; 207 unsigned long long miss_total = 0, wp_total = 0, minor_total = 0; 208 209 for (i = 0; i < n_cpus; i++) { 210 miss_total += args[i].missing_faults; 211 wp_total += args[i].wp_faults; 212 minor_total += args[i].minor_faults; 213 } 214 215 printf("userfaults: "); 216 if (miss_total) { 217 printf("%llu missing (", miss_total); 218 for (i = 0; i < n_cpus; i++) 219 printf("%lu+", args[i].missing_faults); 220 printf("\b) "); 221 } 222 if (wp_total) { 223 printf("%llu wp (", wp_total); 224 for (i = 0; i < n_cpus; i++) 225 printf("%lu+", args[i].wp_faults); 226 printf("\b) "); 227 } 228 if (minor_total) { 229 printf("%llu minor (", minor_total); 230 for (i = 0; i < n_cpus; i++) 231 printf("%lu+", args[i].minor_faults); 232 printf("\b)"); 233 } 234 printf("\n"); 235 } 236 237 int userfaultfd_open(uint64_t *features) 238 { 239 struct uffdio_api uffdio_api; 240 241 uffd = uffd_open(UFFD_FLAGS); 242 if (uffd < 0) 243 return -1; 244 uffd_flags = fcntl(uffd, F_GETFD, NULL); 245 246 uffdio_api.api = UFFD_API; 247 uffdio_api.features = *features; 248 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) 249 /* Probably lack of CAP_PTRACE? */ 250 return -1; 251 if (uffdio_api.api != UFFD_API) 252 err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api); 253 254 *features = uffdio_api.features; 255 return 0; 256 } 257 258 static inline void munmap_area(void **area) 259 { 260 if (*area) 261 if (munmap(*area, nr_pages * page_size)) 262 err("munmap"); 263 264 *area = NULL; 265 } 266 267 void uffd_test_ctx_clear(void) 268 { 269 size_t i; 270 271 if (pipefd) { 272 for (i = 0; i < nr_cpus * 2; ++i) { 273 if (close(pipefd[i])) 274 err("close pipefd"); 275 } 276 free(pipefd); 277 pipefd = NULL; 278 } 279 280 if (count_verify) { 281 free(count_verify); 282 count_verify = NULL; 283 } 284 285 if (uffd != -1) { 286 if (close(uffd)) 287 err("close uffd"); 288 uffd = -1; 289 } 290 291 munmap_area((void **)&area_src); 292 munmap_area((void **)&area_src_alias); 293 munmap_area((void **)&area_dst); 294 munmap_area((void **)&area_dst_alias); 295 munmap_area((void **)&area_remap); 296 } 297 298 int uffd_test_ctx_init(uint64_t features, const char **errmsg) 299 { 300 unsigned long nr, cpu; 301 int ret; 302 303 if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { 304 ret = uffd_test_case_ops->pre_alloc(errmsg); 305 if (ret) 306 return ret; 307 } 308 309 ret = uffd_test_ops->allocate_area((void **)&area_src, true); 310 ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); 311 if (ret) { 312 if (errmsg) 313 *errmsg = "memory allocation failed"; 314 return ret; 315 } 316 317 if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { 318 ret = uffd_test_case_ops->post_alloc(errmsg); 319 if (ret) 320 return ret; 321 } 322 323 ret = userfaultfd_open(&features); 324 if (ret) { 325 if (errmsg) 326 *errmsg = "possible lack of priviledge"; 327 return ret; 328 } 329 330 count_verify = malloc(nr_pages * sizeof(unsigned long long)); 331 if (!count_verify) 332 err("count_verify"); 333 334 for (nr = 0; nr < nr_pages; nr++) { 335 *area_mutex(area_src, nr) = 336 (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; 337 count_verify[nr] = *area_count(area_src, nr) = 1; 338 /* 339 * In the transition between 255 to 256, powerpc will 340 * read out of order in my_bcmp and see both bytes as 341 * zero, so leave a placeholder below always non-zero 342 * after the count, to avoid my_bcmp to trigger false 343 * positives. 344 */ 345 *(area_count(area_src, nr) + 1) = 1; 346 } 347 348 /* 349 * After initialization of area_src, we must explicitly release pages 350 * for area_dst to make sure it's fully empty. Otherwise we could have 351 * some area_dst pages be errornously initialized with zero pages, 352 * hence we could hit memory corruption later in the test. 353 * 354 * One example is when THP is globally enabled, above allocate_area() 355 * calls could have the two areas merged into a single VMA (as they 356 * will have the same VMA flags so they're mergeable). When we 357 * initialize the area_src above, it's possible that some part of 358 * area_dst could have been faulted in via one huge THP that will be 359 * shared between area_src and area_dst. It could cause some of the 360 * area_dst won't be trapped by missing userfaults. 361 * 362 * This release_pages() will guarantee even if that happened, we'll 363 * proactively split the thp and drop any accidentally initialized 364 * pages within area_dst. 365 */ 366 uffd_test_ops->release_pages(area_dst); 367 368 pipefd = malloc(sizeof(int) * nr_cpus * 2); 369 if (!pipefd) 370 err("pipefd"); 371 for (cpu = 0; cpu < nr_cpus; cpu++) 372 if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) 373 err("pipe"); 374 375 return 0; 376 } 377 378 void wp_range(int ufd, __u64 start, __u64 len, bool wp) 379 { 380 struct uffdio_writeprotect prms; 381 382 /* Write protection page faults */ 383 prms.range.start = start; 384 prms.range.len = len; 385 /* Undo write-protect, do wakeup after that */ 386 prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; 387 388 if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) 389 err("clear WP failed: address=0x%"PRIx64, (uint64_t)start); 390 } 391 392 static void continue_range(int ufd, __u64 start, __u64 len, bool wp) 393 { 394 struct uffdio_continue req; 395 int ret; 396 397 req.range.start = start; 398 req.range.len = len; 399 req.mode = 0; 400 if (wp) 401 req.mode |= UFFDIO_CONTINUE_MODE_WP; 402 403 if (ioctl(ufd, UFFDIO_CONTINUE, &req)) 404 err("UFFDIO_CONTINUE failed for address 0x%" PRIx64, 405 (uint64_t)start); 406 407 /* 408 * Error handling within the kernel for continue is subtly different 409 * from copy or zeropage, so it may be a source of bugs. Trigger an 410 * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG. 411 */ 412 req.mapped = 0; 413 ret = ioctl(ufd, UFFDIO_CONTINUE, &req); 414 if (ret >= 0 || req.mapped != -EEXIST) 415 err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64, 416 ret, (int64_t) req.mapped); 417 } 418 419 int uffd_read_msg(int ufd, struct uffd_msg *msg) 420 { 421 int ret = read(uffd, msg, sizeof(*msg)); 422 423 if (ret != sizeof(*msg)) { 424 if (ret < 0) { 425 if (errno == EAGAIN || errno == EINTR) 426 return 1; 427 err("blocking read error"); 428 } else { 429 err("short read"); 430 } 431 } 432 433 return 0; 434 } 435 436 void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) 437 { 438 unsigned long offset; 439 440 if (msg->event != UFFD_EVENT_PAGEFAULT) 441 err("unexpected msg event %u", msg->event); 442 443 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { 444 /* Write protect page faults */ 445 wp_range(uffd, msg->arg.pagefault.address, page_size, false); 446 args->wp_faults++; 447 } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { 448 uint8_t *area; 449 int b; 450 451 /* 452 * Minor page faults 453 * 454 * To prove we can modify the original range for testing 455 * purposes, we're going to bit flip this range before 456 * continuing. 457 * 458 * Note that this requires all minor page fault tests operate on 459 * area_dst (non-UFFD-registered) and area_dst_alias 460 * (UFFD-registered). 461 */ 462 463 area = (uint8_t *)(area_dst + 464 ((char *)msg->arg.pagefault.address - 465 area_dst_alias)); 466 for (b = 0; b < page_size; ++b) 467 area[b] = ~area[b]; 468 continue_range(uffd, msg->arg.pagefault.address, page_size, 469 args->apply_wp); 470 args->minor_faults++; 471 } else { 472 /* 473 * Missing page faults. 474 * 475 * Here we force a write check for each of the missing mode 476 * faults. It's guaranteed because the only threads that 477 * will trigger uffd faults are the locking threads, and 478 * their first instruction to touch the missing page will 479 * always be pthread_mutex_lock(). 480 * 481 * Note that here we relied on an NPTL glibc impl detail to 482 * always read the lock type at the entry of the lock op 483 * (pthread_mutex_t.__data.__type, offset 0x10) before 484 * doing any locking operations to guarantee that. It's 485 * actually not good to rely on this impl detail because 486 * logically a pthread-compatible lib can implement the 487 * locks without types and we can fail when linking with 488 * them. However since we used to find bugs with this 489 * strict check we still keep it around. Hopefully this 490 * could be a good hint when it fails again. If one day 491 * it'll break on some other impl of glibc we'll revisit. 492 */ 493 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) 494 err("unexpected write fault"); 495 496 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; 497 offset &= ~(page_size-1); 498 499 if (copy_page(uffd, offset, args->apply_wp)) 500 args->missing_faults++; 501 } 502 } 503 504 void *uffd_poll_thread(void *arg) 505 { 506 struct uffd_args *args = (struct uffd_args *)arg; 507 unsigned long cpu = args->cpu; 508 struct pollfd pollfd[2]; 509 struct uffd_msg msg; 510 struct uffdio_register uffd_reg; 511 int ret; 512 char tmp_chr; 513 514 if (!args->handle_fault) 515 args->handle_fault = uffd_handle_page_fault; 516 517 pollfd[0].fd = uffd; 518 pollfd[0].events = POLLIN; 519 pollfd[1].fd = pipefd[cpu*2]; 520 pollfd[1].events = POLLIN; 521 522 /* Ready for parent thread to fork */ 523 pthread_barrier_wait(&ready_for_fork); 524 525 for (;;) { 526 ret = poll(pollfd, 2, -1); 527 if (ret <= 0) { 528 if (errno == EINTR || errno == EAGAIN) 529 continue; 530 err("poll error: %d", ret); 531 } 532 if (pollfd[1].revents) { 533 if (!(pollfd[1].revents & POLLIN)) 534 err("pollfd[1].revents %d", pollfd[1].revents); 535 if (read(pollfd[1].fd, &tmp_chr, 1) != 1) 536 err("read pipefd error"); 537 break; 538 } 539 if (!(pollfd[0].revents & POLLIN)) 540 err("pollfd[0].revents %d", pollfd[0].revents); 541 if (uffd_read_msg(uffd, &msg)) 542 continue; 543 switch (msg.event) { 544 default: 545 err("unexpected msg event %u\n", msg.event); 546 break; 547 case UFFD_EVENT_PAGEFAULT: 548 args->handle_fault(&msg, args); 549 break; 550 case UFFD_EVENT_FORK: 551 close(uffd); 552 uffd = msg.arg.fork.ufd; 553 pollfd[0].fd = uffd; 554 break; 555 case UFFD_EVENT_REMOVE: 556 uffd_reg.range.start = msg.arg.remove.start; 557 uffd_reg.range.len = msg.arg.remove.end - 558 msg.arg.remove.start; 559 if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) 560 err("remove failure"); 561 break; 562 case UFFD_EVENT_REMAP: 563 area_remap = area_dst; /* save for later unmap */ 564 area_dst = (char *)(unsigned long)msg.arg.remap.to; 565 break; 566 } 567 } 568 569 return NULL; 570 } 571 572 static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, 573 unsigned long offset) 574 { 575 uffd_test_ops->alias_mapping(&uffdio_copy->dst, 576 uffdio_copy->len, 577 offset); 578 if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { 579 /* real retval in ufdio_copy.copy */ 580 if (uffdio_copy->copy != -EEXIST) 581 err("UFFDIO_COPY retry error: %"PRId64, 582 (int64_t)uffdio_copy->copy); 583 } else { 584 err("UFFDIO_COPY retry unexpected: %"PRId64, 585 (int64_t)uffdio_copy->copy); 586 } 587 } 588 589 static void wake_range(int ufd, unsigned long addr, unsigned long len) 590 { 591 struct uffdio_range uffdio_wake; 592 593 uffdio_wake.start = addr; 594 uffdio_wake.len = len; 595 596 if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake)) 597 fprintf(stderr, "error waking %lu\n", 598 addr), exit(1); 599 } 600 601 int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) 602 { 603 struct uffdio_copy uffdio_copy; 604 605 if (offset >= nr_pages * page_size) 606 err("unexpected offset %lu\n", offset); 607 uffdio_copy.dst = (unsigned long) area_dst + offset; 608 uffdio_copy.src = (unsigned long) area_src + offset; 609 uffdio_copy.len = page_size; 610 if (wp) 611 uffdio_copy.mode = UFFDIO_COPY_MODE_WP; 612 else 613 uffdio_copy.mode = 0; 614 uffdio_copy.copy = 0; 615 if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { 616 /* real retval in ufdio_copy.copy */ 617 if (uffdio_copy.copy != -EEXIST) 618 err("UFFDIO_COPY error: %"PRId64, 619 (int64_t)uffdio_copy.copy); 620 wake_range(ufd, uffdio_copy.dst, page_size); 621 } else if (uffdio_copy.copy != page_size) { 622 err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); 623 } else { 624 if (test_uffdio_copy_eexist && retry) { 625 test_uffdio_copy_eexist = false; 626 retry_copy_page(ufd, &uffdio_copy, offset); 627 } 628 return 1; 629 } 630 return 0; 631 } 632 633 int copy_page(int ufd, unsigned long offset, bool wp) 634 { 635 return __copy_page(ufd, offset, false, wp); 636 } 637 638 int move_page(int ufd, unsigned long offset, unsigned long len) 639 { 640 struct uffdio_move uffdio_move; 641 642 if (offset + len > nr_pages * page_size) 643 err("unexpected offset %lu and length %lu\n", offset, len); 644 uffdio_move.dst = (unsigned long) area_dst + offset; 645 uffdio_move.src = (unsigned long) area_src + offset; 646 uffdio_move.len = len; 647 uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; 648 uffdio_move.move = 0; 649 if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { 650 /* real retval in uffdio_move.move */ 651 if (uffdio_move.move != -EEXIST) 652 err("UFFDIO_MOVE error: %"PRId64, 653 (int64_t)uffdio_move.move); 654 wake_range(ufd, uffdio_move.dst, len); 655 } else if (uffdio_move.move != len) { 656 err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); 657 } else 658 return 1; 659 return 0; 660 } 661 662 int uffd_open_dev(unsigned int flags) 663 { 664 int fd, uffd; 665 666 fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); 667 if (fd < 0) 668 return fd; 669 uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); 670 close(fd); 671 672 return uffd; 673 } 674 675 int uffd_open_sys(unsigned int flags) 676 { 677 return syscall(__NR_userfaultfd, flags); 678 } 679 680 int uffd_open(unsigned int flags) 681 { 682 int uffd = uffd_open_sys(flags); 683 684 if (uffd < 0) 685 uffd = uffd_open_dev(flags); 686 687 return uffd; 688 } 689 690 int uffd_get_features(uint64_t *features) 691 { 692 struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; 693 /* 694 * This should by default work in most kernels; the feature list 695 * will be the same no matter what we pass in here. 696 */ 697 int fd = uffd_open(UFFD_USER_MODE_ONLY); 698 699 if (fd < 0) 700 /* Maybe the kernel is older than user-only mode? */ 701 fd = uffd_open(0); 702 703 if (fd < 0) 704 return fd; 705 706 if (ioctl(fd, UFFDIO_API, &uffdio_api)) { 707 close(fd); 708 return -errno; 709 } 710 711 *features = uffdio_api.features; 712 close(fd); 713 714 return 0; 715 } 716