1 // SPDX-License-Identifier: GPL-2.0 2 #include <string.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <dirent.h> 6 #include <inttypes.h> 7 #include <sys/ioctl.h> 8 #include <linux/userfaultfd.h> 9 #include <linux/fs.h> 10 #include <sys/syscall.h> 11 #include <unistd.h> 12 #include "kselftest.h" 13 #include "vm_util.h" 14 15 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 16 #define SMAP_FILE_PATH "/proc/self/smaps" 17 #define STATUS_FILE_PATH "/proc/self/status" 18 #define MAX_LINE_LENGTH 500 19 20 unsigned int __page_size; 21 unsigned int __page_shift; 22 23 uint64_t pagemap_get_entry(int fd, char *start) 24 { 25 const unsigned long pfn = (unsigned long)start / getpagesize(); 26 uint64_t entry; 27 int ret; 28 29 ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); 30 if (ret != sizeof(entry)) 31 ksft_exit_fail_msg("reading pagemap failed\n"); 32 return entry; 33 } 34 35 static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) 36 { 37 struct pm_scan_arg arg; 38 39 arg.start = (uintptr_t)start; 40 arg.end = (uintptr_t)(start + psize()); 41 arg.vec = (uintptr_t)r; 42 arg.vec_len = 1; 43 arg.flags = 0; 44 arg.size = sizeof(struct pm_scan_arg); 45 arg.max_pages = 0; 46 arg.category_inverted = 0; 47 arg.category_mask = 0; 48 arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | 49 PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | 50 PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; 51 arg.return_mask = arg.category_anyof_mask; 52 53 return ioctl(fd, PAGEMAP_SCAN, &arg); 54 } 55 56 static uint64_t pagemap_scan_get_categories(int fd, char *start) 57 { 58 struct page_region r; 59 long ret; 60 61 ret = __pagemap_scan_get_categories(fd, start, &r); 62 if (ret < 0) 63 ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); 64 if (ret == 0) 65 return 0; 66 return r.categories; 67 } 68 69 /* `start` is any valid address. */ 70 static bool pagemap_scan_supported(int fd, char *start) 71 { 72 static int supported = -1; 73 int ret; 74 75 if (supported != -1) 76 return supported; 77 78 /* Provide an invalid address in order to trigger EFAULT. */ 79 ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); 80 if (ret == 0) 81 ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); 82 83 supported = errno == EFAULT; 84 85 return supported; 86 } 87 88 static bool page_entry_is(int fd, char *start, char *desc, 89 uint64_t pagemap_flags, uint64_t pagescan_flags) 90 { 91 bool m = pagemap_get_entry(fd, start) & pagemap_flags; 92 93 if (pagemap_scan_supported(fd, start)) { 94 bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; 95 96 if (m == s) 97 return m; 98 99 ksft_exit_fail_msg( 100 "read and ioctl return unmatched results for %s: %d %d", desc, m, s); 101 } 102 return m; 103 } 104 105 bool pagemap_is_softdirty(int fd, char *start) 106 { 107 return page_entry_is(fd, start, "soft-dirty", 108 PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); 109 } 110 111 bool pagemap_is_swapped(int fd, char *start) 112 { 113 return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); 114 } 115 116 bool pagemap_is_populated(int fd, char *start) 117 { 118 return page_entry_is(fd, start, "populated", 119 PM_PRESENT | PM_SWAP, 120 PAGE_IS_PRESENT | PAGE_IS_SWAPPED); 121 } 122 123 unsigned long pagemap_get_pfn(int fd, char *start) 124 { 125 uint64_t entry = pagemap_get_entry(fd, start); 126 127 /* If present (63th bit), PFN is at bit 0 -- 54. */ 128 if (entry & PM_PRESENT) 129 return entry & 0x007fffffffffffffull; 130 return -1ul; 131 } 132 133 void clear_softdirty(void) 134 { 135 int ret; 136 const char *ctrl = "4"; 137 int fd = open("/proc/self/clear_refs", O_WRONLY); 138 139 if (fd < 0) 140 ksft_exit_fail_msg("opening clear_refs failed\n"); 141 ret = write(fd, ctrl, strlen(ctrl)); 142 close(fd); 143 if (ret != (signed int)strlen(ctrl)) 144 ksft_exit_fail_msg("writing clear_refs failed\n"); 145 } 146 147 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) 148 { 149 while (fgets(buf, len, fp)) { 150 if (!strncmp(buf, pattern, strlen(pattern))) 151 return true; 152 } 153 return false; 154 } 155 156 uint64_t read_pmd_pagesize(void) 157 { 158 int fd; 159 char buf[20]; 160 ssize_t num_read; 161 162 fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); 163 if (fd == -1) 164 return 0; 165 166 num_read = read(fd, buf, 19); 167 if (num_read < 1) { 168 close(fd); 169 return 0; 170 } 171 buf[num_read] = '\0'; 172 close(fd); 173 174 return strtoul(buf, NULL, 10); 175 } 176 177 unsigned long rss_anon(void) 178 { 179 unsigned long rss_anon = 0; 180 FILE *fp; 181 char buffer[MAX_LINE_LENGTH]; 182 183 fp = fopen(STATUS_FILE_PATH, "r"); 184 if (!fp) 185 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH); 186 187 if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer))) 188 goto err_out; 189 190 if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1) 191 ksft_exit_fail_msg("Reading status error\n"); 192 193 err_out: 194 fclose(fp); 195 return rss_anon; 196 } 197 198 char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len) 199 { 200 int ret; 201 FILE *fp; 202 char *entry = NULL; 203 char addr_pattern[MAX_LINE_LENGTH]; 204 205 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 206 (unsigned long) addr); 207 if (ret >= MAX_LINE_LENGTH) 208 ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); 209 210 fp = fopen(SMAP_FILE_PATH, "r"); 211 if (!fp) 212 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); 213 214 if (!check_for_pattern(fp, addr_pattern, buf, len)) 215 goto err_out; 216 217 /* Fetch the pattern in the same block */ 218 if (!check_for_pattern(fp, pattern, buf, len)) 219 goto err_out; 220 221 /* Trim trailing newline */ 222 entry = strchr(buf, '\n'); 223 if (entry) 224 *entry = '\0'; 225 226 entry = buf + strlen(pattern); 227 228 err_out: 229 fclose(fp); 230 return entry; 231 } 232 233 bool __check_huge(void *addr, char *pattern, int nr_hpages, 234 uint64_t hpage_size) 235 { 236 char buffer[MAX_LINE_LENGTH]; 237 uint64_t thp = -1; 238 char *entry; 239 240 entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer)); 241 if (!entry) 242 goto err_out; 243 244 if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1) 245 ksft_exit_fail_msg("Reading smap error\n"); 246 247 err_out: 248 return thp == (nr_hpages * (hpage_size >> 10)); 249 } 250 251 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) 252 { 253 return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); 254 } 255 256 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) 257 { 258 return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); 259 } 260 261 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) 262 { 263 return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); 264 } 265 266 int64_t allocate_transhuge(void *ptr, int pagemap_fd) 267 { 268 uint64_t ent[2]; 269 270 /* drop pmd */ 271 if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, 272 MAP_FIXED | MAP_ANONYMOUS | 273 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) 274 ksft_exit_fail_msg("mmap transhuge\n"); 275 276 if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) 277 ksft_exit_fail_msg("MADV_HUGEPAGE\n"); 278 279 /* allocate transparent huge page */ 280 *(volatile void **)ptr = ptr; 281 282 if (pread(pagemap_fd, ent, sizeof(ent), 283 (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent)) 284 ksft_exit_fail_msg("read pagemap\n"); 285 286 if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && 287 PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && 288 !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1))) 289 return PAGEMAP_PFN(ent[0]); 290 291 return -1; 292 } 293 294 unsigned long default_huge_page_size(void) 295 { 296 unsigned long hps = 0; 297 char *line = NULL; 298 size_t linelen = 0; 299 FILE *f = fopen("/proc/meminfo", "r"); 300 301 if (!f) 302 return 0; 303 while (getline(&line, &linelen, f) > 0) { 304 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { 305 hps <<= 10; 306 break; 307 } 308 } 309 310 free(line); 311 fclose(f); 312 return hps; 313 } 314 315 int detect_hugetlb_page_sizes(size_t sizes[], int max) 316 { 317 DIR *dir = opendir("/sys/kernel/mm/hugepages/"); 318 int count = 0; 319 320 if (!dir) 321 return 0; 322 323 while (count < max) { 324 struct dirent *entry = readdir(dir); 325 size_t kb; 326 327 if (!entry) 328 break; 329 if (entry->d_type != DT_DIR) 330 continue; 331 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) 332 continue; 333 sizes[count++] = kb * 1024; 334 ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n", 335 kb); 336 } 337 closedir(dir); 338 return count; 339 } 340 341 int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags) 342 { 343 size_t count; 344 345 count = pread(kpageflags_fd, flags, sizeof(*flags), 346 pfn * sizeof(*flags)); 347 348 if (count != sizeof(*flags)) 349 return -1; 350 351 return 0; 352 } 353 354 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ 355 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, 356 bool miss, bool wp, bool minor, uint64_t *ioctls) 357 { 358 struct uffdio_register uffdio_register = { 0 }; 359 uint64_t mode = 0; 360 int ret = 0; 361 362 if (miss) 363 mode |= UFFDIO_REGISTER_MODE_MISSING; 364 if (wp) 365 mode |= UFFDIO_REGISTER_MODE_WP; 366 if (minor) 367 mode |= UFFDIO_REGISTER_MODE_MINOR; 368 369 uffdio_register.range.start = (unsigned long)addr; 370 uffdio_register.range.len = len; 371 uffdio_register.mode = mode; 372 373 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) 374 ret = -errno; 375 else if (ioctls) 376 *ioctls = uffdio_register.ioctls; 377 378 return ret; 379 } 380 381 int uffd_register(int uffd, void *addr, uint64_t len, 382 bool miss, bool wp, bool minor) 383 { 384 return uffd_register_with_ioctls(uffd, addr, len, 385 miss, wp, minor, NULL); 386 } 387 388 int uffd_unregister(int uffd, void *addr, uint64_t len) 389 { 390 struct uffdio_range range = { .start = (uintptr_t)addr, .len = len }; 391 int ret = 0; 392 393 if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1) 394 ret = -errno; 395 396 return ret; 397 } 398 399 unsigned long get_free_hugepages(void) 400 { 401 unsigned long fhp = 0; 402 char *line = NULL; 403 size_t linelen = 0; 404 FILE *f = fopen("/proc/meminfo", "r"); 405 406 if (!f) 407 return fhp; 408 while (getline(&line, &linelen, f) > 0) { 409 if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) 410 break; 411 } 412 413 free(line); 414 fclose(f); 415 return fhp; 416 } 417 418 static bool check_vmflag(void *addr, const char *flag) 419 { 420 char buffer[MAX_LINE_LENGTH]; 421 const char *flags; 422 size_t flaglen; 423 424 flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer)); 425 if (!flags) 426 ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr); 427 428 while (true) { 429 flags += strspn(flags, " "); 430 431 flaglen = strcspn(flags, " "); 432 if (!flaglen) 433 return false; 434 435 if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen)) 436 return true; 437 438 flags += flaglen; 439 } 440 } 441 442 bool check_vmflag_io(void *addr) 443 { 444 return check_vmflag(addr, "io"); 445 } 446 447 bool check_vmflag_pfnmap(void *addr) 448 { 449 return check_vmflag(addr, "pf"); 450 } 451 452 bool check_vmflag_guard(void *addr) 453 { 454 return check_vmflag(addr, "gu"); 455 } 456 457 bool softdirty_supported(void) 458 { 459 char *addr; 460 bool supported = false; 461 const size_t pagesize = getpagesize(); 462 463 /* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */ 464 addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, 465 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 466 if (!addr) 467 ksft_exit_fail_msg("mmap failed\n"); 468 469 supported = check_vmflag(addr, "sd"); 470 munmap(addr, pagesize); 471 return supported; 472 } 473 474 /* 475 * Open an fd at /proc/$pid/maps and configure procmap_out ready for 476 * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. 477 */ 478 int open_procmap(pid_t pid, struct procmap_fd *procmap_out) 479 { 480 char path[256]; 481 int ret = 0; 482 483 memset(procmap_out, '\0', sizeof(*procmap_out)); 484 sprintf(path, "/proc/%d/maps", pid); 485 procmap_out->query.size = sizeof(procmap_out->query); 486 procmap_out->fd = open(path, O_RDONLY); 487 if (procmap_out->fd < 0) 488 ret = -errno; 489 490 return ret; 491 } 492 493 /* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */ 494 int query_procmap(struct procmap_fd *procmap) 495 { 496 int ret = 0; 497 498 if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1) 499 ret = -errno; 500 501 return ret; 502 } 503 504 /* 505 * Try to find the VMA at specified address, returns true if found, false if not 506 * found, and the test is failed if any other error occurs. 507 * 508 * On success, procmap->query is populated with the results. 509 */ 510 bool find_vma_procmap(struct procmap_fd *procmap, void *address) 511 { 512 int err; 513 514 procmap->query.query_flags = 0; 515 procmap->query.query_addr = (unsigned long)address; 516 err = query_procmap(procmap); 517 if (!err) 518 return true; 519 520 if (err != -ENOENT) 521 ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n", 522 __func__, err); 523 return false; 524 } 525 526 /* 527 * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error 528 * code otherwise. 529 */ 530 int close_procmap(struct procmap_fd *procmap) 531 { 532 return close(procmap->fd); 533 } 534 535 int write_sysfs(const char *file_path, unsigned long val) 536 { 537 FILE *f = fopen(file_path, "w"); 538 539 if (!f) { 540 fprintf(stderr, "f %s\n", file_path); 541 perror("fopen"); 542 return 1; 543 } 544 if (fprintf(f, "%lu", val) < 0) { 545 perror("fprintf"); 546 fclose(f); 547 return 1; 548 } 549 fclose(f); 550 551 return 0; 552 } 553 554 int read_sysfs(const char *file_path, unsigned long *val) 555 { 556 FILE *f = fopen(file_path, "r"); 557 558 if (!f) { 559 fprintf(stderr, "f %s\n", file_path); 560 perror("fopen"); 561 return 1; 562 } 563 if (fscanf(f, "%lu", val) != 1) { 564 perror("fscanf"); 565 fclose(f); 566 return 1; 567 } 568 fclose(f); 569 570 return 0; 571 } 572 573 void *sys_mremap(void *old_address, unsigned long old_size, 574 unsigned long new_size, int flags, void *new_address) 575 { 576 return (void *)syscall(__NR_mremap, (unsigned long)old_address, 577 old_size, new_size, flags, 578 (unsigned long)new_address); 579 } 580 581 bool detect_huge_zeropage(void) 582 { 583 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 584 O_RDONLY); 585 bool enabled = 0; 586 char buf[15]; 587 int ret; 588 589 if (fd < 0) 590 return 0; 591 592 ret = pread(fd, buf, sizeof(buf), 0); 593 if (ret > 0 && ret < sizeof(buf)) { 594 buf[ret] = 0; 595 596 if (strtoul(buf, NULL, 10) == 1) 597 enabled = 1; 598 } 599 600 close(fd); 601 return enabled; 602 } 603 604 long ksm_get_self_zero_pages(void) 605 { 606 int proc_self_ksm_stat_fd; 607 char buf[200]; 608 char *substr_ksm_zero; 609 size_t value_pos; 610 ssize_t read_size; 611 612 proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); 613 if (proc_self_ksm_stat_fd < 0) 614 return -errno; 615 616 read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); 617 close(proc_self_ksm_stat_fd); 618 if (read_size < 0) 619 return -errno; 620 621 buf[read_size] = 0; 622 623 substr_ksm_zero = strstr(buf, "ksm_zero_pages"); 624 if (!substr_ksm_zero) 625 return 0; 626 627 value_pos = strcspn(substr_ksm_zero, "0123456789"); 628 return strtol(substr_ksm_zero + value_pos, NULL, 10); 629 } 630 631 long ksm_get_self_merging_pages(void) 632 { 633 int proc_self_ksm_merging_pages_fd; 634 char buf[10]; 635 ssize_t ret; 636 637 proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", 638 O_RDONLY); 639 if (proc_self_ksm_merging_pages_fd < 0) 640 return -errno; 641 642 ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); 643 close(proc_self_ksm_merging_pages_fd); 644 if (ret <= 0) 645 return -errno; 646 buf[ret] = 0; 647 648 return strtol(buf, NULL, 10); 649 } 650 651 long ksm_get_full_scans(void) 652 { 653 int ksm_full_scans_fd; 654 char buf[10]; 655 ssize_t ret; 656 657 ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); 658 if (ksm_full_scans_fd < 0) 659 return -errno; 660 661 ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); 662 close(ksm_full_scans_fd); 663 if (ret <= 0) 664 return -errno; 665 buf[ret] = 0; 666 667 return strtol(buf, NULL, 10); 668 } 669 670 int ksm_use_zero_pages(void) 671 { 672 int ksm_use_zero_pages_fd; 673 ssize_t ret; 674 675 ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); 676 if (ksm_use_zero_pages_fd < 0) 677 return -errno; 678 679 ret = write(ksm_use_zero_pages_fd, "1", 1); 680 close(ksm_use_zero_pages_fd); 681 return ret == 1 ? 0 : -errno; 682 } 683 684 int ksm_start(void) 685 { 686 int ksm_fd; 687 ssize_t ret; 688 long start_scans, end_scans; 689 690 ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); 691 if (ksm_fd < 0) 692 return -errno; 693 694 /* Wait for two full scans such that any possible merging happened. */ 695 start_scans = ksm_get_full_scans(); 696 if (start_scans < 0) { 697 close(ksm_fd); 698 return start_scans; 699 } 700 ret = write(ksm_fd, "1", 1); 701 close(ksm_fd); 702 if (ret != 1) 703 return -errno; 704 do { 705 end_scans = ksm_get_full_scans(); 706 if (end_scans < 0) 707 return end_scans; 708 } while (end_scans < start_scans + 2); 709 710 return 0; 711 } 712 713 int ksm_stop(void) 714 { 715 int ksm_fd; 716 ssize_t ret; 717 718 ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); 719 if (ksm_fd < 0) 720 return -errno; 721 722 ret = write(ksm_fd, "2", 1); 723 close(ksm_fd); 724 return ret == 1 ? 0 : -errno; 725 } 726