1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0 2baa489faSSeongJae Park #include <string.h> 3baa489faSSeongJae Park #include <fcntl.h> 481b1e3f9SDavid Hildenbrand #include <dirent.h> 5c4277cb6SPeter Xu #include <sys/ioctl.h> 6c4277cb6SPeter Xu #include <linux/userfaultfd.h> 778391f64SPeter Xu #include <sys/syscall.h> 878391f64SPeter Xu #include <unistd.h> 9baa489faSSeongJae Park #include "../kselftest.h" 10baa489faSSeongJae Park #include "vm_util.h" 11baa489faSSeongJae Park 12baa489faSSeongJae Park #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 13baa489faSSeongJae Park #define SMAP_FILE_PATH "/proc/self/smaps" 14baa489faSSeongJae Park #define MAX_LINE_LENGTH 500 15baa489faSSeongJae Park 16af605d26SPeter Xu unsigned int __page_size; 17af605d26SPeter Xu unsigned int __page_shift; 18af605d26SPeter Xu 19baa489faSSeongJae Park uint64_t pagemap_get_entry(int fd, char *start) 20baa489faSSeongJae Park { 21baa489faSSeongJae Park const unsigned long pfn = (unsigned long)start / getpagesize(); 22baa489faSSeongJae Park uint64_t entry; 23baa489faSSeongJae Park int ret; 24baa489faSSeongJae Park 25baa489faSSeongJae Park ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); 26baa489faSSeongJae Park if (ret != sizeof(entry)) 27baa489faSSeongJae Park ksft_exit_fail_msg("reading pagemap failed\n"); 28baa489faSSeongJae Park return entry; 29baa489faSSeongJae Park } 30baa489faSSeongJae Park 31baa489faSSeongJae Park bool pagemap_is_softdirty(int fd, char *start) 32baa489faSSeongJae Park { 339f74696bSPeter Xu return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; 34baa489faSSeongJae Park } 35baa489faSSeongJae Park 36baa489faSSeongJae Park bool pagemap_is_swapped(int fd, char *start) 37baa489faSSeongJae Park { 389f74696bSPeter Xu return pagemap_get_entry(fd, start) & PM_SWAP; 39baa489faSSeongJae Park } 40baa489faSSeongJae Park 41baa489faSSeongJae Park bool pagemap_is_populated(int fd, char *start) 42baa489faSSeongJae Park { 439f74696bSPeter Xu return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); 44baa489faSSeongJae Park } 45baa489faSSeongJae Park 46baa489faSSeongJae Park unsigned long pagemap_get_pfn(int fd, char *start) 47baa489faSSeongJae Park { 48baa489faSSeongJae Park uint64_t entry = pagemap_get_entry(fd, start); 49baa489faSSeongJae Park 50baa489faSSeongJae Park /* If present (63th bit), PFN is at bit 0 -- 54. */ 519f74696bSPeter Xu if (entry & PM_PRESENT) 52baa489faSSeongJae Park return entry & 0x007fffffffffffffull; 53baa489faSSeongJae Park return -1ul; 54baa489faSSeongJae Park } 55baa489faSSeongJae Park 56baa489faSSeongJae Park void clear_softdirty(void) 57baa489faSSeongJae Park { 58baa489faSSeongJae Park int ret; 59baa489faSSeongJae Park const char *ctrl = "4"; 60baa489faSSeongJae Park int fd = open("/proc/self/clear_refs", O_WRONLY); 61baa489faSSeongJae Park 62baa489faSSeongJae Park if (fd < 0) 63baa489faSSeongJae Park ksft_exit_fail_msg("opening clear_refs failed\n"); 64baa489faSSeongJae Park ret = write(fd, ctrl, strlen(ctrl)); 65baa489faSSeongJae Park close(fd); 66baa489faSSeongJae Park if (ret != strlen(ctrl)) 67baa489faSSeongJae Park ksft_exit_fail_msg("writing clear_refs failed\n"); 68baa489faSSeongJae Park } 69baa489faSSeongJae Park 70baa489faSSeongJae Park bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) 71baa489faSSeongJae Park { 72baa489faSSeongJae Park while (fgets(buf, len, fp)) { 73baa489faSSeongJae Park if (!strncmp(buf, pattern, strlen(pattern))) 74baa489faSSeongJae Park return true; 75baa489faSSeongJae Park } 76baa489faSSeongJae Park return false; 77baa489faSSeongJae Park } 78baa489faSSeongJae Park 79baa489faSSeongJae Park uint64_t read_pmd_pagesize(void) 80baa489faSSeongJae Park { 81baa489faSSeongJae Park int fd; 82baa489faSSeongJae Park char buf[20]; 83baa489faSSeongJae Park ssize_t num_read; 84baa489faSSeongJae Park 85baa489faSSeongJae Park fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); 86baa489faSSeongJae Park if (fd == -1) 87d6e61afbSDavid Hildenbrand return 0; 88baa489faSSeongJae Park 89baa489faSSeongJae Park num_read = read(fd, buf, 19); 90baa489faSSeongJae Park if (num_read < 1) { 91baa489faSSeongJae Park close(fd); 92d6e61afbSDavid Hildenbrand return 0; 93baa489faSSeongJae Park } 94baa489faSSeongJae Park buf[num_read] = '\0'; 95baa489faSSeongJae Park close(fd); 96baa489faSSeongJae Park 97baa489faSSeongJae Park return strtoul(buf, NULL, 10); 98baa489faSSeongJae Park } 99baa489faSSeongJae Park 100baa489faSSeongJae Park bool __check_huge(void *addr, char *pattern, int nr_hpages, 101baa489faSSeongJae Park uint64_t hpage_size) 102baa489faSSeongJae Park { 103baa489faSSeongJae Park uint64_t thp = -1; 104baa489faSSeongJae Park int ret; 105baa489faSSeongJae Park FILE *fp; 106baa489faSSeongJae Park char buffer[MAX_LINE_LENGTH]; 107baa489faSSeongJae Park char addr_pattern[MAX_LINE_LENGTH]; 108baa489faSSeongJae Park 109baa489faSSeongJae Park ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 110baa489faSSeongJae Park (unsigned long) addr); 111baa489faSSeongJae Park if (ret >= MAX_LINE_LENGTH) 112baa489faSSeongJae Park ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); 113baa489faSSeongJae Park 114baa489faSSeongJae Park fp = fopen(SMAP_FILE_PATH, "r"); 115baa489faSSeongJae Park if (!fp) 116baa489faSSeongJae Park ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); 117baa489faSSeongJae Park 118baa489faSSeongJae Park if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) 119baa489faSSeongJae Park goto err_out; 120baa489faSSeongJae Park 121baa489faSSeongJae Park /* 122baa489faSSeongJae Park * Fetch the pattern in the same block and check the number of 123baa489faSSeongJae Park * hugepages. 124baa489faSSeongJae Park */ 125baa489faSSeongJae Park if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) 126baa489faSSeongJae Park goto err_out; 127baa489faSSeongJae Park 128baa489faSSeongJae Park snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); 129baa489faSSeongJae Park 130baa489faSSeongJae Park if (sscanf(buffer, addr_pattern, &thp) != 1) 131baa489faSSeongJae Park ksft_exit_fail_msg("Reading smap error\n"); 132baa489faSSeongJae Park 133baa489faSSeongJae Park err_out: 134baa489faSSeongJae Park fclose(fp); 135baa489faSSeongJae Park return thp == (nr_hpages * (hpage_size >> 10)); 136baa489faSSeongJae Park } 137baa489faSSeongJae Park 138baa489faSSeongJae Park bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) 139baa489faSSeongJae Park { 140baa489faSSeongJae Park return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); 141baa489faSSeongJae Park } 142baa489faSSeongJae Park 143baa489faSSeongJae Park bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) 144baa489faSSeongJae Park { 145baa489faSSeongJae Park return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); 146baa489faSSeongJae Park } 147baa489faSSeongJae Park 148baa489faSSeongJae Park bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) 149baa489faSSeongJae Park { 150baa489faSSeongJae Park return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); 151baa489faSSeongJae Park } 152af605d26SPeter Xu 153af605d26SPeter Xu int64_t allocate_transhuge(void *ptr, int pagemap_fd) 154af605d26SPeter Xu { 155af605d26SPeter Xu uint64_t ent[2]; 156af605d26SPeter Xu 157af605d26SPeter Xu /* drop pmd */ 158af605d26SPeter Xu if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, 159af605d26SPeter Xu MAP_FIXED | MAP_ANONYMOUS | 160af605d26SPeter Xu MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) 161af605d26SPeter Xu errx(2, "mmap transhuge"); 162af605d26SPeter Xu 163af605d26SPeter Xu if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) 164af605d26SPeter Xu err(2, "MADV_HUGEPAGE"); 165af605d26SPeter Xu 166af605d26SPeter Xu /* allocate transparent huge page */ 167af605d26SPeter Xu *(volatile void **)ptr = ptr; 168af605d26SPeter Xu 169af605d26SPeter Xu if (pread(pagemap_fd, ent, sizeof(ent), 170af605d26SPeter Xu (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent)) 171af605d26SPeter Xu err(2, "read pagemap"); 172af605d26SPeter Xu 173af605d26SPeter Xu if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && 174af605d26SPeter Xu PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && 175af605d26SPeter Xu !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1))) 176af605d26SPeter Xu return PAGEMAP_PFN(ent[0]); 177af605d26SPeter Xu 178af605d26SPeter Xu return -1; 179af605d26SPeter Xu } 180bd4d67e7SPeter Xu 181bd4d67e7SPeter Xu unsigned long default_huge_page_size(void) 182bd4d67e7SPeter Xu { 183bd4d67e7SPeter Xu unsigned long hps = 0; 184bd4d67e7SPeter Xu char *line = NULL; 185bd4d67e7SPeter Xu size_t linelen = 0; 186bd4d67e7SPeter Xu FILE *f = fopen("/proc/meminfo", "r"); 187bd4d67e7SPeter Xu 188bd4d67e7SPeter Xu if (!f) 189bd4d67e7SPeter Xu return 0; 190bd4d67e7SPeter Xu while (getline(&line, &linelen, f) > 0) { 191bd4d67e7SPeter Xu if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { 192bd4d67e7SPeter Xu hps <<= 10; 193bd4d67e7SPeter Xu break; 194bd4d67e7SPeter Xu } 195bd4d67e7SPeter Xu } 196bd4d67e7SPeter Xu 197bd4d67e7SPeter Xu free(line); 198bd4d67e7SPeter Xu fclose(f); 199bd4d67e7SPeter Xu return hps; 200bd4d67e7SPeter Xu } 201c4277cb6SPeter Xu 20281b1e3f9SDavid Hildenbrand int detect_hugetlb_page_sizes(size_t sizes[], int max) 20381b1e3f9SDavid Hildenbrand { 20481b1e3f9SDavid Hildenbrand DIR *dir = opendir("/sys/kernel/mm/hugepages/"); 20581b1e3f9SDavid Hildenbrand int count = 0; 20681b1e3f9SDavid Hildenbrand 20781b1e3f9SDavid Hildenbrand if (!dir) 20881b1e3f9SDavid Hildenbrand return 0; 20981b1e3f9SDavid Hildenbrand 21081b1e3f9SDavid Hildenbrand while (count < max) { 21181b1e3f9SDavid Hildenbrand struct dirent *entry = readdir(dir); 21281b1e3f9SDavid Hildenbrand size_t kb; 21381b1e3f9SDavid Hildenbrand 21481b1e3f9SDavid Hildenbrand if (!entry) 21581b1e3f9SDavid Hildenbrand break; 21681b1e3f9SDavid Hildenbrand if (entry->d_type != DT_DIR) 21781b1e3f9SDavid Hildenbrand continue; 21881b1e3f9SDavid Hildenbrand if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) 21981b1e3f9SDavid Hildenbrand continue; 22081b1e3f9SDavid Hildenbrand sizes[count++] = kb * 1024; 22181b1e3f9SDavid Hildenbrand ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n", 22281b1e3f9SDavid Hildenbrand kb); 22381b1e3f9SDavid Hildenbrand } 22481b1e3f9SDavid Hildenbrand closedir(dir); 22581b1e3f9SDavid Hildenbrand return count; 22681b1e3f9SDavid Hildenbrand } 22781b1e3f9SDavid Hildenbrand 228c3315502SPeter Xu /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ 229c3315502SPeter Xu int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, 230c3315502SPeter Xu bool miss, bool wp, bool minor, uint64_t *ioctls) 231c4277cb6SPeter Xu { 232c4277cb6SPeter Xu struct uffdio_register uffdio_register = { 0 }; 233c4277cb6SPeter Xu uint64_t mode = 0; 234c4277cb6SPeter Xu int ret = 0; 235c4277cb6SPeter Xu 236c4277cb6SPeter Xu if (miss) 237c4277cb6SPeter Xu mode |= UFFDIO_REGISTER_MODE_MISSING; 238c4277cb6SPeter Xu if (wp) 239c4277cb6SPeter Xu mode |= UFFDIO_REGISTER_MODE_WP; 240c4277cb6SPeter Xu if (minor) 241c4277cb6SPeter Xu mode |= UFFDIO_REGISTER_MODE_MINOR; 242c4277cb6SPeter Xu 243c4277cb6SPeter Xu uffdio_register.range.start = (unsigned long)addr; 244c4277cb6SPeter Xu uffdio_register.range.len = len; 245c4277cb6SPeter Xu uffdio_register.mode = mode; 246c4277cb6SPeter Xu 247c4277cb6SPeter Xu if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) 248c4277cb6SPeter Xu ret = -errno; 249c3315502SPeter Xu else if (ioctls) 250c3315502SPeter Xu *ioctls = uffdio_register.ioctls; 251c4277cb6SPeter Xu 252c4277cb6SPeter Xu return ret; 253c4277cb6SPeter Xu } 254c4277cb6SPeter Xu 255c3315502SPeter Xu int uffd_register(int uffd, void *addr, uint64_t len, 256c3315502SPeter Xu bool miss, bool wp, bool minor) 257c3315502SPeter Xu { 258c3315502SPeter Xu return uffd_register_with_ioctls(uffd, addr, len, 259c3315502SPeter Xu miss, wp, minor, NULL); 260c3315502SPeter Xu } 261c3315502SPeter Xu 262c4277cb6SPeter Xu int uffd_unregister(int uffd, void *addr, uint64_t len) 263c4277cb6SPeter Xu { 264c4277cb6SPeter Xu struct uffdio_range range = { .start = (uintptr_t)addr, .len = len }; 265c4277cb6SPeter Xu int ret = 0; 266c4277cb6SPeter Xu 267c4277cb6SPeter Xu if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1) 268c4277cb6SPeter Xu ret = -errno; 269c4277cb6SPeter Xu 270c4277cb6SPeter Xu return ret; 271c4277cb6SPeter Xu } 272*c8b90731SBreno Leitao 273*c8b90731SBreno Leitao unsigned long get_free_hugepages(void) 274*c8b90731SBreno Leitao { 275*c8b90731SBreno Leitao unsigned long fhp = 0; 276*c8b90731SBreno Leitao char *line = NULL; 277*c8b90731SBreno Leitao size_t linelen = 0; 278*c8b90731SBreno Leitao FILE *f = fopen("/proc/meminfo", "r"); 279*c8b90731SBreno Leitao 280*c8b90731SBreno Leitao if (!f) 281*c8b90731SBreno Leitao return fhp; 282*c8b90731SBreno Leitao while (getline(&line, &linelen, f) > 0) { 283*c8b90731SBreno Leitao if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) 284*c8b90731SBreno Leitao break; 285*c8b90731SBreno Leitao } 286*c8b90731SBreno Leitao 287*c8b90731SBreno Leitao free(line); 288*c8b90731SBreno Leitao fclose(f); 289*c8b90731SBreno Leitao return fhp; 290*c8b90731SBreno Leitao } 291