1 // SPDX-License-Identifier: GPL-2.0 2 #include <string.h> 3 #include <fcntl.h> 4 #include <dirent.h> 5 #include <sys/ioctl.h> 6 #include <linux/userfaultfd.h> 7 #include <linux/fs.h> 8 #include <sys/syscall.h> 9 #include <unistd.h> 10 #include "../kselftest.h" 11 #include "vm_util.h" 12 13 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 14 #define SMAP_FILE_PATH "/proc/self/smaps" 15 #define MAX_LINE_LENGTH 500 16 17 unsigned int __page_size; 18 unsigned int __page_shift; 19 20 uint64_t pagemap_get_entry(int fd, char *start) 21 { 22 const unsigned long pfn = (unsigned long)start / getpagesize(); 23 uint64_t entry; 24 int ret; 25 26 ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); 27 if (ret != sizeof(entry)) 28 ksft_exit_fail_msg("reading pagemap failed\n"); 29 return entry; 30 } 31 32 static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) 33 { 34 struct pm_scan_arg arg; 35 36 arg.start = (uintptr_t)start; 37 arg.end = (uintptr_t)(start + psize()); 38 arg.vec = (uintptr_t)r; 39 arg.vec_len = 1; 40 arg.flags = 0; 41 arg.size = sizeof(struct pm_scan_arg); 42 arg.max_pages = 0; 43 arg.category_inverted = 0; 44 arg.category_mask = 0; 45 arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | 46 PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | 47 PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; 48 arg.return_mask = arg.category_anyof_mask; 49 50 return ioctl(fd, PAGEMAP_SCAN, &arg); 51 } 52 53 static uint64_t pagemap_scan_get_categories(int fd, char *start) 54 { 55 struct page_region r; 56 long ret; 57 58 ret = __pagemap_scan_get_categories(fd, start, &r); 59 if (ret < 0) 60 ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); 61 if (ret == 0) 62 return 0; 63 return r.categories; 64 } 65 66 /* `start` is any valid address. */ 67 static bool pagemap_scan_supported(int fd, char *start) 68 { 69 static int supported = -1; 70 int ret; 71 72 if (supported != -1) 73 return supported; 74 75 /* Provide an invalid address in order to trigger EFAULT. */ 76 ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); 77 if (ret == 0) 78 ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); 79 80 supported = errno == EFAULT; 81 82 return supported; 83 } 84 85 static bool page_entry_is(int fd, char *start, char *desc, 86 uint64_t pagemap_flags, uint64_t pagescan_flags) 87 { 88 bool m = pagemap_get_entry(fd, start) & pagemap_flags; 89 90 if (pagemap_scan_supported(fd, start)) { 91 bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; 92 93 if (m == s) 94 return m; 95 96 ksft_exit_fail_msg( 97 "read and ioctl return unmatched results for %s: %d %d", desc, m, s); 98 } 99 return m; 100 } 101 102 bool pagemap_is_softdirty(int fd, char *start) 103 { 104 return page_entry_is(fd, start, "soft-dirty", 105 PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); 106 } 107 108 bool pagemap_is_swapped(int fd, char *start) 109 { 110 return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); 111 } 112 113 bool pagemap_is_populated(int fd, char *start) 114 { 115 return page_entry_is(fd, start, "populated", 116 PM_PRESENT | PM_SWAP, 117 PAGE_IS_PRESENT | PAGE_IS_SWAPPED); 118 } 119 120 unsigned long pagemap_get_pfn(int fd, char *start) 121 { 122 uint64_t entry = pagemap_get_entry(fd, start); 123 124 /* If present (63th bit), PFN is at bit 0 -- 54. */ 125 if (entry & PM_PRESENT) 126 return entry & 0x007fffffffffffffull; 127 return -1ul; 128 } 129 130 void clear_softdirty(void) 131 { 132 int ret; 133 const char *ctrl = "4"; 134 int fd = open("/proc/self/clear_refs", O_WRONLY); 135 136 if (fd < 0) 137 ksft_exit_fail_msg("opening clear_refs failed\n"); 138 ret = write(fd, ctrl, strlen(ctrl)); 139 close(fd); 140 if (ret != strlen(ctrl)) 141 ksft_exit_fail_msg("writing clear_refs failed\n"); 142 } 143 144 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) 145 { 146 while (fgets(buf, len, fp)) { 147 if (!strncmp(buf, pattern, strlen(pattern))) 148 return true; 149 } 150 return false; 151 } 152 153 uint64_t read_pmd_pagesize(void) 154 { 155 int fd; 156 char buf[20]; 157 ssize_t num_read; 158 159 fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); 160 if (fd == -1) 161 return 0; 162 163 num_read = read(fd, buf, 19); 164 if (num_read < 1) { 165 close(fd); 166 return 0; 167 } 168 buf[num_read] = '\0'; 169 close(fd); 170 171 return strtoul(buf, NULL, 10); 172 } 173 174 bool __check_huge(void *addr, char *pattern, int nr_hpages, 175 uint64_t hpage_size) 176 { 177 uint64_t thp = -1; 178 int ret; 179 FILE *fp; 180 char buffer[MAX_LINE_LENGTH]; 181 char addr_pattern[MAX_LINE_LENGTH]; 182 183 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 184 (unsigned long) addr); 185 if (ret >= MAX_LINE_LENGTH) 186 ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); 187 188 fp = fopen(SMAP_FILE_PATH, "r"); 189 if (!fp) 190 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); 191 192 if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) 193 goto err_out; 194 195 /* 196 * Fetch the pattern in the same block and check the number of 197 * hugepages. 198 */ 199 if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) 200 goto err_out; 201 202 snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); 203 204 if (sscanf(buffer, addr_pattern, &thp) != 1) 205 ksft_exit_fail_msg("Reading smap error\n"); 206 207 err_out: 208 fclose(fp); 209 return thp == (nr_hpages * (hpage_size >> 10)); 210 } 211 212 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) 213 { 214 return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); 215 } 216 217 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) 218 { 219 return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); 220 } 221 222 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) 223 { 224 return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); 225 } 226 227 int64_t allocate_transhuge(void *ptr, int pagemap_fd) 228 { 229 uint64_t ent[2]; 230 231 /* drop pmd */ 232 if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, 233 MAP_FIXED | MAP_ANONYMOUS | 234 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) 235 errx(2, "mmap transhuge"); 236 237 if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) 238 err(2, "MADV_HUGEPAGE"); 239 240 /* allocate transparent huge page */ 241 *(volatile void **)ptr = ptr; 242 243 if (pread(pagemap_fd, ent, sizeof(ent), 244 (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent)) 245 err(2, "read pagemap"); 246 247 if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && 248 PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && 249 !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1))) 250 return PAGEMAP_PFN(ent[0]); 251 252 return -1; 253 } 254 255 unsigned long default_huge_page_size(void) 256 { 257 unsigned long hps = 0; 258 char *line = NULL; 259 size_t linelen = 0; 260 FILE *f = fopen("/proc/meminfo", "r"); 261 262 if (!f) 263 return 0; 264 while (getline(&line, &linelen, f) > 0) { 265 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { 266 hps <<= 10; 267 break; 268 } 269 } 270 271 free(line); 272 fclose(f); 273 return hps; 274 } 275 276 int detect_hugetlb_page_sizes(size_t sizes[], int max) 277 { 278 DIR *dir = opendir("/sys/kernel/mm/hugepages/"); 279 int count = 0; 280 281 if (!dir) 282 return 0; 283 284 while (count < max) { 285 struct dirent *entry = readdir(dir); 286 size_t kb; 287 288 if (!entry) 289 break; 290 if (entry->d_type != DT_DIR) 291 continue; 292 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) 293 continue; 294 sizes[count++] = kb * 1024; 295 ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n", 296 kb); 297 } 298 closedir(dir); 299 return count; 300 } 301 302 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ 303 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, 304 bool miss, bool wp, bool minor, uint64_t *ioctls) 305 { 306 struct uffdio_register uffdio_register = { 0 }; 307 uint64_t mode = 0; 308 int ret = 0; 309 310 if (miss) 311 mode |= UFFDIO_REGISTER_MODE_MISSING; 312 if (wp) 313 mode |= UFFDIO_REGISTER_MODE_WP; 314 if (minor) 315 mode |= UFFDIO_REGISTER_MODE_MINOR; 316 317 uffdio_register.range.start = (unsigned long)addr; 318 uffdio_register.range.len = len; 319 uffdio_register.mode = mode; 320 321 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) 322 ret = -errno; 323 else if (ioctls) 324 *ioctls = uffdio_register.ioctls; 325 326 return ret; 327 } 328 329 int uffd_register(int uffd, void *addr, uint64_t len, 330 bool miss, bool wp, bool minor) 331 { 332 return uffd_register_with_ioctls(uffd, addr, len, 333 miss, wp, minor, NULL); 334 } 335 336 int uffd_unregister(int uffd, void *addr, uint64_t len) 337 { 338 struct uffdio_range range = { .start = (uintptr_t)addr, .len = len }; 339 int ret = 0; 340 341 if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1) 342 ret = -errno; 343 344 return ret; 345 } 346 347 unsigned long get_free_hugepages(void) 348 { 349 unsigned long fhp = 0; 350 char *line = NULL; 351 size_t linelen = 0; 352 FILE *f = fopen("/proc/meminfo", "r"); 353 354 if (!f) 355 return fhp; 356 while (getline(&line, &linelen, f) > 0) { 357 if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) 358 break; 359 } 360 361 free(line); 362 fclose(f); 363 return fhp; 364 } 365