1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <dirent.h>
6 #include <inttypes.h>
7 #include <sys/ioctl.h>
8 #include <linux/userfaultfd.h>
9 #include <linux/fs.h>
10 #include <sys/syscall.h>
11 #include <unistd.h>
12 #include "../kselftest.h"
13 #include "vm_util.h"
14
15 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
16 #define SMAP_FILE_PATH "/proc/self/smaps"
17 #define STATUS_FILE_PATH "/proc/self/status"
18 #define MAX_LINE_LENGTH 500
19
20 unsigned int __page_size;
21 unsigned int __page_shift;
22
pagemap_get_entry(int fd,char * start)23 uint64_t pagemap_get_entry(int fd, char *start)
24 {
25 const unsigned long pfn = (unsigned long)start / getpagesize();
26 uint64_t entry;
27 int ret;
28
29 ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
30 if (ret != sizeof(entry))
31 ksft_exit_fail_msg("reading pagemap failed\n");
32 return entry;
33 }
34
__pagemap_scan_get_categories(int fd,char * start,struct page_region * r)35 static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
36 {
37 struct pm_scan_arg arg;
38
39 arg.start = (uintptr_t)start;
40 arg.end = (uintptr_t)(start + psize());
41 arg.vec = (uintptr_t)r;
42 arg.vec_len = 1;
43 arg.flags = 0;
44 arg.size = sizeof(struct pm_scan_arg);
45 arg.max_pages = 0;
46 arg.category_inverted = 0;
47 arg.category_mask = 0;
48 arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
49 PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
50 PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
51 arg.return_mask = arg.category_anyof_mask;
52
53 return ioctl(fd, PAGEMAP_SCAN, &arg);
54 }
55
pagemap_scan_get_categories(int fd,char * start)56 static uint64_t pagemap_scan_get_categories(int fd, char *start)
57 {
58 struct page_region r;
59 long ret;
60
61 ret = __pagemap_scan_get_categories(fd, start, &r);
62 if (ret < 0)
63 ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
64 if (ret == 0)
65 return 0;
66 return r.categories;
67 }
68
69 /* `start` is any valid address. */
pagemap_scan_supported(int fd,char * start)70 static bool pagemap_scan_supported(int fd, char *start)
71 {
72 static int supported = -1;
73 int ret;
74
75 if (supported != -1)
76 return supported;
77
78 /* Provide an invalid address in order to trigger EFAULT. */
79 ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
80 if (ret == 0)
81 ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
82
83 supported = errno == EFAULT;
84
85 return supported;
86 }
87
page_entry_is(int fd,char * start,char * desc,uint64_t pagemap_flags,uint64_t pagescan_flags)88 static bool page_entry_is(int fd, char *start, char *desc,
89 uint64_t pagemap_flags, uint64_t pagescan_flags)
90 {
91 bool m = pagemap_get_entry(fd, start) & pagemap_flags;
92
93 if (pagemap_scan_supported(fd, start)) {
94 bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
95
96 if (m == s)
97 return m;
98
99 ksft_exit_fail_msg(
100 "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
101 }
102 return m;
103 }
104
pagemap_is_softdirty(int fd,char * start)105 bool pagemap_is_softdirty(int fd, char *start)
106 {
107 return page_entry_is(fd, start, "soft-dirty",
108 PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
109 }
110
pagemap_is_swapped(int fd,char * start)111 bool pagemap_is_swapped(int fd, char *start)
112 {
113 return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
114 }
115
pagemap_is_populated(int fd,char * start)116 bool pagemap_is_populated(int fd, char *start)
117 {
118 return page_entry_is(fd, start, "populated",
119 PM_PRESENT | PM_SWAP,
120 PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
121 }
122
pagemap_get_pfn(int fd,char * start)123 unsigned long pagemap_get_pfn(int fd, char *start)
124 {
125 uint64_t entry = pagemap_get_entry(fd, start);
126
127 /* If present (63th bit), PFN is at bit 0 -- 54. */
128 if (entry & PM_PRESENT)
129 return entry & 0x007fffffffffffffull;
130 return -1ul;
131 }
132
clear_softdirty(void)133 void clear_softdirty(void)
134 {
135 int ret;
136 const char *ctrl = "4";
137 int fd = open("/proc/self/clear_refs", O_WRONLY);
138
139 if (fd < 0)
140 ksft_exit_fail_msg("opening clear_refs failed\n");
141 ret = write(fd, ctrl, strlen(ctrl));
142 close(fd);
143 if (ret != (signed int)strlen(ctrl))
144 ksft_exit_fail_msg("writing clear_refs failed\n");
145 }
146
check_for_pattern(FILE * fp,const char * pattern,char * buf,size_t len)147 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
148 {
149 while (fgets(buf, len, fp)) {
150 if (!strncmp(buf, pattern, strlen(pattern)))
151 return true;
152 }
153 return false;
154 }
155
read_pmd_pagesize(void)156 uint64_t read_pmd_pagesize(void)
157 {
158 int fd;
159 char buf[20];
160 ssize_t num_read;
161
162 fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
163 if (fd == -1)
164 return 0;
165
166 num_read = read(fd, buf, 19);
167 if (num_read < 1) {
168 close(fd);
169 return 0;
170 }
171 buf[num_read] = '\0';
172 close(fd);
173
174 return strtoul(buf, NULL, 10);
175 }
176
rss_anon(void)177 unsigned long rss_anon(void)
178 {
179 unsigned long rss_anon = 0;
180 FILE *fp;
181 char buffer[MAX_LINE_LENGTH];
182
183 fp = fopen(STATUS_FILE_PATH, "r");
184 if (!fp)
185 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH);
186
187 if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer)))
188 goto err_out;
189
190 if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1)
191 ksft_exit_fail_msg("Reading status error\n");
192
193 err_out:
194 fclose(fp);
195 return rss_anon;
196 }
197
__get_smap_entry(void * addr,const char * pattern,char * buf,size_t len)198 char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len)
199 {
200 int ret;
201 FILE *fp;
202 char *entry = NULL;
203 char addr_pattern[MAX_LINE_LENGTH];
204
205 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
206 (unsigned long) addr);
207 if (ret >= MAX_LINE_LENGTH)
208 ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
209
210 fp = fopen(SMAP_FILE_PATH, "r");
211 if (!fp)
212 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
213
214 if (!check_for_pattern(fp, addr_pattern, buf, len))
215 goto err_out;
216
217 /* Fetch the pattern in the same block */
218 if (!check_for_pattern(fp, pattern, buf, len))
219 goto err_out;
220
221 /* Trim trailing newline */
222 entry = strchr(buf, '\n');
223 if (entry)
224 *entry = '\0';
225
226 entry = buf + strlen(pattern);
227
228 err_out:
229 fclose(fp);
230 return entry;
231 }
232
__check_huge(void * addr,char * pattern,int nr_hpages,uint64_t hpage_size)233 bool __check_huge(void *addr, char *pattern, int nr_hpages,
234 uint64_t hpage_size)
235 {
236 char buffer[MAX_LINE_LENGTH];
237 uint64_t thp = -1;
238 char *entry;
239
240 entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer));
241 if (!entry)
242 goto err_out;
243
244 if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1)
245 ksft_exit_fail_msg("Reading smap error\n");
246
247 err_out:
248 return thp == (nr_hpages * (hpage_size >> 10));
249 }
250
check_huge_anon(void * addr,int nr_hpages,uint64_t hpage_size)251 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
252 {
253 return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
254 }
255
check_huge_file(void * addr,int nr_hpages,uint64_t hpage_size)256 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
257 {
258 return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
259 }
260
check_huge_shmem(void * addr,int nr_hpages,uint64_t hpage_size)261 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
262 {
263 return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
264 }
265
allocate_transhuge(void * ptr,int pagemap_fd)266 int64_t allocate_transhuge(void *ptr, int pagemap_fd)
267 {
268 uint64_t ent[2];
269
270 /* drop pmd */
271 if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
272 MAP_FIXED | MAP_ANONYMOUS |
273 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
274 ksft_exit_fail_msg("mmap transhuge\n");
275
276 if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
277 ksft_exit_fail_msg("MADV_HUGEPAGE\n");
278
279 /* allocate transparent huge page */
280 *(volatile void **)ptr = ptr;
281
282 if (pread(pagemap_fd, ent, sizeof(ent),
283 (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
284 ksft_exit_fail_msg("read pagemap\n");
285
286 if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
287 PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
288 !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
289 return PAGEMAP_PFN(ent[0]);
290
291 return -1;
292 }
293
default_huge_page_size(void)294 unsigned long default_huge_page_size(void)
295 {
296 unsigned long hps = 0;
297 char *line = NULL;
298 size_t linelen = 0;
299 FILE *f = fopen("/proc/meminfo", "r");
300
301 if (!f)
302 return 0;
303 while (getline(&line, &linelen, f) > 0) {
304 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
305 hps <<= 10;
306 break;
307 }
308 }
309
310 free(line);
311 fclose(f);
312 return hps;
313 }
314
detect_hugetlb_page_sizes(size_t sizes[],int max)315 int detect_hugetlb_page_sizes(size_t sizes[], int max)
316 {
317 DIR *dir = opendir("/sys/kernel/mm/hugepages/");
318 int count = 0;
319
320 if (!dir)
321 return 0;
322
323 while (count < max) {
324 struct dirent *entry = readdir(dir);
325 size_t kb;
326
327 if (!entry)
328 break;
329 if (entry->d_type != DT_DIR)
330 continue;
331 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
332 continue;
333 sizes[count++] = kb * 1024;
334 ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
335 kb);
336 }
337 closedir(dir);
338 return count;
339 }
340
341 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
uffd_register_with_ioctls(int uffd,void * addr,uint64_t len,bool miss,bool wp,bool minor,uint64_t * ioctls)342 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
343 bool miss, bool wp, bool minor, uint64_t *ioctls)
344 {
345 struct uffdio_register uffdio_register = { 0 };
346 uint64_t mode = 0;
347 int ret = 0;
348
349 if (miss)
350 mode |= UFFDIO_REGISTER_MODE_MISSING;
351 if (wp)
352 mode |= UFFDIO_REGISTER_MODE_WP;
353 if (minor)
354 mode |= UFFDIO_REGISTER_MODE_MINOR;
355
356 uffdio_register.range.start = (unsigned long)addr;
357 uffdio_register.range.len = len;
358 uffdio_register.mode = mode;
359
360 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
361 ret = -errno;
362 else if (ioctls)
363 *ioctls = uffdio_register.ioctls;
364
365 return ret;
366 }
367
uffd_register(int uffd,void * addr,uint64_t len,bool miss,bool wp,bool minor)368 int uffd_register(int uffd, void *addr, uint64_t len,
369 bool miss, bool wp, bool minor)
370 {
371 return uffd_register_with_ioctls(uffd, addr, len,
372 miss, wp, minor, NULL);
373 }
374
uffd_unregister(int uffd,void * addr,uint64_t len)375 int uffd_unregister(int uffd, void *addr, uint64_t len)
376 {
377 struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
378 int ret = 0;
379
380 if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
381 ret = -errno;
382
383 return ret;
384 }
385
get_free_hugepages(void)386 unsigned long get_free_hugepages(void)
387 {
388 unsigned long fhp = 0;
389 char *line = NULL;
390 size_t linelen = 0;
391 FILE *f = fopen("/proc/meminfo", "r");
392
393 if (!f)
394 return fhp;
395 while (getline(&line, &linelen, f) > 0) {
396 if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
397 break;
398 }
399
400 free(line);
401 fclose(f);
402 return fhp;
403 }
404
check_vmflag_io(void * addr)405 bool check_vmflag_io(void *addr)
406 {
407 char buffer[MAX_LINE_LENGTH];
408 const char *flags;
409 size_t flaglen;
410
411 flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer));
412 if (!flags)
413 ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr);
414
415 while (true) {
416 flags += strspn(flags, " ");
417
418 flaglen = strcspn(flags, " ");
419 if (!flaglen)
420 return false;
421
422 if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen))
423 return true;
424
425 flags += flaglen;
426 }
427 }
428
429 /*
430 * Open an fd at /proc/$pid/maps and configure procmap_out ready for
431 * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise.
432 */
open_procmap(pid_t pid,struct procmap_fd * procmap_out)433 int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
434 {
435 char path[256];
436 int ret = 0;
437
438 memset(procmap_out, '\0', sizeof(*procmap_out));
439 sprintf(path, "/proc/%d/maps", pid);
440 procmap_out->query.size = sizeof(procmap_out->query);
441 procmap_out->fd = open(path, O_RDONLY);
442 if (procmap_out->fd < 0)
443 ret = -errno;
444
445 return ret;
446 }
447
448 /* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */
query_procmap(struct procmap_fd * procmap)449 int query_procmap(struct procmap_fd *procmap)
450 {
451 int ret = 0;
452
453 if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1)
454 ret = -errno;
455
456 return ret;
457 }
458
459 /*
460 * Try to find the VMA at specified address, returns true if found, false if not
461 * found, and the test is failed if any other error occurs.
462 *
463 * On success, procmap->query is populated with the results.
464 */
find_vma_procmap(struct procmap_fd * procmap,void * address)465 bool find_vma_procmap(struct procmap_fd *procmap, void *address)
466 {
467 int err;
468
469 procmap->query.query_flags = 0;
470 procmap->query.query_addr = (unsigned long)address;
471 err = query_procmap(procmap);
472 if (!err)
473 return true;
474
475 if (err != -ENOENT)
476 ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n",
477 __func__, err);
478 return false;
479 }
480
481 /*
482 * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error
483 * code otherwise.
484 */
close_procmap(struct procmap_fd * procmap)485 int close_procmap(struct procmap_fd *procmap)
486 {
487 return close(procmap->fd);
488 }
489
write_sysfs(const char * file_path,unsigned long val)490 int write_sysfs(const char *file_path, unsigned long val)
491 {
492 FILE *f = fopen(file_path, "w");
493
494 if (!f) {
495 fprintf(stderr, "f %s\n", file_path);
496 perror("fopen");
497 return 1;
498 }
499 if (fprintf(f, "%lu", val) < 0) {
500 perror("fprintf");
501 fclose(f);
502 return 1;
503 }
504 fclose(f);
505
506 return 0;
507 }
508
read_sysfs(const char * file_path,unsigned long * val)509 int read_sysfs(const char *file_path, unsigned long *val)
510 {
511 FILE *f = fopen(file_path, "r");
512
513 if (!f) {
514 fprintf(stderr, "f %s\n", file_path);
515 perror("fopen");
516 return 1;
517 }
518 if (fscanf(f, "%lu", val) != 1) {
519 perror("fscanf");
520 fclose(f);
521 return 1;
522 }
523 fclose(f);
524
525 return 0;
526 }
527