1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
4 * address range in a process via <debugfs>/split_huge_pages interface.
5 */
6
7 #define _GNU_SOURCE
8 #include <assert.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <unistd.h>
13 #include <inttypes.h>
14 #include <string.h>
15 #include <fcntl.h>
16 #include <sys/mman.h>
17 #include <sys/mount.h>
18 #include <sys/param.h>
19 #include <malloc.h>
20 #include <stdbool.h>
21 #include <time.h>
22 #include "vm_util.h"
23 #include "../kselftest.h"
24
25 uint64_t pagesize;
26 unsigned int pageshift;
27 uint64_t pmd_pagesize;
28 unsigned int pmd_order;
29 int *expected_orders;
30
31 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
32 #define SMAP_PATH "/proc/self/smaps"
33 #define INPUT_MAX 80
34
35 #define PID_FMT "%d,0x%lx,0x%lx,%d"
36 #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
37 #define PATH_FMT "%s,0x%lx,0x%lx,%d"
38
39 const char *pagemap_proc = "/proc/self/pagemap";
40 const char *kpageflags_proc = "/proc/kpageflags";
41 int pagemap_fd;
42 int kpageflags_fd;
43
is_backed_by_folio(char * vaddr,int order,int pagemap_fd,int kpageflags_fd)44 static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd,
45 int kpageflags_fd)
46 {
47 const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD;
48 const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL;
49 const unsigned long nr_pages = 1UL << order;
50 unsigned long pfn_head;
51 uint64_t pfn_flags;
52 unsigned long pfn;
53 unsigned long i;
54
55 pfn = pagemap_get_pfn(pagemap_fd, vaddr);
56
57 /* non present page */
58 if (pfn == -1UL)
59 return false;
60
61 if (pageflags_get(pfn, kpageflags_fd, &pfn_flags))
62 goto fail;
63
64 /* check for order-0 pages */
65 if (!order) {
66 if (pfn_flags & (folio_head_flags | folio_tail_flags))
67 return false;
68 return true;
69 }
70
71 /* non THP folio */
72 if (!(pfn_flags & KPF_THP))
73 return false;
74
75 pfn_head = pfn & ~(nr_pages - 1);
76
77 if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags))
78 goto fail;
79
80 /* head PFN has no compound_head flag set */
81 if ((pfn_flags & folio_head_flags) != folio_head_flags)
82 return false;
83
84 /* check all tail PFN flags */
85 for (i = 1; i < nr_pages; i++) {
86 if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags))
87 goto fail;
88 if ((pfn_flags & folio_tail_flags) != folio_tail_flags)
89 return false;
90 }
91
92 /*
93 * check the PFN after this folio, but if its flags cannot be obtained,
94 * assume this folio has the expected order
95 */
96 if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags))
97 return true;
98
99 /* If we find another tail page, then the folio is larger. */
100 return (pfn_flags & folio_tail_flags) != folio_tail_flags;
101 fail:
102 ksft_exit_fail_msg("Failed to get folio info\n");
103 return false;
104 }
105
vaddr_pageflags_get(char * vaddr,int pagemap_fd,int kpageflags_fd,uint64_t * flags)106 static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd,
107 uint64_t *flags)
108 {
109 unsigned long pfn;
110
111 pfn = pagemap_get_pfn(pagemap_fd, vaddr);
112
113 /* non-present PFN */
114 if (pfn == -1UL)
115 return 1;
116
117 if (pageflags_get(pfn, kpageflags_fd, flags))
118 return -1;
119
120 return 0;
121 }
122
123 /*
124 * gather_after_split_folio_orders - scan through [vaddr_start, len) and record
125 * folio orders
126 *
127 * @vaddr_start: start vaddr
128 * @len: range length
129 * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
130 * @kpageflags_fd: file descriptor to /proc/kpageflags
131 * @orders: output folio order array
132 * @nr_orders: folio order array size
133 *
134 * gather_after_split_folio_orders() scan through [vaddr_start, len) and check
135 * all folios within the range and record their orders. All order-0 pages will
136 * be recorded. Non-present vaddr is skipped.
137 *
138 * NOTE: the function is used to check folio orders after a split is performed,
139 * so it assumes [vaddr_start, len) fully maps to after-split folios within that
140 * range.
141 *
142 * Return: 0 - no error, -1 - unhandled cases
143 */
gather_after_split_folio_orders(char * vaddr_start,size_t len,int pagemap_fd,int kpageflags_fd,int orders[],int nr_orders)144 static int gather_after_split_folio_orders(char *vaddr_start, size_t len,
145 int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
146 {
147 uint64_t page_flags = 0;
148 int cur_order = -1;
149 char *vaddr;
150
151 if (pagemap_fd == -1 || kpageflags_fd == -1)
152 return -1;
153 if (!orders)
154 return -1;
155 if (nr_orders <= 0)
156 return -1;
157
158 for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
159 char *next_folio_vaddr;
160 int status;
161
162 status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd,
163 &page_flags);
164 if (status < 0)
165 return -1;
166
167 /* skip non present vaddr */
168 if (status == 1) {
169 vaddr += psize();
170 continue;
171 }
172
173 /* all order-0 pages with possible false postive (non folio) */
174 if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
175 orders[0]++;
176 vaddr += psize();
177 continue;
178 }
179
180 /* skip non thp compound pages */
181 if (!(page_flags & KPF_THP)) {
182 vaddr += psize();
183 continue;
184 }
185
186 /* vpn points to part of a THP at this point */
187 if (page_flags & KPF_COMPOUND_HEAD)
188 cur_order = 1;
189 else {
190 vaddr += psize();
191 continue;
192 }
193
194 next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
195
196 if (next_folio_vaddr >= vaddr_start + len)
197 break;
198
199 while ((status = vaddr_pageflags_get(next_folio_vaddr,
200 pagemap_fd, kpageflags_fd,
201 &page_flags)) >= 0) {
202 /*
203 * non present vaddr, next compound head page, or
204 * order-0 page
205 */
206 if (status == 1 ||
207 (page_flags & KPF_COMPOUND_HEAD) ||
208 !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
209 if (cur_order < nr_orders) {
210 orders[cur_order]++;
211 cur_order = -1;
212 vaddr = next_folio_vaddr;
213 }
214 break;
215 }
216
217 cur_order++;
218 next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
219 }
220
221 if (status < 0)
222 return status;
223 }
224 if (cur_order > 0 && cur_order < nr_orders)
225 orders[cur_order]++;
226 return 0;
227 }
228
check_after_split_folio_orders(char * vaddr_start,size_t len,int pagemap_fd,int kpageflags_fd,int orders[],int nr_orders)229 static int check_after_split_folio_orders(char *vaddr_start, size_t len,
230 int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
231 {
232 int *vaddr_orders;
233 int status;
234 int i;
235
236 vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
237
238 if (!vaddr_orders)
239 ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
240
241 memset(vaddr_orders, 0, sizeof(int) * nr_orders);
242 status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd,
243 kpageflags_fd, vaddr_orders, nr_orders);
244 if (status)
245 ksft_exit_fail_msg("gather folio info failed\n");
246
247 for (i = 0; i < nr_orders; i++)
248 if (vaddr_orders[i] != orders[i]) {
249 ksft_print_msg("order %d: expected: %d got %d\n", i,
250 orders[i], vaddr_orders[i]);
251 status = -1;
252 }
253
254 free(vaddr_orders);
255 return status;
256 }
257
write_file(const char * path,const char * buf,size_t buflen)258 static void write_file(const char *path, const char *buf, size_t buflen)
259 {
260 int fd;
261 ssize_t numwritten;
262
263 fd = open(path, O_WRONLY);
264 if (fd == -1)
265 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno));
266
267 numwritten = write(fd, buf, buflen - 1);
268 close(fd);
269 if (numwritten < 1)
270 ksft_exit_fail_msg("Write failed\n");
271 }
272
write_debugfs(const char * fmt,...)273 static void write_debugfs(const char *fmt, ...)
274 {
275 char input[INPUT_MAX];
276 int ret;
277 va_list argp;
278
279 va_start(argp, fmt);
280 ret = vsnprintf(input, INPUT_MAX, fmt, argp);
281 va_end(argp);
282
283 if (ret >= INPUT_MAX)
284 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__);
285
286 write_file(SPLIT_DEBUGFS, input, ret + 1);
287 }
288
allocate_zero_filled_hugepage(size_t len)289 static char *allocate_zero_filled_hugepage(size_t len)
290 {
291 char *result;
292 size_t i;
293
294 result = memalign(pmd_pagesize, len);
295 if (!result) {
296 printf("Fail to allocate memory\n");
297 exit(EXIT_FAILURE);
298 }
299
300 madvise(result, len, MADV_HUGEPAGE);
301
302 for (i = 0; i < len; i++)
303 result[i] = (char)0;
304
305 return result;
306 }
307
verify_rss_anon_split_huge_page_all_zeroes(char * one_page,int nr_hpages,size_t len)308 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len)
309 {
310 unsigned long rss_anon_before, rss_anon_after;
311 size_t i;
312
313 if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize))
314 ksft_exit_fail_msg("No THP is allocated\n");
315
316 rss_anon_before = rss_anon();
317 if (!rss_anon_before)
318 ksft_exit_fail_msg("No RssAnon is allocated before split\n");
319
320 /* split all THPs */
321 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
322 (uint64_t)one_page + len, 0);
323
324 for (i = 0; i < len; i++)
325 if (one_page[i] != (char)0)
326 ksft_exit_fail_msg("%ld byte corrupted\n", i);
327
328 if (!check_huge_anon(one_page, 0, pmd_pagesize))
329 ksft_exit_fail_msg("Still AnonHugePages not split\n");
330
331 rss_anon_after = rss_anon();
332 if (rss_anon_after >= rss_anon_before)
333 ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n",
334 rss_anon_before, rss_anon_after);
335 }
336
split_pmd_zero_pages(void)337 static void split_pmd_zero_pages(void)
338 {
339 char *one_page;
340 int nr_hpages = 4;
341 size_t len = nr_hpages * pmd_pagesize;
342
343 one_page = allocate_zero_filled_hugepage(len);
344 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
345 ksft_test_result_pass("Split zero filled huge pages successful\n");
346 free(one_page);
347 }
348
split_pmd_thp_to_order(int order)349 static void split_pmd_thp_to_order(int order)
350 {
351 char *one_page;
352 size_t len = 4 * pmd_pagesize;
353 size_t i;
354
355 one_page = memalign(pmd_pagesize, len);
356 if (!one_page)
357 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
358
359 madvise(one_page, len, MADV_HUGEPAGE);
360
361 for (i = 0; i < len; i++)
362 one_page[i] = (char)i;
363
364 if (!check_huge_anon(one_page, 4, pmd_pagesize))
365 ksft_exit_fail_msg("No THP is allocated\n");
366
367 /* split all THPs */
368 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
369 (uint64_t)one_page + len, order);
370
371 for (i = 0; i < len; i++)
372 if (one_page[i] != (char)i)
373 ksft_exit_fail_msg("%ld byte corrupted\n", i);
374
375 memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
376 expected_orders[order] = 4 << (pmd_order - order);
377
378 if (check_after_split_folio_orders(one_page, len, pagemap_fd,
379 kpageflags_fd, expected_orders,
380 (pmd_order + 1)))
381 ksft_exit_fail_msg("Unexpected THP split\n");
382
383 if (!check_huge_anon(one_page, 0, pmd_pagesize))
384 ksft_exit_fail_msg("Still AnonHugePages not split\n");
385
386 ksft_test_result_pass("Split huge pages to order %d successful\n", order);
387 free(one_page);
388 }
389
split_pte_mapped_thp(void)390 static void split_pte_mapped_thp(void)
391 {
392 const size_t nr_thps = 4;
393 const size_t thp_area_size = nr_thps * pmd_pagesize;
394 const size_t page_area_size = nr_thps * pagesize;
395 char *thp_area, *tmp, *page_area = MAP_FAILED;
396 size_t i;
397
398 thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE,
399 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
400 if (thp_area == MAP_FAILED) {
401 ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
402 return;
403 }
404
405 madvise(thp_area, thp_area_size, MADV_HUGEPAGE);
406
407 for (i = 0; i < thp_area_size; i++)
408 thp_area[i] = (char)i;
409
410 if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) {
411 ksft_test_result_skip("Not all THPs allocated\n");
412 goto out;
413 }
414
415 /*
416 * To challenge spitting code, we will mremap a single page of each
417 * THP (page[i] of thp[i]) in the thp_area into page_area. This will
418 * replace the PMD mappings in the thp_area by PTE mappings first,
419 * but leaving the THP unsplit, to then create a page-sized hole in
420 * the thp_area.
421 * We will then manually trigger splitting of all THPs through the
422 * single mremap'ed pages of each THP in the page_area.
423 */
424 page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE,
425 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
426 if (page_area == MAP_FAILED) {
427 ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
428 goto out;
429 }
430
431 for (i = 0; i < nr_thps; i++) {
432 tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i,
433 pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED,
434 page_area + pagesize * i);
435 if (tmp != MAP_FAILED)
436 continue;
437 ksft_test_result_fail("mremap failed: %s\n", strerror(errno));
438 goto out;
439 }
440
441 /*
442 * Verify that our THPs were not split yet. Note that
443 * check_huge_anon() cannot be used as it checks for PMD mappings.
444 */
445 for (i = 0; i < nr_thps; i++) {
446 if (is_backed_by_folio(page_area + i * pagesize, pmd_order,
447 pagemap_fd, kpageflags_fd))
448 continue;
449 ksft_test_result_fail("THP %zu missing after mremap\n", i);
450 goto out;
451 }
452
453 /* Split all THPs through the remapped pages. */
454 write_debugfs(PID_FMT, getpid(), (uint64_t)page_area,
455 (uint64_t)page_area + page_area_size, 0);
456
457 /* Corruption during mremap or split? */
458 for (i = 0; i < page_area_size; i++) {
459 if (page_area[i] == (char)i)
460 continue;
461 ksft_test_result_fail("%zu byte corrupted\n", i);
462 goto out;
463 }
464
465 /* Split failed? */
466 for (i = 0; i < nr_thps; i++) {
467 if (is_backed_by_folio(page_area + i * pagesize, 0,
468 pagemap_fd, kpageflags_fd))
469 continue;
470 ksft_test_result_fail("THP %zu not split\n", i);
471 }
472
473 ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
474 out:
475 munmap(thp_area, thp_area_size);
476 if (page_area != MAP_FAILED)
477 munmap(page_area, page_area_size);
478 }
479
split_file_backed_thp(int order)480 static void split_file_backed_thp(int order)
481 {
482 int status;
483 int fd;
484 char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
485 const char *tmpfs_loc = mkdtemp(tmpfs_template);
486 char testfile[INPUT_MAX];
487 ssize_t num_written, num_read;
488 char *file_buf1, *file_buf2;
489 uint64_t pgoff_start = 0, pgoff_end = 1024;
490 int i;
491
492 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
493
494 file_buf1 = (char *)malloc(pmd_pagesize);
495 file_buf2 = (char *)malloc(pmd_pagesize);
496
497 if (!file_buf1 || !file_buf2) {
498 ksft_print_msg("cannot allocate file buffers\n");
499 goto out;
500 }
501
502 for (i = 0; i < pmd_pagesize; i++)
503 file_buf1[i] = (char)i;
504 memset(file_buf2, 0, pmd_pagesize);
505
506 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
507
508 if (status)
509 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
510
511 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
512 if (status >= INPUT_MAX) {
513 ksft_print_msg("Fail to create file-backed THP split testing file\n");
514 goto cleanup;
515 }
516
517 fd = open(testfile, O_CREAT|O_RDWR, 0664);
518 if (fd == -1) {
519 ksft_perror("Cannot open testing file");
520 goto cleanup;
521 }
522
523 /* write pmd size data to the file, so a file-backed THP can be allocated */
524 num_written = write(fd, file_buf1, pmd_pagesize);
525
526 if (num_written == -1 || num_written != pmd_pagesize) {
527 ksft_perror("Failed to write data to testing file");
528 goto close_file;
529 }
530
531 /* split the file-backed THP */
532 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order);
533
534 /* check file content after split */
535 status = lseek(fd, 0, SEEK_SET);
536 if (status == -1) {
537 ksft_perror("Cannot lseek file");
538 goto close_file;
539 }
540
541 num_read = read(fd, file_buf2, num_written);
542 if (num_read == -1 || num_read != num_written) {
543 ksft_perror("Cannot read file content back");
544 goto close_file;
545 }
546
547 if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) {
548 ksft_print_msg("File content changed\n");
549 goto close_file;
550 }
551
552 close(fd);
553 status = unlink(testfile);
554 if (status) {
555 ksft_perror("Cannot remove testing file");
556 goto cleanup;
557 }
558
559 status = umount(tmpfs_loc);
560 if (status) {
561 rmdir(tmpfs_loc);
562 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
563 }
564
565 status = rmdir(tmpfs_loc);
566 if (status)
567 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
568
569 ksft_print_msg("Please check dmesg for more information\n");
570 ksft_test_result_pass("File-backed THP split to order %d test done\n", order);
571 return;
572
573 close_file:
574 close(fd);
575 cleanup:
576 umount(tmpfs_loc);
577 rmdir(tmpfs_loc);
578 out:
579 ksft_exit_fail_msg("Error occurred\n");
580 }
581
prepare_thp_fs(const char * xfs_path,char * thp_fs_template,const char ** thp_fs_loc)582 static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
583 const char **thp_fs_loc)
584 {
585 if (xfs_path) {
586 *thp_fs_loc = xfs_path;
587 return false;
588 }
589
590 *thp_fs_loc = mkdtemp(thp_fs_template);
591
592 if (!*thp_fs_loc)
593 ksft_exit_fail_msg("cannot create temp folder\n");
594
595 return true;
596 }
597
cleanup_thp_fs(const char * thp_fs_loc,bool created_tmp)598 static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
599 {
600 int status;
601
602 if (!created_tmp)
603 return;
604
605 status = rmdir(thp_fs_loc);
606 if (status)
607 ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
608 strerror(errno));
609 }
610
create_pagecache_thp_and_fd(const char * testfile,size_t fd_size,int * fd,char ** addr)611 static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
612 int *fd, char **addr)
613 {
614 size_t i;
615 unsigned char buf[1024];
616
617 srand(time(NULL));
618
619 *fd = open(testfile, O_CREAT | O_RDWR, 0664);
620 if (*fd == -1)
621 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
622
623 assert(fd_size % sizeof(buf) == 0);
624 for (i = 0; i < sizeof(buf); i++)
625 buf[i] = (unsigned char)i;
626 for (i = 0; i < fd_size; i += sizeof(buf))
627 write(*fd, buf, sizeof(buf));
628
629 close(*fd);
630 sync();
631 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
632 if (*fd == -1) {
633 ksft_perror("open drop_caches");
634 goto err_out_unlink;
635 }
636 if (write(*fd, "3", 1) != 1) {
637 ksft_perror("write to drop_caches");
638 goto err_out_unlink;
639 }
640 close(*fd);
641
642 *fd = open(testfile, O_RDWR);
643 if (*fd == -1) {
644 ksft_perror("Failed to open testfile\n");
645 goto err_out_unlink;
646 }
647
648 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
649 if (*addr == (char *)-1) {
650 ksft_perror("cannot mmap");
651 goto err_out_close;
652 }
653 madvise(*addr, fd_size, MADV_HUGEPAGE);
654
655 for (size_t i = 0; i < fd_size; i++) {
656 char *addr2 = *addr + i;
657
658 FORCE_READ(*addr2);
659 }
660
661 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
662 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
663 munmap(*addr, fd_size);
664 close(*fd);
665 unlink(testfile);
666 ksft_test_result_skip("Pagecache folio split skipped\n");
667 return -2;
668 }
669 return 0;
670 err_out_close:
671 close(*fd);
672 err_out_unlink:
673 unlink(testfile);
674 ksft_exit_fail_msg("Failed to create large pagecache folios\n");
675 return -1;
676 }
677
split_thp_in_pagecache_to_order_at(size_t fd_size,const char * fs_loc,int order,int offset)678 static void split_thp_in_pagecache_to_order_at(size_t fd_size,
679 const char *fs_loc, int order, int offset)
680 {
681 int fd;
682 char *split_addr;
683 char *addr;
684 size_t i;
685 char testfile[INPUT_MAX];
686 int err = 0;
687
688 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
689
690 if (err < 0)
691 ksft_exit_fail_msg("cannot generate right test file name\n");
692
693 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
694 if (err)
695 return;
696
697 err = 0;
698
699 memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
700 /*
701 * use [split_addr, split_addr + pagesize) range to split THPs, since
702 * the debugfs function always split a range with pagesize step and
703 * providing a full [addr, addr + fd_size) range can trigger multiple
704 * splits, complicating after-split result checking.
705 */
706 if (offset == -1) {
707 for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
708 write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr,
709 (uint64_t)split_addr + pagesize, order);
710
711 expected_orders[order] = fd_size / (pagesize << order);
712 } else {
713 int times = fd_size / pmd_pagesize;
714
715 for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
716 write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr,
717 (uint64_t)split_addr + pagesize, order, offset);
718
719 for (i = order + 1; i < pmd_order; i++)
720 expected_orders[i] = times;
721 expected_orders[order] = 2 * times;
722 }
723
724 for (i = 0; i < fd_size; i++)
725 if (*(addr + i) != (char)i) {
726 ksft_print_msg("%lu byte corrupted in the file\n", i);
727 err = EXIT_FAILURE;
728 goto out;
729 }
730
731 if (check_after_split_folio_orders(addr, fd_size, pagemap_fd,
732 kpageflags_fd, expected_orders,
733 (pmd_order + 1))) {
734 ksft_print_msg("Unexpected THP split\n");
735 err = 1;
736 goto out;
737 }
738
739 if (!check_huge_file(addr, 0, pmd_pagesize)) {
740 ksft_print_msg("Still FilePmdMapped not split\n");
741 err = EXIT_FAILURE;
742 goto out;
743 }
744
745 out:
746 munmap(addr, fd_size);
747 close(fd);
748 unlink(testfile);
749 if (offset == -1) {
750 if (err)
751 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
752 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
753 } else {
754 if (err)
755 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset);
756 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset);
757 }
758 }
759
main(int argc,char ** argv)760 int main(int argc, char **argv)
761 {
762 int i;
763 size_t fd_size;
764 char *optional_xfs_path = NULL;
765 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
766 const char *fs_loc;
767 bool created_tmp;
768 int offset;
769 unsigned int nr_pages;
770 unsigned int tests;
771
772 ksft_print_header();
773
774 if (geteuid() != 0) {
775 ksft_print_msg("Please run the benchmark as root\n");
776 ksft_finished();
777 }
778
779 if (argc > 1)
780 optional_xfs_path = argv[1];
781
782 pagesize = getpagesize();
783 pageshift = ffs(pagesize) - 1;
784 pmd_pagesize = read_pmd_pagesize();
785 if (!pmd_pagesize)
786 ksft_exit_fail_msg("Reading PMD pagesize failed\n");
787
788 nr_pages = pmd_pagesize / pagesize;
789 pmd_order = sz2ord(pmd_pagesize, pagesize);
790
791 expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1));
792 if (!expected_orders)
793 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
794
795 tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2;
796 ksft_set_plan(tests);
797
798 pagemap_fd = open(pagemap_proc, O_RDONLY);
799 if (pagemap_fd == -1)
800 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
801
802 kpageflags_fd = open(kpageflags_proc, O_RDONLY);
803 if (kpageflags_fd == -1)
804 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
805
806 fd_size = 2 * pmd_pagesize;
807
808 split_pmd_zero_pages();
809
810 for (i = 0; i < pmd_order; i++)
811 if (i != 1)
812 split_pmd_thp_to_order(i);
813
814 split_pte_mapped_thp();
815 for (i = 0; i < pmd_order; i++)
816 split_file_backed_thp(i);
817
818 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
819 &fs_loc);
820 for (i = pmd_order - 1; i >= 0; i--)
821 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1);
822
823 for (i = 0; i < pmd_order; i++)
824 for (offset = 0;
825 offset < nr_pages;
826 offset += MAX(nr_pages / 4, 1 << i))
827 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset);
828 cleanup_thp_fs(fs_loc, created_tmp);
829
830 close(pagemap_fd);
831 close(kpageflags_fd);
832 free(expected_orders);
833
834 ksft_finished();
835
836 return 0;
837 }
838