xref: /linux/tools/testing/selftests/mm/split_huge_page_test.c (revision 8804d970fab45726b3c7cd7f240b31122aa94219)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
4  * address range in a process via <debugfs>/split_huge_pages interface.
5  */
6 
7 #define _GNU_SOURCE
8 #include <assert.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <unistd.h>
13 #include <inttypes.h>
14 #include <string.h>
15 #include <fcntl.h>
16 #include <sys/mman.h>
17 #include <sys/mount.h>
18 #include <sys/param.h>
19 #include <malloc.h>
20 #include <stdbool.h>
21 #include <time.h>
22 #include "vm_util.h"
23 #include "../kselftest.h"
24 
25 uint64_t pagesize;
26 unsigned int pageshift;
27 uint64_t pmd_pagesize;
28 unsigned int pmd_order;
29 int *expected_orders;
30 
31 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
32 #define SMAP_PATH "/proc/self/smaps"
33 #define INPUT_MAX 80
34 
35 #define PID_FMT "%d,0x%lx,0x%lx,%d"
36 #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
37 #define PATH_FMT "%s,0x%lx,0x%lx,%d"
38 
39 const char *pagemap_proc = "/proc/self/pagemap";
40 const char *kpageflags_proc = "/proc/kpageflags";
41 int pagemap_fd;
42 int kpageflags_fd;
43 
is_backed_by_folio(char * vaddr,int order,int pagemap_fd,int kpageflags_fd)44 static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd,
45 		int kpageflags_fd)
46 {
47 	const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD;
48 	const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL;
49 	const unsigned long nr_pages = 1UL << order;
50 	unsigned long pfn_head;
51 	uint64_t pfn_flags;
52 	unsigned long pfn;
53 	unsigned long i;
54 
55 	pfn = pagemap_get_pfn(pagemap_fd, vaddr);
56 
57 	/* non present page */
58 	if (pfn == -1UL)
59 		return false;
60 
61 	if (pageflags_get(pfn, kpageflags_fd, &pfn_flags))
62 		goto fail;
63 
64 	/* check for order-0 pages */
65 	if (!order) {
66 		if (pfn_flags & (folio_head_flags | folio_tail_flags))
67 			return false;
68 		return true;
69 	}
70 
71 	/* non THP folio */
72 	if (!(pfn_flags & KPF_THP))
73 		return false;
74 
75 	pfn_head = pfn & ~(nr_pages - 1);
76 
77 	if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags))
78 		goto fail;
79 
80 	/* head PFN has no compound_head flag set */
81 	if ((pfn_flags & folio_head_flags) != folio_head_flags)
82 		return false;
83 
84 	/* check all tail PFN flags */
85 	for (i = 1; i < nr_pages; i++) {
86 		if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags))
87 			goto fail;
88 		if ((pfn_flags & folio_tail_flags) != folio_tail_flags)
89 			return false;
90 	}
91 
92 	/*
93 	 * check the PFN after this folio, but if its flags cannot be obtained,
94 	 * assume this folio has the expected order
95 	 */
96 	if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags))
97 		return true;
98 
99 	/* If we find another tail page, then the folio is larger. */
100 	return (pfn_flags & folio_tail_flags) != folio_tail_flags;
101 fail:
102 	ksft_exit_fail_msg("Failed to get folio info\n");
103 	return false;
104 }
105 
vaddr_pageflags_get(char * vaddr,int pagemap_fd,int kpageflags_fd,uint64_t * flags)106 static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd,
107 		uint64_t *flags)
108 {
109 	unsigned long pfn;
110 
111 	pfn = pagemap_get_pfn(pagemap_fd, vaddr);
112 
113 	/* non-present PFN */
114 	if (pfn == -1UL)
115 		return 1;
116 
117 	if (pageflags_get(pfn, kpageflags_fd, flags))
118 		return -1;
119 
120 	return 0;
121 }
122 
123 /*
124  * gather_after_split_folio_orders - scan through [vaddr_start, len) and record
125  * folio orders
126  *
127  * @vaddr_start: start vaddr
128  * @len: range length
129  * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
130  * @kpageflags_fd: file descriptor to /proc/kpageflags
131  * @orders: output folio order array
132  * @nr_orders: folio order array size
133  *
134  * gather_after_split_folio_orders() scan through [vaddr_start, len) and check
135  * all folios within the range and record their orders. All order-0 pages will
136  * be recorded. Non-present vaddr is skipped.
137  *
138  * NOTE: the function is used to check folio orders after a split is performed,
139  * so it assumes [vaddr_start, len) fully maps to after-split folios within that
140  * range.
141  *
142  * Return: 0 - no error, -1 - unhandled cases
143  */
gather_after_split_folio_orders(char * vaddr_start,size_t len,int pagemap_fd,int kpageflags_fd,int orders[],int nr_orders)144 static int gather_after_split_folio_orders(char *vaddr_start, size_t len,
145 		int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
146 {
147 	uint64_t page_flags = 0;
148 	int cur_order = -1;
149 	char *vaddr;
150 
151 	if (pagemap_fd == -1 || kpageflags_fd == -1)
152 		return -1;
153 	if (!orders)
154 		return -1;
155 	if (nr_orders <= 0)
156 		return -1;
157 
158 	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
159 		char *next_folio_vaddr;
160 		int status;
161 
162 		status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd,
163 					&page_flags);
164 		if (status < 0)
165 			return -1;
166 
167 		/* skip non present vaddr */
168 		if (status == 1) {
169 			vaddr += psize();
170 			continue;
171 		}
172 
173 		/* all order-0 pages with possible false postive (non folio) */
174 		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
175 			orders[0]++;
176 			vaddr += psize();
177 			continue;
178 		}
179 
180 		/* skip non thp compound pages */
181 		if (!(page_flags & KPF_THP)) {
182 			vaddr += psize();
183 			continue;
184 		}
185 
186 		/* vpn points to part of a THP at this point */
187 		if (page_flags & KPF_COMPOUND_HEAD)
188 			cur_order = 1;
189 		else {
190 			vaddr += psize();
191 			continue;
192 		}
193 
194 		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
195 
196 		if (next_folio_vaddr >= vaddr_start + len)
197 			break;
198 
199 		while ((status = vaddr_pageflags_get(next_folio_vaddr,
200 						     pagemap_fd, kpageflags_fd,
201 						     &page_flags)) >= 0) {
202 			/*
203 			 * non present vaddr, next compound head page, or
204 			 * order-0 page
205 			 */
206 			if (status == 1 ||
207 			    (page_flags & KPF_COMPOUND_HEAD) ||
208 			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
209 				if (cur_order < nr_orders) {
210 					orders[cur_order]++;
211 					cur_order = -1;
212 					vaddr = next_folio_vaddr;
213 				}
214 				break;
215 			}
216 
217 			cur_order++;
218 			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
219 		}
220 
221 		if (status < 0)
222 			return status;
223 	}
224 	if (cur_order > 0 && cur_order < nr_orders)
225 		orders[cur_order]++;
226 	return 0;
227 }
228 
check_after_split_folio_orders(char * vaddr_start,size_t len,int pagemap_fd,int kpageflags_fd,int orders[],int nr_orders)229 static int check_after_split_folio_orders(char *vaddr_start, size_t len,
230 		int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
231 {
232 	int *vaddr_orders;
233 	int status;
234 	int i;
235 
236 	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
237 
238 	if (!vaddr_orders)
239 		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
240 
241 	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
242 	status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd,
243 				     kpageflags_fd, vaddr_orders, nr_orders);
244 	if (status)
245 		ksft_exit_fail_msg("gather folio info failed\n");
246 
247 	for (i = 0; i < nr_orders; i++)
248 		if (vaddr_orders[i] != orders[i]) {
249 			ksft_print_msg("order %d: expected: %d got %d\n", i,
250 				       orders[i], vaddr_orders[i]);
251 			status = -1;
252 		}
253 
254 	free(vaddr_orders);
255 	return status;
256 }
257 
write_file(const char * path,const char * buf,size_t buflen)258 static void write_file(const char *path, const char *buf, size_t buflen)
259 {
260 	int fd;
261 	ssize_t numwritten;
262 
263 	fd = open(path, O_WRONLY);
264 	if (fd == -1)
265 		ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno));
266 
267 	numwritten = write(fd, buf, buflen - 1);
268 	close(fd);
269 	if (numwritten < 1)
270 		ksft_exit_fail_msg("Write failed\n");
271 }
272 
write_debugfs(const char * fmt,...)273 static void write_debugfs(const char *fmt, ...)
274 {
275 	char input[INPUT_MAX];
276 	int ret;
277 	va_list argp;
278 
279 	va_start(argp, fmt);
280 	ret = vsnprintf(input, INPUT_MAX, fmt, argp);
281 	va_end(argp);
282 
283 	if (ret >= INPUT_MAX)
284 		ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__);
285 
286 	write_file(SPLIT_DEBUGFS, input, ret + 1);
287 }
288 
allocate_zero_filled_hugepage(size_t len)289 static char *allocate_zero_filled_hugepage(size_t len)
290 {
291 	char *result;
292 	size_t i;
293 
294 	result = memalign(pmd_pagesize, len);
295 	if (!result) {
296 		printf("Fail to allocate memory\n");
297 		exit(EXIT_FAILURE);
298 	}
299 
300 	madvise(result, len, MADV_HUGEPAGE);
301 
302 	for (i = 0; i < len; i++)
303 		result[i] = (char)0;
304 
305 	return result;
306 }
307 
verify_rss_anon_split_huge_page_all_zeroes(char * one_page,int nr_hpages,size_t len)308 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len)
309 {
310 	unsigned long rss_anon_before, rss_anon_after;
311 	size_t i;
312 
313 	if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize))
314 		ksft_exit_fail_msg("No THP is allocated\n");
315 
316 	rss_anon_before = rss_anon();
317 	if (!rss_anon_before)
318 		ksft_exit_fail_msg("No RssAnon is allocated before split\n");
319 
320 	/* split all THPs */
321 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
322 		      (uint64_t)one_page + len, 0);
323 
324 	for (i = 0; i < len; i++)
325 		if (one_page[i] != (char)0)
326 			ksft_exit_fail_msg("%ld byte corrupted\n", i);
327 
328 	if (!check_huge_anon(one_page, 0, pmd_pagesize))
329 		ksft_exit_fail_msg("Still AnonHugePages not split\n");
330 
331 	rss_anon_after = rss_anon();
332 	if (rss_anon_after >= rss_anon_before)
333 		ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n",
334 		       rss_anon_before, rss_anon_after);
335 }
336 
split_pmd_zero_pages(void)337 static void split_pmd_zero_pages(void)
338 {
339 	char *one_page;
340 	int nr_hpages = 4;
341 	size_t len = nr_hpages * pmd_pagesize;
342 
343 	one_page = allocate_zero_filled_hugepage(len);
344 	verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
345 	ksft_test_result_pass("Split zero filled huge pages successful\n");
346 	free(one_page);
347 }
348 
split_pmd_thp_to_order(int order)349 static void split_pmd_thp_to_order(int order)
350 {
351 	char *one_page;
352 	size_t len = 4 * pmd_pagesize;
353 	size_t i;
354 
355 	one_page = memalign(pmd_pagesize, len);
356 	if (!one_page)
357 		ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
358 
359 	madvise(one_page, len, MADV_HUGEPAGE);
360 
361 	for (i = 0; i < len; i++)
362 		one_page[i] = (char)i;
363 
364 	if (!check_huge_anon(one_page, 4, pmd_pagesize))
365 		ksft_exit_fail_msg("No THP is allocated\n");
366 
367 	/* split all THPs */
368 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
369 		(uint64_t)one_page + len, order);
370 
371 	for (i = 0; i < len; i++)
372 		if (one_page[i] != (char)i)
373 			ksft_exit_fail_msg("%ld byte corrupted\n", i);
374 
375 	memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
376 	expected_orders[order] = 4 << (pmd_order - order);
377 
378 	if (check_after_split_folio_orders(one_page, len, pagemap_fd,
379 					   kpageflags_fd, expected_orders,
380 					   (pmd_order + 1)))
381 		ksft_exit_fail_msg("Unexpected THP split\n");
382 
383 	if (!check_huge_anon(one_page, 0, pmd_pagesize))
384 		ksft_exit_fail_msg("Still AnonHugePages not split\n");
385 
386 	ksft_test_result_pass("Split huge pages to order %d successful\n", order);
387 	free(one_page);
388 }
389 
split_pte_mapped_thp(void)390 static void split_pte_mapped_thp(void)
391 {
392 	const size_t nr_thps = 4;
393 	const size_t thp_area_size = nr_thps * pmd_pagesize;
394 	const size_t page_area_size = nr_thps * pagesize;
395 	char *thp_area, *tmp, *page_area = MAP_FAILED;
396 	size_t i;
397 
398 	thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE,
399 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
400 	if (thp_area == MAP_FAILED) {
401 		ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
402 		return;
403 	}
404 
405 	madvise(thp_area, thp_area_size, MADV_HUGEPAGE);
406 
407 	for (i = 0; i < thp_area_size; i++)
408 		thp_area[i] = (char)i;
409 
410 	if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) {
411 		ksft_test_result_skip("Not all THPs allocated\n");
412 		goto out;
413 	}
414 
415 	/*
416 	 * To challenge spitting code, we will mremap a single page of each
417 	 * THP (page[i] of thp[i]) in the thp_area into page_area. This will
418 	 * replace the PMD mappings in the thp_area by PTE mappings first,
419 	 * but leaving the THP unsplit, to then create a page-sized hole in
420 	 * the thp_area.
421 	 * We will then manually trigger splitting of all THPs through the
422 	 * single mremap'ed pages of each THP in the page_area.
423 	 */
424 	page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE,
425 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
426 	if (page_area == MAP_FAILED) {
427 		ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
428 		goto out;
429 	}
430 
431 	for (i = 0; i < nr_thps; i++) {
432 		tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i,
433 			     pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED,
434 			     page_area + pagesize * i);
435 		if (tmp != MAP_FAILED)
436 			continue;
437 		ksft_test_result_fail("mremap failed: %s\n", strerror(errno));
438 		goto out;
439 	}
440 
441 	/*
442 	 * Verify that our THPs were not split yet. Note that
443 	 * check_huge_anon() cannot be used as it checks for PMD mappings.
444 	 */
445 	for (i = 0; i < nr_thps; i++) {
446 		if (is_backed_by_folio(page_area + i * pagesize, pmd_order,
447 				       pagemap_fd, kpageflags_fd))
448 			continue;
449 		ksft_test_result_fail("THP %zu missing after mremap\n", i);
450 		goto out;
451 	}
452 
453 	/* Split all THPs through the remapped pages. */
454 	write_debugfs(PID_FMT, getpid(), (uint64_t)page_area,
455 		      (uint64_t)page_area + page_area_size, 0);
456 
457 	/* Corruption during mremap or split? */
458 	for (i = 0; i < page_area_size; i++) {
459 		if (page_area[i] == (char)i)
460 			continue;
461 		ksft_test_result_fail("%zu byte corrupted\n", i);
462 		goto out;
463 	}
464 
465 	/* Split failed? */
466 	for (i = 0; i < nr_thps; i++) {
467 		if (is_backed_by_folio(page_area + i * pagesize, 0,
468 				       pagemap_fd, kpageflags_fd))
469 			continue;
470 		ksft_test_result_fail("THP %zu not split\n", i);
471 	}
472 
473 	ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
474 out:
475 	munmap(thp_area, thp_area_size);
476 	if (page_area != MAP_FAILED)
477 		munmap(page_area, page_area_size);
478 }
479 
split_file_backed_thp(int order)480 static void split_file_backed_thp(int order)
481 {
482 	int status;
483 	int fd;
484 	char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
485 	const char *tmpfs_loc = mkdtemp(tmpfs_template);
486 	char testfile[INPUT_MAX];
487 	ssize_t num_written, num_read;
488 	char *file_buf1, *file_buf2;
489 	uint64_t pgoff_start = 0, pgoff_end = 1024;
490 	int i;
491 
492 	ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
493 
494 	file_buf1 = (char *)malloc(pmd_pagesize);
495 	file_buf2 = (char *)malloc(pmd_pagesize);
496 
497 	if (!file_buf1 || !file_buf2) {
498 		ksft_print_msg("cannot allocate file buffers\n");
499 		goto out;
500 	}
501 
502 	for (i = 0; i < pmd_pagesize; i++)
503 		file_buf1[i] = (char)i;
504 	memset(file_buf2, 0, pmd_pagesize);
505 
506 	status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
507 
508 	if (status)
509 		ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
510 
511 	status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
512 	if (status >= INPUT_MAX) {
513 		ksft_print_msg("Fail to create file-backed THP split testing file\n");
514 		goto cleanup;
515 	}
516 
517 	fd = open(testfile, O_CREAT|O_RDWR, 0664);
518 	if (fd == -1) {
519 		ksft_perror("Cannot open testing file");
520 		goto cleanup;
521 	}
522 
523 	/* write pmd size data to the file, so a file-backed THP can be allocated */
524 	num_written = write(fd, file_buf1, pmd_pagesize);
525 
526 	if (num_written == -1 || num_written != pmd_pagesize) {
527 		ksft_perror("Failed to write data to testing file");
528 		goto close_file;
529 	}
530 
531 	/* split the file-backed THP */
532 	write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order);
533 
534 	/* check file content after split */
535 	status = lseek(fd, 0, SEEK_SET);
536 	if (status == -1) {
537 		ksft_perror("Cannot lseek file");
538 		goto close_file;
539 	}
540 
541 	num_read = read(fd, file_buf2, num_written);
542 	if (num_read == -1 || num_read != num_written) {
543 		ksft_perror("Cannot read file content back");
544 		goto close_file;
545 	}
546 
547 	if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) {
548 		ksft_print_msg("File content changed\n");
549 		goto close_file;
550 	}
551 
552 	close(fd);
553 	status = unlink(testfile);
554 	if (status) {
555 		ksft_perror("Cannot remove testing file");
556 		goto cleanup;
557 	}
558 
559 	status = umount(tmpfs_loc);
560 	if (status) {
561 		rmdir(tmpfs_loc);
562 		ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
563 	}
564 
565 	status = rmdir(tmpfs_loc);
566 	if (status)
567 		ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
568 
569 	ksft_print_msg("Please check dmesg for more information\n");
570 	ksft_test_result_pass("File-backed THP split to order %d test done\n", order);
571 	return;
572 
573 close_file:
574 	close(fd);
575 cleanup:
576 	umount(tmpfs_loc);
577 	rmdir(tmpfs_loc);
578 out:
579 	ksft_exit_fail_msg("Error occurred\n");
580 }
581 
prepare_thp_fs(const char * xfs_path,char * thp_fs_template,const char ** thp_fs_loc)582 static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
583 		const char **thp_fs_loc)
584 {
585 	if (xfs_path) {
586 		*thp_fs_loc = xfs_path;
587 		return false;
588 	}
589 
590 	*thp_fs_loc = mkdtemp(thp_fs_template);
591 
592 	if (!*thp_fs_loc)
593 		ksft_exit_fail_msg("cannot create temp folder\n");
594 
595 	return true;
596 }
597 
cleanup_thp_fs(const char * thp_fs_loc,bool created_tmp)598 static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
599 {
600 	int status;
601 
602 	if (!created_tmp)
603 		return;
604 
605 	status = rmdir(thp_fs_loc);
606 	if (status)
607 		ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
608 				   strerror(errno));
609 }
610 
create_pagecache_thp_and_fd(const char * testfile,size_t fd_size,int * fd,char ** addr)611 static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
612 		int *fd, char **addr)
613 {
614 	size_t i;
615 	unsigned char buf[1024];
616 
617 	srand(time(NULL));
618 
619 	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
620 	if (*fd == -1)
621 		ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
622 
623 	assert(fd_size % sizeof(buf) == 0);
624 	for (i = 0; i < sizeof(buf); i++)
625 		buf[i] = (unsigned char)i;
626 	for (i = 0; i < fd_size; i += sizeof(buf))
627 		write(*fd, buf, sizeof(buf));
628 
629 	close(*fd);
630 	sync();
631 	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
632 	if (*fd == -1) {
633 		ksft_perror("open drop_caches");
634 		goto err_out_unlink;
635 	}
636 	if (write(*fd, "3", 1) != 1) {
637 		ksft_perror("write to drop_caches");
638 		goto err_out_unlink;
639 	}
640 	close(*fd);
641 
642 	*fd = open(testfile, O_RDWR);
643 	if (*fd == -1) {
644 		ksft_perror("Failed to open testfile\n");
645 		goto err_out_unlink;
646 	}
647 
648 	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
649 	if (*addr == (char *)-1) {
650 		ksft_perror("cannot mmap");
651 		goto err_out_close;
652 	}
653 	madvise(*addr, fd_size, MADV_HUGEPAGE);
654 
655 	for (size_t i = 0; i < fd_size; i++) {
656 		char *addr2 = *addr + i;
657 
658 		FORCE_READ(*addr2);
659 	}
660 
661 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
662 		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
663 		munmap(*addr, fd_size);
664 		close(*fd);
665 		unlink(testfile);
666 		ksft_test_result_skip("Pagecache folio split skipped\n");
667 		return -2;
668 	}
669 	return 0;
670 err_out_close:
671 	close(*fd);
672 err_out_unlink:
673 	unlink(testfile);
674 	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
675 	return -1;
676 }
677 
split_thp_in_pagecache_to_order_at(size_t fd_size,const char * fs_loc,int order,int offset)678 static void split_thp_in_pagecache_to_order_at(size_t fd_size,
679 		const char *fs_loc, int order, int offset)
680 {
681 	int fd;
682 	char *split_addr;
683 	char *addr;
684 	size_t i;
685 	char testfile[INPUT_MAX];
686 	int err = 0;
687 
688 	err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
689 
690 	if (err < 0)
691 		ksft_exit_fail_msg("cannot generate right test file name\n");
692 
693 	err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
694 	if (err)
695 		return;
696 
697 	err = 0;
698 
699 	memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
700 	/*
701 	 * use [split_addr, split_addr + pagesize) range to split THPs, since
702 	 * the debugfs function always split a range with pagesize step and
703 	 * providing a full [addr, addr + fd_size) range can trigger multiple
704 	 * splits, complicating after-split result checking.
705 	 */
706 	if (offset == -1) {
707 		for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
708 			write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr,
709 				      (uint64_t)split_addr + pagesize, order);
710 
711 		expected_orders[order] = fd_size / (pagesize << order);
712 	} else {
713 		int times = fd_size / pmd_pagesize;
714 
715 		for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
716 			write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr,
717 				      (uint64_t)split_addr + pagesize, order, offset);
718 
719 		for (i = order + 1; i < pmd_order; i++)
720 			expected_orders[i] = times;
721 		expected_orders[order] = 2 * times;
722 	}
723 
724 	for (i = 0; i < fd_size; i++)
725 		if (*(addr + i) != (char)i) {
726 			ksft_print_msg("%lu byte corrupted in the file\n", i);
727 			err = EXIT_FAILURE;
728 			goto out;
729 		}
730 
731 	if (check_after_split_folio_orders(addr, fd_size, pagemap_fd,
732 					   kpageflags_fd, expected_orders,
733 					   (pmd_order + 1))) {
734 		ksft_print_msg("Unexpected THP split\n");
735 		err = 1;
736 		goto out;
737 	}
738 
739 	if (!check_huge_file(addr, 0, pmd_pagesize)) {
740 		ksft_print_msg("Still FilePmdMapped not split\n");
741 		err = EXIT_FAILURE;
742 		goto out;
743 	}
744 
745 out:
746 	munmap(addr, fd_size);
747 	close(fd);
748 	unlink(testfile);
749 	if (offset == -1) {
750 		if (err)
751 			ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
752 		ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
753 	} else {
754 		if (err)
755 			ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset);
756 		ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset);
757 	}
758 }
759 
main(int argc,char ** argv)760 int main(int argc, char **argv)
761 {
762 	int i;
763 	size_t fd_size;
764 	char *optional_xfs_path = NULL;
765 	char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
766 	const char *fs_loc;
767 	bool created_tmp;
768 	int offset;
769 	unsigned int nr_pages;
770 	unsigned int tests;
771 
772 	ksft_print_header();
773 
774 	if (geteuid() != 0) {
775 		ksft_print_msg("Please run the benchmark as root\n");
776 		ksft_finished();
777 	}
778 
779 	if (argc > 1)
780 		optional_xfs_path = argv[1];
781 
782 	pagesize = getpagesize();
783 	pageshift = ffs(pagesize) - 1;
784 	pmd_pagesize = read_pmd_pagesize();
785 	if (!pmd_pagesize)
786 		ksft_exit_fail_msg("Reading PMD pagesize failed\n");
787 
788 	nr_pages = pmd_pagesize / pagesize;
789 	pmd_order = sz2ord(pmd_pagesize, pagesize);
790 
791 	expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1));
792 	if (!expected_orders)
793 		ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
794 
795 	tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2;
796 	ksft_set_plan(tests);
797 
798 	pagemap_fd = open(pagemap_proc, O_RDONLY);
799 	if (pagemap_fd == -1)
800 		ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
801 
802 	kpageflags_fd = open(kpageflags_proc, O_RDONLY);
803 	if (kpageflags_fd == -1)
804 		ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
805 
806 	fd_size = 2 * pmd_pagesize;
807 
808 	split_pmd_zero_pages();
809 
810 	for (i = 0; i < pmd_order; i++)
811 		if (i != 1)
812 			split_pmd_thp_to_order(i);
813 
814 	split_pte_mapped_thp();
815 	for (i = 0; i < pmd_order; i++)
816 		split_file_backed_thp(i);
817 
818 	created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
819 			&fs_loc);
820 	for (i = pmd_order - 1; i >= 0; i--)
821 		split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1);
822 
823 	for (i = 0; i < pmd_order; i++)
824 		for (offset = 0;
825 		     offset < nr_pages;
826 		     offset += MAX(nr_pages / 4, 1 << i))
827 			split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset);
828 	cleanup_thp_fs(fs_loc, created_tmp);
829 
830 	close(pagemap_fd);
831 	close(kpageflags_fd);
832 	free(expected_orders);
833 
834 	ksft_finished();
835 
836 	return 0;
837 }
838