1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <inttypes.h> 13 #include <string.h> 14 #include <fcntl.h> 15 #include <sys/mman.h> 16 #include <sys/mount.h> 17 #include <malloc.h> 18 #include <stdbool.h> 19 #include <time.h> 20 #include "vm_util.h" 21 #include "../kselftest.h" 22 23 uint64_t pagesize; 24 unsigned int pageshift; 25 uint64_t pmd_pagesize; 26 27 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 28 #define SMAP_PATH "/proc/self/smaps" 29 #define INPUT_MAX 80 30 31 #define PID_FMT "%d,0x%lx,0x%lx,%d" 32 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 33 34 #define PFN_MASK ((1UL<<55)-1) 35 #define KPF_THP (1UL<<22) 36 37 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 38 { 39 uint64_t paddr; 40 uint64_t page_flags; 41 42 if (pagemap_file) { 43 pread(pagemap_file, &paddr, sizeof(paddr), 44 ((long)vaddr >> pageshift) * sizeof(paddr)); 45 46 if (kpageflags_file) { 47 pread(kpageflags_file, &page_flags, sizeof(page_flags), 48 (paddr & PFN_MASK) * sizeof(page_flags)); 49 50 return !!(page_flags & KPF_THP); 51 } 52 } 53 return 0; 54 } 55 56 static void write_file(const char *path, const char *buf, size_t buflen) 57 { 58 int fd; 59 ssize_t numwritten; 60 61 fd = open(path, O_WRONLY); 62 if (fd == -1) 63 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 64 65 numwritten = write(fd, buf, buflen - 1); 66 close(fd); 67 if (numwritten < 1) 68 ksft_exit_fail_msg("Write failed\n"); 69 } 70 71 static void write_debugfs(const char *fmt, ...) 72 { 73 char input[INPUT_MAX]; 74 int ret; 75 va_list argp; 76 77 va_start(argp, fmt); 78 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 79 va_end(argp); 80 81 if (ret >= INPUT_MAX) 82 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 83 84 write_file(SPLIT_DEBUGFS, input, ret + 1); 85 } 86 87 static char *allocate_zero_filled_hugepage(size_t len) 88 { 89 char *result; 90 size_t i; 91 92 result = memalign(pmd_pagesize, len); 93 if (!result) { 94 printf("Fail to allocate memory\n"); 95 exit(EXIT_FAILURE); 96 } 97 98 madvise(result, len, MADV_HUGEPAGE); 99 100 for (i = 0; i < len; i++) 101 result[i] = (char)0; 102 103 return result; 104 } 105 106 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) 107 { 108 unsigned long rss_anon_before, rss_anon_after; 109 size_t i; 110 111 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 112 ksft_exit_fail_msg("No THP is allocated\n"); 113 114 rss_anon_before = rss_anon(); 115 if (!rss_anon_before) 116 ksft_exit_fail_msg("No RssAnon is allocated before split\n"); 117 118 /* split all THPs */ 119 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 120 (uint64_t)one_page + len, 0); 121 122 for (i = 0; i < len; i++) 123 if (one_page[i] != (char)0) 124 ksft_exit_fail_msg("%ld byte corrupted\n", i); 125 126 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 127 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 128 129 rss_anon_after = rss_anon(); 130 if (rss_anon_after >= rss_anon_before) 131 ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", 132 rss_anon_before, rss_anon_after); 133 } 134 135 void split_pmd_zero_pages(void) 136 { 137 char *one_page; 138 int nr_hpages = 4; 139 size_t len = nr_hpages * pmd_pagesize; 140 141 one_page = allocate_zero_filled_hugepage(len); 142 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); 143 ksft_test_result_pass("Split zero filled huge pages successful\n"); 144 free(one_page); 145 } 146 147 void split_pmd_thp_to_order(int order) 148 { 149 char *one_page; 150 size_t len = 4 * pmd_pagesize; 151 size_t i; 152 153 one_page = memalign(pmd_pagesize, len); 154 if (!one_page) 155 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 156 157 madvise(one_page, len, MADV_HUGEPAGE); 158 159 for (i = 0; i < len; i++) 160 one_page[i] = (char)i; 161 162 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 163 ksft_exit_fail_msg("No THP is allocated\n"); 164 165 /* split all THPs */ 166 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 167 (uint64_t)one_page + len, order); 168 169 for (i = 0; i < len; i++) 170 if (one_page[i] != (char)i) 171 ksft_exit_fail_msg("%ld byte corrupted\n", i); 172 173 174 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 175 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 176 177 ksft_test_result_pass("Split huge pages to order %d successful\n", order); 178 free(one_page); 179 } 180 181 void split_pte_mapped_thp(void) 182 { 183 char *one_page, *pte_mapped, *pte_mapped2; 184 size_t len = 4 * pmd_pagesize; 185 uint64_t thp_size; 186 size_t i; 187 const char *pagemap_template = "/proc/%d/pagemap"; 188 const char *kpageflags_proc = "/proc/kpageflags"; 189 char pagemap_proc[255]; 190 int pagemap_fd; 191 int kpageflags_fd; 192 193 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 194 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 195 196 pagemap_fd = open(pagemap_proc, O_RDONLY); 197 if (pagemap_fd == -1) 198 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 199 200 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 201 if (kpageflags_fd == -1) 202 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 203 204 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 205 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 206 if (one_page == MAP_FAILED) 207 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 208 209 madvise(one_page, len, MADV_HUGEPAGE); 210 211 for (i = 0; i < len; i++) 212 one_page[i] = (char)i; 213 214 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 215 ksft_exit_fail_msg("No THP is allocated\n"); 216 217 /* remap the first pagesize of first THP */ 218 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 219 220 /* remap the Nth pagesize of Nth THP */ 221 for (i = 1; i < 4; i++) { 222 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 223 pagesize, pagesize, 224 MREMAP_MAYMOVE|MREMAP_FIXED, 225 pte_mapped + pagesize * i); 226 if (pte_mapped2 == MAP_FAILED) 227 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 228 } 229 230 /* smap does not show THPs after mremap, use kpageflags instead */ 231 thp_size = 0; 232 for (i = 0; i < pagesize * 4; i++) 233 if (i % pagesize == 0 && 234 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 235 thp_size++; 236 237 if (thp_size != 4) 238 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 239 240 /* split all remapped THPs */ 241 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 242 (uint64_t)pte_mapped + pagesize * 4, 0); 243 244 /* smap does not show THPs after mremap, use kpageflags instead */ 245 thp_size = 0; 246 for (i = 0; i < pagesize * 4; i++) { 247 if (pte_mapped[i] != (char)i) 248 ksft_exit_fail_msg("%ld byte corrupted\n", i); 249 250 if (i % pagesize == 0 && 251 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 252 thp_size++; 253 } 254 255 if (thp_size) 256 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 257 258 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 259 munmap(one_page, len); 260 close(pagemap_fd); 261 close(kpageflags_fd); 262 } 263 264 void split_file_backed_thp(int order) 265 { 266 int status; 267 int fd; 268 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 269 const char *tmpfs_loc = mkdtemp(tmpfs_template); 270 char testfile[INPUT_MAX]; 271 ssize_t num_written, num_read; 272 char *file_buf1, *file_buf2; 273 uint64_t pgoff_start = 0, pgoff_end = 1024; 274 int i; 275 276 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 277 278 file_buf1 = (char *)malloc(pmd_pagesize); 279 file_buf2 = (char *)malloc(pmd_pagesize); 280 281 if (!file_buf1 || !file_buf2) { 282 ksft_print_msg("cannot allocate file buffers\n"); 283 goto out; 284 } 285 286 for (i = 0; i < pmd_pagesize; i++) 287 file_buf1[i] = (char)i; 288 memset(file_buf2, 0, pmd_pagesize); 289 290 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 291 292 if (status) 293 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 294 295 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 296 if (status >= INPUT_MAX) { 297 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 298 goto cleanup; 299 } 300 301 fd = open(testfile, O_CREAT|O_RDWR, 0664); 302 if (fd == -1) { 303 ksft_perror("Cannot open testing file"); 304 goto cleanup; 305 } 306 307 /* write pmd size data to the file, so a file-backed THP can be allocated */ 308 num_written = write(fd, file_buf1, pmd_pagesize); 309 310 if (num_written == -1 || num_written != pmd_pagesize) { 311 ksft_perror("Failed to write data to testing file"); 312 goto close_file; 313 } 314 315 /* split the file-backed THP */ 316 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); 317 318 /* check file content after split */ 319 status = lseek(fd, 0, SEEK_SET); 320 if (status == -1) { 321 ksft_perror("Cannot lseek file"); 322 goto close_file; 323 } 324 325 num_read = read(fd, file_buf2, num_written); 326 if (num_read == -1 || num_read != num_written) { 327 ksft_perror("Cannot read file content back"); 328 goto close_file; 329 } 330 331 if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { 332 ksft_print_msg("File content changed\n"); 333 goto close_file; 334 } 335 336 close(fd); 337 status = unlink(testfile); 338 if (status) { 339 ksft_perror("Cannot remove testing file"); 340 goto cleanup; 341 } 342 343 status = umount(tmpfs_loc); 344 if (status) { 345 rmdir(tmpfs_loc); 346 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 347 } 348 349 status = rmdir(tmpfs_loc); 350 if (status) 351 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 352 353 ksft_print_msg("Please check dmesg for more information\n"); 354 ksft_test_result_pass("File-backed THP split to order %d test done\n", order); 355 return; 356 357 close_file: 358 close(fd); 359 cleanup: 360 umount(tmpfs_loc); 361 rmdir(tmpfs_loc); 362 out: 363 ksft_exit_fail_msg("Error occurred\n"); 364 } 365 366 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 367 const char **thp_fs_loc) 368 { 369 if (xfs_path) { 370 *thp_fs_loc = xfs_path; 371 return false; 372 } 373 374 *thp_fs_loc = mkdtemp(thp_fs_template); 375 376 if (!*thp_fs_loc) 377 ksft_exit_fail_msg("cannot create temp folder\n"); 378 379 return true; 380 } 381 382 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 383 { 384 int status; 385 386 if (!created_tmp) 387 return; 388 389 status = rmdir(thp_fs_loc); 390 if (status) 391 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 392 strerror(errno)); 393 } 394 395 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 396 char **addr) 397 { 398 size_t i; 399 int dummy = 0; 400 401 srand(time(NULL)); 402 403 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 404 if (*fd == -1) 405 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 406 407 for (i = 0; i < fd_size; i++) { 408 unsigned char byte = (unsigned char)i; 409 410 write(*fd, &byte, sizeof(byte)); 411 } 412 close(*fd); 413 sync(); 414 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 415 if (*fd == -1) { 416 ksft_perror("open drop_caches"); 417 goto err_out_unlink; 418 } 419 if (write(*fd, "3", 1) != 1) { 420 ksft_perror("write to drop_caches"); 421 goto err_out_unlink; 422 } 423 close(*fd); 424 425 *fd = open(testfile, O_RDWR); 426 if (*fd == -1) { 427 ksft_perror("Failed to open testfile\n"); 428 goto err_out_unlink; 429 } 430 431 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 432 if (*addr == (char *)-1) { 433 ksft_perror("cannot mmap"); 434 goto err_out_close; 435 } 436 madvise(*addr, fd_size, MADV_HUGEPAGE); 437 438 for (size_t i = 0; i < fd_size; i++) 439 dummy += *(*addr + i); 440 asm volatile("" : "+r" (dummy)); 441 442 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 443 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 444 munmap(*addr, fd_size); 445 close(*fd); 446 unlink(testfile); 447 ksft_test_result_skip("Pagecache folio split skipped\n"); 448 return -2; 449 } 450 return 0; 451 err_out_close: 452 close(*fd); 453 err_out_unlink: 454 unlink(testfile); 455 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 456 return -1; 457 } 458 459 void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) 460 { 461 int fd; 462 char *addr; 463 size_t i; 464 char testfile[INPUT_MAX]; 465 int err = 0; 466 467 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 468 469 if (err < 0) 470 ksft_exit_fail_msg("cannot generate right test file name\n"); 471 472 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 473 if (err) 474 return; 475 err = 0; 476 477 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); 478 479 for (i = 0; i < fd_size; i++) 480 if (*(addr + i) != (char)i) { 481 ksft_print_msg("%lu byte corrupted in the file\n", i); 482 err = EXIT_FAILURE; 483 goto out; 484 } 485 486 if (!check_huge_file(addr, 0, pmd_pagesize)) { 487 ksft_print_msg("Still FilePmdMapped not split\n"); 488 err = EXIT_FAILURE; 489 goto out; 490 } 491 492 out: 493 munmap(addr, fd_size); 494 close(fd); 495 unlink(testfile); 496 if (err) 497 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 498 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 499 } 500 501 int main(int argc, char **argv) 502 { 503 int i; 504 size_t fd_size; 505 char *optional_xfs_path = NULL; 506 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 507 const char *fs_loc; 508 bool created_tmp; 509 510 ksft_print_header(); 511 512 if (geteuid() != 0) { 513 ksft_print_msg("Please run the benchmark as root\n"); 514 ksft_finished(); 515 } 516 517 if (argc > 1) 518 optional_xfs_path = argv[1]; 519 520 ksft_set_plan(1+8+1+9+9); 521 522 pagesize = getpagesize(); 523 pageshift = ffs(pagesize) - 1; 524 pmd_pagesize = read_pmd_pagesize(); 525 if (!pmd_pagesize) 526 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 527 528 fd_size = 2 * pmd_pagesize; 529 530 split_pmd_zero_pages(); 531 532 for (i = 0; i < 9; i++) 533 if (i != 1) 534 split_pmd_thp_to_order(i); 535 536 split_pte_mapped_thp(); 537 for (i = 0; i < 9; i++) 538 split_file_backed_thp(i); 539 540 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 541 &fs_loc); 542 for (i = 8; i >= 0; i--) 543 split_thp_in_pagecache_to_order(fd_size, i, fs_loc); 544 cleanup_thp_fs(fs_loc, created_tmp); 545 546 ksft_finished(); 547 548 return 0; 549 } 550