1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <assert.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <stdarg.h> 12 #include <unistd.h> 13 #include <inttypes.h> 14 #include <string.h> 15 #include <fcntl.h> 16 #include <sys/mman.h> 17 #include <sys/mount.h> 18 #include <sys/param.h> 19 #include <malloc.h> 20 #include <stdbool.h> 21 #include <time.h> 22 #include "vm_util.h" 23 #include "../kselftest.h" 24 25 uint64_t pagesize; 26 unsigned int pageshift; 27 uint64_t pmd_pagesize; 28 29 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 30 #define SMAP_PATH "/proc/self/smaps" 31 #define INPUT_MAX 80 32 33 #define PID_FMT "%d,0x%lx,0x%lx,%d" 34 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 35 36 #define PFN_MASK ((1UL<<55)-1) 37 #define KPF_THP (1UL<<22) 38 39 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 40 { 41 uint64_t paddr; 42 uint64_t page_flags; 43 44 if (pagemap_file) { 45 pread(pagemap_file, &paddr, sizeof(paddr), 46 ((long)vaddr >> pageshift) * sizeof(paddr)); 47 48 if (kpageflags_file) { 49 pread(kpageflags_file, &page_flags, sizeof(page_flags), 50 (paddr & PFN_MASK) * sizeof(page_flags)); 51 52 return !!(page_flags & KPF_THP); 53 } 54 } 55 return 0; 56 } 57 58 static void write_file(const char *path, const char *buf, size_t buflen) 59 { 60 int fd; 61 ssize_t numwritten; 62 63 fd = open(path, O_WRONLY); 64 if (fd == -1) 65 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 66 67 numwritten = write(fd, buf, buflen - 1); 68 close(fd); 69 if (numwritten < 1) 70 ksft_exit_fail_msg("Write failed\n"); 71 } 72 73 static void write_debugfs(const char *fmt, ...) 74 { 75 char input[INPUT_MAX]; 76 int ret; 77 va_list argp; 78 79 va_start(argp, fmt); 80 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 81 va_end(argp); 82 83 if (ret >= INPUT_MAX) 84 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 85 86 write_file(SPLIT_DEBUGFS, input, ret + 1); 87 } 88 89 static char *allocate_zero_filled_hugepage(size_t len) 90 { 91 char *result; 92 size_t i; 93 94 result = memalign(pmd_pagesize, len); 95 if (!result) { 96 printf("Fail to allocate memory\n"); 97 exit(EXIT_FAILURE); 98 } 99 100 madvise(result, len, MADV_HUGEPAGE); 101 102 for (i = 0; i < len; i++) 103 result[i] = (char)0; 104 105 return result; 106 } 107 108 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) 109 { 110 unsigned long rss_anon_before, rss_anon_after; 111 size_t i; 112 113 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 114 ksft_exit_fail_msg("No THP is allocated\n"); 115 116 rss_anon_before = rss_anon(); 117 if (!rss_anon_before) 118 ksft_exit_fail_msg("No RssAnon is allocated before split\n"); 119 120 /* split all THPs */ 121 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 122 (uint64_t)one_page + len, 0); 123 124 for (i = 0; i < len; i++) 125 if (one_page[i] != (char)0) 126 ksft_exit_fail_msg("%ld byte corrupted\n", i); 127 128 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 129 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 130 131 rss_anon_after = rss_anon(); 132 if (rss_anon_after >= rss_anon_before) 133 ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", 134 rss_anon_before, rss_anon_after); 135 } 136 137 void split_pmd_zero_pages(void) 138 { 139 char *one_page; 140 int nr_hpages = 4; 141 size_t len = nr_hpages * pmd_pagesize; 142 143 one_page = allocate_zero_filled_hugepage(len); 144 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); 145 ksft_test_result_pass("Split zero filled huge pages successful\n"); 146 free(one_page); 147 } 148 149 void split_pmd_thp_to_order(int order) 150 { 151 char *one_page; 152 size_t len = 4 * pmd_pagesize; 153 size_t i; 154 155 one_page = memalign(pmd_pagesize, len); 156 if (!one_page) 157 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 158 159 madvise(one_page, len, MADV_HUGEPAGE); 160 161 for (i = 0; i < len; i++) 162 one_page[i] = (char)i; 163 164 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 165 ksft_exit_fail_msg("No THP is allocated\n"); 166 167 /* split all THPs */ 168 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 169 (uint64_t)one_page + len, order); 170 171 for (i = 0; i < len; i++) 172 if (one_page[i] != (char)i) 173 ksft_exit_fail_msg("%ld byte corrupted\n", i); 174 175 176 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 177 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 178 179 ksft_test_result_pass("Split huge pages to order %d successful\n", order); 180 free(one_page); 181 } 182 183 void split_pte_mapped_thp(void) 184 { 185 char *one_page, *pte_mapped, *pte_mapped2; 186 size_t len = 4 * pmd_pagesize; 187 uint64_t thp_size; 188 size_t i; 189 const char *pagemap_template = "/proc/%d/pagemap"; 190 const char *kpageflags_proc = "/proc/kpageflags"; 191 char pagemap_proc[255]; 192 int pagemap_fd; 193 int kpageflags_fd; 194 195 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 196 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 197 198 pagemap_fd = open(pagemap_proc, O_RDONLY); 199 if (pagemap_fd == -1) 200 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 201 202 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 203 if (kpageflags_fd == -1) 204 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 205 206 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 207 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 208 if (one_page == MAP_FAILED) 209 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 210 211 madvise(one_page, len, MADV_HUGEPAGE); 212 213 for (i = 0; i < len; i++) 214 one_page[i] = (char)i; 215 216 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 217 ksft_exit_fail_msg("No THP is allocated\n"); 218 219 /* remap the first pagesize of first THP */ 220 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 221 222 /* remap the Nth pagesize of Nth THP */ 223 for (i = 1; i < 4; i++) { 224 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 225 pagesize, pagesize, 226 MREMAP_MAYMOVE|MREMAP_FIXED, 227 pte_mapped + pagesize * i); 228 if (pte_mapped2 == MAP_FAILED) 229 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 230 } 231 232 /* smap does not show THPs after mremap, use kpageflags instead */ 233 thp_size = 0; 234 for (i = 0; i < pagesize * 4; i++) 235 if (i % pagesize == 0 && 236 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 237 thp_size++; 238 239 if (thp_size != 4) 240 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 241 242 /* split all remapped THPs */ 243 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 244 (uint64_t)pte_mapped + pagesize * 4, 0); 245 246 /* smap does not show THPs after mremap, use kpageflags instead */ 247 thp_size = 0; 248 for (i = 0; i < pagesize * 4; i++) { 249 if (pte_mapped[i] != (char)i) 250 ksft_exit_fail_msg("%ld byte corrupted\n", i); 251 252 if (i % pagesize == 0 && 253 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 254 thp_size++; 255 } 256 257 if (thp_size) 258 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 259 260 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 261 munmap(one_page, len); 262 close(pagemap_fd); 263 close(kpageflags_fd); 264 } 265 266 void split_file_backed_thp(int order) 267 { 268 int status; 269 int fd; 270 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 271 const char *tmpfs_loc = mkdtemp(tmpfs_template); 272 char testfile[INPUT_MAX]; 273 ssize_t num_written, num_read; 274 char *file_buf1, *file_buf2; 275 uint64_t pgoff_start = 0, pgoff_end = 1024; 276 int i; 277 278 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 279 280 file_buf1 = (char *)malloc(pmd_pagesize); 281 file_buf2 = (char *)malloc(pmd_pagesize); 282 283 if (!file_buf1 || !file_buf2) { 284 ksft_print_msg("cannot allocate file buffers\n"); 285 goto out; 286 } 287 288 for (i = 0; i < pmd_pagesize; i++) 289 file_buf1[i] = (char)i; 290 memset(file_buf2, 0, pmd_pagesize); 291 292 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 293 294 if (status) 295 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 296 297 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 298 if (status >= INPUT_MAX) { 299 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 300 goto cleanup; 301 } 302 303 fd = open(testfile, O_CREAT|O_RDWR, 0664); 304 if (fd == -1) { 305 ksft_perror("Cannot open testing file"); 306 goto cleanup; 307 } 308 309 /* write pmd size data to the file, so a file-backed THP can be allocated */ 310 num_written = write(fd, file_buf1, pmd_pagesize); 311 312 if (num_written == -1 || num_written != pmd_pagesize) { 313 ksft_perror("Failed to write data to testing file"); 314 goto close_file; 315 } 316 317 /* split the file-backed THP */ 318 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); 319 320 /* check file content after split */ 321 status = lseek(fd, 0, SEEK_SET); 322 if (status == -1) { 323 ksft_perror("Cannot lseek file"); 324 goto close_file; 325 } 326 327 num_read = read(fd, file_buf2, num_written); 328 if (num_read == -1 || num_read != num_written) { 329 ksft_perror("Cannot read file content back"); 330 goto close_file; 331 } 332 333 if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { 334 ksft_print_msg("File content changed\n"); 335 goto close_file; 336 } 337 338 close(fd); 339 status = unlink(testfile); 340 if (status) { 341 ksft_perror("Cannot remove testing file"); 342 goto cleanup; 343 } 344 345 status = umount(tmpfs_loc); 346 if (status) { 347 rmdir(tmpfs_loc); 348 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 349 } 350 351 status = rmdir(tmpfs_loc); 352 if (status) 353 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 354 355 ksft_print_msg("Please check dmesg for more information\n"); 356 ksft_test_result_pass("File-backed THP split to order %d test done\n", order); 357 return; 358 359 close_file: 360 close(fd); 361 cleanup: 362 umount(tmpfs_loc); 363 rmdir(tmpfs_loc); 364 out: 365 ksft_exit_fail_msg("Error occurred\n"); 366 } 367 368 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 369 const char **thp_fs_loc) 370 { 371 if (xfs_path) { 372 *thp_fs_loc = xfs_path; 373 return false; 374 } 375 376 *thp_fs_loc = mkdtemp(thp_fs_template); 377 378 if (!*thp_fs_loc) 379 ksft_exit_fail_msg("cannot create temp folder\n"); 380 381 return true; 382 } 383 384 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 385 { 386 int status; 387 388 if (!created_tmp) 389 return; 390 391 status = rmdir(thp_fs_loc); 392 if (status) 393 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 394 strerror(errno)); 395 } 396 397 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 398 char **addr) 399 { 400 size_t i; 401 int dummy = 0; 402 unsigned char buf[1024]; 403 404 srand(time(NULL)); 405 406 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 407 if (*fd == -1) 408 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 409 410 assert(fd_size % sizeof(buf) == 0); 411 for (i = 0; i < sizeof(buf); i++) 412 buf[i] = (unsigned char)i; 413 for (i = 0; i < fd_size; i += sizeof(buf)) 414 write(*fd, buf, sizeof(buf)); 415 416 close(*fd); 417 sync(); 418 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 419 if (*fd == -1) { 420 ksft_perror("open drop_caches"); 421 goto err_out_unlink; 422 } 423 if (write(*fd, "3", 1) != 1) { 424 ksft_perror("write to drop_caches"); 425 goto err_out_unlink; 426 } 427 close(*fd); 428 429 *fd = open(testfile, O_RDWR); 430 if (*fd == -1) { 431 ksft_perror("Failed to open testfile\n"); 432 goto err_out_unlink; 433 } 434 435 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 436 if (*addr == (char *)-1) { 437 ksft_perror("cannot mmap"); 438 goto err_out_close; 439 } 440 madvise(*addr, fd_size, MADV_HUGEPAGE); 441 442 for (size_t i = 0; i < fd_size; i++) 443 dummy += *(*addr + i); 444 asm volatile("" : "+r" (dummy)); 445 446 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 447 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 448 munmap(*addr, fd_size); 449 close(*fd); 450 unlink(testfile); 451 ksft_test_result_skip("Pagecache folio split skipped\n"); 452 return -2; 453 } 454 return 0; 455 err_out_close: 456 close(*fd); 457 err_out_unlink: 458 unlink(testfile); 459 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 460 return -1; 461 } 462 463 void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, 464 int order, int offset) 465 { 466 int fd; 467 char *addr; 468 size_t i; 469 char testfile[INPUT_MAX]; 470 int err = 0; 471 472 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 473 474 if (err < 0) 475 ksft_exit_fail_msg("cannot generate right test file name\n"); 476 477 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 478 if (err) 479 return; 480 err = 0; 481 482 if (offset == -1) 483 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, 484 (uint64_t)addr + fd_size, order); 485 else 486 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, 487 (uint64_t)addr + fd_size, order, offset); 488 489 for (i = 0; i < fd_size; i++) 490 if (*(addr + i) != (char)i) { 491 ksft_print_msg("%lu byte corrupted in the file\n", i); 492 err = EXIT_FAILURE; 493 goto out; 494 } 495 496 if (!check_huge_file(addr, 0, pmd_pagesize)) { 497 ksft_print_msg("Still FilePmdMapped not split\n"); 498 err = EXIT_FAILURE; 499 goto out; 500 } 501 502 out: 503 munmap(addr, fd_size); 504 close(fd); 505 unlink(testfile); 506 if (offset == -1) { 507 if (err) 508 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 509 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 510 } else { 511 if (err) 512 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); 513 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); 514 } 515 } 516 517 int main(int argc, char **argv) 518 { 519 int i; 520 size_t fd_size; 521 char *optional_xfs_path = NULL; 522 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 523 const char *fs_loc; 524 bool created_tmp; 525 int offset; 526 527 ksft_print_header(); 528 529 if (geteuid() != 0) { 530 ksft_print_msg("Please run the benchmark as root\n"); 531 ksft_finished(); 532 } 533 534 if (argc > 1) 535 optional_xfs_path = argv[1]; 536 537 ksft_set_plan(1+8+1+9+9+8*4+2); 538 539 pagesize = getpagesize(); 540 pageshift = ffs(pagesize) - 1; 541 pmd_pagesize = read_pmd_pagesize(); 542 if (!pmd_pagesize) 543 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 544 545 fd_size = 2 * pmd_pagesize; 546 547 split_pmd_zero_pages(); 548 549 for (i = 0; i < 9; i++) 550 if (i != 1) 551 split_pmd_thp_to_order(i); 552 553 split_pte_mapped_thp(); 554 for (i = 0; i < 9; i++) 555 split_file_backed_thp(i); 556 557 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 558 &fs_loc); 559 for (i = 8; i >= 0; i--) 560 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); 561 562 for (i = 0; i < 9; i++) 563 for (offset = 0; 564 offset < pmd_pagesize / pagesize; 565 offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) 566 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); 567 cleanup_thp_fs(fs_loc, created_tmp); 568 569 ksft_finished(); 570 571 return 0; 572 } 573