1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <assert.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <stdarg.h> 12 #include <unistd.h> 13 #include <inttypes.h> 14 #include <string.h> 15 #include <fcntl.h> 16 #include <sys/mman.h> 17 #include <sys/mount.h> 18 #include <sys/param.h> 19 #include <malloc.h> 20 #include <stdbool.h> 21 #include <time.h> 22 #include "vm_util.h" 23 #include "../kselftest.h" 24 25 uint64_t pagesize; 26 unsigned int pageshift; 27 uint64_t pmd_pagesize; 28 29 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 30 #define SMAP_PATH "/proc/self/smaps" 31 #define INPUT_MAX 80 32 33 #define PID_FMT "%d,0x%lx,0x%lx,%d" 34 #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" 35 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 36 37 #define PFN_MASK ((1UL<<55)-1) 38 #define KPF_THP (1UL<<22) 39 40 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 41 { 42 uint64_t paddr; 43 uint64_t page_flags; 44 45 if (pagemap_file) { 46 pread(pagemap_file, &paddr, sizeof(paddr), 47 ((long)vaddr >> pageshift) * sizeof(paddr)); 48 49 if (kpageflags_file) { 50 pread(kpageflags_file, &page_flags, sizeof(page_flags), 51 (paddr & PFN_MASK) * sizeof(page_flags)); 52 53 return !!(page_flags & KPF_THP); 54 } 55 } 56 return 0; 57 } 58 59 static void write_file(const char *path, const char *buf, size_t buflen) 60 { 61 int fd; 62 ssize_t numwritten; 63 64 fd = open(path, O_WRONLY); 65 if (fd == -1) 66 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 67 68 numwritten = write(fd, buf, buflen - 1); 69 close(fd); 70 if (numwritten < 1) 71 ksft_exit_fail_msg("Write failed\n"); 72 } 73 74 static void write_debugfs(const char *fmt, ...) 75 { 76 char input[INPUT_MAX]; 77 int ret; 78 va_list argp; 79 80 va_start(argp, fmt); 81 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 82 va_end(argp); 83 84 if (ret >= INPUT_MAX) 85 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 86 87 write_file(SPLIT_DEBUGFS, input, ret + 1); 88 } 89 90 static char *allocate_zero_filled_hugepage(size_t len) 91 { 92 char *result; 93 size_t i; 94 95 result = memalign(pmd_pagesize, len); 96 if (!result) { 97 printf("Fail to allocate memory\n"); 98 exit(EXIT_FAILURE); 99 } 100 101 madvise(result, len, MADV_HUGEPAGE); 102 103 for (i = 0; i < len; i++) 104 result[i] = (char)0; 105 106 return result; 107 } 108 109 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) 110 { 111 unsigned long rss_anon_before, rss_anon_after; 112 size_t i; 113 114 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 115 ksft_exit_fail_msg("No THP is allocated\n"); 116 117 rss_anon_before = rss_anon(); 118 if (!rss_anon_before) 119 ksft_exit_fail_msg("No RssAnon is allocated before split\n"); 120 121 /* split all THPs */ 122 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 123 (uint64_t)one_page + len, 0); 124 125 for (i = 0; i < len; i++) 126 if (one_page[i] != (char)0) 127 ksft_exit_fail_msg("%ld byte corrupted\n", i); 128 129 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 130 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 131 132 rss_anon_after = rss_anon(); 133 if (rss_anon_after >= rss_anon_before) 134 ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", 135 rss_anon_before, rss_anon_after); 136 } 137 138 void split_pmd_zero_pages(void) 139 { 140 char *one_page; 141 int nr_hpages = 4; 142 size_t len = nr_hpages * pmd_pagesize; 143 144 one_page = allocate_zero_filled_hugepage(len); 145 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); 146 ksft_test_result_pass("Split zero filled huge pages successful\n"); 147 free(one_page); 148 } 149 150 void split_pmd_thp_to_order(int order) 151 { 152 char *one_page; 153 size_t len = 4 * pmd_pagesize; 154 size_t i; 155 156 one_page = memalign(pmd_pagesize, len); 157 if (!one_page) 158 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 159 160 madvise(one_page, len, MADV_HUGEPAGE); 161 162 for (i = 0; i < len; i++) 163 one_page[i] = (char)i; 164 165 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 166 ksft_exit_fail_msg("No THP is allocated\n"); 167 168 /* split all THPs */ 169 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 170 (uint64_t)one_page + len, order); 171 172 for (i = 0; i < len; i++) 173 if (one_page[i] != (char)i) 174 ksft_exit_fail_msg("%ld byte corrupted\n", i); 175 176 177 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 178 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 179 180 ksft_test_result_pass("Split huge pages to order %d successful\n", order); 181 free(one_page); 182 } 183 184 void split_pte_mapped_thp(void) 185 { 186 char *one_page, *pte_mapped, *pte_mapped2; 187 size_t len = 4 * pmd_pagesize; 188 uint64_t thp_size; 189 size_t i; 190 const char *pagemap_template = "/proc/%d/pagemap"; 191 const char *kpageflags_proc = "/proc/kpageflags"; 192 char pagemap_proc[255]; 193 int pagemap_fd; 194 int kpageflags_fd; 195 196 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 197 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 198 199 pagemap_fd = open(pagemap_proc, O_RDONLY); 200 if (pagemap_fd == -1) 201 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 202 203 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 204 if (kpageflags_fd == -1) 205 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 206 207 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 208 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 209 if (one_page == MAP_FAILED) 210 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 211 212 madvise(one_page, len, MADV_HUGEPAGE); 213 214 for (i = 0; i < len; i++) 215 one_page[i] = (char)i; 216 217 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 218 ksft_exit_fail_msg("No THP is allocated\n"); 219 220 /* remap the first pagesize of first THP */ 221 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 222 223 /* remap the Nth pagesize of Nth THP */ 224 for (i = 1; i < 4; i++) { 225 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 226 pagesize, pagesize, 227 MREMAP_MAYMOVE|MREMAP_FIXED, 228 pte_mapped + pagesize * i); 229 if (pte_mapped2 == MAP_FAILED) 230 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 231 } 232 233 /* smap does not show THPs after mremap, use kpageflags instead */ 234 thp_size = 0; 235 for (i = 0; i < pagesize * 4; i++) 236 if (i % pagesize == 0 && 237 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 238 thp_size++; 239 240 if (thp_size != 4) 241 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 242 243 /* split all remapped THPs */ 244 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 245 (uint64_t)pte_mapped + pagesize * 4, 0); 246 247 /* smap does not show THPs after mremap, use kpageflags instead */ 248 thp_size = 0; 249 for (i = 0; i < pagesize * 4; i++) { 250 if (pte_mapped[i] != (char)i) 251 ksft_exit_fail_msg("%ld byte corrupted\n", i); 252 253 if (i % pagesize == 0 && 254 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 255 thp_size++; 256 } 257 258 if (thp_size) 259 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 260 261 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 262 munmap(one_page, len); 263 close(pagemap_fd); 264 close(kpageflags_fd); 265 } 266 267 void split_file_backed_thp(int order) 268 { 269 int status; 270 int fd; 271 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 272 const char *tmpfs_loc = mkdtemp(tmpfs_template); 273 char testfile[INPUT_MAX]; 274 ssize_t num_written, num_read; 275 char *file_buf1, *file_buf2; 276 uint64_t pgoff_start = 0, pgoff_end = 1024; 277 int i; 278 279 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 280 281 file_buf1 = (char *)malloc(pmd_pagesize); 282 file_buf2 = (char *)malloc(pmd_pagesize); 283 284 if (!file_buf1 || !file_buf2) { 285 ksft_print_msg("cannot allocate file buffers\n"); 286 goto out; 287 } 288 289 for (i = 0; i < pmd_pagesize; i++) 290 file_buf1[i] = (char)i; 291 memset(file_buf2, 0, pmd_pagesize); 292 293 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 294 295 if (status) 296 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 297 298 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 299 if (status >= INPUT_MAX) { 300 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 301 goto cleanup; 302 } 303 304 fd = open(testfile, O_CREAT|O_RDWR, 0664); 305 if (fd == -1) { 306 ksft_perror("Cannot open testing file"); 307 goto cleanup; 308 } 309 310 /* write pmd size data to the file, so a file-backed THP can be allocated */ 311 num_written = write(fd, file_buf1, pmd_pagesize); 312 313 if (num_written == -1 || num_written != pmd_pagesize) { 314 ksft_perror("Failed to write data to testing file"); 315 goto close_file; 316 } 317 318 /* split the file-backed THP */ 319 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); 320 321 /* check file content after split */ 322 status = lseek(fd, 0, SEEK_SET); 323 if (status == -1) { 324 ksft_perror("Cannot lseek file"); 325 goto close_file; 326 } 327 328 num_read = read(fd, file_buf2, num_written); 329 if (num_read == -1 || num_read != num_written) { 330 ksft_perror("Cannot read file content back"); 331 goto close_file; 332 } 333 334 if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { 335 ksft_print_msg("File content changed\n"); 336 goto close_file; 337 } 338 339 close(fd); 340 status = unlink(testfile); 341 if (status) { 342 ksft_perror("Cannot remove testing file"); 343 goto cleanup; 344 } 345 346 status = umount(tmpfs_loc); 347 if (status) { 348 rmdir(tmpfs_loc); 349 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 350 } 351 352 status = rmdir(tmpfs_loc); 353 if (status) 354 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 355 356 ksft_print_msg("Please check dmesg for more information\n"); 357 ksft_test_result_pass("File-backed THP split to order %d test done\n", order); 358 return; 359 360 close_file: 361 close(fd); 362 cleanup: 363 umount(tmpfs_loc); 364 rmdir(tmpfs_loc); 365 out: 366 ksft_exit_fail_msg("Error occurred\n"); 367 } 368 369 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 370 const char **thp_fs_loc) 371 { 372 if (xfs_path) { 373 *thp_fs_loc = xfs_path; 374 return false; 375 } 376 377 *thp_fs_loc = mkdtemp(thp_fs_template); 378 379 if (!*thp_fs_loc) 380 ksft_exit_fail_msg("cannot create temp folder\n"); 381 382 return true; 383 } 384 385 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 386 { 387 int status; 388 389 if (!created_tmp) 390 return; 391 392 status = rmdir(thp_fs_loc); 393 if (status) 394 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 395 strerror(errno)); 396 } 397 398 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 399 char **addr) 400 { 401 size_t i; 402 unsigned char buf[1024]; 403 404 srand(time(NULL)); 405 406 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 407 if (*fd == -1) 408 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 409 410 assert(fd_size % sizeof(buf) == 0); 411 for (i = 0; i < sizeof(buf); i++) 412 buf[i] = (unsigned char)i; 413 for (i = 0; i < fd_size; i += sizeof(buf)) 414 write(*fd, buf, sizeof(buf)); 415 416 close(*fd); 417 sync(); 418 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 419 if (*fd == -1) { 420 ksft_perror("open drop_caches"); 421 goto err_out_unlink; 422 } 423 if (write(*fd, "3", 1) != 1) { 424 ksft_perror("write to drop_caches"); 425 goto err_out_unlink; 426 } 427 close(*fd); 428 429 *fd = open(testfile, O_RDWR); 430 if (*fd == -1) { 431 ksft_perror("Failed to open testfile\n"); 432 goto err_out_unlink; 433 } 434 435 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 436 if (*addr == (char *)-1) { 437 ksft_perror("cannot mmap"); 438 goto err_out_close; 439 } 440 madvise(*addr, fd_size, MADV_HUGEPAGE); 441 442 for (size_t i = 0; i < fd_size; i++) 443 FORCE_READ((*addr + i)); 444 445 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 446 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 447 munmap(*addr, fd_size); 448 close(*fd); 449 unlink(testfile); 450 ksft_test_result_skip("Pagecache folio split skipped\n"); 451 return -2; 452 } 453 return 0; 454 err_out_close: 455 close(*fd); 456 err_out_unlink: 457 unlink(testfile); 458 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 459 return -1; 460 } 461 462 void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, 463 int order, int offset) 464 { 465 int fd; 466 char *addr; 467 size_t i; 468 char testfile[INPUT_MAX]; 469 int err = 0; 470 471 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 472 473 if (err < 0) 474 ksft_exit_fail_msg("cannot generate right test file name\n"); 475 476 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 477 if (err) 478 return; 479 err = 0; 480 481 if (offset == -1) 482 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, 483 (uint64_t)addr + fd_size, order); 484 else 485 write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, 486 (uint64_t)addr + fd_size, order, offset); 487 488 for (i = 0; i < fd_size; i++) 489 if (*(addr + i) != (char)i) { 490 ksft_print_msg("%lu byte corrupted in the file\n", i); 491 err = EXIT_FAILURE; 492 goto out; 493 } 494 495 if (!check_huge_file(addr, 0, pmd_pagesize)) { 496 ksft_print_msg("Still FilePmdMapped not split\n"); 497 err = EXIT_FAILURE; 498 goto out; 499 } 500 501 out: 502 munmap(addr, fd_size); 503 close(fd); 504 unlink(testfile); 505 if (offset == -1) { 506 if (err) 507 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 508 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 509 } else { 510 if (err) 511 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); 512 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); 513 } 514 } 515 516 int main(int argc, char **argv) 517 { 518 int i; 519 size_t fd_size; 520 char *optional_xfs_path = NULL; 521 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 522 const char *fs_loc; 523 bool created_tmp; 524 int offset; 525 526 ksft_print_header(); 527 528 if (geteuid() != 0) { 529 ksft_print_msg("Please run the benchmark as root\n"); 530 ksft_finished(); 531 } 532 533 if (argc > 1) 534 optional_xfs_path = argv[1]; 535 536 ksft_set_plan(1+8+1+9+9+8*4+2); 537 538 pagesize = getpagesize(); 539 pageshift = ffs(pagesize) - 1; 540 pmd_pagesize = read_pmd_pagesize(); 541 if (!pmd_pagesize) 542 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 543 544 fd_size = 2 * pmd_pagesize; 545 546 split_pmd_zero_pages(); 547 548 for (i = 0; i < 9; i++) 549 if (i != 1) 550 split_pmd_thp_to_order(i); 551 552 split_pte_mapped_thp(); 553 for (i = 0; i < 9; i++) 554 split_file_backed_thp(i); 555 556 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 557 &fs_loc); 558 for (i = 8; i >= 0; i--) 559 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); 560 561 for (i = 0; i < 9; i++) 562 for (offset = 0; 563 offset < pmd_pagesize / pagesize; 564 offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) 565 split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); 566 cleanup_thp_fs(fs_loc, created_tmp); 567 568 ksft_finished(); 569 570 return 0; 571 } 572