1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <inttypes.h> 13 #include <string.h> 14 #include <fcntl.h> 15 #include <sys/mman.h> 16 #include <sys/mount.h> 17 #include <malloc.h> 18 #include <stdbool.h> 19 #include <time.h> 20 #include "vm_util.h" 21 #include "../kselftest.h" 22 23 uint64_t pagesize; 24 unsigned int pageshift; 25 uint64_t pmd_pagesize; 26 27 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 28 #define SMAP_PATH "/proc/self/smaps" 29 #define INPUT_MAX 80 30 31 #define PID_FMT "%d,0x%lx,0x%lx,%d" 32 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 33 34 #define PFN_MASK ((1UL<<55)-1) 35 #define KPF_THP (1UL<<22) 36 37 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 38 { 39 uint64_t paddr; 40 uint64_t page_flags; 41 42 if (pagemap_file) { 43 pread(pagemap_file, &paddr, sizeof(paddr), 44 ((long)vaddr >> pageshift) * sizeof(paddr)); 45 46 if (kpageflags_file) { 47 pread(kpageflags_file, &page_flags, sizeof(page_flags), 48 (paddr & PFN_MASK) * sizeof(page_flags)); 49 50 return !!(page_flags & KPF_THP); 51 } 52 } 53 return 0; 54 } 55 56 static void write_file(const char *path, const char *buf, size_t buflen) 57 { 58 int fd; 59 ssize_t numwritten; 60 61 fd = open(path, O_WRONLY); 62 if (fd == -1) 63 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 64 65 numwritten = write(fd, buf, buflen - 1); 66 close(fd); 67 if (numwritten < 1) 68 ksft_exit_fail_msg("Write failed\n"); 69 } 70 71 static void write_debugfs(const char *fmt, ...) 72 { 73 char input[INPUT_MAX]; 74 int ret; 75 va_list argp; 76 77 va_start(argp, fmt); 78 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 79 va_end(argp); 80 81 if (ret >= INPUT_MAX) 82 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 83 84 write_file(SPLIT_DEBUGFS, input, ret + 1); 85 } 86 87 static char *allocate_zero_filled_hugepage(size_t len) 88 { 89 char *result; 90 size_t i; 91 92 result = memalign(pmd_pagesize, len); 93 if (!result) { 94 printf("Fail to allocate memory\n"); 95 exit(EXIT_FAILURE); 96 } 97 98 madvise(result, len, MADV_HUGEPAGE); 99 100 for (i = 0; i < len; i++) 101 result[i] = (char)0; 102 103 return result; 104 } 105 106 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) 107 { 108 unsigned long rss_anon_before, rss_anon_after; 109 size_t i; 110 111 if (!check_huge_anon(one_page, 4, pmd_pagesize)) { 112 printf("No THP is allocated\n"); 113 exit(EXIT_FAILURE); 114 } 115 116 rss_anon_before = rss_anon(); 117 if (!rss_anon_before) { 118 printf("No RssAnon is allocated before split\n"); 119 exit(EXIT_FAILURE); 120 } 121 122 /* split all THPs */ 123 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 124 (uint64_t)one_page + len, 0); 125 126 for (i = 0; i < len; i++) 127 if (one_page[i] != (char)0) { 128 printf("%ld byte corrupted\n", i); 129 exit(EXIT_FAILURE); 130 } 131 132 if (!check_huge_anon(one_page, 0, pmd_pagesize)) { 133 printf("Still AnonHugePages not split\n"); 134 exit(EXIT_FAILURE); 135 } 136 137 rss_anon_after = rss_anon(); 138 if (rss_anon_after >= rss_anon_before) { 139 printf("Incorrect RssAnon value. Before: %ld After: %ld\n", 140 rss_anon_before, rss_anon_after); 141 exit(EXIT_FAILURE); 142 } 143 } 144 145 void split_pmd_zero_pages(void) 146 { 147 char *one_page; 148 int nr_hpages = 4; 149 size_t len = nr_hpages * pmd_pagesize; 150 151 one_page = allocate_zero_filled_hugepage(len); 152 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); 153 printf("Split zero filled huge pages successful\n"); 154 free(one_page); 155 } 156 157 void split_pmd_thp(void) 158 { 159 char *one_page; 160 size_t len = 4 * pmd_pagesize; 161 size_t i; 162 163 one_page = memalign(pmd_pagesize, len); 164 if (!one_page) 165 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 166 167 madvise(one_page, len, MADV_HUGEPAGE); 168 169 for (i = 0; i < len; i++) 170 one_page[i] = (char)i; 171 172 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 173 ksft_exit_fail_msg("No THP is allocated\n"); 174 175 /* split all THPs */ 176 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 177 (uint64_t)one_page + len, 0); 178 179 for (i = 0; i < len; i++) 180 if (one_page[i] != (char)i) 181 ksft_exit_fail_msg("%ld byte corrupted\n", i); 182 183 184 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 185 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 186 187 ksft_test_result_pass("Split huge pages successful\n"); 188 free(one_page); 189 } 190 191 void split_pte_mapped_thp(void) 192 { 193 char *one_page, *pte_mapped, *pte_mapped2; 194 size_t len = 4 * pmd_pagesize; 195 uint64_t thp_size; 196 size_t i; 197 const char *pagemap_template = "/proc/%d/pagemap"; 198 const char *kpageflags_proc = "/proc/kpageflags"; 199 char pagemap_proc[255]; 200 int pagemap_fd; 201 int kpageflags_fd; 202 203 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 204 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 205 206 pagemap_fd = open(pagemap_proc, O_RDONLY); 207 if (pagemap_fd == -1) 208 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 209 210 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 211 if (kpageflags_fd == -1) 212 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 213 214 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 215 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 216 if (one_page == MAP_FAILED) 217 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 218 219 madvise(one_page, len, MADV_HUGEPAGE); 220 221 for (i = 0; i < len; i++) 222 one_page[i] = (char)i; 223 224 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 225 ksft_exit_fail_msg("No THP is allocated\n"); 226 227 /* remap the first pagesize of first THP */ 228 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 229 230 /* remap the Nth pagesize of Nth THP */ 231 for (i = 1; i < 4; i++) { 232 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 233 pagesize, pagesize, 234 MREMAP_MAYMOVE|MREMAP_FIXED, 235 pte_mapped + pagesize * i); 236 if (pte_mapped2 == MAP_FAILED) 237 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 238 } 239 240 /* smap does not show THPs after mremap, use kpageflags instead */ 241 thp_size = 0; 242 for (i = 0; i < pagesize * 4; i++) 243 if (i % pagesize == 0 && 244 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 245 thp_size++; 246 247 if (thp_size != 4) 248 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 249 250 /* split all remapped THPs */ 251 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 252 (uint64_t)pte_mapped + pagesize * 4, 0); 253 254 /* smap does not show THPs after mremap, use kpageflags instead */ 255 thp_size = 0; 256 for (i = 0; i < pagesize * 4; i++) { 257 if (pte_mapped[i] != (char)i) 258 ksft_exit_fail_msg("%ld byte corrupted\n", i); 259 260 if (i % pagesize == 0 && 261 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 262 thp_size++; 263 } 264 265 if (thp_size) 266 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 267 268 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 269 munmap(one_page, len); 270 close(pagemap_fd); 271 close(kpageflags_fd); 272 } 273 274 void split_file_backed_thp(void) 275 { 276 int status; 277 int fd; 278 ssize_t num_written; 279 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 280 const char *tmpfs_loc = mkdtemp(tmpfs_template); 281 char testfile[INPUT_MAX]; 282 uint64_t pgoff_start = 0, pgoff_end = 1024; 283 284 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 285 286 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 287 288 if (status) 289 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 290 291 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 292 if (status >= INPUT_MAX) { 293 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 294 } 295 296 fd = open(testfile, O_CREAT|O_WRONLY, 0664); 297 if (fd == -1) { 298 ksft_perror("Cannot open testing file"); 299 goto cleanup; 300 } 301 302 /* write something to the file, so a file-backed THP can be allocated */ 303 num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); 304 close(fd); 305 306 if (num_written < 1) { 307 ksft_perror("Fail to write data to testing file"); 308 goto cleanup; 309 } 310 311 /* split the file-backed THP */ 312 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); 313 314 status = unlink(testfile); 315 if (status) { 316 ksft_perror("Cannot remove testing file"); 317 goto cleanup; 318 } 319 320 status = umount(tmpfs_loc); 321 if (status) { 322 rmdir(tmpfs_loc); 323 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 324 } 325 326 status = rmdir(tmpfs_loc); 327 if (status) 328 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 329 330 ksft_print_msg("Please check dmesg for more information\n"); 331 ksft_test_result_pass("File-backed THP split test done\n"); 332 return; 333 334 cleanup: 335 umount(tmpfs_loc); 336 rmdir(tmpfs_loc); 337 ksft_exit_fail_msg("Error occurred\n"); 338 } 339 340 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 341 const char **thp_fs_loc) 342 { 343 if (xfs_path) { 344 *thp_fs_loc = xfs_path; 345 return false; 346 } 347 348 *thp_fs_loc = mkdtemp(thp_fs_template); 349 350 if (!*thp_fs_loc) 351 ksft_exit_fail_msg("cannot create temp folder\n"); 352 353 return true; 354 } 355 356 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 357 { 358 int status; 359 360 if (!created_tmp) 361 return; 362 363 status = rmdir(thp_fs_loc); 364 if (status) 365 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 366 strerror(errno)); 367 } 368 369 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 370 char **addr) 371 { 372 size_t i; 373 int dummy = 0; 374 375 srand(time(NULL)); 376 377 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 378 if (*fd == -1) 379 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 380 381 for (i = 0; i < fd_size; i++) { 382 unsigned char byte = (unsigned char)i; 383 384 write(*fd, &byte, sizeof(byte)); 385 } 386 close(*fd); 387 sync(); 388 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 389 if (*fd == -1) { 390 ksft_perror("open drop_caches"); 391 goto err_out_unlink; 392 } 393 if (write(*fd, "3", 1) != 1) { 394 ksft_perror("write to drop_caches"); 395 goto err_out_unlink; 396 } 397 close(*fd); 398 399 *fd = open(testfile, O_RDWR); 400 if (*fd == -1) { 401 ksft_perror("Failed to open testfile\n"); 402 goto err_out_unlink; 403 } 404 405 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 406 if (*addr == (char *)-1) { 407 ksft_perror("cannot mmap"); 408 goto err_out_close; 409 } 410 madvise(*addr, fd_size, MADV_HUGEPAGE); 411 412 for (size_t i = 0; i < fd_size; i++) 413 dummy += *(*addr + i); 414 asm volatile("" : "+r" (dummy)); 415 416 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 417 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 418 munmap(*addr, fd_size); 419 close(*fd); 420 unlink(testfile); 421 ksft_test_result_skip("Pagecache folio split skipped\n"); 422 return -2; 423 } 424 return 0; 425 err_out_close: 426 close(*fd); 427 err_out_unlink: 428 unlink(testfile); 429 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 430 return -1; 431 } 432 433 void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) 434 { 435 int fd; 436 char *addr; 437 size_t i; 438 char testfile[INPUT_MAX]; 439 int err = 0; 440 441 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 442 443 if (err < 0) 444 ksft_exit_fail_msg("cannot generate right test file name\n"); 445 446 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 447 if (err) 448 return; 449 err = 0; 450 451 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); 452 453 for (i = 0; i < fd_size; i++) 454 if (*(addr + i) != (char)i) { 455 ksft_print_msg("%lu byte corrupted in the file\n", i); 456 err = EXIT_FAILURE; 457 goto out; 458 } 459 460 if (!check_huge_file(addr, 0, pmd_pagesize)) { 461 ksft_print_msg("Still FilePmdMapped not split\n"); 462 err = EXIT_FAILURE; 463 goto out; 464 } 465 466 out: 467 munmap(addr, fd_size); 468 close(fd); 469 unlink(testfile); 470 if (err) 471 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 472 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 473 } 474 475 int main(int argc, char **argv) 476 { 477 int i; 478 size_t fd_size; 479 char *optional_xfs_path = NULL; 480 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 481 const char *fs_loc; 482 bool created_tmp; 483 484 ksft_print_header(); 485 486 if (geteuid() != 0) { 487 ksft_print_msg("Please run the benchmark as root\n"); 488 ksft_finished(); 489 } 490 491 if (argc > 1) 492 optional_xfs_path = argv[1]; 493 494 ksft_set_plan(3+9); 495 496 pagesize = getpagesize(); 497 pageshift = ffs(pagesize) - 1; 498 pmd_pagesize = read_pmd_pagesize(); 499 if (!pmd_pagesize) 500 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 501 502 fd_size = 2 * pmd_pagesize; 503 504 split_pmd_zero_pages(); 505 split_pmd_thp(); 506 split_pte_mapped_thp(); 507 split_file_backed_thp(); 508 509 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 510 &fs_loc); 511 for (i = 8; i >= 0; i--) 512 split_thp_in_pagecache_to_order(fd_size, i, fs_loc); 513 cleanup_thp_fs(fs_loc, created_tmp); 514 515 ksft_finished(); 516 517 return 0; 518 } 519