1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <inttypes.h> 13 #include <string.h> 14 #include <fcntl.h> 15 #include <sys/mman.h> 16 #include <sys/mount.h> 17 #include <malloc.h> 18 #include <stdbool.h> 19 #include <time.h> 20 #include "vm_util.h" 21 #include "../kselftest.h" 22 23 uint64_t pagesize; 24 unsigned int pageshift; 25 uint64_t pmd_pagesize; 26 27 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 28 #define SMAP_PATH "/proc/self/smaps" 29 #define INPUT_MAX 80 30 31 #define PID_FMT "%d,0x%lx,0x%lx,%d" 32 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 33 34 #define PFN_MASK ((1UL<<55)-1) 35 #define KPF_THP (1UL<<22) 36 37 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 38 { 39 uint64_t paddr; 40 uint64_t page_flags; 41 42 if (pagemap_file) { 43 pread(pagemap_file, &paddr, sizeof(paddr), 44 ((long)vaddr >> pageshift) * sizeof(paddr)); 45 46 if (kpageflags_file) { 47 pread(kpageflags_file, &page_flags, sizeof(page_flags), 48 (paddr & PFN_MASK) * sizeof(page_flags)); 49 50 return !!(page_flags & KPF_THP); 51 } 52 } 53 return 0; 54 } 55 56 static void write_file(const char *path, const char *buf, size_t buflen) 57 { 58 int fd; 59 ssize_t numwritten; 60 61 fd = open(path, O_WRONLY); 62 if (fd == -1) 63 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 64 65 numwritten = write(fd, buf, buflen - 1); 66 close(fd); 67 if (numwritten < 1) 68 ksft_exit_fail_msg("Write failed\n"); 69 } 70 71 static void write_debugfs(const char *fmt, ...) 72 { 73 char input[INPUT_MAX]; 74 int ret; 75 va_list argp; 76 77 va_start(argp, fmt); 78 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 79 va_end(argp); 80 81 if (ret >= INPUT_MAX) 82 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 83 84 write_file(SPLIT_DEBUGFS, input, ret + 1); 85 } 86 87 static char *allocate_zero_filled_hugepage(size_t len) 88 { 89 char *result; 90 size_t i; 91 92 result = memalign(pmd_pagesize, len); 93 if (!result) { 94 printf("Fail to allocate memory\n"); 95 exit(EXIT_FAILURE); 96 } 97 98 madvise(result, len, MADV_HUGEPAGE); 99 100 for (i = 0; i < len; i++) 101 result[i] = (char)0; 102 103 return result; 104 } 105 106 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) 107 { 108 unsigned long rss_anon_before, rss_anon_after; 109 size_t i; 110 111 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 112 ksft_exit_fail_msg("No THP is allocated\n"); 113 114 rss_anon_before = rss_anon(); 115 if (!rss_anon_before) 116 ksft_exit_fail_msg("No RssAnon is allocated before split\n"); 117 118 /* split all THPs */ 119 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 120 (uint64_t)one_page + len, 0); 121 122 for (i = 0; i < len; i++) 123 if (one_page[i] != (char)0) 124 ksft_exit_fail_msg("%ld byte corrupted\n", i); 125 126 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 127 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 128 129 rss_anon_after = rss_anon(); 130 if (rss_anon_after >= rss_anon_before) 131 ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", 132 rss_anon_before, rss_anon_after); 133 } 134 135 void split_pmd_zero_pages(void) 136 { 137 char *one_page; 138 int nr_hpages = 4; 139 size_t len = nr_hpages * pmd_pagesize; 140 141 one_page = allocate_zero_filled_hugepage(len); 142 verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); 143 ksft_test_result_pass("Split zero filled huge pages successful\n"); 144 free(one_page); 145 } 146 147 void split_pmd_thp_to_order(int order) 148 { 149 char *one_page; 150 size_t len = 4 * pmd_pagesize; 151 size_t i; 152 153 one_page = memalign(pmd_pagesize, len); 154 if (!one_page) 155 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 156 157 madvise(one_page, len, MADV_HUGEPAGE); 158 159 for (i = 0; i < len; i++) 160 one_page[i] = (char)i; 161 162 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 163 ksft_exit_fail_msg("No THP is allocated\n"); 164 165 /* split all THPs */ 166 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 167 (uint64_t)one_page + len, order); 168 169 for (i = 0; i < len; i++) 170 if (one_page[i] != (char)i) 171 ksft_exit_fail_msg("%ld byte corrupted\n", i); 172 173 174 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 175 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 176 177 ksft_test_result_pass("Split huge pages to order %d successful\n", order); 178 free(one_page); 179 } 180 181 void split_pte_mapped_thp(void) 182 { 183 char *one_page, *pte_mapped, *pte_mapped2; 184 size_t len = 4 * pmd_pagesize; 185 uint64_t thp_size; 186 size_t i; 187 const char *pagemap_template = "/proc/%d/pagemap"; 188 const char *kpageflags_proc = "/proc/kpageflags"; 189 char pagemap_proc[255]; 190 int pagemap_fd; 191 int kpageflags_fd; 192 193 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 194 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 195 196 pagemap_fd = open(pagemap_proc, O_RDONLY); 197 if (pagemap_fd == -1) 198 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 199 200 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 201 if (kpageflags_fd == -1) 202 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 203 204 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 205 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 206 if (one_page == MAP_FAILED) 207 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 208 209 madvise(one_page, len, MADV_HUGEPAGE); 210 211 for (i = 0; i < len; i++) 212 one_page[i] = (char)i; 213 214 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 215 ksft_exit_fail_msg("No THP is allocated\n"); 216 217 /* remap the first pagesize of first THP */ 218 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 219 220 /* remap the Nth pagesize of Nth THP */ 221 for (i = 1; i < 4; i++) { 222 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 223 pagesize, pagesize, 224 MREMAP_MAYMOVE|MREMAP_FIXED, 225 pte_mapped + pagesize * i); 226 if (pte_mapped2 == MAP_FAILED) 227 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 228 } 229 230 /* smap does not show THPs after mremap, use kpageflags instead */ 231 thp_size = 0; 232 for (i = 0; i < pagesize * 4; i++) 233 if (i % pagesize == 0 && 234 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 235 thp_size++; 236 237 if (thp_size != 4) 238 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 239 240 /* split all remapped THPs */ 241 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 242 (uint64_t)pte_mapped + pagesize * 4, 0); 243 244 /* smap does not show THPs after mremap, use kpageflags instead */ 245 thp_size = 0; 246 for (i = 0; i < pagesize * 4; i++) { 247 if (pte_mapped[i] != (char)i) 248 ksft_exit_fail_msg("%ld byte corrupted\n", i); 249 250 if (i % pagesize == 0 && 251 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 252 thp_size++; 253 } 254 255 if (thp_size) 256 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 257 258 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 259 munmap(one_page, len); 260 close(pagemap_fd); 261 close(kpageflags_fd); 262 } 263 264 void split_file_backed_thp(void) 265 { 266 int status; 267 int fd; 268 ssize_t num_written; 269 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 270 const char *tmpfs_loc = mkdtemp(tmpfs_template); 271 char testfile[INPUT_MAX]; 272 uint64_t pgoff_start = 0, pgoff_end = 1024; 273 274 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 275 276 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 277 278 if (status) 279 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 280 281 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 282 if (status >= INPUT_MAX) { 283 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 284 } 285 286 fd = open(testfile, O_CREAT|O_WRONLY, 0664); 287 if (fd == -1) { 288 ksft_perror("Cannot open testing file"); 289 goto cleanup; 290 } 291 292 /* write something to the file, so a file-backed THP can be allocated */ 293 num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); 294 close(fd); 295 296 if (num_written < 1) { 297 ksft_perror("Fail to write data to testing file"); 298 goto cleanup; 299 } 300 301 /* split the file-backed THP */ 302 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); 303 304 status = unlink(testfile); 305 if (status) { 306 ksft_perror("Cannot remove testing file"); 307 goto cleanup; 308 } 309 310 status = umount(tmpfs_loc); 311 if (status) { 312 rmdir(tmpfs_loc); 313 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 314 } 315 316 status = rmdir(tmpfs_loc); 317 if (status) 318 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 319 320 ksft_print_msg("Please check dmesg for more information\n"); 321 ksft_test_result_pass("File-backed THP split test done\n"); 322 return; 323 324 cleanup: 325 umount(tmpfs_loc); 326 rmdir(tmpfs_loc); 327 ksft_exit_fail_msg("Error occurred\n"); 328 } 329 330 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 331 const char **thp_fs_loc) 332 { 333 if (xfs_path) { 334 *thp_fs_loc = xfs_path; 335 return false; 336 } 337 338 *thp_fs_loc = mkdtemp(thp_fs_template); 339 340 if (!*thp_fs_loc) 341 ksft_exit_fail_msg("cannot create temp folder\n"); 342 343 return true; 344 } 345 346 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 347 { 348 int status; 349 350 if (!created_tmp) 351 return; 352 353 status = rmdir(thp_fs_loc); 354 if (status) 355 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 356 strerror(errno)); 357 } 358 359 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 360 char **addr) 361 { 362 size_t i; 363 int dummy = 0; 364 365 srand(time(NULL)); 366 367 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 368 if (*fd == -1) 369 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 370 371 for (i = 0; i < fd_size; i++) { 372 unsigned char byte = (unsigned char)i; 373 374 write(*fd, &byte, sizeof(byte)); 375 } 376 close(*fd); 377 sync(); 378 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 379 if (*fd == -1) { 380 ksft_perror("open drop_caches"); 381 goto err_out_unlink; 382 } 383 if (write(*fd, "3", 1) != 1) { 384 ksft_perror("write to drop_caches"); 385 goto err_out_unlink; 386 } 387 close(*fd); 388 389 *fd = open(testfile, O_RDWR); 390 if (*fd == -1) { 391 ksft_perror("Failed to open testfile\n"); 392 goto err_out_unlink; 393 } 394 395 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 396 if (*addr == (char *)-1) { 397 ksft_perror("cannot mmap"); 398 goto err_out_close; 399 } 400 madvise(*addr, fd_size, MADV_HUGEPAGE); 401 402 for (size_t i = 0; i < fd_size; i++) 403 dummy += *(*addr + i); 404 asm volatile("" : "+r" (dummy)); 405 406 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 407 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 408 munmap(*addr, fd_size); 409 close(*fd); 410 unlink(testfile); 411 ksft_test_result_skip("Pagecache folio split skipped\n"); 412 return -2; 413 } 414 return 0; 415 err_out_close: 416 close(*fd); 417 err_out_unlink: 418 unlink(testfile); 419 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 420 return -1; 421 } 422 423 void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) 424 { 425 int fd; 426 char *addr; 427 size_t i; 428 char testfile[INPUT_MAX]; 429 int err = 0; 430 431 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 432 433 if (err < 0) 434 ksft_exit_fail_msg("cannot generate right test file name\n"); 435 436 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 437 if (err) 438 return; 439 err = 0; 440 441 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); 442 443 for (i = 0; i < fd_size; i++) 444 if (*(addr + i) != (char)i) { 445 ksft_print_msg("%lu byte corrupted in the file\n", i); 446 err = EXIT_FAILURE; 447 goto out; 448 } 449 450 if (!check_huge_file(addr, 0, pmd_pagesize)) { 451 ksft_print_msg("Still FilePmdMapped not split\n"); 452 err = EXIT_FAILURE; 453 goto out; 454 } 455 456 out: 457 munmap(addr, fd_size); 458 close(fd); 459 unlink(testfile); 460 if (err) 461 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 462 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 463 } 464 465 int main(int argc, char **argv) 466 { 467 int i; 468 size_t fd_size; 469 char *optional_xfs_path = NULL; 470 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 471 const char *fs_loc; 472 bool created_tmp; 473 474 ksft_print_header(); 475 476 if (geteuid() != 0) { 477 ksft_print_msg("Please run the benchmark as root\n"); 478 ksft_finished(); 479 } 480 481 if (argc > 1) 482 optional_xfs_path = argv[1]; 483 484 ksft_set_plan(1+8+2+9); 485 486 pagesize = getpagesize(); 487 pageshift = ffs(pagesize) - 1; 488 pmd_pagesize = read_pmd_pagesize(); 489 if (!pmd_pagesize) 490 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 491 492 fd_size = 2 * pmd_pagesize; 493 494 split_pmd_zero_pages(); 495 496 for (i = 0; i < 9; i++) 497 if (i != 1) 498 split_pmd_thp_to_order(i); 499 500 split_pte_mapped_thp(); 501 split_file_backed_thp(); 502 503 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 504 &fs_loc); 505 for (i = 8; i >= 0; i--) 506 split_thp_in_pagecache_to_order(fd_size, i, fs_loc); 507 cleanup_thp_fs(fs_loc, created_tmp); 508 509 ksft_finished(); 510 511 return 0; 512 } 513