1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual 4 * address range in a process via <debugfs>/split_huge_pages interface. 5 */ 6 7 #define _GNU_SOURCE 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <inttypes.h> 13 #include <string.h> 14 #include <fcntl.h> 15 #include <sys/mman.h> 16 #include <sys/mount.h> 17 #include <malloc.h> 18 #include <stdbool.h> 19 #include <time.h> 20 #include "vm_util.h" 21 #include "../kselftest.h" 22 23 uint64_t pagesize; 24 unsigned int pageshift; 25 uint64_t pmd_pagesize; 26 27 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" 28 #define SMAP_PATH "/proc/self/smaps" 29 #define INPUT_MAX 80 30 31 #define PID_FMT "%d,0x%lx,0x%lx,%d" 32 #define PATH_FMT "%s,0x%lx,0x%lx,%d" 33 34 #define PFN_MASK ((1UL<<55)-1) 35 #define KPF_THP (1UL<<22) 36 37 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) 38 { 39 uint64_t paddr; 40 uint64_t page_flags; 41 42 if (pagemap_file) { 43 pread(pagemap_file, &paddr, sizeof(paddr), 44 ((long)vaddr >> pageshift) * sizeof(paddr)); 45 46 if (kpageflags_file) { 47 pread(kpageflags_file, &page_flags, sizeof(page_flags), 48 (paddr & PFN_MASK) * sizeof(page_flags)); 49 50 return !!(page_flags & KPF_THP); 51 } 52 } 53 return 0; 54 } 55 56 static void write_file(const char *path, const char *buf, size_t buflen) 57 { 58 int fd; 59 ssize_t numwritten; 60 61 fd = open(path, O_WRONLY); 62 if (fd == -1) 63 ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); 64 65 numwritten = write(fd, buf, buflen - 1); 66 close(fd); 67 if (numwritten < 1) 68 ksft_exit_fail_msg("Write failed\n"); 69 } 70 71 static void write_debugfs(const char *fmt, ...) 72 { 73 char input[INPUT_MAX]; 74 int ret; 75 va_list argp; 76 77 va_start(argp, fmt); 78 ret = vsnprintf(input, INPUT_MAX, fmt, argp); 79 va_end(argp); 80 81 if (ret >= INPUT_MAX) 82 ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); 83 84 write_file(SPLIT_DEBUGFS, input, ret + 1); 85 } 86 87 void split_pmd_thp(void) 88 { 89 char *one_page; 90 size_t len = 4 * pmd_pagesize; 91 size_t i; 92 93 one_page = memalign(pmd_pagesize, len); 94 if (!one_page) 95 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 96 97 madvise(one_page, len, MADV_HUGEPAGE); 98 99 for (i = 0; i < len; i++) 100 one_page[i] = (char)i; 101 102 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 103 ksft_exit_fail_msg("No THP is allocated\n"); 104 105 /* split all THPs */ 106 write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, 107 (uint64_t)one_page + len, 0); 108 109 for (i = 0; i < len; i++) 110 if (one_page[i] != (char)i) 111 ksft_exit_fail_msg("%ld byte corrupted\n", i); 112 113 114 if (!check_huge_anon(one_page, 0, pmd_pagesize)) 115 ksft_exit_fail_msg("Still AnonHugePages not split\n"); 116 117 ksft_test_result_pass("Split huge pages successful\n"); 118 free(one_page); 119 } 120 121 void split_pte_mapped_thp(void) 122 { 123 char *one_page, *pte_mapped, *pte_mapped2; 124 size_t len = 4 * pmd_pagesize; 125 uint64_t thp_size; 126 size_t i; 127 const char *pagemap_template = "/proc/%d/pagemap"; 128 const char *kpageflags_proc = "/proc/kpageflags"; 129 char pagemap_proc[255]; 130 int pagemap_fd; 131 int kpageflags_fd; 132 133 if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) 134 ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); 135 136 pagemap_fd = open(pagemap_proc, O_RDONLY); 137 if (pagemap_fd == -1) 138 ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); 139 140 kpageflags_fd = open(kpageflags_proc, O_RDONLY); 141 if (kpageflags_fd == -1) 142 ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); 143 144 one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, 145 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 146 if (one_page == MAP_FAILED) 147 ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); 148 149 madvise(one_page, len, MADV_HUGEPAGE); 150 151 for (i = 0; i < len; i++) 152 one_page[i] = (char)i; 153 154 if (!check_huge_anon(one_page, 4, pmd_pagesize)) 155 ksft_exit_fail_msg("No THP is allocated\n"); 156 157 /* remap the first pagesize of first THP */ 158 pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); 159 160 /* remap the Nth pagesize of Nth THP */ 161 for (i = 1; i < 4; i++) { 162 pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, 163 pagesize, pagesize, 164 MREMAP_MAYMOVE|MREMAP_FIXED, 165 pte_mapped + pagesize * i); 166 if (pte_mapped2 == MAP_FAILED) 167 ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); 168 } 169 170 /* smap does not show THPs after mremap, use kpageflags instead */ 171 thp_size = 0; 172 for (i = 0; i < pagesize * 4; i++) 173 if (i % pagesize == 0 && 174 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 175 thp_size++; 176 177 if (thp_size != 4) 178 ksft_exit_fail_msg("Some THPs are missing during mremap\n"); 179 180 /* split all remapped THPs */ 181 write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, 182 (uint64_t)pte_mapped + pagesize * 4, 0); 183 184 /* smap does not show THPs after mremap, use kpageflags instead */ 185 thp_size = 0; 186 for (i = 0; i < pagesize * 4; i++) { 187 if (pte_mapped[i] != (char)i) 188 ksft_exit_fail_msg("%ld byte corrupted\n", i); 189 190 if (i % pagesize == 0 && 191 is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) 192 thp_size++; 193 } 194 195 if (thp_size) 196 ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); 197 198 ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); 199 munmap(one_page, len); 200 close(pagemap_fd); 201 close(kpageflags_fd); 202 } 203 204 void split_file_backed_thp(void) 205 { 206 int status; 207 int fd; 208 ssize_t num_written; 209 char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; 210 const char *tmpfs_loc = mkdtemp(tmpfs_template); 211 char testfile[INPUT_MAX]; 212 uint64_t pgoff_start = 0, pgoff_end = 1024; 213 214 ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); 215 216 status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); 217 218 if (status) 219 ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); 220 221 status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); 222 if (status >= INPUT_MAX) { 223 ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); 224 } 225 226 fd = open(testfile, O_CREAT|O_WRONLY, 0664); 227 if (fd == -1) { 228 ksft_perror("Cannot open testing file"); 229 goto cleanup; 230 } 231 232 /* write something to the file, so a file-backed THP can be allocated */ 233 num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); 234 close(fd); 235 236 if (num_written < 1) { 237 ksft_perror("Fail to write data to testing file"); 238 goto cleanup; 239 } 240 241 /* split the file-backed THP */ 242 write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); 243 244 status = unlink(testfile); 245 if (status) { 246 ksft_perror("Cannot remove testing file"); 247 goto cleanup; 248 } 249 250 status = umount(tmpfs_loc); 251 if (status) { 252 rmdir(tmpfs_loc); 253 ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); 254 } 255 256 status = rmdir(tmpfs_loc); 257 if (status) 258 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); 259 260 ksft_print_msg("Please check dmesg for more information\n"); 261 ksft_test_result_pass("File-backed THP split test done\n"); 262 return; 263 264 cleanup: 265 umount(tmpfs_loc); 266 rmdir(tmpfs_loc); 267 ksft_exit_fail_msg("Error occurred\n"); 268 } 269 270 bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, 271 const char **thp_fs_loc) 272 { 273 if (xfs_path) { 274 *thp_fs_loc = xfs_path; 275 return false; 276 } 277 278 *thp_fs_loc = mkdtemp(thp_fs_template); 279 280 if (!*thp_fs_loc) 281 ksft_exit_fail_msg("cannot create temp folder\n"); 282 283 return true; 284 } 285 286 void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) 287 { 288 int status; 289 290 if (!created_tmp) 291 return; 292 293 status = rmdir(thp_fs_loc); 294 if (status) 295 ksft_exit_fail_msg("cannot remove tmp dir: %s\n", 296 strerror(errno)); 297 } 298 299 int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, 300 char **addr) 301 { 302 size_t i; 303 int dummy; 304 305 srand(time(NULL)); 306 307 *fd = open(testfile, O_CREAT | O_RDWR, 0664); 308 if (*fd == -1) 309 ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); 310 311 for (i = 0; i < fd_size; i++) { 312 unsigned char byte = (unsigned char)i; 313 314 write(*fd, &byte, sizeof(byte)); 315 } 316 close(*fd); 317 sync(); 318 *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 319 if (*fd == -1) { 320 ksft_perror("open drop_caches"); 321 goto err_out_unlink; 322 } 323 if (write(*fd, "3", 1) != 1) { 324 ksft_perror("write to drop_caches"); 325 goto err_out_unlink; 326 } 327 close(*fd); 328 329 *fd = open(testfile, O_RDWR); 330 if (*fd == -1) { 331 ksft_perror("Failed to open testfile\n"); 332 goto err_out_unlink; 333 } 334 335 *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); 336 if (*addr == (char *)-1) { 337 ksft_perror("cannot mmap"); 338 goto err_out_close; 339 } 340 madvise(*addr, fd_size, MADV_HUGEPAGE); 341 342 for (size_t i = 0; i < fd_size; i++) 343 dummy += *(*addr + i); 344 345 if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { 346 ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); 347 munmap(*addr, fd_size); 348 close(*fd); 349 unlink(testfile); 350 ksft_test_result_skip("Pagecache folio split skipped\n"); 351 return -2; 352 } 353 return 0; 354 err_out_close: 355 close(*fd); 356 err_out_unlink: 357 unlink(testfile); 358 ksft_exit_fail_msg("Failed to create large pagecache folios\n"); 359 return -1; 360 } 361 362 void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) 363 { 364 int fd; 365 char *addr; 366 size_t i; 367 char testfile[INPUT_MAX]; 368 int err = 0; 369 370 err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); 371 372 if (err < 0) 373 ksft_exit_fail_msg("cannot generate right test file name\n"); 374 375 err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); 376 if (err) 377 return; 378 err = 0; 379 380 write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); 381 382 for (i = 0; i < fd_size; i++) 383 if (*(addr + i) != (char)i) { 384 ksft_print_msg("%lu byte corrupted in the file\n", i); 385 err = EXIT_FAILURE; 386 goto out; 387 } 388 389 if (!check_huge_file(addr, 0, pmd_pagesize)) { 390 ksft_print_msg("Still FilePmdMapped not split\n"); 391 err = EXIT_FAILURE; 392 goto out; 393 } 394 395 out: 396 munmap(addr, fd_size); 397 close(fd); 398 unlink(testfile); 399 if (err) 400 ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); 401 ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); 402 } 403 404 int main(int argc, char **argv) 405 { 406 int i; 407 size_t fd_size; 408 char *optional_xfs_path = NULL; 409 char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; 410 const char *fs_loc; 411 bool created_tmp; 412 413 ksft_print_header(); 414 415 if (geteuid() != 0) { 416 ksft_print_msg("Please run the benchmark as root\n"); 417 ksft_finished(); 418 } 419 420 if (argc > 1) 421 optional_xfs_path = argv[1]; 422 423 ksft_set_plan(3+9); 424 425 pagesize = getpagesize(); 426 pageshift = ffs(pagesize) - 1; 427 pmd_pagesize = read_pmd_pagesize(); 428 if (!pmd_pagesize) 429 ksft_exit_fail_msg("Reading PMD pagesize failed\n"); 430 431 fd_size = 2 * pmd_pagesize; 432 433 split_pmd_thp(); 434 split_pte_mapped_thp(); 435 split_file_backed_thp(); 436 437 created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, 438 &fs_loc); 439 for (i = 8; i >= 0; i--) 440 split_thp_in_pagecache_to_order(fd_size, i, fs_loc); 441 cleanup_thp_fs(fs_loc, created_tmp); 442 443 ksft_finished(); 444 445 return 0; 446 } 447