1*224f1292SZi Yan // SPDX-License-Identifier: GPL-2.0 2*224f1292SZi Yan /* 3*224f1292SZi Yan * The test creates shmem PMD huge pages, fills all pages with known patterns, 4*224f1292SZi Yan * then continuously verifies non-punched pages with 16 threads. Meanwhile, the 5*224f1292SZi Yan * main thread punches holes via MADV_REMOVE on the shmem. 6*224f1292SZi Yan * 7*224f1292SZi Yan * It tests the race condition between folio_split() and filemap_get_entry(), 8*224f1292SZi Yan * where the hole punches on shmem lead to folio_split() and reading the shmem 9*224f1292SZi Yan * lead to filemap_get_entry(). 10*224f1292SZi Yan */ 11*224f1292SZi Yan 12*224f1292SZi Yan #define _GNU_SOURCE 13*224f1292SZi Yan #include <errno.h> 14*224f1292SZi Yan #include <inttypes.h> 15*224f1292SZi Yan #include <linux/mman.h> 16*224f1292SZi Yan #include <pthread.h> 17*224f1292SZi Yan #include <stdatomic.h> 18*224f1292SZi Yan #include <stdbool.h> 19*224f1292SZi Yan #include <stdint.h> 20*224f1292SZi Yan #include <stdio.h> 21*224f1292SZi Yan #include <stdlib.h> 22*224f1292SZi Yan #include <string.h> 23*224f1292SZi Yan #include <sys/mman.h> 24*224f1292SZi Yan #include <signal.h> 25*224f1292SZi Yan #include <unistd.h> 26*224f1292SZi Yan #include "vm_util.h" 27*224f1292SZi Yan #include "kselftest.h" 28*224f1292SZi Yan #include "thp_settings.h" 29*224f1292SZi Yan 30*224f1292SZi Yan uint64_t page_size; 31*224f1292SZi Yan uint64_t pmd_pagesize; 32*224f1292SZi Yan #define NR_PMD_PAGE 5 33*224f1292SZi Yan #define FILE_SIZE (pmd_pagesize * NR_PMD_PAGE) 34*224f1292SZi Yan #define TOTAL_PAGES (FILE_SIZE / page_size) 35*224f1292SZi Yan 36*224f1292SZi Yan /* Every N-th to N+M-th pages are punched; not aligned with huge page boundaries. */ 37*224f1292SZi Yan #define PUNCH_INTERVAL 50 /* N */ 38*224f1292SZi Yan #define PUNCH_SIZE_FACTOR 3 /* M */ 39*224f1292SZi Yan 40*224f1292SZi Yan #define NUM_READER_THREADS 16 41*224f1292SZi Yan #define FILL_BYTE 0xAF 42*224f1292SZi Yan #define NUM_ITERATIONS 100 43*224f1292SZi Yan 44*224f1292SZi Yan /* Shared control block: control reading threads and record stats */ 45*224f1292SZi Yan struct shared_ctl { 46*224f1292SZi Yan atomic_uint_fast32_t stop; 47*224f1292SZi Yan atomic_uint_fast64_t reader_failures; 48*224f1292SZi Yan atomic_uint_fast64_t reader_verified; 49*224f1292SZi Yan pthread_barrier_t barrier; 50*224f1292SZi Yan }; 51*224f1292SZi Yan 52*224f1292SZi Yan static void fill_page(unsigned char *base, size_t page_idx) 53*224f1292SZi Yan { 54*224f1292SZi Yan unsigned char *page_ptr = base + page_idx * page_size; 55*224f1292SZi Yan uint64_t idx = (uint64_t)page_idx; 56*224f1292SZi Yan 57*224f1292SZi Yan memset(page_ptr, FILL_BYTE, page_size); 58*224f1292SZi Yan memcpy(page_ptr, &idx, sizeof(idx)); 59*224f1292SZi Yan } 60*224f1292SZi Yan 61*224f1292SZi Yan /* Returns true if valid, false if corrupted. */ 62*224f1292SZi Yan static bool check_page(unsigned char *base, uint64_t page_idx) 63*224f1292SZi Yan { 64*224f1292SZi Yan unsigned char *page_ptr = base + page_idx * page_size; 65*224f1292SZi Yan uint64_t expected_idx = (uint64_t)page_idx; 66*224f1292SZi Yan uint64_t got_idx; 67*224f1292SZi Yan 68*224f1292SZi Yan memcpy(&got_idx, page_ptr, 8); 69*224f1292SZi Yan 70*224f1292SZi Yan if (got_idx != expected_idx) { 71*224f1292SZi Yan uint64_t off; 72*224f1292SZi Yan int all_zero = 1; 73*224f1292SZi Yan 74*224f1292SZi Yan for (off = 0; off < page_size; off++) { 75*224f1292SZi Yan if (page_ptr[off] != 0) { 76*224f1292SZi Yan all_zero = 0; 77*224f1292SZi Yan break; 78*224f1292SZi Yan } 79*224f1292SZi Yan } 80*224f1292SZi Yan if (all_zero) { 81*224f1292SZi Yan ksft_print_msg("CORRUPTED: page %" PRIu64 82*224f1292SZi Yan " (huge page %" PRIu64 83*224f1292SZi Yan ") is ALL ZEROS\n", 84*224f1292SZi Yan page_idx, 85*224f1292SZi Yan (page_idx * page_size) / pmd_pagesize); 86*224f1292SZi Yan } else { 87*224f1292SZi Yan ksft_print_msg("CORRUPTED: page %" PRIu64 88*224f1292SZi Yan " (huge page %" PRIu64 89*224f1292SZi Yan "): expected idx %" PRIu64 90*224f1292SZi Yan ", got %" PRIu64 "\n", 91*224f1292SZi Yan page_idx, 92*224f1292SZi Yan (page_idx * page_size) / pmd_pagesize, 93*224f1292SZi Yan page_idx, got_idx); 94*224f1292SZi Yan } 95*224f1292SZi Yan return false; 96*224f1292SZi Yan } 97*224f1292SZi Yan return true; 98*224f1292SZi Yan } 99*224f1292SZi Yan 100*224f1292SZi Yan struct reader_arg { 101*224f1292SZi Yan unsigned char *base; 102*224f1292SZi Yan struct shared_ctl *ctl; 103*224f1292SZi Yan int tid; 104*224f1292SZi Yan atomic_uint_fast64_t *failures; 105*224f1292SZi Yan atomic_uint_fast64_t *verified; 106*224f1292SZi Yan }; 107*224f1292SZi Yan 108*224f1292SZi Yan static void *reader_thread(void *arg) 109*224f1292SZi Yan { 110*224f1292SZi Yan struct reader_arg *ra = (struct reader_arg *)arg; 111*224f1292SZi Yan unsigned char *base = ra->base; 112*224f1292SZi Yan struct shared_ctl *ctl = ra->ctl; 113*224f1292SZi Yan int tid = ra->tid; 114*224f1292SZi Yan atomic_uint_fast64_t *failures = ra->failures; 115*224f1292SZi Yan atomic_uint_fast64_t *verified = ra->verified; 116*224f1292SZi Yan uint64_t page_idx; 117*224f1292SZi Yan 118*224f1292SZi Yan pthread_barrier_wait(&ctl->barrier); 119*224f1292SZi Yan 120*224f1292SZi Yan while (atomic_load_explicit(&ctl->stop, memory_order_acquire) == 0) { 121*224f1292SZi Yan for (page_idx = (size_t)tid; page_idx < TOTAL_PAGES; 122*224f1292SZi Yan page_idx += NUM_READER_THREADS) { 123*224f1292SZi Yan /* 124*224f1292SZi Yan * page_idx % PUNCH_INTERVAL is in [0, PUNCH_INTERVAL), 125*224f1292SZi Yan * skip [0, PUNCH_SIZE_FACTOR) 126*224f1292SZi Yan */ 127*224f1292SZi Yan if (page_idx % PUNCH_INTERVAL < PUNCH_SIZE_FACTOR) 128*224f1292SZi Yan continue; 129*224f1292SZi Yan if (check_page(base, page_idx)) 130*224f1292SZi Yan atomic_fetch_add_explicit(verified, 1, 131*224f1292SZi Yan memory_order_relaxed); 132*224f1292SZi Yan else 133*224f1292SZi Yan atomic_fetch_add_explicit(failures, 1, 134*224f1292SZi Yan memory_order_relaxed); 135*224f1292SZi Yan } 136*224f1292SZi Yan if (atomic_load_explicit(failures, memory_order_relaxed) > 0) 137*224f1292SZi Yan break; 138*224f1292SZi Yan } 139*224f1292SZi Yan 140*224f1292SZi Yan return NULL; 141*224f1292SZi Yan } 142*224f1292SZi Yan 143*224f1292SZi Yan static void create_readers(pthread_t *threads, struct reader_arg *args, 144*224f1292SZi Yan unsigned char *base, struct shared_ctl *ctl) 145*224f1292SZi Yan { 146*224f1292SZi Yan int i; 147*224f1292SZi Yan 148*224f1292SZi Yan for (i = 0; i < NUM_READER_THREADS; i++) { 149*224f1292SZi Yan args[i].base = base; 150*224f1292SZi Yan args[i].ctl = ctl; 151*224f1292SZi Yan args[i].tid = i; 152*224f1292SZi Yan args[i].failures = &ctl->reader_failures; 153*224f1292SZi Yan args[i].verified = &ctl->reader_verified; 154*224f1292SZi Yan if (pthread_create(&threads[i], NULL, reader_thread, 155*224f1292SZi Yan &args[i]) != 0) 156*224f1292SZi Yan ksft_exit_fail_msg("pthread_create failed\n"); 157*224f1292SZi Yan } 158*224f1292SZi Yan } 159*224f1292SZi Yan 160*224f1292SZi Yan /* Run a single iteration. Returns total number of corrupted pages. */ 161*224f1292SZi Yan static uint64_t run_iteration(void) 162*224f1292SZi Yan { 163*224f1292SZi Yan uint64_t reader_failures, reader_verified; 164*224f1292SZi Yan struct reader_arg args[NUM_READER_THREADS]; 165*224f1292SZi Yan pthread_t threads[NUM_READER_THREADS]; 166*224f1292SZi Yan unsigned char *mmap_base; 167*224f1292SZi Yan struct shared_ctl ctl; 168*224f1292SZi Yan uint64_t i; 169*224f1292SZi Yan 170*224f1292SZi Yan memset(&ctl, 0, sizeof(struct shared_ctl)); 171*224f1292SZi Yan 172*224f1292SZi Yan mmap_base = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, 173*224f1292SZi Yan MAP_SHARED | MAP_ANONYMOUS, -1, 0); 174*224f1292SZi Yan 175*224f1292SZi Yan if (mmap_base == MAP_FAILED) 176*224f1292SZi Yan ksft_exit_fail_msg("mmap failed: %d\n", errno); 177*224f1292SZi Yan 178*224f1292SZi Yan if (madvise(mmap_base, FILE_SIZE, MADV_HUGEPAGE) != 0) 179*224f1292SZi Yan ksft_exit_fail_msg("madvise(MADV_HUGEPAGE) failed: %d\n", 180*224f1292SZi Yan errno); 181*224f1292SZi Yan 182*224f1292SZi Yan for (i = 0; i < TOTAL_PAGES; i++) 183*224f1292SZi Yan fill_page(mmap_base, i); 184*224f1292SZi Yan 185*224f1292SZi Yan if (!check_huge_shmem(mmap_base, NR_PMD_PAGE, pmd_pagesize)) 186*224f1292SZi Yan ksft_exit_fail_msg("No shmem THP is allocated\n"); 187*224f1292SZi Yan 188*224f1292SZi Yan if (pthread_barrier_init(&ctl.barrier, NULL, NUM_READER_THREADS + 1) != 0) 189*224f1292SZi Yan ksft_exit_fail_msg("pthread_barrier_init failed\n"); 190*224f1292SZi Yan 191*224f1292SZi Yan create_readers(threads, args, mmap_base, &ctl); 192*224f1292SZi Yan 193*224f1292SZi Yan /* Wait for all reader threads to be ready before punching holes. */ 194*224f1292SZi Yan pthread_barrier_wait(&ctl.barrier); 195*224f1292SZi Yan 196*224f1292SZi Yan for (i = 0; i < TOTAL_PAGES; i++) { 197*224f1292SZi Yan if (i % PUNCH_INTERVAL != 0) 198*224f1292SZi Yan continue; 199*224f1292SZi Yan if (madvise(mmap_base + i * page_size, 200*224f1292SZi Yan PUNCH_SIZE_FACTOR * page_size, MADV_REMOVE) != 0) { 201*224f1292SZi Yan ksft_exit_fail_msg( 202*224f1292SZi Yan "madvise(MADV_REMOVE) failed on page %" PRIu64 ": %d\n", 203*224f1292SZi Yan i, errno); 204*224f1292SZi Yan } 205*224f1292SZi Yan 206*224f1292SZi Yan i += PUNCH_SIZE_FACTOR - 1; 207*224f1292SZi Yan } 208*224f1292SZi Yan 209*224f1292SZi Yan atomic_store_explicit(&ctl.stop, 1, memory_order_release); 210*224f1292SZi Yan 211*224f1292SZi Yan for (i = 0; i < NUM_READER_THREADS; i++) 212*224f1292SZi Yan pthread_join(threads[i], NULL); 213*224f1292SZi Yan 214*224f1292SZi Yan pthread_barrier_destroy(&ctl.barrier); 215*224f1292SZi Yan 216*224f1292SZi Yan reader_failures = atomic_load_explicit(&ctl.reader_failures, 217*224f1292SZi Yan memory_order_acquire); 218*224f1292SZi Yan reader_verified = atomic_load_explicit(&ctl.reader_verified, 219*224f1292SZi Yan memory_order_acquire); 220*224f1292SZi Yan if (reader_failures) 221*224f1292SZi Yan ksft_print_msg("Child: %" PRIu64 " pages verified, %" PRIu64 " failures\n", 222*224f1292SZi Yan reader_verified, reader_failures); 223*224f1292SZi Yan 224*224f1292SZi Yan munmap(mmap_base, FILE_SIZE); 225*224f1292SZi Yan 226*224f1292SZi Yan return reader_failures; 227*224f1292SZi Yan } 228*224f1292SZi Yan 229*224f1292SZi Yan static void thp_cleanup_handler(int signum) 230*224f1292SZi Yan { 231*224f1292SZi Yan thp_restore_settings(); 232*224f1292SZi Yan /* 233*224f1292SZi Yan * Restore default handler and re-raise the signal to exit. 234*224f1292SZi Yan * This is to ensure the test process exits with the correct 235*224f1292SZi Yan * status code corresponding to the signal. 236*224f1292SZi Yan */ 237*224f1292SZi Yan signal(signum, SIG_DFL); 238*224f1292SZi Yan raise(signum); 239*224f1292SZi Yan } 240*224f1292SZi Yan 241*224f1292SZi Yan static void thp_settings_cleanup(void) 242*224f1292SZi Yan { 243*224f1292SZi Yan thp_restore_settings(); 244*224f1292SZi Yan } 245*224f1292SZi Yan 246*224f1292SZi Yan int main(void) 247*224f1292SZi Yan { 248*224f1292SZi Yan struct thp_settings current_settings; 249*224f1292SZi Yan uint64_t corrupted_pages; 250*224f1292SZi Yan uint64_t iter; 251*224f1292SZi Yan 252*224f1292SZi Yan ksft_print_header(); 253*224f1292SZi Yan 254*224f1292SZi Yan page_size = getpagesize(); 255*224f1292SZi Yan pmd_pagesize = read_pmd_pagesize(); 256*224f1292SZi Yan 257*224f1292SZi Yan if (!thp_available() || !pmd_pagesize) 258*224f1292SZi Yan ksft_exit_skip("Transparent Hugepages not available\n"); 259*224f1292SZi Yan 260*224f1292SZi Yan if (geteuid() != 0) 261*224f1292SZi Yan ksft_exit_skip("Please run the test as root\n"); 262*224f1292SZi Yan 263*224f1292SZi Yan thp_save_settings(); 264*224f1292SZi Yan /* make sure thp settings are restored */ 265*224f1292SZi Yan if (atexit(thp_settings_cleanup) != 0) 266*224f1292SZi Yan ksft_exit_fail_msg("atexit failed\n"); 267*224f1292SZi Yan 268*224f1292SZi Yan signal(SIGINT, thp_cleanup_handler); 269*224f1292SZi Yan signal(SIGTERM, thp_cleanup_handler); 270*224f1292SZi Yan 271*224f1292SZi Yan thp_read_settings(¤t_settings); 272*224f1292SZi Yan current_settings.shmem_enabled = SHMEM_ADVISE; 273*224f1292SZi Yan thp_write_settings(¤t_settings); 274*224f1292SZi Yan 275*224f1292SZi Yan ksft_set_plan(1); 276*224f1292SZi Yan 277*224f1292SZi Yan ksft_print_msg("folio split race test\n"); 278*224f1292SZi Yan 279*224f1292SZi Yan for (iter = 0; iter < NUM_ITERATIONS; iter++) { 280*224f1292SZi Yan corrupted_pages = run_iteration(); 281*224f1292SZi Yan if (corrupted_pages > 0) 282*224f1292SZi Yan break; 283*224f1292SZi Yan } 284*224f1292SZi Yan 285*224f1292SZi Yan if (iter < NUM_ITERATIONS) 286*224f1292SZi Yan ksft_test_result_fail("FAILED on iteration %" PRIu64 287*224f1292SZi Yan ": %" PRIu64 288*224f1292SZi Yan " pages corrupted by MADV_REMOVE!\n", 289*224f1292SZi Yan iter, corrupted_pages); 290*224f1292SZi Yan else 291*224f1292SZi Yan ksft_test_result_pass("All %d iterations passed\n", 292*224f1292SZi Yan NUM_ITERATIONS); 293*224f1292SZi Yan 294*224f1292SZi Yan ksft_exit(iter == NUM_ITERATIONS); 295*224f1292SZi Yan 296*224f1292SZi Yan return 0; 297*224f1292SZi Yan } 298