1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0-only 2baa489faSSeongJae Park /* 3baa489faSSeongJae Park * COW (Copy On Write) tests. 4baa489faSSeongJae Park * 5baa489faSSeongJae Park * Copyright 2022, Red Hat, Inc. 6baa489faSSeongJae Park * 7baa489faSSeongJae Park * Author(s): David Hildenbrand <david@redhat.com> 8baa489faSSeongJae Park */ 9baa489faSSeongJae Park #define _GNU_SOURCE 10baa489faSSeongJae Park #include <stdlib.h> 11baa489faSSeongJae Park #include <string.h> 12baa489faSSeongJae Park #include <stdbool.h> 13baa489faSSeongJae Park #include <stdint.h> 14baa489faSSeongJae Park #include <unistd.h> 15baa489faSSeongJae Park #include <errno.h> 16baa489faSSeongJae Park #include <fcntl.h> 17baa489faSSeongJae Park #include <assert.h> 180183d777SMuhammad Usama Anjum #include <linux/mman.h> 19baa489faSSeongJae Park #include <sys/mman.h> 20baa489faSSeongJae Park #include <sys/ioctl.h> 21baa489faSSeongJae Park #include <sys/wait.h> 22baa489faSSeongJae Park #include <linux/memfd.h> 23baa489faSSeongJae Park 24baa489faSSeongJae Park #include "local_config.h" 25baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 26baa489faSSeongJae Park #include <liburing.h> 27baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28baa489faSSeongJae Park 29baa489faSSeongJae Park #include "../../../../mm/gup_test.h" 30baa489faSSeongJae Park #include "../kselftest.h" 31baa489faSSeongJae Park #include "vm_util.h" 32c0f79103SRyan Roberts #include "thp_settings.h" 33baa489faSSeongJae Park 34baa489faSSeongJae Park static size_t pagesize; 35baa489faSSeongJae Park static int pagemap_fd; 3612dc16b3SRyan Roberts static size_t pmdsize; 37c0f79103SRyan Roberts static int nr_thpsizes; 38c0f79103SRyan Roberts static size_t thpsizes[20]; 39baa489faSSeongJae Park static int nr_hugetlbsizes; 40baa489faSSeongJae Park static size_t hugetlbsizes[10]; 41baa489faSSeongJae Park static int gup_fd; 42baa489faSSeongJae Park static bool has_huge_zeropage; 43baa489faSSeongJae Park 44c0f79103SRyan Roberts static int sz2ord(size_t size) 45c0f79103SRyan Roberts { 46c0f79103SRyan Roberts return __builtin_ctzll(size / pagesize); 47c0f79103SRyan Roberts } 48c0f79103SRyan Roberts 49c0f79103SRyan Roberts static int detect_thp_sizes(size_t sizes[], int max) 50c0f79103SRyan Roberts { 51c0f79103SRyan Roberts int count = 0; 52c0f79103SRyan Roberts unsigned long orders; 53c0f79103SRyan Roberts size_t kb; 54c0f79103SRyan Roberts int i; 55c0f79103SRyan Roberts 56c0f79103SRyan Roberts /* thp not supported at all. */ 57c0f79103SRyan Roberts if (!pmdsize) 58c0f79103SRyan Roberts return 0; 59c0f79103SRyan Roberts 60c0f79103SRyan Roberts orders = 1UL << sz2ord(pmdsize); 61c0f79103SRyan Roberts orders |= thp_supported_orders(); 62c0f79103SRyan Roberts 63c0f79103SRyan Roberts for (i = 0; orders && count < max; i++) { 64c0f79103SRyan Roberts if (!(orders & (1UL << i))) 65c0f79103SRyan Roberts continue; 66c0f79103SRyan Roberts orders &= ~(1UL << i); 67c0f79103SRyan Roberts kb = (pagesize >> 10) << i; 68c0f79103SRyan Roberts sizes[count++] = kb * 1024; 69c0f79103SRyan Roberts ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 70c0f79103SRyan Roberts } 71c0f79103SRyan Roberts 72c0f79103SRyan Roberts return count; 73c0f79103SRyan Roberts } 74c0f79103SRyan Roberts 75baa489faSSeongJae Park static void detect_huge_zeropage(void) 76baa489faSSeongJae Park { 77baa489faSSeongJae Park int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 78baa489faSSeongJae Park O_RDONLY); 79baa489faSSeongJae Park size_t enabled = 0; 80baa489faSSeongJae Park char buf[15]; 81baa489faSSeongJae Park int ret; 82baa489faSSeongJae Park 83baa489faSSeongJae Park if (fd < 0) 84baa489faSSeongJae Park return; 85baa489faSSeongJae Park 86baa489faSSeongJae Park ret = pread(fd, buf, sizeof(buf), 0); 87baa489faSSeongJae Park if (ret > 0 && ret < sizeof(buf)) { 88baa489faSSeongJae Park buf[ret] = 0; 89baa489faSSeongJae Park 90baa489faSSeongJae Park enabled = strtoul(buf, NULL, 10); 91baa489faSSeongJae Park if (enabled == 1) { 92baa489faSSeongJae Park has_huge_zeropage = true; 93baa489faSSeongJae Park ksft_print_msg("[INFO] huge zeropage is enabled\n"); 94baa489faSSeongJae Park } 95baa489faSSeongJae Park } 96baa489faSSeongJae Park 97baa489faSSeongJae Park close(fd); 98baa489faSSeongJae Park } 99baa489faSSeongJae Park 100baa489faSSeongJae Park static bool range_is_swapped(void *addr, size_t size) 101baa489faSSeongJae Park { 102baa489faSSeongJae Park for (; size; addr += pagesize, size -= pagesize) 103baa489faSSeongJae Park if (!pagemap_is_swapped(pagemap_fd, addr)) 104baa489faSSeongJae Park return false; 105baa489faSSeongJae Park return true; 106baa489faSSeongJae Park } 107baa489faSSeongJae Park 108baa489faSSeongJae Park struct comm_pipes { 109baa489faSSeongJae Park int child_ready[2]; 110baa489faSSeongJae Park int parent_ready[2]; 111baa489faSSeongJae Park }; 112baa489faSSeongJae Park 113baa489faSSeongJae Park static int setup_comm_pipes(struct comm_pipes *comm_pipes) 114baa489faSSeongJae Park { 115baa489faSSeongJae Park if (pipe(comm_pipes->child_ready) < 0) 116baa489faSSeongJae Park return -errno; 117baa489faSSeongJae Park if (pipe(comm_pipes->parent_ready) < 0) { 118baa489faSSeongJae Park close(comm_pipes->child_ready[0]); 119baa489faSSeongJae Park close(comm_pipes->child_ready[1]); 120baa489faSSeongJae Park return -errno; 121baa489faSSeongJae Park } 122baa489faSSeongJae Park 123baa489faSSeongJae Park return 0; 124baa489faSSeongJae Park } 125baa489faSSeongJae Park 126baa489faSSeongJae Park static void close_comm_pipes(struct comm_pipes *comm_pipes) 127baa489faSSeongJae Park { 128baa489faSSeongJae Park close(comm_pipes->child_ready[0]); 129baa489faSSeongJae Park close(comm_pipes->child_ready[1]); 130baa489faSSeongJae Park close(comm_pipes->parent_ready[0]); 131baa489faSSeongJae Park close(comm_pipes->parent_ready[1]); 132baa489faSSeongJae Park } 133baa489faSSeongJae Park 134baa489faSSeongJae Park static int child_memcmp_fn(char *mem, size_t size, 135baa489faSSeongJae Park struct comm_pipes *comm_pipes) 136baa489faSSeongJae Park { 137baa489faSSeongJae Park char *old = malloc(size); 138baa489faSSeongJae Park char buf; 139baa489faSSeongJae Park 140baa489faSSeongJae Park /* Backup the original content. */ 141baa489faSSeongJae Park memcpy(old, mem, size); 142baa489faSSeongJae Park 143baa489faSSeongJae Park /* Wait until the parent modified the page. */ 144baa489faSSeongJae Park write(comm_pipes->child_ready[1], "0", 1); 145baa489faSSeongJae Park while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 146baa489faSSeongJae Park ; 147baa489faSSeongJae Park 148baa489faSSeongJae Park /* See if we still read the old values. */ 149baa489faSSeongJae Park return memcmp(old, mem, size); 150baa489faSSeongJae Park } 151baa489faSSeongJae Park 152baa489faSSeongJae Park static int child_vmsplice_memcmp_fn(char *mem, size_t size, 153baa489faSSeongJae Park struct comm_pipes *comm_pipes) 154baa489faSSeongJae Park { 155baa489faSSeongJae Park struct iovec iov = { 156baa489faSSeongJae Park .iov_base = mem, 157baa489faSSeongJae Park .iov_len = size, 158baa489faSSeongJae Park }; 159baa489faSSeongJae Park ssize_t cur, total, transferred; 160baa489faSSeongJae Park char *old, *new; 161baa489faSSeongJae Park int fds[2]; 162baa489faSSeongJae Park char buf; 163baa489faSSeongJae Park 164baa489faSSeongJae Park old = malloc(size); 165baa489faSSeongJae Park new = malloc(size); 166baa489faSSeongJae Park 167baa489faSSeongJae Park /* Backup the original content. */ 168baa489faSSeongJae Park memcpy(old, mem, size); 169baa489faSSeongJae Park 170baa489faSSeongJae Park if (pipe(fds) < 0) 171baa489faSSeongJae Park return -errno; 172baa489faSSeongJae Park 173baa489faSSeongJae Park /* Trigger a read-only pin. */ 174baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 175baa489faSSeongJae Park if (transferred < 0) 176baa489faSSeongJae Park return -errno; 177baa489faSSeongJae Park if (transferred == 0) 178baa489faSSeongJae Park return -EINVAL; 179baa489faSSeongJae Park 180baa489faSSeongJae Park /* Unmap it from our page tables. */ 181baa489faSSeongJae Park if (munmap(mem, size) < 0) 182baa489faSSeongJae Park return -errno; 183baa489faSSeongJae Park 184baa489faSSeongJae Park /* Wait until the parent modified it. */ 185baa489faSSeongJae Park write(comm_pipes->child_ready[1], "0", 1); 186baa489faSSeongJae Park while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 187baa489faSSeongJae Park ; 188baa489faSSeongJae Park 189baa489faSSeongJae Park /* See if we still read the old values via the pipe. */ 190baa489faSSeongJae Park for (total = 0; total < transferred; total += cur) { 191baa489faSSeongJae Park cur = read(fds[0], new + total, transferred - total); 192baa489faSSeongJae Park if (cur < 0) 193baa489faSSeongJae Park return -errno; 194baa489faSSeongJae Park } 195baa489faSSeongJae Park 196baa489faSSeongJae Park return memcmp(old, new, transferred); 197baa489faSSeongJae Park } 198baa489faSSeongJae Park 199baa489faSSeongJae Park typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 200baa489faSSeongJae Park 201baa489faSSeongJae Park static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 202*4bf6a4ebSDavid Hildenbrand child_fn fn, bool xfail) 203baa489faSSeongJae Park { 204baa489faSSeongJae Park struct comm_pipes comm_pipes; 205baa489faSSeongJae Park char buf; 206baa489faSSeongJae Park int ret; 207baa489faSSeongJae Park 208baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 209baa489faSSeongJae Park if (ret) { 210baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 211baa489faSSeongJae Park return; 212baa489faSSeongJae Park } 213baa489faSSeongJae Park 214baa489faSSeongJae Park ret = fork(); 215baa489faSSeongJae Park if (ret < 0) { 216baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 217baa489faSSeongJae Park goto close_comm_pipes; 218baa489faSSeongJae Park } else if (!ret) { 219baa489faSSeongJae Park exit(fn(mem, size, &comm_pipes)); 220baa489faSSeongJae Park } 221baa489faSSeongJae Park 222baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 223baa489faSSeongJae Park ; 224baa489faSSeongJae Park 225baa489faSSeongJae Park if (do_mprotect) { 226baa489faSSeongJae Park /* 227baa489faSSeongJae Park * mprotect() optimizations might try avoiding 228baa489faSSeongJae Park * write-faults by directly mapping pages writable. 229baa489faSSeongJae Park */ 230baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 231baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 232baa489faSSeongJae Park if (ret) { 233baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 234baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 235baa489faSSeongJae Park wait(&ret); 236baa489faSSeongJae Park goto close_comm_pipes; 237baa489faSSeongJae Park } 238baa489faSSeongJae Park } 239baa489faSSeongJae Park 240baa489faSSeongJae Park /* Modify the page. */ 241baa489faSSeongJae Park memset(mem, 0xff, size); 242baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 243baa489faSSeongJae Park 244baa489faSSeongJae Park wait(&ret); 245baa489faSSeongJae Park if (WIFEXITED(ret)) 246baa489faSSeongJae Park ret = WEXITSTATUS(ret); 247baa489faSSeongJae Park else 248baa489faSSeongJae Park ret = -EINVAL; 249baa489faSSeongJae Park 250*4bf6a4ebSDavid Hildenbrand if (!ret) { 251*4bf6a4ebSDavid Hildenbrand ksft_test_result_pass("No leak from parent into child\n"); 252*4bf6a4ebSDavid Hildenbrand } else if (xfail) { 253*4bf6a4ebSDavid Hildenbrand /* 254*4bf6a4ebSDavid Hildenbrand * With hugetlb, some vmsplice() tests are currently expected to 255*4bf6a4ebSDavid Hildenbrand * fail because (a) harder to fix and (b) nobody really cares. 256*4bf6a4ebSDavid Hildenbrand * Flag them as expected failure for now. 257*4bf6a4ebSDavid Hildenbrand */ 258*4bf6a4ebSDavid Hildenbrand ksft_test_result_xfail("Leak from parent into child\n"); 259*4bf6a4ebSDavid Hildenbrand } else { 260*4bf6a4ebSDavid Hildenbrand ksft_test_result_fail("Leak from parent into child\n"); 261*4bf6a4ebSDavid Hildenbrand } 262baa489faSSeongJae Park close_comm_pipes: 263baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 264baa489faSSeongJae Park } 265baa489faSSeongJae Park 266*4bf6a4ebSDavid Hildenbrand static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 267baa489faSSeongJae Park { 268*4bf6a4ebSDavid Hildenbrand do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 269baa489faSSeongJae Park } 270baa489faSSeongJae Park 271*4bf6a4ebSDavid Hildenbrand static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 272baa489faSSeongJae Park { 273*4bf6a4ebSDavid Hildenbrand do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 274baa489faSSeongJae Park } 275baa489faSSeongJae Park 276*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 277baa489faSSeongJae Park { 278*4bf6a4ebSDavid Hildenbrand do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 279*4bf6a4ebSDavid Hildenbrand is_hugetlb); 280baa489faSSeongJae Park } 281baa489faSSeongJae Park 282*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 283*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 284baa489faSSeongJae Park { 285*4bf6a4ebSDavid Hildenbrand do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 286*4bf6a4ebSDavid Hildenbrand is_hugetlb); 287baa489faSSeongJae Park } 288baa489faSSeongJae Park 289baa489faSSeongJae Park static void do_test_vmsplice_in_parent(char *mem, size_t size, 290*4bf6a4ebSDavid Hildenbrand bool before_fork, bool xfail) 291baa489faSSeongJae Park { 292baa489faSSeongJae Park struct iovec iov = { 293baa489faSSeongJae Park .iov_base = mem, 294baa489faSSeongJae Park .iov_len = size, 295baa489faSSeongJae Park }; 296baa489faSSeongJae Park ssize_t cur, total, transferred; 297baa489faSSeongJae Park struct comm_pipes comm_pipes; 298baa489faSSeongJae Park char *old, *new; 299baa489faSSeongJae Park int ret, fds[2]; 300baa489faSSeongJae Park char buf; 301baa489faSSeongJae Park 302baa489faSSeongJae Park old = malloc(size); 303baa489faSSeongJae Park new = malloc(size); 304baa489faSSeongJae Park 305baa489faSSeongJae Park memcpy(old, mem, size); 306baa489faSSeongJae Park 307baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 308baa489faSSeongJae Park if (ret) { 309baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 310baa489faSSeongJae Park goto free; 311baa489faSSeongJae Park } 312baa489faSSeongJae Park 313baa489faSSeongJae Park if (pipe(fds) < 0) { 314baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 315baa489faSSeongJae Park goto close_comm_pipes; 316baa489faSSeongJae Park } 317baa489faSSeongJae Park 318baa489faSSeongJae Park if (before_fork) { 319baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 320baa489faSSeongJae Park if (transferred <= 0) { 321baa489faSSeongJae Park ksft_test_result_fail("vmsplice() failed\n"); 322baa489faSSeongJae Park goto close_pipe; 323baa489faSSeongJae Park } 324baa489faSSeongJae Park } 325baa489faSSeongJae Park 326baa489faSSeongJae Park ret = fork(); 327baa489faSSeongJae Park if (ret < 0) { 328baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 329baa489faSSeongJae Park goto close_pipe; 330baa489faSSeongJae Park } else if (!ret) { 331baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 332baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 333baa489faSSeongJae Park ; 334baa489faSSeongJae Park /* Modify page content in the child. */ 335baa489faSSeongJae Park memset(mem, 0xff, size); 336baa489faSSeongJae Park exit(0); 337baa489faSSeongJae Park } 338baa489faSSeongJae Park 339baa489faSSeongJae Park if (!before_fork) { 340baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 341baa489faSSeongJae Park if (transferred <= 0) { 342baa489faSSeongJae Park ksft_test_result_fail("vmsplice() failed\n"); 343baa489faSSeongJae Park wait(&ret); 344baa489faSSeongJae Park goto close_pipe; 345baa489faSSeongJae Park } 346baa489faSSeongJae Park } 347baa489faSSeongJae Park 348baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 349baa489faSSeongJae Park ; 350baa489faSSeongJae Park if (munmap(mem, size) < 0) { 351baa489faSSeongJae Park ksft_test_result_fail("munmap() failed\n"); 352baa489faSSeongJae Park goto close_pipe; 353baa489faSSeongJae Park } 354baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 355baa489faSSeongJae Park 356baa489faSSeongJae Park /* Wait until the child is done writing. */ 357baa489faSSeongJae Park wait(&ret); 358baa489faSSeongJae Park if (!WIFEXITED(ret)) { 359baa489faSSeongJae Park ksft_test_result_fail("wait() failed\n"); 360baa489faSSeongJae Park goto close_pipe; 361baa489faSSeongJae Park } 362baa489faSSeongJae Park 363baa489faSSeongJae Park /* See if we still read the old values. */ 364baa489faSSeongJae Park for (total = 0; total < transferred; total += cur) { 365baa489faSSeongJae Park cur = read(fds[0], new + total, transferred - total); 366baa489faSSeongJae Park if (cur < 0) { 367baa489faSSeongJae Park ksft_test_result_fail("read() failed\n"); 368baa489faSSeongJae Park goto close_pipe; 369baa489faSSeongJae Park } 370baa489faSSeongJae Park } 371baa489faSSeongJae Park 372*4bf6a4ebSDavid Hildenbrand if (!memcmp(old, new, transferred)) { 373*4bf6a4ebSDavid Hildenbrand ksft_test_result_pass("No leak from child into parent\n"); 374*4bf6a4ebSDavid Hildenbrand } else if (xfail) { 375*4bf6a4ebSDavid Hildenbrand /* 376*4bf6a4ebSDavid Hildenbrand * With hugetlb, some vmsplice() tests are currently expected to 377*4bf6a4ebSDavid Hildenbrand * fail because (a) harder to fix and (b) nobody really cares. 378*4bf6a4ebSDavid Hildenbrand * Flag them as expected failure for now. 379*4bf6a4ebSDavid Hildenbrand */ 380*4bf6a4ebSDavid Hildenbrand ksft_test_result_xfail("Leak from child into parent\n"); 381*4bf6a4ebSDavid Hildenbrand } else { 382*4bf6a4ebSDavid Hildenbrand ksft_test_result_fail("Leak from child into parent\n"); 383*4bf6a4ebSDavid Hildenbrand } 384baa489faSSeongJae Park close_pipe: 385baa489faSSeongJae Park close(fds[0]); 386baa489faSSeongJae Park close(fds[1]); 387baa489faSSeongJae Park close_comm_pipes: 388baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 389baa489faSSeongJae Park free: 390baa489faSSeongJae Park free(old); 391baa489faSSeongJae Park free(new); 392baa489faSSeongJae Park } 393baa489faSSeongJae Park 394*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 395baa489faSSeongJae Park { 396*4bf6a4ebSDavid Hildenbrand do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 397baa489faSSeongJae Park } 398baa489faSSeongJae Park 399*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 400baa489faSSeongJae Park { 401*4bf6a4ebSDavid Hildenbrand do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 402baa489faSSeongJae Park } 403baa489faSSeongJae Park 404baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 405baa489faSSeongJae Park static void do_test_iouring(char *mem, size_t size, bool use_fork) 406baa489faSSeongJae Park { 407baa489faSSeongJae Park struct comm_pipes comm_pipes; 408baa489faSSeongJae Park struct io_uring_cqe *cqe; 409baa489faSSeongJae Park struct io_uring_sqe *sqe; 410baa489faSSeongJae Park struct io_uring ring; 411baa489faSSeongJae Park ssize_t cur, total; 412baa489faSSeongJae Park struct iovec iov; 413baa489faSSeongJae Park char *buf, *tmp; 414baa489faSSeongJae Park int ret, fd; 415baa489faSSeongJae Park FILE *file; 416baa489faSSeongJae Park 417baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 418baa489faSSeongJae Park if (ret) { 419baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 420baa489faSSeongJae Park return; 421baa489faSSeongJae Park } 422baa489faSSeongJae Park 423baa489faSSeongJae Park file = tmpfile(); 424baa489faSSeongJae Park if (!file) { 425baa489faSSeongJae Park ksft_test_result_fail("tmpfile() failed\n"); 426baa489faSSeongJae Park goto close_comm_pipes; 427baa489faSSeongJae Park } 428baa489faSSeongJae Park fd = fileno(file); 429baa489faSSeongJae Park assert(fd); 430baa489faSSeongJae Park 431baa489faSSeongJae Park tmp = malloc(size); 432baa489faSSeongJae Park if (!tmp) { 433baa489faSSeongJae Park ksft_test_result_fail("malloc() failed\n"); 434baa489faSSeongJae Park goto close_file; 435baa489faSSeongJae Park } 436baa489faSSeongJae Park 437baa489faSSeongJae Park /* Skip on errors, as we might just lack kernel support. */ 438baa489faSSeongJae Park ret = io_uring_queue_init(1, &ring, 0); 439baa489faSSeongJae Park if (ret < 0) { 440baa489faSSeongJae Park ksft_test_result_skip("io_uring_queue_init() failed\n"); 441baa489faSSeongJae Park goto free_tmp; 442baa489faSSeongJae Park } 443baa489faSSeongJae Park 444baa489faSSeongJae Park /* 445baa489faSSeongJae Park * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 446baa489faSSeongJae Park * | FOLL_LONGTERM the range. 447baa489faSSeongJae Park * 448baa489faSSeongJae Park * Skip on errors, as we might just lack kernel support or might not 449baa489faSSeongJae Park * have sufficient MEMLOCK permissions. 450baa489faSSeongJae Park */ 451baa489faSSeongJae Park iov.iov_base = mem; 452baa489faSSeongJae Park iov.iov_len = size; 453baa489faSSeongJae Park ret = io_uring_register_buffers(&ring, &iov, 1); 454baa489faSSeongJae Park if (ret) { 455baa489faSSeongJae Park ksft_test_result_skip("io_uring_register_buffers() failed\n"); 456baa489faSSeongJae Park goto queue_exit; 457baa489faSSeongJae Park } 458baa489faSSeongJae Park 459baa489faSSeongJae Park if (use_fork) { 460baa489faSSeongJae Park /* 461baa489faSSeongJae Park * fork() and keep the child alive until we're done. Note that 462baa489faSSeongJae Park * we expect the pinned page to not get shared with the child. 463baa489faSSeongJae Park */ 464baa489faSSeongJae Park ret = fork(); 465baa489faSSeongJae Park if (ret < 0) { 466baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 467baa489faSSeongJae Park goto unregister_buffers; 468baa489faSSeongJae Park } else if (!ret) { 469baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 470baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 471baa489faSSeongJae Park ; 472baa489faSSeongJae Park exit(0); 473baa489faSSeongJae Park } 474baa489faSSeongJae Park 475baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 476baa489faSSeongJae Park ; 477baa489faSSeongJae Park } else { 478baa489faSSeongJae Park /* 479baa489faSSeongJae Park * Map the page R/O into the page table. Enable softdirty 480baa489faSSeongJae Park * tracking to stop the page from getting mapped R/W immediately 481baa489faSSeongJae Park * again by mprotect() optimizations. Note that we don't have an 482baa489faSSeongJae Park * easy way to test if that worked (the pagemap does not export 483baa489faSSeongJae Park * if the page is mapped R/O vs. R/W). 484baa489faSSeongJae Park */ 485baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 486baa489faSSeongJae Park clear_softdirty(); 487baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 488baa489faSSeongJae Park if (ret) { 489baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 490baa489faSSeongJae Park goto unregister_buffers; 491baa489faSSeongJae Park } 492baa489faSSeongJae Park } 493baa489faSSeongJae Park 494baa489faSSeongJae Park /* 495baa489faSSeongJae Park * Modify the page and write page content as observed by the fixed 496baa489faSSeongJae Park * buffer pin to the file so we can verify it. 497baa489faSSeongJae Park */ 498baa489faSSeongJae Park memset(mem, 0xff, size); 499baa489faSSeongJae Park sqe = io_uring_get_sqe(&ring); 500baa489faSSeongJae Park if (!sqe) { 501baa489faSSeongJae Park ksft_test_result_fail("io_uring_get_sqe() failed\n"); 502baa489faSSeongJae Park goto quit_child; 503baa489faSSeongJae Park } 504baa489faSSeongJae Park io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 505baa489faSSeongJae Park 506baa489faSSeongJae Park ret = io_uring_submit(&ring); 507baa489faSSeongJae Park if (ret < 0) { 508baa489faSSeongJae Park ksft_test_result_fail("io_uring_submit() failed\n"); 509baa489faSSeongJae Park goto quit_child; 510baa489faSSeongJae Park } 511baa489faSSeongJae Park 512baa489faSSeongJae Park ret = io_uring_wait_cqe(&ring, &cqe); 513baa489faSSeongJae Park if (ret < 0) { 514baa489faSSeongJae Park ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 515baa489faSSeongJae Park goto quit_child; 516baa489faSSeongJae Park } 517baa489faSSeongJae Park 518baa489faSSeongJae Park if (cqe->res != size) { 519baa489faSSeongJae Park ksft_test_result_fail("write_fixed failed\n"); 520baa489faSSeongJae Park goto quit_child; 521baa489faSSeongJae Park } 522baa489faSSeongJae Park io_uring_cqe_seen(&ring, cqe); 523baa489faSSeongJae Park 524baa489faSSeongJae Park /* Read back the file content to the temporary buffer. */ 525baa489faSSeongJae Park total = 0; 526baa489faSSeongJae Park while (total < size) { 527baa489faSSeongJae Park cur = pread(fd, tmp + total, size - total, total); 528baa489faSSeongJae Park if (cur < 0) { 529baa489faSSeongJae Park ksft_test_result_fail("pread() failed\n"); 530baa489faSSeongJae Park goto quit_child; 531baa489faSSeongJae Park } 532baa489faSSeongJae Park total += cur; 533baa489faSSeongJae Park } 534baa489faSSeongJae Park 535baa489faSSeongJae Park /* Finally, check if we read what we expected. */ 536baa489faSSeongJae Park ksft_test_result(!memcmp(mem, tmp, size), 537baa489faSSeongJae Park "Longterm R/W pin is reliable\n"); 538baa489faSSeongJae Park 539baa489faSSeongJae Park quit_child: 540baa489faSSeongJae Park if (use_fork) { 541baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 542baa489faSSeongJae Park wait(&ret); 543baa489faSSeongJae Park } 544baa489faSSeongJae Park unregister_buffers: 545baa489faSSeongJae Park io_uring_unregister_buffers(&ring); 546baa489faSSeongJae Park queue_exit: 547baa489faSSeongJae Park io_uring_queue_exit(&ring); 548baa489faSSeongJae Park free_tmp: 549baa489faSSeongJae Park free(tmp); 550baa489faSSeongJae Park close_file: 551baa489faSSeongJae Park fclose(file); 552baa489faSSeongJae Park close_comm_pipes: 553baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 554baa489faSSeongJae Park } 555baa489faSSeongJae Park 556*4bf6a4ebSDavid Hildenbrand static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 557baa489faSSeongJae Park { 558baa489faSSeongJae Park do_test_iouring(mem, size, false); 559baa489faSSeongJae Park } 560baa489faSSeongJae Park 561*4bf6a4ebSDavid Hildenbrand static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 562baa489faSSeongJae Park { 563baa489faSSeongJae Park do_test_iouring(mem, size, true); 564baa489faSSeongJae Park } 565baa489faSSeongJae Park 566baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 567baa489faSSeongJae Park 568baa489faSSeongJae Park enum ro_pin_test { 569baa489faSSeongJae Park RO_PIN_TEST, 570baa489faSSeongJae Park RO_PIN_TEST_SHARED, 571baa489faSSeongJae Park RO_PIN_TEST_PREVIOUSLY_SHARED, 572baa489faSSeongJae Park RO_PIN_TEST_RO_EXCLUSIVE, 573baa489faSSeongJae Park }; 574baa489faSSeongJae Park 575baa489faSSeongJae Park static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 576baa489faSSeongJae Park bool fast) 577baa489faSSeongJae Park { 578baa489faSSeongJae Park struct pin_longterm_test args; 579baa489faSSeongJae Park struct comm_pipes comm_pipes; 580baa489faSSeongJae Park char *tmp, buf; 581baa489faSSeongJae Park __u64 tmp_val; 582baa489faSSeongJae Park int ret; 583baa489faSSeongJae Park 584baa489faSSeongJae Park if (gup_fd < 0) { 585baa489faSSeongJae Park ksft_test_result_skip("gup_test not available\n"); 586baa489faSSeongJae Park return; 587baa489faSSeongJae Park } 588baa489faSSeongJae Park 589baa489faSSeongJae Park tmp = malloc(size); 590baa489faSSeongJae Park if (!tmp) { 591baa489faSSeongJae Park ksft_test_result_fail("malloc() failed\n"); 592baa489faSSeongJae Park return; 593baa489faSSeongJae Park } 594baa489faSSeongJae Park 595baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 596baa489faSSeongJae Park if (ret) { 597baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 598baa489faSSeongJae Park goto free_tmp; 599baa489faSSeongJae Park } 600baa489faSSeongJae Park 601baa489faSSeongJae Park switch (test) { 602baa489faSSeongJae Park case RO_PIN_TEST: 603baa489faSSeongJae Park break; 604baa489faSSeongJae Park case RO_PIN_TEST_SHARED: 605baa489faSSeongJae Park case RO_PIN_TEST_PREVIOUSLY_SHARED: 606baa489faSSeongJae Park /* 607baa489faSSeongJae Park * Share the pages with our child. As the pages are not pinned, 608baa489faSSeongJae Park * this should just work. 609baa489faSSeongJae Park */ 610baa489faSSeongJae Park ret = fork(); 611baa489faSSeongJae Park if (ret < 0) { 612baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 613baa489faSSeongJae Park goto close_comm_pipes; 614baa489faSSeongJae Park } else if (!ret) { 615baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 616baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 617baa489faSSeongJae Park ; 618baa489faSSeongJae Park exit(0); 619baa489faSSeongJae Park } 620baa489faSSeongJae Park 621baa489faSSeongJae Park /* Wait until our child is ready. */ 622baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 623baa489faSSeongJae Park ; 624baa489faSSeongJae Park 625baa489faSSeongJae Park if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 626baa489faSSeongJae Park /* 627baa489faSSeongJae Park * Tell the child to quit now and wait until it quit. 628baa489faSSeongJae Park * The pages should now be mapped R/O into our page 629baa489faSSeongJae Park * tables, but they are no longer shared. 630baa489faSSeongJae Park */ 631baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 632baa489faSSeongJae Park wait(&ret); 633baa489faSSeongJae Park if (!WIFEXITED(ret)) 634baa489faSSeongJae Park ksft_print_msg("[INFO] wait() failed\n"); 635baa489faSSeongJae Park } 636baa489faSSeongJae Park break; 637baa489faSSeongJae Park case RO_PIN_TEST_RO_EXCLUSIVE: 638baa489faSSeongJae Park /* 639baa489faSSeongJae Park * Map the page R/O into the page table. Enable softdirty 640baa489faSSeongJae Park * tracking to stop the page from getting mapped R/W immediately 641baa489faSSeongJae Park * again by mprotect() optimizations. Note that we don't have an 642baa489faSSeongJae Park * easy way to test if that worked (the pagemap does not export 643baa489faSSeongJae Park * if the page is mapped R/O vs. R/W). 644baa489faSSeongJae Park */ 645baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 646baa489faSSeongJae Park clear_softdirty(); 647baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 648baa489faSSeongJae Park if (ret) { 649baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 650baa489faSSeongJae Park goto close_comm_pipes; 651baa489faSSeongJae Park } 652baa489faSSeongJae Park break; 653baa489faSSeongJae Park default: 654baa489faSSeongJae Park assert(false); 655baa489faSSeongJae Park } 656baa489faSSeongJae Park 657baa489faSSeongJae Park /* Take a R/O pin. This should trigger unsharing. */ 658baa489faSSeongJae Park args.addr = (__u64)(uintptr_t)mem; 659baa489faSSeongJae Park args.size = size; 660baa489faSSeongJae Park args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 661baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 662baa489faSSeongJae Park if (ret) { 663baa489faSSeongJae Park if (errno == EINVAL) 664baa489faSSeongJae Park ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 665baa489faSSeongJae Park else 666baa489faSSeongJae Park ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 667baa489faSSeongJae Park goto wait; 668baa489faSSeongJae Park } 669baa489faSSeongJae Park 670baa489faSSeongJae Park /* Modify the page. */ 671baa489faSSeongJae Park memset(mem, 0xff, size); 672baa489faSSeongJae Park 673baa489faSSeongJae Park /* 674baa489faSSeongJae Park * Read back the content via the pin to the temporary buffer and 675baa489faSSeongJae Park * test if we observed the modification. 676baa489faSSeongJae Park */ 677baa489faSSeongJae Park tmp_val = (__u64)(uintptr_t)tmp; 678baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 679baa489faSSeongJae Park if (ret) 680baa489faSSeongJae Park ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 681baa489faSSeongJae Park else 682baa489faSSeongJae Park ksft_test_result(!memcmp(mem, tmp, size), 683baa489faSSeongJae Park "Longterm R/O pin is reliable\n"); 684baa489faSSeongJae Park 685baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 686baa489faSSeongJae Park if (ret) 687baa489faSSeongJae Park ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 688baa489faSSeongJae Park wait: 689baa489faSSeongJae Park switch (test) { 690baa489faSSeongJae Park case RO_PIN_TEST_SHARED: 691baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 692baa489faSSeongJae Park wait(&ret); 693baa489faSSeongJae Park if (!WIFEXITED(ret)) 694baa489faSSeongJae Park ksft_print_msg("[INFO] wait() failed\n"); 695baa489faSSeongJae Park break; 696baa489faSSeongJae Park default: 697baa489faSSeongJae Park break; 698baa489faSSeongJae Park } 699baa489faSSeongJae Park close_comm_pipes: 700baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 701baa489faSSeongJae Park free_tmp: 702baa489faSSeongJae Park free(tmp); 703baa489faSSeongJae Park } 704baa489faSSeongJae Park 705*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 706baa489faSSeongJae Park { 707baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 708baa489faSSeongJae Park } 709baa489faSSeongJae Park 710*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 711baa489faSSeongJae Park { 712baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 713baa489faSSeongJae Park } 714baa489faSSeongJae Park 715*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 716*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 717baa489faSSeongJae Park { 718baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 719baa489faSSeongJae Park } 720baa489faSSeongJae Park 721*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 722*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 723baa489faSSeongJae Park { 724baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 725baa489faSSeongJae Park } 726baa489faSSeongJae Park 727*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 728*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 729baa489faSSeongJae Park { 730baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 731baa489faSSeongJae Park } 732baa489faSSeongJae Park 733*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 734*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 735baa489faSSeongJae Park { 736baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 737baa489faSSeongJae Park } 738baa489faSSeongJae Park 739*4bf6a4ebSDavid Hildenbrand typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 740baa489faSSeongJae Park 741baa489faSSeongJae Park static void do_run_with_base_page(test_fn fn, bool swapout) 742baa489faSSeongJae Park { 743baa489faSSeongJae Park char *mem; 744baa489faSSeongJae Park int ret; 745baa489faSSeongJae Park 746baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 747baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 748baa489faSSeongJae Park if (mem == MAP_FAILED) { 749baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 750baa489faSSeongJae Park return; 751baa489faSSeongJae Park } 752baa489faSSeongJae Park 753baa489faSSeongJae Park ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 754baa489faSSeongJae Park /* Ignore if not around on a kernel. */ 755baa489faSSeongJae Park if (ret && errno != EINVAL) { 756baa489faSSeongJae Park ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 757baa489faSSeongJae Park goto munmap; 758baa489faSSeongJae Park } 759baa489faSSeongJae Park 760baa489faSSeongJae Park /* Populate a base page. */ 761baa489faSSeongJae Park memset(mem, 0, pagesize); 762baa489faSSeongJae Park 763baa489faSSeongJae Park if (swapout) { 764baa489faSSeongJae Park madvise(mem, pagesize, MADV_PAGEOUT); 765baa489faSSeongJae Park if (!pagemap_is_swapped(pagemap_fd, mem)) { 766baa489faSSeongJae Park ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 767baa489faSSeongJae Park goto munmap; 768baa489faSSeongJae Park } 769baa489faSSeongJae Park } 770baa489faSSeongJae Park 771*4bf6a4ebSDavid Hildenbrand fn(mem, pagesize, false); 772baa489faSSeongJae Park munmap: 773baa489faSSeongJae Park munmap(mem, pagesize); 774baa489faSSeongJae Park } 775baa489faSSeongJae Park 776baa489faSSeongJae Park static void run_with_base_page(test_fn fn, const char *desc) 777baa489faSSeongJae Park { 778baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with base page\n", desc); 779baa489faSSeongJae Park do_run_with_base_page(fn, false); 780baa489faSSeongJae Park } 781baa489faSSeongJae Park 782baa489faSSeongJae Park static void run_with_base_page_swap(test_fn fn, const char *desc) 783baa489faSSeongJae Park { 784baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 785baa489faSSeongJae Park do_run_with_base_page(fn, true); 786baa489faSSeongJae Park } 787baa489faSSeongJae Park 788baa489faSSeongJae Park enum thp_run { 789baa489faSSeongJae Park THP_RUN_PMD, 790baa489faSSeongJae Park THP_RUN_PMD_SWAPOUT, 791baa489faSSeongJae Park THP_RUN_PTE, 792baa489faSSeongJae Park THP_RUN_PTE_SWAPOUT, 793baa489faSSeongJae Park THP_RUN_SINGLE_PTE, 794baa489faSSeongJae Park THP_RUN_SINGLE_PTE_SWAPOUT, 795baa489faSSeongJae Park THP_RUN_PARTIAL_MREMAP, 796baa489faSSeongJae Park THP_RUN_PARTIAL_SHARED, 797baa489faSSeongJae Park }; 798baa489faSSeongJae Park 79912dc16b3SRyan Roberts static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 800baa489faSSeongJae Park { 801baa489faSSeongJae Park char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 802baa489faSSeongJae Park size_t size, mmap_size, mremap_size; 803baa489faSSeongJae Park int ret; 804baa489faSSeongJae Park 805baa489faSSeongJae Park /* For alignment purposes, we need twice the thp size. */ 806baa489faSSeongJae Park mmap_size = 2 * thpsize; 807baa489faSSeongJae Park mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 808baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 809baa489faSSeongJae Park if (mmap_mem == MAP_FAILED) { 810baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 811baa489faSSeongJae Park return; 812baa489faSSeongJae Park } 813baa489faSSeongJae Park 814baa489faSSeongJae Park /* We need a THP-aligned memory area. */ 815baa489faSSeongJae Park mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 816baa489faSSeongJae Park 817baa489faSSeongJae Park ret = madvise(mem, thpsize, MADV_HUGEPAGE); 818baa489faSSeongJae Park if (ret) { 819baa489faSSeongJae Park ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 820baa489faSSeongJae Park goto munmap; 821baa489faSSeongJae Park } 822baa489faSSeongJae Park 823baa489faSSeongJae Park /* 82412dc16b3SRyan Roberts * Try to populate a THP. Touch the first sub-page and test if 82512dc16b3SRyan Roberts * we get the last sub-page populated automatically. 826baa489faSSeongJae Park */ 827baa489faSSeongJae Park mem[0] = 0; 82812dc16b3SRyan Roberts if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 829baa489faSSeongJae Park ksft_test_result_skip("Did not get a THP populated\n"); 830baa489faSSeongJae Park goto munmap; 831baa489faSSeongJae Park } 832baa489faSSeongJae Park memset(mem, 0, thpsize); 833baa489faSSeongJae Park 834baa489faSSeongJae Park size = thpsize; 835baa489faSSeongJae Park switch (thp_run) { 836baa489faSSeongJae Park case THP_RUN_PMD: 837baa489faSSeongJae Park case THP_RUN_PMD_SWAPOUT: 83812dc16b3SRyan Roberts assert(thpsize == pmdsize); 839baa489faSSeongJae Park break; 840baa489faSSeongJae Park case THP_RUN_PTE: 841baa489faSSeongJae Park case THP_RUN_PTE_SWAPOUT: 842baa489faSSeongJae Park /* 843baa489faSSeongJae Park * Trigger PTE-mapping the THP by temporarily mapping a single 84412dc16b3SRyan Roberts * subpage R/O. This is a noop if the THP is not pmdsize (and 84512dc16b3SRyan Roberts * therefore already PTE-mapped). 846baa489faSSeongJae Park */ 847baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ); 848baa489faSSeongJae Park if (ret) { 849baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 850baa489faSSeongJae Park goto munmap; 851baa489faSSeongJae Park } 852baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 853baa489faSSeongJae Park if (ret) { 854baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 855baa489faSSeongJae Park goto munmap; 856baa489faSSeongJae Park } 857baa489faSSeongJae Park break; 858baa489faSSeongJae Park case THP_RUN_SINGLE_PTE: 859baa489faSSeongJae Park case THP_RUN_SINGLE_PTE_SWAPOUT: 860baa489faSSeongJae Park /* 861baa489faSSeongJae Park * Discard all but a single subpage of that PTE-mapped THP. What 862baa489faSSeongJae Park * remains is a single PTE mapping a single subpage. 863baa489faSSeongJae Park */ 864baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 865baa489faSSeongJae Park if (ret) { 866baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTNEED failed\n"); 867baa489faSSeongJae Park goto munmap; 868baa489faSSeongJae Park } 869baa489faSSeongJae Park size = pagesize; 870baa489faSSeongJae Park break; 871baa489faSSeongJae Park case THP_RUN_PARTIAL_MREMAP: 872baa489faSSeongJae Park /* 873baa489faSSeongJae Park * Remap half of the THP. We need some new memory location 874baa489faSSeongJae Park * for that. 875baa489faSSeongJae Park */ 876baa489faSSeongJae Park mremap_size = thpsize / 2; 877baa489faSSeongJae Park mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 878baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 879baa489faSSeongJae Park if (mem == MAP_FAILED) { 880baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 881baa489faSSeongJae Park goto munmap; 882baa489faSSeongJae Park } 883baa489faSSeongJae Park tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 884baa489faSSeongJae Park MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 885baa489faSSeongJae Park if (tmp != mremap_mem) { 886baa489faSSeongJae Park ksft_test_result_fail("mremap() failed\n"); 887baa489faSSeongJae Park goto munmap; 888baa489faSSeongJae Park } 889baa489faSSeongJae Park size = mremap_size; 890baa489faSSeongJae Park break; 891baa489faSSeongJae Park case THP_RUN_PARTIAL_SHARED: 892baa489faSSeongJae Park /* 893baa489faSSeongJae Park * Share the first page of the THP with a child and quit the 894baa489faSSeongJae Park * child. This will result in some parts of the THP never 895baa489faSSeongJae Park * have been shared. 896baa489faSSeongJae Park */ 897baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 898baa489faSSeongJae Park if (ret) { 899baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 900baa489faSSeongJae Park goto munmap; 901baa489faSSeongJae Park } 902baa489faSSeongJae Park ret = fork(); 903baa489faSSeongJae Park if (ret < 0) { 904baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 905baa489faSSeongJae Park goto munmap; 906baa489faSSeongJae Park } else if (!ret) { 907baa489faSSeongJae Park exit(0); 908baa489faSSeongJae Park } 909baa489faSSeongJae Park wait(&ret); 910baa489faSSeongJae Park /* Allow for sharing all pages again. */ 911baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 912baa489faSSeongJae Park if (ret) { 913baa489faSSeongJae Park ksft_test_result_fail("MADV_DOFORK failed\n"); 914baa489faSSeongJae Park goto munmap; 915baa489faSSeongJae Park } 916baa489faSSeongJae Park break; 917baa489faSSeongJae Park default: 918baa489faSSeongJae Park assert(false); 919baa489faSSeongJae Park } 920baa489faSSeongJae Park 921baa489faSSeongJae Park switch (thp_run) { 922baa489faSSeongJae Park case THP_RUN_PMD_SWAPOUT: 923baa489faSSeongJae Park case THP_RUN_PTE_SWAPOUT: 924baa489faSSeongJae Park case THP_RUN_SINGLE_PTE_SWAPOUT: 925baa489faSSeongJae Park madvise(mem, size, MADV_PAGEOUT); 926baa489faSSeongJae Park if (!range_is_swapped(mem, size)) { 927baa489faSSeongJae Park ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 928baa489faSSeongJae Park goto munmap; 929baa489faSSeongJae Park } 930baa489faSSeongJae Park break; 931baa489faSSeongJae Park default: 932baa489faSSeongJae Park break; 933baa489faSSeongJae Park } 934baa489faSSeongJae Park 935*4bf6a4ebSDavid Hildenbrand fn(mem, size, false); 936baa489faSSeongJae Park munmap: 937baa489faSSeongJae Park munmap(mmap_mem, mmap_size); 938baa489faSSeongJae Park if (mremap_mem != MAP_FAILED) 939baa489faSSeongJae Park munmap(mremap_mem, mremap_size); 940baa489faSSeongJae Park } 941baa489faSSeongJae Park 94212dc16b3SRyan Roberts static void run_with_thp(test_fn fn, const char *desc, size_t size) 943baa489faSSeongJae Park { 94412dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", 94512dc16b3SRyan Roberts desc, size / 1024); 94612dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PMD, size); 947baa489faSSeongJae Park } 948baa489faSSeongJae Park 94912dc16b3SRyan Roberts static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 950baa489faSSeongJae Park { 95112dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", 95212dc16b3SRyan Roberts desc, size / 1024); 95312dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 954baa489faSSeongJae Park } 955baa489faSSeongJae Park 95612dc16b3SRyan Roberts static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 957baa489faSSeongJae Park { 95812dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", 95912dc16b3SRyan Roberts desc, size / 1024); 96012dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PTE, size); 961baa489faSSeongJae Park } 962baa489faSSeongJae Park 96312dc16b3SRyan Roberts static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 964baa489faSSeongJae Park { 96512dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", 96612dc16b3SRyan Roberts desc, size / 1024); 96712dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 968baa489faSSeongJae Park } 969baa489faSSeongJae Park 97012dc16b3SRyan Roberts static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 971baa489faSSeongJae Park { 97212dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", 97312dc16b3SRyan Roberts desc, size / 1024); 97412dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 975baa489faSSeongJae Park } 976baa489faSSeongJae Park 97712dc16b3SRyan Roberts static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 978baa489faSSeongJae Park { 97912dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", 98012dc16b3SRyan Roberts desc, size / 1024); 98112dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 982baa489faSSeongJae Park } 983baa489faSSeongJae Park 98412dc16b3SRyan Roberts static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 985baa489faSSeongJae Park { 98612dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", 98712dc16b3SRyan Roberts desc, size / 1024); 98812dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 989baa489faSSeongJae Park } 990baa489faSSeongJae Park 99112dc16b3SRyan Roberts static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 992baa489faSSeongJae Park { 99312dc16b3SRyan Roberts ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", 99412dc16b3SRyan Roberts desc, size / 1024); 99512dc16b3SRyan Roberts do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 996baa489faSSeongJae Park } 997baa489faSSeongJae Park 998baa489faSSeongJae Park static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 999baa489faSSeongJae Park { 1000baa489faSSeongJae Park int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1001baa489faSSeongJae Park char *mem, *dummy; 1002baa489faSSeongJae Park 1003baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 1004baa489faSSeongJae Park hugetlbsize / 1024); 1005baa489faSSeongJae Park 1006baa489faSSeongJae Park flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1007baa489faSSeongJae Park 1008baa489faSSeongJae Park mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1009baa489faSSeongJae Park if (mem == MAP_FAILED) { 1010baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1011baa489faSSeongJae Park return; 1012baa489faSSeongJae Park } 1013baa489faSSeongJae Park 1014baa489faSSeongJae Park /* Populate an huge page. */ 1015baa489faSSeongJae Park memset(mem, 0, hugetlbsize); 1016baa489faSSeongJae Park 1017baa489faSSeongJae Park /* 1018baa489faSSeongJae Park * We need a total of two hugetlb pages to handle COW/unsharing 1019baa489faSSeongJae Park * properly, otherwise we might get zapped by a SIGBUS. 1020baa489faSSeongJae Park */ 1021baa489faSSeongJae Park dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1022baa489faSSeongJae Park if (dummy == MAP_FAILED) { 1023baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1024baa489faSSeongJae Park goto munmap; 1025baa489faSSeongJae Park } 1026baa489faSSeongJae Park munmap(dummy, hugetlbsize); 1027baa489faSSeongJae Park 1028*4bf6a4ebSDavid Hildenbrand fn(mem, hugetlbsize, true); 1029baa489faSSeongJae Park munmap: 1030baa489faSSeongJae Park munmap(mem, hugetlbsize); 1031baa489faSSeongJae Park } 1032baa489faSSeongJae Park 1033baa489faSSeongJae Park struct test_case { 1034baa489faSSeongJae Park const char *desc; 1035baa489faSSeongJae Park test_fn fn; 1036baa489faSSeongJae Park }; 1037baa489faSSeongJae Park 1038baa489faSSeongJae Park /* 1039baa489faSSeongJae Park * Test cases that are specific to anonymous pages: pages in private mappings 1040baa489faSSeongJae Park * that may get shared via COW during fork(). 1041baa489faSSeongJae Park */ 1042baa489faSSeongJae Park static const struct test_case anon_test_cases[] = { 1043baa489faSSeongJae Park /* 1044baa489faSSeongJae Park * Basic COW tests for fork() without any GUP. If we miss to break COW, 1045baa489faSSeongJae Park * either the child can observe modifications by the parent or the 1046baa489faSSeongJae Park * other way around. 1047baa489faSSeongJae Park */ 1048baa489faSSeongJae Park { 1049baa489faSSeongJae Park "Basic COW after fork()", 1050baa489faSSeongJae Park test_cow_in_parent, 1051baa489faSSeongJae Park }, 1052baa489faSSeongJae Park /* 1053baa489faSSeongJae Park * Basic test, but do an additional mprotect(PROT_READ)+ 1054baa489faSSeongJae Park * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1055baa489faSSeongJae Park */ 1056baa489faSSeongJae Park { 1057baa489faSSeongJae Park "Basic COW after fork() with mprotect() optimization", 1058baa489faSSeongJae Park test_cow_in_parent_mprotect, 1059baa489faSSeongJae Park }, 1060baa489faSSeongJae Park /* 1061baa489faSSeongJae Park * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1062baa489faSSeongJae Park * we miss to break COW, the child observes modifications by the parent. 1063baa489faSSeongJae Park * This is CVE-2020-29374 reported by Jann Horn. 1064baa489faSSeongJae Park */ 1065baa489faSSeongJae Park { 1066baa489faSSeongJae Park "vmsplice() + unmap in child", 1067*4bf6a4ebSDavid Hildenbrand test_vmsplice_in_child, 1068baa489faSSeongJae Park }, 1069baa489faSSeongJae Park /* 1070baa489faSSeongJae Park * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1071baa489faSSeongJae Park * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1072baa489faSSeongJae Park */ 1073baa489faSSeongJae Park { 1074baa489faSSeongJae Park "vmsplice() + unmap in child with mprotect() optimization", 1075*4bf6a4ebSDavid Hildenbrand test_vmsplice_in_child_mprotect, 1076baa489faSSeongJae Park }, 1077baa489faSSeongJae Park /* 1078baa489faSSeongJae Park * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1079baa489faSSeongJae Park * fork(); modify in the child. If we miss to break COW, the parent 1080baa489faSSeongJae Park * observes modifications by the child. 1081baa489faSSeongJae Park */ 1082baa489faSSeongJae Park { 1083baa489faSSeongJae Park "vmsplice() before fork(), unmap in parent after fork()", 1084baa489faSSeongJae Park test_vmsplice_before_fork, 1085baa489faSSeongJae Park }, 1086baa489faSSeongJae Park /* 1087baa489faSSeongJae Park * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1088baa489faSSeongJae Park * child. If we miss to break COW, the parent observes modifications by 1089baa489faSSeongJae Park * the child. 1090baa489faSSeongJae Park */ 1091baa489faSSeongJae Park { 1092baa489faSSeongJae Park "vmsplice() + unmap in parent after fork()", 1093baa489faSSeongJae Park test_vmsplice_after_fork, 1094baa489faSSeongJae Park }, 1095baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 1096baa489faSSeongJae Park /* 1097baa489faSSeongJae Park * Take a R/W longterm pin and then map the page R/O into the page 1098baa489faSSeongJae Park * table to trigger a write fault on next access. When modifying the 1099baa489faSSeongJae Park * page, the page content must be visible via the pin. 1100baa489faSSeongJae Park */ 1101baa489faSSeongJae Park { 1102baa489faSSeongJae Park "R/O-mapping a page registered as iouring fixed buffer", 1103baa489faSSeongJae Park test_iouring_ro, 1104baa489faSSeongJae Park }, 1105baa489faSSeongJae Park /* 1106baa489faSSeongJae Park * Take a R/W longterm pin and then fork() a child. When modifying the 1107baa489faSSeongJae Park * page, the page content must be visible via the pin. We expect the 1108baa489faSSeongJae Park * pinned page to not get shared with the child. 1109baa489faSSeongJae Park */ 1110baa489faSSeongJae Park { 1111baa489faSSeongJae Park "fork() with an iouring fixed buffer", 1112baa489faSSeongJae Park test_iouring_fork, 1113baa489faSSeongJae Park }, 1114baa489faSSeongJae Park 1115baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1116baa489faSSeongJae Park /* 1117baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1118baa489faSSeongJae Park * When modifying the page via the page table, the page content change 1119baa489faSSeongJae Park * must be visible via the pin. 1120baa489faSSeongJae Park */ 1121baa489faSSeongJae Park { 1122baa489faSSeongJae Park "R/O GUP pin on R/O-mapped shared page", 1123baa489faSSeongJae Park test_ro_pin_on_shared, 1124baa489faSSeongJae Park }, 1125baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1126baa489faSSeongJae Park { 1127baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped shared page", 1128baa489faSSeongJae Park test_ro_fast_pin_on_shared, 1129baa489faSSeongJae Park }, 1130baa489faSSeongJae Park /* 1131baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1132baa489faSSeongJae Park * was previously shared. When modifying the page via the page table, 1133baa489faSSeongJae Park * the page content change must be visible via the pin. 1134baa489faSSeongJae Park */ 1135baa489faSSeongJae Park { 1136baa489faSSeongJae Park "R/O GUP pin on R/O-mapped previously-shared page", 1137baa489faSSeongJae Park test_ro_pin_on_ro_previously_shared, 1138baa489faSSeongJae Park }, 1139baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1140baa489faSSeongJae Park { 1141baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped previously-shared page", 1142baa489faSSeongJae Park test_ro_fast_pin_on_ro_previously_shared, 1143baa489faSSeongJae Park }, 1144baa489faSSeongJae Park /* 1145baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1146baa489faSSeongJae Park * When modifying the page via the page table, the page content change 1147baa489faSSeongJae Park * must be visible via the pin. 1148baa489faSSeongJae Park */ 1149baa489faSSeongJae Park { 1150baa489faSSeongJae Park "R/O GUP pin on R/O-mapped exclusive page", 1151baa489faSSeongJae Park test_ro_pin_on_ro_exclusive, 1152baa489faSSeongJae Park }, 1153baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1154baa489faSSeongJae Park { 1155baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped exclusive page", 1156baa489faSSeongJae Park test_ro_fast_pin_on_ro_exclusive, 1157baa489faSSeongJae Park }, 1158baa489faSSeongJae Park }; 1159baa489faSSeongJae Park 1160baa489faSSeongJae Park static void run_anon_test_case(struct test_case const *test_case) 1161baa489faSSeongJae Park { 1162baa489faSSeongJae Park int i; 1163baa489faSSeongJae Park 1164baa489faSSeongJae Park run_with_base_page(test_case->fn, test_case->desc); 1165baa489faSSeongJae Park run_with_base_page_swap(test_case->fn, test_case->desc); 1166c0f79103SRyan Roberts for (i = 0; i < nr_thpsizes; i++) { 1167c0f79103SRyan Roberts size_t size = thpsizes[i]; 1168c0f79103SRyan Roberts struct thp_settings settings = *thp_current_settings(); 1169c0f79103SRyan Roberts 1170c0f79103SRyan Roberts settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; 1171c0f79103SRyan Roberts settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; 1172c0f79103SRyan Roberts thp_push_settings(&settings); 1173c0f79103SRyan Roberts 1174c0f79103SRyan Roberts if (size == pmdsize) { 1175c0f79103SRyan Roberts run_with_thp(test_case->fn, test_case->desc, size); 1176c0f79103SRyan Roberts run_with_thp_swap(test_case->fn, test_case->desc, size); 1177c0f79103SRyan Roberts } 1178c0f79103SRyan Roberts 1179c0f79103SRyan Roberts run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1180c0f79103SRyan Roberts run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1181c0f79103SRyan Roberts run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1182c0f79103SRyan Roberts run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1183c0f79103SRyan Roberts run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1184c0f79103SRyan Roberts run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1185c0f79103SRyan Roberts 1186c0f79103SRyan Roberts thp_pop_settings(); 1187baa489faSSeongJae Park } 1188baa489faSSeongJae Park for (i = 0; i < nr_hugetlbsizes; i++) 1189baa489faSSeongJae Park run_with_hugetlb(test_case->fn, test_case->desc, 1190baa489faSSeongJae Park hugetlbsizes[i]); 1191baa489faSSeongJae Park } 1192baa489faSSeongJae Park 1193baa489faSSeongJae Park static void run_anon_test_cases(void) 1194baa489faSSeongJae Park { 1195baa489faSSeongJae Park int i; 1196baa489faSSeongJae Park 1197baa489faSSeongJae Park ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1198baa489faSSeongJae Park 1199baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1200baa489faSSeongJae Park run_anon_test_case(&anon_test_cases[i]); 1201baa489faSSeongJae Park } 1202baa489faSSeongJae Park 1203baa489faSSeongJae Park static int tests_per_anon_test_case(void) 1204baa489faSSeongJae Park { 1205baa489faSSeongJae Park int tests = 2 + nr_hugetlbsizes; 1206baa489faSSeongJae Park 1207c0f79103SRyan Roberts tests += 6 * nr_thpsizes; 120812dc16b3SRyan Roberts if (pmdsize) 1209c0f79103SRyan Roberts tests += 2; 1210baa489faSSeongJae Park return tests; 1211baa489faSSeongJae Park } 1212baa489faSSeongJae Park 1213baa489faSSeongJae Park enum anon_thp_collapse_test { 1214baa489faSSeongJae Park ANON_THP_COLLAPSE_UNSHARED, 1215baa489faSSeongJae Park ANON_THP_COLLAPSE_FULLY_SHARED, 1216baa489faSSeongJae Park ANON_THP_COLLAPSE_LOWER_SHARED, 1217baa489faSSeongJae Park ANON_THP_COLLAPSE_UPPER_SHARED, 1218baa489faSSeongJae Park }; 1219baa489faSSeongJae Park 1220baa489faSSeongJae Park static void do_test_anon_thp_collapse(char *mem, size_t size, 1221baa489faSSeongJae Park enum anon_thp_collapse_test test) 1222baa489faSSeongJae Park { 1223baa489faSSeongJae Park struct comm_pipes comm_pipes; 1224baa489faSSeongJae Park char buf; 1225baa489faSSeongJae Park int ret; 1226baa489faSSeongJae Park 1227baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 1228baa489faSSeongJae Park if (ret) { 1229baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 1230baa489faSSeongJae Park return; 1231baa489faSSeongJae Park } 1232baa489faSSeongJae Park 1233baa489faSSeongJae Park /* 1234baa489faSSeongJae Park * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1235baa489faSSeongJae Park * R/O, such that we can try collapsing it later. 1236baa489faSSeongJae Park */ 1237baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1238baa489faSSeongJae Park if (ret) { 1239baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 1240baa489faSSeongJae Park goto close_comm_pipes; 1241baa489faSSeongJae Park } 1242baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1243baa489faSSeongJae Park if (ret) { 1244baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 1245baa489faSSeongJae Park goto close_comm_pipes; 1246baa489faSSeongJae Park } 1247baa489faSSeongJae Park 1248baa489faSSeongJae Park switch (test) { 1249baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1250baa489faSSeongJae Park /* Collapse before actually COW-sharing the page. */ 1251baa489faSSeongJae Park ret = madvise(mem, size, MADV_COLLAPSE); 1252baa489faSSeongJae Park if (ret) { 1253baa489faSSeongJae Park ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1254baa489faSSeongJae Park strerror(errno)); 1255baa489faSSeongJae Park goto close_comm_pipes; 1256baa489faSSeongJae Park } 1257baa489faSSeongJae Park break; 1258baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1259baa489faSSeongJae Park /* COW-share the full PTE-mapped THP. */ 1260baa489faSSeongJae Park break; 1261baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1262baa489faSSeongJae Park /* Don't COW-share the upper part of the THP. */ 1263baa489faSSeongJae Park ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1264baa489faSSeongJae Park if (ret) { 1265baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 1266baa489faSSeongJae Park goto close_comm_pipes; 1267baa489faSSeongJae Park } 1268baa489faSSeongJae Park break; 1269baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1270baa489faSSeongJae Park /* Don't COW-share the lower part of the THP. */ 1271baa489faSSeongJae Park ret = madvise(mem, size / 2, MADV_DONTFORK); 1272baa489faSSeongJae Park if (ret) { 1273baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 1274baa489faSSeongJae Park goto close_comm_pipes; 1275baa489faSSeongJae Park } 1276baa489faSSeongJae Park break; 1277baa489faSSeongJae Park default: 1278baa489faSSeongJae Park assert(false); 1279baa489faSSeongJae Park } 1280baa489faSSeongJae Park 1281baa489faSSeongJae Park ret = fork(); 1282baa489faSSeongJae Park if (ret < 0) { 1283baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 1284baa489faSSeongJae Park goto close_comm_pipes; 1285baa489faSSeongJae Park } else if (!ret) { 1286baa489faSSeongJae Park switch (test) { 1287baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1288baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1289baa489faSSeongJae Park exit(child_memcmp_fn(mem, size, &comm_pipes)); 1290baa489faSSeongJae Park break; 1291baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1292baa489faSSeongJae Park exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1293baa489faSSeongJae Park break; 1294baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1295baa489faSSeongJae Park exit(child_memcmp_fn(mem + size / 2, size / 2, 1296baa489faSSeongJae Park &comm_pipes)); 1297baa489faSSeongJae Park break; 1298baa489faSSeongJae Park default: 1299baa489faSSeongJae Park assert(false); 1300baa489faSSeongJae Park } 1301baa489faSSeongJae Park } 1302baa489faSSeongJae Park 1303baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1304baa489faSSeongJae Park ; 1305baa489faSSeongJae Park 1306baa489faSSeongJae Park switch (test) { 1307baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1308baa489faSSeongJae Park break; 1309baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1310baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1311baa489faSSeongJae Park /* 1312baa489faSSeongJae Park * Revert MADV_DONTFORK such that we merge the VMAs and are 1313baa489faSSeongJae Park * able to actually collapse. 1314baa489faSSeongJae Park */ 1315baa489faSSeongJae Park ret = madvise(mem, size, MADV_DOFORK); 1316baa489faSSeongJae Park if (ret) { 1317baa489faSSeongJae Park ksft_test_result_fail("MADV_DOFORK failed\n"); 1318baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1319baa489faSSeongJae Park wait(&ret); 1320baa489faSSeongJae Park goto close_comm_pipes; 1321baa489faSSeongJae Park } 1322baa489faSSeongJae Park /* FALLTHROUGH */ 1323baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1324baa489faSSeongJae Park /* Collapse before anyone modified the COW-shared page. */ 1325baa489faSSeongJae Park ret = madvise(mem, size, MADV_COLLAPSE); 1326baa489faSSeongJae Park if (ret) { 1327baa489faSSeongJae Park ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1328baa489faSSeongJae Park strerror(errno)); 1329baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1330baa489faSSeongJae Park wait(&ret); 1331baa489faSSeongJae Park goto close_comm_pipes; 1332baa489faSSeongJae Park } 1333baa489faSSeongJae Park break; 1334baa489faSSeongJae Park default: 1335baa489faSSeongJae Park assert(false); 1336baa489faSSeongJae Park } 1337baa489faSSeongJae Park 1338baa489faSSeongJae Park /* Modify the page. */ 1339baa489faSSeongJae Park memset(mem, 0xff, size); 1340baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1341baa489faSSeongJae Park 1342baa489faSSeongJae Park wait(&ret); 1343baa489faSSeongJae Park if (WIFEXITED(ret)) 1344baa489faSSeongJae Park ret = WEXITSTATUS(ret); 1345baa489faSSeongJae Park else 1346baa489faSSeongJae Park ret = -EINVAL; 1347baa489faSSeongJae Park 1348baa489faSSeongJae Park ksft_test_result(!ret, "No leak from parent into child\n"); 1349baa489faSSeongJae Park close_comm_pipes: 1350baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 1351baa489faSSeongJae Park } 1352baa489faSSeongJae Park 1353*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1354*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 1355baa489faSSeongJae Park { 1356*4bf6a4ebSDavid Hildenbrand assert(!is_hugetlb); 1357baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1358baa489faSSeongJae Park } 1359baa489faSSeongJae Park 1360*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1361*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 1362baa489faSSeongJae Park { 1363*4bf6a4ebSDavid Hildenbrand assert(!is_hugetlb); 1364baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1365baa489faSSeongJae Park } 1366baa489faSSeongJae Park 1367*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1368*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 1369baa489faSSeongJae Park { 1370*4bf6a4ebSDavid Hildenbrand assert(!is_hugetlb); 1371baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1372baa489faSSeongJae Park } 1373baa489faSSeongJae Park 1374*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1375*4bf6a4ebSDavid Hildenbrand bool is_hugetlb) 1376baa489faSSeongJae Park { 1377*4bf6a4ebSDavid Hildenbrand assert(!is_hugetlb); 1378baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1379baa489faSSeongJae Park } 1380baa489faSSeongJae Park 1381baa489faSSeongJae Park /* 1382baa489faSSeongJae Park * Test cases that are specific to anonymous THP: pages in private mappings 1383baa489faSSeongJae Park * that may get shared via COW during fork(). 1384baa489faSSeongJae Park */ 1385baa489faSSeongJae Park static const struct test_case anon_thp_test_cases[] = { 1386baa489faSSeongJae Park /* 1387baa489faSSeongJae Park * Basic COW test for fork() without any GUP when collapsing a THP 1388baa489faSSeongJae Park * before fork(). 1389baa489faSSeongJae Park * 1390baa489faSSeongJae Park * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1391baa489faSSeongJae Park * collapse") might easily get COW handling wrong when not collapsing 1392baa489faSSeongJae Park * exclusivity information properly. 1393baa489faSSeongJae Park */ 1394baa489faSSeongJae Park { 1395baa489faSSeongJae Park "Basic COW after fork() when collapsing before fork()", 1396baa489faSSeongJae Park test_anon_thp_collapse_unshared, 1397baa489faSSeongJae Park }, 1398baa489faSSeongJae Park /* Basic COW test, but collapse after COW-sharing a full THP. */ 1399baa489faSSeongJae Park { 1400baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (fully shared)", 1401baa489faSSeongJae Park test_anon_thp_collapse_fully_shared, 1402baa489faSSeongJae Park }, 1403baa489faSSeongJae Park /* 1404baa489faSSeongJae Park * Basic COW test, but collapse after COW-sharing the lower half of a 1405baa489faSSeongJae Park * THP. 1406baa489faSSeongJae Park */ 1407baa489faSSeongJae Park { 1408baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (lower shared)", 1409baa489faSSeongJae Park test_anon_thp_collapse_lower_shared, 1410baa489faSSeongJae Park }, 1411baa489faSSeongJae Park /* 1412baa489faSSeongJae Park * Basic COW test, but collapse after COW-sharing the upper half of a 1413baa489faSSeongJae Park * THP. 1414baa489faSSeongJae Park */ 1415baa489faSSeongJae Park { 1416baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (upper shared)", 1417baa489faSSeongJae Park test_anon_thp_collapse_upper_shared, 1418baa489faSSeongJae Park }, 1419baa489faSSeongJae Park }; 1420baa489faSSeongJae Park 1421baa489faSSeongJae Park static void run_anon_thp_test_cases(void) 1422baa489faSSeongJae Park { 1423baa489faSSeongJae Park int i; 1424baa489faSSeongJae Park 142512dc16b3SRyan Roberts if (!pmdsize) 1426baa489faSSeongJae Park return; 1427baa489faSSeongJae Park 1428baa489faSSeongJae Park ksft_print_msg("[INFO] Anonymous THP tests\n"); 1429baa489faSSeongJae Park 1430baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1431baa489faSSeongJae Park struct test_case const *test_case = &anon_thp_test_cases[i]; 1432baa489faSSeongJae Park 1433baa489faSSeongJae Park ksft_print_msg("[RUN] %s\n", test_case->desc); 143412dc16b3SRyan Roberts do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1435baa489faSSeongJae Park } 1436baa489faSSeongJae Park } 1437baa489faSSeongJae Park 1438baa489faSSeongJae Park static int tests_per_anon_thp_test_case(void) 1439baa489faSSeongJae Park { 144012dc16b3SRyan Roberts return pmdsize ? 1 : 0; 1441baa489faSSeongJae Park } 1442baa489faSSeongJae Park 1443baa489faSSeongJae Park typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1444baa489faSSeongJae Park 1445baa489faSSeongJae Park static void test_cow(char *mem, const char *smem, size_t size) 1446baa489faSSeongJae Park { 1447baa489faSSeongJae Park char *old = malloc(size); 1448baa489faSSeongJae Park 1449baa489faSSeongJae Park /* Backup the original content. */ 1450baa489faSSeongJae Park memcpy(old, smem, size); 1451baa489faSSeongJae Park 1452baa489faSSeongJae Park /* Modify the page. */ 1453baa489faSSeongJae Park memset(mem, 0xff, size); 1454baa489faSSeongJae Park 1455baa489faSSeongJae Park /* See if we still read the old values via the other mapping. */ 1456baa489faSSeongJae Park ksft_test_result(!memcmp(smem, old, size), 1457baa489faSSeongJae Park "Other mapping not modified\n"); 1458baa489faSSeongJae Park free(old); 1459baa489faSSeongJae Park } 1460baa489faSSeongJae Park 1461baa489faSSeongJae Park static void test_ro_pin(char *mem, const char *smem, size_t size) 1462baa489faSSeongJae Park { 1463baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1464baa489faSSeongJae Park } 1465baa489faSSeongJae Park 1466baa489faSSeongJae Park static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1467baa489faSSeongJae Park { 1468baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1469baa489faSSeongJae Park } 1470baa489faSSeongJae Park 1471baa489faSSeongJae Park static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1472baa489faSSeongJae Park { 1473baa489faSSeongJae Park char *mem, *smem, tmp; 1474baa489faSSeongJae Park 1475baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1476baa489faSSeongJae Park 1477baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1478baa489faSSeongJae Park MAP_PRIVATE | MAP_ANON, -1, 0); 1479baa489faSSeongJae Park if (mem == MAP_FAILED) { 1480baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1481baa489faSSeongJae Park return; 1482baa489faSSeongJae Park } 1483baa489faSSeongJae Park 1484baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1485baa489faSSeongJae Park if (mem == MAP_FAILED) { 1486baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1487baa489faSSeongJae Park goto munmap; 1488baa489faSSeongJae Park } 1489baa489faSSeongJae Park 1490baa489faSSeongJae Park /* Read from the page to populate the shared zeropage. */ 1491baa489faSSeongJae Park tmp = *mem + *smem; 1492baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1493baa489faSSeongJae Park 1494baa489faSSeongJae Park fn(mem, smem, pagesize); 1495baa489faSSeongJae Park munmap: 1496baa489faSSeongJae Park munmap(mem, pagesize); 1497baa489faSSeongJae Park if (smem != MAP_FAILED) 1498baa489faSSeongJae Park munmap(smem, pagesize); 1499baa489faSSeongJae Park } 1500baa489faSSeongJae Park 1501baa489faSSeongJae Park static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1502baa489faSSeongJae Park { 1503baa489faSSeongJae Park char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1504baa489faSSeongJae Park size_t mmap_size; 1505baa489faSSeongJae Park int ret; 1506baa489faSSeongJae Park 1507baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1508baa489faSSeongJae Park 1509baa489faSSeongJae Park if (!has_huge_zeropage) { 1510baa489faSSeongJae Park ksft_test_result_skip("Huge zeropage not enabled\n"); 1511baa489faSSeongJae Park return; 1512baa489faSSeongJae Park } 1513baa489faSSeongJae Park 1514baa489faSSeongJae Park /* For alignment purposes, we need twice the thp size. */ 151512dc16b3SRyan Roberts mmap_size = 2 * pmdsize; 1516baa489faSSeongJae Park mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1517baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1518baa489faSSeongJae Park if (mmap_mem == MAP_FAILED) { 1519baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1520baa489faSSeongJae Park return; 1521baa489faSSeongJae Park } 1522baa489faSSeongJae Park mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1523baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1524baa489faSSeongJae Park if (mmap_smem == MAP_FAILED) { 1525baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1526baa489faSSeongJae Park goto munmap; 1527baa489faSSeongJae Park } 1528baa489faSSeongJae Park 1529baa489faSSeongJae Park /* We need a THP-aligned memory area. */ 153012dc16b3SRyan Roberts mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 153112dc16b3SRyan Roberts smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1532baa489faSSeongJae Park 153312dc16b3SRyan Roberts ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 153412dc16b3SRyan Roberts ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); 1535baa489faSSeongJae Park if (ret) { 1536baa489faSSeongJae Park ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1537baa489faSSeongJae Park goto munmap; 1538baa489faSSeongJae Park } 1539baa489faSSeongJae Park 1540baa489faSSeongJae Park /* 1541baa489faSSeongJae Park * Read from the memory to populate the huge shared zeropage. Read from 1542baa489faSSeongJae Park * the first sub-page and test if we get another sub-page populated 1543baa489faSSeongJae Park * automatically. 1544baa489faSSeongJae Park */ 1545baa489faSSeongJae Park tmp = *mem + *smem; 1546baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1547baa489faSSeongJae Park if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1548baa489faSSeongJae Park !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1549baa489faSSeongJae Park ksft_test_result_skip("Did not get THPs populated\n"); 1550baa489faSSeongJae Park goto munmap; 1551baa489faSSeongJae Park } 1552baa489faSSeongJae Park 155312dc16b3SRyan Roberts fn(mem, smem, pmdsize); 1554baa489faSSeongJae Park munmap: 1555baa489faSSeongJae Park munmap(mmap_mem, mmap_size); 1556baa489faSSeongJae Park if (mmap_smem != MAP_FAILED) 1557baa489faSSeongJae Park munmap(mmap_smem, mmap_size); 1558baa489faSSeongJae Park } 1559baa489faSSeongJae Park 1560baa489faSSeongJae Park static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1561baa489faSSeongJae Park { 1562baa489faSSeongJae Park char *mem, *smem, tmp; 1563baa489faSSeongJae Park int fd; 1564baa489faSSeongJae Park 1565baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1566baa489faSSeongJae Park 1567baa489faSSeongJae Park fd = memfd_create("test", 0); 1568baa489faSSeongJae Park if (fd < 0) { 1569baa489faSSeongJae Park ksft_test_result_fail("memfd_create() failed\n"); 1570baa489faSSeongJae Park return; 1571baa489faSSeongJae Park } 1572baa489faSSeongJae Park 1573baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1574baa489faSSeongJae Park if (fallocate(fd, 0, 0, pagesize)) { 1575baa489faSSeongJae Park ksft_test_result_fail("fallocate() failed\n"); 1576baa489faSSeongJae Park goto close; 1577baa489faSSeongJae Park } 1578baa489faSSeongJae Park 1579baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1580baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1581baa489faSSeongJae Park if (mem == MAP_FAILED) { 1582baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1583baa489faSSeongJae Park goto close; 1584baa489faSSeongJae Park } 1585baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1586baa489faSSeongJae Park if (mem == MAP_FAILED) { 1587baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1588baa489faSSeongJae Park goto munmap; 1589baa489faSSeongJae Park } 1590baa489faSSeongJae Park 1591baa489faSSeongJae Park /* Fault the page in. */ 1592baa489faSSeongJae Park tmp = *mem + *smem; 1593baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1594baa489faSSeongJae Park 1595baa489faSSeongJae Park fn(mem, smem, pagesize); 1596baa489faSSeongJae Park munmap: 1597baa489faSSeongJae Park munmap(mem, pagesize); 1598baa489faSSeongJae Park if (smem != MAP_FAILED) 1599baa489faSSeongJae Park munmap(smem, pagesize); 1600baa489faSSeongJae Park close: 1601baa489faSSeongJae Park close(fd); 1602baa489faSSeongJae Park } 1603baa489faSSeongJae Park 1604baa489faSSeongJae Park static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1605baa489faSSeongJae Park { 1606baa489faSSeongJae Park char *mem, *smem, tmp; 1607baa489faSSeongJae Park FILE *file; 1608baa489faSSeongJae Park int fd; 1609baa489faSSeongJae Park 1610baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1611baa489faSSeongJae Park 1612baa489faSSeongJae Park file = tmpfile(); 1613baa489faSSeongJae Park if (!file) { 1614baa489faSSeongJae Park ksft_test_result_fail("tmpfile() failed\n"); 1615baa489faSSeongJae Park return; 1616baa489faSSeongJae Park } 1617baa489faSSeongJae Park 1618baa489faSSeongJae Park fd = fileno(file); 1619baa489faSSeongJae Park if (fd < 0) { 1620baa489faSSeongJae Park ksft_test_result_skip("fileno() failed\n"); 1621baa489faSSeongJae Park return; 1622baa489faSSeongJae Park } 1623baa489faSSeongJae Park 1624baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1625baa489faSSeongJae Park if (fallocate(fd, 0, 0, pagesize)) { 1626baa489faSSeongJae Park ksft_test_result_fail("fallocate() failed\n"); 1627baa489faSSeongJae Park goto close; 1628baa489faSSeongJae Park } 1629baa489faSSeongJae Park 1630baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1631baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1632baa489faSSeongJae Park if (mem == MAP_FAILED) { 1633baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1634baa489faSSeongJae Park goto close; 1635baa489faSSeongJae Park } 1636baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1637baa489faSSeongJae Park if (mem == MAP_FAILED) { 1638baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1639baa489faSSeongJae Park goto munmap; 1640baa489faSSeongJae Park } 1641baa489faSSeongJae Park 1642baa489faSSeongJae Park /* Fault the page in. */ 1643baa489faSSeongJae Park tmp = *mem + *smem; 1644baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1645baa489faSSeongJae Park 1646baa489faSSeongJae Park fn(mem, smem, pagesize); 1647baa489faSSeongJae Park munmap: 1648baa489faSSeongJae Park munmap(mem, pagesize); 1649baa489faSSeongJae Park if (smem != MAP_FAILED) 1650baa489faSSeongJae Park munmap(smem, pagesize); 1651baa489faSSeongJae Park close: 1652baa489faSSeongJae Park fclose(file); 1653baa489faSSeongJae Park } 1654baa489faSSeongJae Park 1655baa489faSSeongJae Park static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1656baa489faSSeongJae Park size_t hugetlbsize) 1657baa489faSSeongJae Park { 1658baa489faSSeongJae Park int flags = MFD_HUGETLB; 1659baa489faSSeongJae Park char *mem, *smem, tmp; 1660baa489faSSeongJae Park int fd; 1661baa489faSSeongJae Park 1662baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1663baa489faSSeongJae Park hugetlbsize / 1024); 1664baa489faSSeongJae Park 1665baa489faSSeongJae Park flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1666baa489faSSeongJae Park 1667baa489faSSeongJae Park fd = memfd_create("test", flags); 1668baa489faSSeongJae Park if (fd < 0) { 1669baa489faSSeongJae Park ksft_test_result_skip("memfd_create() failed\n"); 1670baa489faSSeongJae Park return; 1671baa489faSSeongJae Park } 1672baa489faSSeongJae Park 1673baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1674baa489faSSeongJae Park if (fallocate(fd, 0, 0, hugetlbsize)) { 1675baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1676baa489faSSeongJae Park goto close; 1677baa489faSSeongJae Park } 1678baa489faSSeongJae Park 1679baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1680baa489faSSeongJae Park mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1681baa489faSSeongJae Park 0); 1682baa489faSSeongJae Park if (mem == MAP_FAILED) { 1683baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1684baa489faSSeongJae Park goto close; 1685baa489faSSeongJae Park } 1686baa489faSSeongJae Park smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1687baa489faSSeongJae Park if (mem == MAP_FAILED) { 1688baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1689baa489faSSeongJae Park goto munmap; 1690baa489faSSeongJae Park } 1691baa489faSSeongJae Park 1692baa489faSSeongJae Park /* Fault the page in. */ 1693baa489faSSeongJae Park tmp = *mem + *smem; 1694baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1695baa489faSSeongJae Park 1696baa489faSSeongJae Park fn(mem, smem, hugetlbsize); 1697baa489faSSeongJae Park munmap: 1698baa489faSSeongJae Park munmap(mem, hugetlbsize); 1699baa489faSSeongJae Park if (mem != MAP_FAILED) 1700baa489faSSeongJae Park munmap(smem, hugetlbsize); 1701baa489faSSeongJae Park close: 1702baa489faSSeongJae Park close(fd); 1703baa489faSSeongJae Park } 1704baa489faSSeongJae Park 1705baa489faSSeongJae Park struct non_anon_test_case { 1706baa489faSSeongJae Park const char *desc; 1707baa489faSSeongJae Park non_anon_test_fn fn; 1708baa489faSSeongJae Park }; 1709baa489faSSeongJae Park 1710baa489faSSeongJae Park /* 1711baa489faSSeongJae Park * Test cases that target any pages in private mappings that are not anonymous: 1712baa489faSSeongJae Park * pages that may get shared via COW ndependent of fork(). This includes 1713baa489faSSeongJae Park * the shared zeropage(s), pagecache pages, ... 1714baa489faSSeongJae Park */ 1715baa489faSSeongJae Park static const struct non_anon_test_case non_anon_test_cases[] = { 1716baa489faSSeongJae Park /* 1717baa489faSSeongJae Park * Basic COW test without any GUP. If we miss to break COW, changes are 1718baa489faSSeongJae Park * visible via other private/shared mappings. 1719baa489faSSeongJae Park */ 1720baa489faSSeongJae Park { 1721baa489faSSeongJae Park "Basic COW", 1722baa489faSSeongJae Park test_cow, 1723baa489faSSeongJae Park }, 1724baa489faSSeongJae Park /* 1725baa489faSSeongJae Park * Take a R/O longterm pin. When modifying the page via the page table, 1726baa489faSSeongJae Park * the page content change must be visible via the pin. 1727baa489faSSeongJae Park */ 1728baa489faSSeongJae Park { 1729baa489faSSeongJae Park "R/O longterm GUP pin", 1730baa489faSSeongJae Park test_ro_pin, 1731baa489faSSeongJae Park }, 1732baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1733baa489faSSeongJae Park { 1734baa489faSSeongJae Park "R/O longterm GUP-fast pin", 1735baa489faSSeongJae Park test_ro_fast_pin, 1736baa489faSSeongJae Park }, 1737baa489faSSeongJae Park }; 1738baa489faSSeongJae Park 1739baa489faSSeongJae Park static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1740baa489faSSeongJae Park { 1741baa489faSSeongJae Park int i; 1742baa489faSSeongJae Park 1743baa489faSSeongJae Park run_with_zeropage(test_case->fn, test_case->desc); 1744baa489faSSeongJae Park run_with_memfd(test_case->fn, test_case->desc); 1745baa489faSSeongJae Park run_with_tmpfile(test_case->fn, test_case->desc); 174612dc16b3SRyan Roberts if (pmdsize) 1747baa489faSSeongJae Park run_with_huge_zeropage(test_case->fn, test_case->desc); 1748baa489faSSeongJae Park for (i = 0; i < nr_hugetlbsizes; i++) 1749baa489faSSeongJae Park run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1750baa489faSSeongJae Park hugetlbsizes[i]); 1751baa489faSSeongJae Park } 1752baa489faSSeongJae Park 1753baa489faSSeongJae Park static void run_non_anon_test_cases(void) 1754baa489faSSeongJae Park { 1755baa489faSSeongJae Park int i; 1756baa489faSSeongJae Park 1757baa489faSSeongJae Park ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1758baa489faSSeongJae Park 1759baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1760baa489faSSeongJae Park run_non_anon_test_case(&non_anon_test_cases[i]); 1761baa489faSSeongJae Park } 1762baa489faSSeongJae Park 1763baa489faSSeongJae Park static int tests_per_non_anon_test_case(void) 1764baa489faSSeongJae Park { 1765baa489faSSeongJae Park int tests = 3 + nr_hugetlbsizes; 1766baa489faSSeongJae Park 176712dc16b3SRyan Roberts if (pmdsize) 1768baa489faSSeongJae Park tests += 1; 1769baa489faSSeongJae Park return tests; 1770baa489faSSeongJae Park } 1771baa489faSSeongJae Park 1772baa489faSSeongJae Park int main(int argc, char **argv) 1773baa489faSSeongJae Park { 1774baa489faSSeongJae Park int err; 1775c0f79103SRyan Roberts struct thp_settings default_settings; 1776baa489faSSeongJae Park 1777a6fcd57cSDavid Hildenbrand ksft_print_header(); 1778a6fcd57cSDavid Hildenbrand 1779baa489faSSeongJae Park pagesize = getpagesize(); 178012dc16b3SRyan Roberts pmdsize = read_pmd_pagesize(); 178112dc16b3SRyan Roberts if (pmdsize) { 1782c0f79103SRyan Roberts /* Only if THP is supported. */ 1783c0f79103SRyan Roberts thp_read_settings(&default_settings); 1784c0f79103SRyan Roberts default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; 1785c0f79103SRyan Roberts thp_save_settings(); 1786c0f79103SRyan Roberts thp_push_settings(&default_settings); 1787c0f79103SRyan Roberts 178812dc16b3SRyan Roberts ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 178912dc16b3SRyan Roberts pmdsize / 1024); 1790c0f79103SRyan Roberts nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 179112dc16b3SRyan Roberts } 179281b1e3f9SDavid Hildenbrand nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 179381b1e3f9SDavid Hildenbrand ARRAY_SIZE(hugetlbsizes)); 1794baa489faSSeongJae Park detect_huge_zeropage(); 1795baa489faSSeongJae Park 1796baa489faSSeongJae Park ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1797baa489faSSeongJae Park ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1798baa489faSSeongJae Park ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1799baa489faSSeongJae Park 1800baa489faSSeongJae Park gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1801baa489faSSeongJae Park pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1802baa489faSSeongJae Park if (pagemap_fd < 0) 1803baa489faSSeongJae Park ksft_exit_fail_msg("opening pagemap failed\n"); 1804baa489faSSeongJae Park 1805baa489faSSeongJae Park run_anon_test_cases(); 1806baa489faSSeongJae Park run_anon_thp_test_cases(); 1807baa489faSSeongJae Park run_non_anon_test_cases(); 1808baa489faSSeongJae Park 1809c0f79103SRyan Roberts if (pmdsize) { 1810c0f79103SRyan Roberts /* Only if THP is supported. */ 1811c0f79103SRyan Roberts thp_restore_settings(); 1812c0f79103SRyan Roberts } 1813c0f79103SRyan Roberts 1814baa489faSSeongJae Park err = ksft_get_fail_cnt(); 1815baa489faSSeongJae Park if (err) 1816baa489faSSeongJae Park ksft_exit_fail_msg("%d out of %d tests failed\n", 1817baa489faSSeongJae Park err, ksft_test_num()); 181869e545edSNathan Chancellor ksft_exit_pass(); 1819baa489faSSeongJae Park } 1820