1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0-only 2baa489faSSeongJae Park /* 3baa489faSSeongJae Park * COW (Copy On Write) tests. 4baa489faSSeongJae Park * 5baa489faSSeongJae Park * Copyright 2022, Red Hat, Inc. 6baa489faSSeongJae Park * 7baa489faSSeongJae Park * Author(s): David Hildenbrand <david@redhat.com> 8baa489faSSeongJae Park */ 9baa489faSSeongJae Park #define _GNU_SOURCE 10baa489faSSeongJae Park #include <stdlib.h> 11baa489faSSeongJae Park #include <string.h> 12baa489faSSeongJae Park #include <stdbool.h> 13baa489faSSeongJae Park #include <stdint.h> 14baa489faSSeongJae Park #include <unistd.h> 15baa489faSSeongJae Park #include <errno.h> 16baa489faSSeongJae Park #include <fcntl.h> 17baa489faSSeongJae Park #include <assert.h> 18baa489faSSeongJae Park #include <sys/mman.h> 19baa489faSSeongJae Park #include <sys/ioctl.h> 20baa489faSSeongJae Park #include <sys/wait.h> 21baa489faSSeongJae Park #include <linux/memfd.h> 22baa489faSSeongJae Park 23baa489faSSeongJae Park #include "local_config.h" 24baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 25baa489faSSeongJae Park #include <liburing.h> 26baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 27baa489faSSeongJae Park 28baa489faSSeongJae Park #include "../../../../mm/gup_test.h" 29baa489faSSeongJae Park #include "../kselftest.h" 30baa489faSSeongJae Park #include "vm_util.h" 31baa489faSSeongJae Park 32f4d9139fSDavid Hildenbrand #ifndef MADV_PAGEOUT 33f4d9139fSDavid Hildenbrand #define MADV_PAGEOUT 21 34f4d9139fSDavid Hildenbrand #endif 35baa489faSSeongJae Park #ifndef MADV_COLLAPSE 36baa489faSSeongJae Park #define MADV_COLLAPSE 25 37baa489faSSeongJae Park #endif 38baa489faSSeongJae Park 39baa489faSSeongJae Park static size_t pagesize; 40baa489faSSeongJae Park static int pagemap_fd; 41baa489faSSeongJae Park static size_t thpsize; 42baa489faSSeongJae Park static int nr_hugetlbsizes; 43baa489faSSeongJae Park static size_t hugetlbsizes[10]; 44baa489faSSeongJae Park static int gup_fd; 45baa489faSSeongJae Park static bool has_huge_zeropage; 46baa489faSSeongJae Park 47baa489faSSeongJae Park static void detect_huge_zeropage(void) 48baa489faSSeongJae Park { 49baa489faSSeongJae Park int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 50baa489faSSeongJae Park O_RDONLY); 51baa489faSSeongJae Park size_t enabled = 0; 52baa489faSSeongJae Park char buf[15]; 53baa489faSSeongJae Park int ret; 54baa489faSSeongJae Park 55baa489faSSeongJae Park if (fd < 0) 56baa489faSSeongJae Park return; 57baa489faSSeongJae Park 58baa489faSSeongJae Park ret = pread(fd, buf, sizeof(buf), 0); 59baa489faSSeongJae Park if (ret > 0 && ret < sizeof(buf)) { 60baa489faSSeongJae Park buf[ret] = 0; 61baa489faSSeongJae Park 62baa489faSSeongJae Park enabled = strtoul(buf, NULL, 10); 63baa489faSSeongJae Park if (enabled == 1) { 64baa489faSSeongJae Park has_huge_zeropage = true; 65baa489faSSeongJae Park ksft_print_msg("[INFO] huge zeropage is enabled\n"); 66baa489faSSeongJae Park } 67baa489faSSeongJae Park } 68baa489faSSeongJae Park 69baa489faSSeongJae Park close(fd); 70baa489faSSeongJae Park } 71baa489faSSeongJae Park 72baa489faSSeongJae Park static bool range_is_swapped(void *addr, size_t size) 73baa489faSSeongJae Park { 74baa489faSSeongJae Park for (; size; addr += pagesize, size -= pagesize) 75baa489faSSeongJae Park if (!pagemap_is_swapped(pagemap_fd, addr)) 76baa489faSSeongJae Park return false; 77baa489faSSeongJae Park return true; 78baa489faSSeongJae Park } 79baa489faSSeongJae Park 80baa489faSSeongJae Park struct comm_pipes { 81baa489faSSeongJae Park int child_ready[2]; 82baa489faSSeongJae Park int parent_ready[2]; 83baa489faSSeongJae Park }; 84baa489faSSeongJae Park 85baa489faSSeongJae Park static int setup_comm_pipes(struct comm_pipes *comm_pipes) 86baa489faSSeongJae Park { 87baa489faSSeongJae Park if (pipe(comm_pipes->child_ready) < 0) 88baa489faSSeongJae Park return -errno; 89baa489faSSeongJae Park if (pipe(comm_pipes->parent_ready) < 0) { 90baa489faSSeongJae Park close(comm_pipes->child_ready[0]); 91baa489faSSeongJae Park close(comm_pipes->child_ready[1]); 92baa489faSSeongJae Park return -errno; 93baa489faSSeongJae Park } 94baa489faSSeongJae Park 95baa489faSSeongJae Park return 0; 96baa489faSSeongJae Park } 97baa489faSSeongJae Park 98baa489faSSeongJae Park static void close_comm_pipes(struct comm_pipes *comm_pipes) 99baa489faSSeongJae Park { 100baa489faSSeongJae Park close(comm_pipes->child_ready[0]); 101baa489faSSeongJae Park close(comm_pipes->child_ready[1]); 102baa489faSSeongJae Park close(comm_pipes->parent_ready[0]); 103baa489faSSeongJae Park close(comm_pipes->parent_ready[1]); 104baa489faSSeongJae Park } 105baa489faSSeongJae Park 106baa489faSSeongJae Park static int child_memcmp_fn(char *mem, size_t size, 107baa489faSSeongJae Park struct comm_pipes *comm_pipes) 108baa489faSSeongJae Park { 109baa489faSSeongJae Park char *old = malloc(size); 110baa489faSSeongJae Park char buf; 111baa489faSSeongJae Park 112baa489faSSeongJae Park /* Backup the original content. */ 113baa489faSSeongJae Park memcpy(old, mem, size); 114baa489faSSeongJae Park 115baa489faSSeongJae Park /* Wait until the parent modified the page. */ 116baa489faSSeongJae Park write(comm_pipes->child_ready[1], "0", 1); 117baa489faSSeongJae Park while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 118baa489faSSeongJae Park ; 119baa489faSSeongJae Park 120baa489faSSeongJae Park /* See if we still read the old values. */ 121baa489faSSeongJae Park return memcmp(old, mem, size); 122baa489faSSeongJae Park } 123baa489faSSeongJae Park 124baa489faSSeongJae Park static int child_vmsplice_memcmp_fn(char *mem, size_t size, 125baa489faSSeongJae Park struct comm_pipes *comm_pipes) 126baa489faSSeongJae Park { 127baa489faSSeongJae Park struct iovec iov = { 128baa489faSSeongJae Park .iov_base = mem, 129baa489faSSeongJae Park .iov_len = size, 130baa489faSSeongJae Park }; 131baa489faSSeongJae Park ssize_t cur, total, transferred; 132baa489faSSeongJae Park char *old, *new; 133baa489faSSeongJae Park int fds[2]; 134baa489faSSeongJae Park char buf; 135baa489faSSeongJae Park 136baa489faSSeongJae Park old = malloc(size); 137baa489faSSeongJae Park new = malloc(size); 138baa489faSSeongJae Park 139baa489faSSeongJae Park /* Backup the original content. */ 140baa489faSSeongJae Park memcpy(old, mem, size); 141baa489faSSeongJae Park 142baa489faSSeongJae Park if (pipe(fds) < 0) 143baa489faSSeongJae Park return -errno; 144baa489faSSeongJae Park 145baa489faSSeongJae Park /* Trigger a read-only pin. */ 146baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 147baa489faSSeongJae Park if (transferred < 0) 148baa489faSSeongJae Park return -errno; 149baa489faSSeongJae Park if (transferred == 0) 150baa489faSSeongJae Park return -EINVAL; 151baa489faSSeongJae Park 152baa489faSSeongJae Park /* Unmap it from our page tables. */ 153baa489faSSeongJae Park if (munmap(mem, size) < 0) 154baa489faSSeongJae Park return -errno; 155baa489faSSeongJae Park 156baa489faSSeongJae Park /* Wait until the parent modified it. */ 157baa489faSSeongJae Park write(comm_pipes->child_ready[1], "0", 1); 158baa489faSSeongJae Park while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 159baa489faSSeongJae Park ; 160baa489faSSeongJae Park 161baa489faSSeongJae Park /* See if we still read the old values via the pipe. */ 162baa489faSSeongJae Park for (total = 0; total < transferred; total += cur) { 163baa489faSSeongJae Park cur = read(fds[0], new + total, transferred - total); 164baa489faSSeongJae Park if (cur < 0) 165baa489faSSeongJae Park return -errno; 166baa489faSSeongJae Park } 167baa489faSSeongJae Park 168baa489faSSeongJae Park return memcmp(old, new, transferred); 169baa489faSSeongJae Park } 170baa489faSSeongJae Park 171baa489faSSeongJae Park typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 172baa489faSSeongJae Park 173baa489faSSeongJae Park static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 174baa489faSSeongJae Park child_fn fn) 175baa489faSSeongJae Park { 176baa489faSSeongJae Park struct comm_pipes comm_pipes; 177baa489faSSeongJae Park char buf; 178baa489faSSeongJae Park int ret; 179baa489faSSeongJae Park 180baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 181baa489faSSeongJae Park if (ret) { 182baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 183baa489faSSeongJae Park return; 184baa489faSSeongJae Park } 185baa489faSSeongJae Park 186baa489faSSeongJae Park ret = fork(); 187baa489faSSeongJae Park if (ret < 0) { 188baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 189baa489faSSeongJae Park goto close_comm_pipes; 190baa489faSSeongJae Park } else if (!ret) { 191baa489faSSeongJae Park exit(fn(mem, size, &comm_pipes)); 192baa489faSSeongJae Park } 193baa489faSSeongJae Park 194baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 195baa489faSSeongJae Park ; 196baa489faSSeongJae Park 197baa489faSSeongJae Park if (do_mprotect) { 198baa489faSSeongJae Park /* 199baa489faSSeongJae Park * mprotect() optimizations might try avoiding 200baa489faSSeongJae Park * write-faults by directly mapping pages writable. 201baa489faSSeongJae Park */ 202baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 203baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); 204baa489faSSeongJae Park if (ret) { 205baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 206baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 207baa489faSSeongJae Park wait(&ret); 208baa489faSSeongJae Park goto close_comm_pipes; 209baa489faSSeongJae Park } 210baa489faSSeongJae Park } 211baa489faSSeongJae Park 212baa489faSSeongJae Park /* Modify the page. */ 213baa489faSSeongJae Park memset(mem, 0xff, size); 214baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 215baa489faSSeongJae Park 216baa489faSSeongJae Park wait(&ret); 217baa489faSSeongJae Park if (WIFEXITED(ret)) 218baa489faSSeongJae Park ret = WEXITSTATUS(ret); 219baa489faSSeongJae Park else 220baa489faSSeongJae Park ret = -EINVAL; 221baa489faSSeongJae Park 222baa489faSSeongJae Park ksft_test_result(!ret, "No leak from parent into child\n"); 223baa489faSSeongJae Park close_comm_pipes: 224baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 225baa489faSSeongJae Park } 226baa489faSSeongJae Park 227baa489faSSeongJae Park static void test_cow_in_parent(char *mem, size_t size) 228baa489faSSeongJae Park { 229baa489faSSeongJae Park do_test_cow_in_parent(mem, size, false, child_memcmp_fn); 230baa489faSSeongJae Park } 231baa489faSSeongJae Park 232baa489faSSeongJae Park static void test_cow_in_parent_mprotect(char *mem, size_t size) 233baa489faSSeongJae Park { 234baa489faSSeongJae Park do_test_cow_in_parent(mem, size, true, child_memcmp_fn); 235baa489faSSeongJae Park } 236baa489faSSeongJae Park 237baa489faSSeongJae Park static void test_vmsplice_in_child(char *mem, size_t size) 238baa489faSSeongJae Park { 239baa489faSSeongJae Park do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); 240baa489faSSeongJae Park } 241baa489faSSeongJae Park 242baa489faSSeongJae Park static void test_vmsplice_in_child_mprotect(char *mem, size_t size) 243baa489faSSeongJae Park { 244baa489faSSeongJae Park do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); 245baa489faSSeongJae Park } 246baa489faSSeongJae Park 247baa489faSSeongJae Park static void do_test_vmsplice_in_parent(char *mem, size_t size, 248baa489faSSeongJae Park bool before_fork) 249baa489faSSeongJae Park { 250baa489faSSeongJae Park struct iovec iov = { 251baa489faSSeongJae Park .iov_base = mem, 252baa489faSSeongJae Park .iov_len = size, 253baa489faSSeongJae Park }; 254baa489faSSeongJae Park ssize_t cur, total, transferred; 255baa489faSSeongJae Park struct comm_pipes comm_pipes; 256baa489faSSeongJae Park char *old, *new; 257baa489faSSeongJae Park int ret, fds[2]; 258baa489faSSeongJae Park char buf; 259baa489faSSeongJae Park 260baa489faSSeongJae Park old = malloc(size); 261baa489faSSeongJae Park new = malloc(size); 262baa489faSSeongJae Park 263baa489faSSeongJae Park memcpy(old, mem, size); 264baa489faSSeongJae Park 265baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 266baa489faSSeongJae Park if (ret) { 267baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 268baa489faSSeongJae Park goto free; 269baa489faSSeongJae Park } 270baa489faSSeongJae Park 271baa489faSSeongJae Park if (pipe(fds) < 0) { 272baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 273baa489faSSeongJae Park goto close_comm_pipes; 274baa489faSSeongJae Park } 275baa489faSSeongJae Park 276baa489faSSeongJae Park if (before_fork) { 277baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 278baa489faSSeongJae Park if (transferred <= 0) { 279baa489faSSeongJae Park ksft_test_result_fail("vmsplice() failed\n"); 280baa489faSSeongJae Park goto close_pipe; 281baa489faSSeongJae Park } 282baa489faSSeongJae Park } 283baa489faSSeongJae Park 284baa489faSSeongJae Park ret = fork(); 285baa489faSSeongJae Park if (ret < 0) { 286baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 287baa489faSSeongJae Park goto close_pipe; 288baa489faSSeongJae Park } else if (!ret) { 289baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 290baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 291baa489faSSeongJae Park ; 292baa489faSSeongJae Park /* Modify page content in the child. */ 293baa489faSSeongJae Park memset(mem, 0xff, size); 294baa489faSSeongJae Park exit(0); 295baa489faSSeongJae Park } 296baa489faSSeongJae Park 297baa489faSSeongJae Park if (!before_fork) { 298baa489faSSeongJae Park transferred = vmsplice(fds[1], &iov, 1, 0); 299baa489faSSeongJae Park if (transferred <= 0) { 300baa489faSSeongJae Park ksft_test_result_fail("vmsplice() failed\n"); 301baa489faSSeongJae Park wait(&ret); 302baa489faSSeongJae Park goto close_pipe; 303baa489faSSeongJae Park } 304baa489faSSeongJae Park } 305baa489faSSeongJae Park 306baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 307baa489faSSeongJae Park ; 308baa489faSSeongJae Park if (munmap(mem, size) < 0) { 309baa489faSSeongJae Park ksft_test_result_fail("munmap() failed\n"); 310baa489faSSeongJae Park goto close_pipe; 311baa489faSSeongJae Park } 312baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 313baa489faSSeongJae Park 314baa489faSSeongJae Park /* Wait until the child is done writing. */ 315baa489faSSeongJae Park wait(&ret); 316baa489faSSeongJae Park if (!WIFEXITED(ret)) { 317baa489faSSeongJae Park ksft_test_result_fail("wait() failed\n"); 318baa489faSSeongJae Park goto close_pipe; 319baa489faSSeongJae Park } 320baa489faSSeongJae Park 321baa489faSSeongJae Park /* See if we still read the old values. */ 322baa489faSSeongJae Park for (total = 0; total < transferred; total += cur) { 323baa489faSSeongJae Park cur = read(fds[0], new + total, transferred - total); 324baa489faSSeongJae Park if (cur < 0) { 325baa489faSSeongJae Park ksft_test_result_fail("read() failed\n"); 326baa489faSSeongJae Park goto close_pipe; 327baa489faSSeongJae Park } 328baa489faSSeongJae Park } 329baa489faSSeongJae Park 330baa489faSSeongJae Park ksft_test_result(!memcmp(old, new, transferred), 331baa489faSSeongJae Park "No leak from child into parent\n"); 332baa489faSSeongJae Park close_pipe: 333baa489faSSeongJae Park close(fds[0]); 334baa489faSSeongJae Park close(fds[1]); 335baa489faSSeongJae Park close_comm_pipes: 336baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 337baa489faSSeongJae Park free: 338baa489faSSeongJae Park free(old); 339baa489faSSeongJae Park free(new); 340baa489faSSeongJae Park } 341baa489faSSeongJae Park 342baa489faSSeongJae Park static void test_vmsplice_before_fork(char *mem, size_t size) 343baa489faSSeongJae Park { 344baa489faSSeongJae Park do_test_vmsplice_in_parent(mem, size, true); 345baa489faSSeongJae Park } 346baa489faSSeongJae Park 347baa489faSSeongJae Park static void test_vmsplice_after_fork(char *mem, size_t size) 348baa489faSSeongJae Park { 349baa489faSSeongJae Park do_test_vmsplice_in_parent(mem, size, false); 350baa489faSSeongJae Park } 351baa489faSSeongJae Park 352baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 353baa489faSSeongJae Park static void do_test_iouring(char *mem, size_t size, bool use_fork) 354baa489faSSeongJae Park { 355baa489faSSeongJae Park struct comm_pipes comm_pipes; 356baa489faSSeongJae Park struct io_uring_cqe *cqe; 357baa489faSSeongJae Park struct io_uring_sqe *sqe; 358baa489faSSeongJae Park struct io_uring ring; 359baa489faSSeongJae Park ssize_t cur, total; 360baa489faSSeongJae Park struct iovec iov; 361baa489faSSeongJae Park char *buf, *tmp; 362baa489faSSeongJae Park int ret, fd; 363baa489faSSeongJae Park FILE *file; 364baa489faSSeongJae Park 365baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 366baa489faSSeongJae Park if (ret) { 367baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 368baa489faSSeongJae Park return; 369baa489faSSeongJae Park } 370baa489faSSeongJae Park 371baa489faSSeongJae Park file = tmpfile(); 372baa489faSSeongJae Park if (!file) { 373baa489faSSeongJae Park ksft_test_result_fail("tmpfile() failed\n"); 374baa489faSSeongJae Park goto close_comm_pipes; 375baa489faSSeongJae Park } 376baa489faSSeongJae Park fd = fileno(file); 377baa489faSSeongJae Park assert(fd); 378baa489faSSeongJae Park 379baa489faSSeongJae Park tmp = malloc(size); 380baa489faSSeongJae Park if (!tmp) { 381baa489faSSeongJae Park ksft_test_result_fail("malloc() failed\n"); 382baa489faSSeongJae Park goto close_file; 383baa489faSSeongJae Park } 384baa489faSSeongJae Park 385baa489faSSeongJae Park /* Skip on errors, as we might just lack kernel support. */ 386baa489faSSeongJae Park ret = io_uring_queue_init(1, &ring, 0); 387baa489faSSeongJae Park if (ret < 0) { 388baa489faSSeongJae Park ksft_test_result_skip("io_uring_queue_init() failed\n"); 389baa489faSSeongJae Park goto free_tmp; 390baa489faSSeongJae Park } 391baa489faSSeongJae Park 392baa489faSSeongJae Park /* 393baa489faSSeongJae Park * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 394baa489faSSeongJae Park * | FOLL_LONGTERM the range. 395baa489faSSeongJae Park * 396baa489faSSeongJae Park * Skip on errors, as we might just lack kernel support or might not 397baa489faSSeongJae Park * have sufficient MEMLOCK permissions. 398baa489faSSeongJae Park */ 399baa489faSSeongJae Park iov.iov_base = mem; 400baa489faSSeongJae Park iov.iov_len = size; 401baa489faSSeongJae Park ret = io_uring_register_buffers(&ring, &iov, 1); 402baa489faSSeongJae Park if (ret) { 403baa489faSSeongJae Park ksft_test_result_skip("io_uring_register_buffers() failed\n"); 404baa489faSSeongJae Park goto queue_exit; 405baa489faSSeongJae Park } 406baa489faSSeongJae Park 407baa489faSSeongJae Park if (use_fork) { 408baa489faSSeongJae Park /* 409baa489faSSeongJae Park * fork() and keep the child alive until we're done. Note that 410baa489faSSeongJae Park * we expect the pinned page to not get shared with the child. 411baa489faSSeongJae Park */ 412baa489faSSeongJae Park ret = fork(); 413baa489faSSeongJae Park if (ret < 0) { 414baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 415baa489faSSeongJae Park goto unregister_buffers; 416baa489faSSeongJae Park } else if (!ret) { 417baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 418baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 419baa489faSSeongJae Park ; 420baa489faSSeongJae Park exit(0); 421baa489faSSeongJae Park } 422baa489faSSeongJae Park 423baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 424baa489faSSeongJae Park ; 425baa489faSSeongJae Park } else { 426baa489faSSeongJae Park /* 427baa489faSSeongJae Park * Map the page R/O into the page table. Enable softdirty 428baa489faSSeongJae Park * tracking to stop the page from getting mapped R/W immediately 429baa489faSSeongJae Park * again by mprotect() optimizations. Note that we don't have an 430baa489faSSeongJae Park * easy way to test if that worked (the pagemap does not export 431baa489faSSeongJae Park * if the page is mapped R/O vs. R/W). 432baa489faSSeongJae Park */ 433baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 434baa489faSSeongJae Park clear_softdirty(); 435baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 436baa489faSSeongJae Park if (ret) { 437baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 438baa489faSSeongJae Park goto unregister_buffers; 439baa489faSSeongJae Park } 440baa489faSSeongJae Park } 441baa489faSSeongJae Park 442baa489faSSeongJae Park /* 443baa489faSSeongJae Park * Modify the page and write page content as observed by the fixed 444baa489faSSeongJae Park * buffer pin to the file so we can verify it. 445baa489faSSeongJae Park */ 446baa489faSSeongJae Park memset(mem, 0xff, size); 447baa489faSSeongJae Park sqe = io_uring_get_sqe(&ring); 448baa489faSSeongJae Park if (!sqe) { 449baa489faSSeongJae Park ksft_test_result_fail("io_uring_get_sqe() failed\n"); 450baa489faSSeongJae Park goto quit_child; 451baa489faSSeongJae Park } 452baa489faSSeongJae Park io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 453baa489faSSeongJae Park 454baa489faSSeongJae Park ret = io_uring_submit(&ring); 455baa489faSSeongJae Park if (ret < 0) { 456baa489faSSeongJae Park ksft_test_result_fail("io_uring_submit() failed\n"); 457baa489faSSeongJae Park goto quit_child; 458baa489faSSeongJae Park } 459baa489faSSeongJae Park 460baa489faSSeongJae Park ret = io_uring_wait_cqe(&ring, &cqe); 461baa489faSSeongJae Park if (ret < 0) { 462baa489faSSeongJae Park ksft_test_result_fail("io_uring_wait_cqe() failed\n"); 463baa489faSSeongJae Park goto quit_child; 464baa489faSSeongJae Park } 465baa489faSSeongJae Park 466baa489faSSeongJae Park if (cqe->res != size) { 467baa489faSSeongJae Park ksft_test_result_fail("write_fixed failed\n"); 468baa489faSSeongJae Park goto quit_child; 469baa489faSSeongJae Park } 470baa489faSSeongJae Park io_uring_cqe_seen(&ring, cqe); 471baa489faSSeongJae Park 472baa489faSSeongJae Park /* Read back the file content to the temporary buffer. */ 473baa489faSSeongJae Park total = 0; 474baa489faSSeongJae Park while (total < size) { 475baa489faSSeongJae Park cur = pread(fd, tmp + total, size - total, total); 476baa489faSSeongJae Park if (cur < 0) { 477baa489faSSeongJae Park ksft_test_result_fail("pread() failed\n"); 478baa489faSSeongJae Park goto quit_child; 479baa489faSSeongJae Park } 480baa489faSSeongJae Park total += cur; 481baa489faSSeongJae Park } 482baa489faSSeongJae Park 483baa489faSSeongJae Park /* Finally, check if we read what we expected. */ 484baa489faSSeongJae Park ksft_test_result(!memcmp(mem, tmp, size), 485baa489faSSeongJae Park "Longterm R/W pin is reliable\n"); 486baa489faSSeongJae Park 487baa489faSSeongJae Park quit_child: 488baa489faSSeongJae Park if (use_fork) { 489baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 490baa489faSSeongJae Park wait(&ret); 491baa489faSSeongJae Park } 492baa489faSSeongJae Park unregister_buffers: 493baa489faSSeongJae Park io_uring_unregister_buffers(&ring); 494baa489faSSeongJae Park queue_exit: 495baa489faSSeongJae Park io_uring_queue_exit(&ring); 496baa489faSSeongJae Park free_tmp: 497baa489faSSeongJae Park free(tmp); 498baa489faSSeongJae Park close_file: 499baa489faSSeongJae Park fclose(file); 500baa489faSSeongJae Park close_comm_pipes: 501baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 502baa489faSSeongJae Park } 503baa489faSSeongJae Park 504baa489faSSeongJae Park static void test_iouring_ro(char *mem, size_t size) 505baa489faSSeongJae Park { 506baa489faSSeongJae Park do_test_iouring(mem, size, false); 507baa489faSSeongJae Park } 508baa489faSSeongJae Park 509baa489faSSeongJae Park static void test_iouring_fork(char *mem, size_t size) 510baa489faSSeongJae Park { 511baa489faSSeongJae Park do_test_iouring(mem, size, true); 512baa489faSSeongJae Park } 513baa489faSSeongJae Park 514baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 515baa489faSSeongJae Park 516baa489faSSeongJae Park enum ro_pin_test { 517baa489faSSeongJae Park RO_PIN_TEST, 518baa489faSSeongJae Park RO_PIN_TEST_SHARED, 519baa489faSSeongJae Park RO_PIN_TEST_PREVIOUSLY_SHARED, 520baa489faSSeongJae Park RO_PIN_TEST_RO_EXCLUSIVE, 521baa489faSSeongJae Park }; 522baa489faSSeongJae Park 523baa489faSSeongJae Park static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 524baa489faSSeongJae Park bool fast) 525baa489faSSeongJae Park { 526baa489faSSeongJae Park struct pin_longterm_test args; 527baa489faSSeongJae Park struct comm_pipes comm_pipes; 528baa489faSSeongJae Park char *tmp, buf; 529baa489faSSeongJae Park __u64 tmp_val; 530baa489faSSeongJae Park int ret; 531baa489faSSeongJae Park 532baa489faSSeongJae Park if (gup_fd < 0) { 533baa489faSSeongJae Park ksft_test_result_skip("gup_test not available\n"); 534baa489faSSeongJae Park return; 535baa489faSSeongJae Park } 536baa489faSSeongJae Park 537baa489faSSeongJae Park tmp = malloc(size); 538baa489faSSeongJae Park if (!tmp) { 539baa489faSSeongJae Park ksft_test_result_fail("malloc() failed\n"); 540baa489faSSeongJae Park return; 541baa489faSSeongJae Park } 542baa489faSSeongJae Park 543baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 544baa489faSSeongJae Park if (ret) { 545baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 546baa489faSSeongJae Park goto free_tmp; 547baa489faSSeongJae Park } 548baa489faSSeongJae Park 549baa489faSSeongJae Park switch (test) { 550baa489faSSeongJae Park case RO_PIN_TEST: 551baa489faSSeongJae Park break; 552baa489faSSeongJae Park case RO_PIN_TEST_SHARED: 553baa489faSSeongJae Park case RO_PIN_TEST_PREVIOUSLY_SHARED: 554baa489faSSeongJae Park /* 555baa489faSSeongJae Park * Share the pages with our child. As the pages are not pinned, 556baa489faSSeongJae Park * this should just work. 557baa489faSSeongJae Park */ 558baa489faSSeongJae Park ret = fork(); 559baa489faSSeongJae Park if (ret < 0) { 560baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 561baa489faSSeongJae Park goto close_comm_pipes; 562baa489faSSeongJae Park } else if (!ret) { 563baa489faSSeongJae Park write(comm_pipes.child_ready[1], "0", 1); 564baa489faSSeongJae Park while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 565baa489faSSeongJae Park ; 566baa489faSSeongJae Park exit(0); 567baa489faSSeongJae Park } 568baa489faSSeongJae Park 569baa489faSSeongJae Park /* Wait until our child is ready. */ 570baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 571baa489faSSeongJae Park ; 572baa489faSSeongJae Park 573baa489faSSeongJae Park if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 574baa489faSSeongJae Park /* 575baa489faSSeongJae Park * Tell the child to quit now and wait until it quit. 576baa489faSSeongJae Park * The pages should now be mapped R/O into our page 577baa489faSSeongJae Park * tables, but they are no longer shared. 578baa489faSSeongJae Park */ 579baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 580baa489faSSeongJae Park wait(&ret); 581baa489faSSeongJae Park if (!WIFEXITED(ret)) 582baa489faSSeongJae Park ksft_print_msg("[INFO] wait() failed\n"); 583baa489faSSeongJae Park } 584baa489faSSeongJae Park break; 585baa489faSSeongJae Park case RO_PIN_TEST_RO_EXCLUSIVE: 586baa489faSSeongJae Park /* 587baa489faSSeongJae Park * Map the page R/O into the page table. Enable softdirty 588baa489faSSeongJae Park * tracking to stop the page from getting mapped R/W immediately 589baa489faSSeongJae Park * again by mprotect() optimizations. Note that we don't have an 590baa489faSSeongJae Park * easy way to test if that worked (the pagemap does not export 591baa489faSSeongJae Park * if the page is mapped R/O vs. R/W). 592baa489faSSeongJae Park */ 593baa489faSSeongJae Park ret = mprotect(mem, size, PROT_READ); 594baa489faSSeongJae Park clear_softdirty(); 595baa489faSSeongJae Park ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 596baa489faSSeongJae Park if (ret) { 597baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 598baa489faSSeongJae Park goto close_comm_pipes; 599baa489faSSeongJae Park } 600baa489faSSeongJae Park break; 601baa489faSSeongJae Park default: 602baa489faSSeongJae Park assert(false); 603baa489faSSeongJae Park } 604baa489faSSeongJae Park 605baa489faSSeongJae Park /* Take a R/O pin. This should trigger unsharing. */ 606baa489faSSeongJae Park args.addr = (__u64)(uintptr_t)mem; 607baa489faSSeongJae Park args.size = size; 608baa489faSSeongJae Park args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 609baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 610baa489faSSeongJae Park if (ret) { 611baa489faSSeongJae Park if (errno == EINVAL) 612baa489faSSeongJae Park ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); 613baa489faSSeongJae Park else 614baa489faSSeongJae Park ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); 615baa489faSSeongJae Park goto wait; 616baa489faSSeongJae Park } 617baa489faSSeongJae Park 618baa489faSSeongJae Park /* Modify the page. */ 619baa489faSSeongJae Park memset(mem, 0xff, size); 620baa489faSSeongJae Park 621baa489faSSeongJae Park /* 622baa489faSSeongJae Park * Read back the content via the pin to the temporary buffer and 623baa489faSSeongJae Park * test if we observed the modification. 624baa489faSSeongJae Park */ 625baa489faSSeongJae Park tmp_val = (__u64)(uintptr_t)tmp; 626baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 627baa489faSSeongJae Park if (ret) 628baa489faSSeongJae Park ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); 629baa489faSSeongJae Park else 630baa489faSSeongJae Park ksft_test_result(!memcmp(mem, tmp, size), 631baa489faSSeongJae Park "Longterm R/O pin is reliable\n"); 632baa489faSSeongJae Park 633baa489faSSeongJae Park ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 634baa489faSSeongJae Park if (ret) 635baa489faSSeongJae Park ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); 636baa489faSSeongJae Park wait: 637baa489faSSeongJae Park switch (test) { 638baa489faSSeongJae Park case RO_PIN_TEST_SHARED: 639baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 640baa489faSSeongJae Park wait(&ret); 641baa489faSSeongJae Park if (!WIFEXITED(ret)) 642baa489faSSeongJae Park ksft_print_msg("[INFO] wait() failed\n"); 643baa489faSSeongJae Park break; 644baa489faSSeongJae Park default: 645baa489faSSeongJae Park break; 646baa489faSSeongJae Park } 647baa489faSSeongJae Park close_comm_pipes: 648baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 649baa489faSSeongJae Park free_tmp: 650baa489faSSeongJae Park free(tmp); 651baa489faSSeongJae Park } 652baa489faSSeongJae Park 653baa489faSSeongJae Park static void test_ro_pin_on_shared(char *mem, size_t size) 654baa489faSSeongJae Park { 655baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 656baa489faSSeongJae Park } 657baa489faSSeongJae Park 658baa489faSSeongJae Park static void test_ro_fast_pin_on_shared(char *mem, size_t size) 659baa489faSSeongJae Park { 660baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 661baa489faSSeongJae Park } 662baa489faSSeongJae Park 663baa489faSSeongJae Park static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) 664baa489faSSeongJae Park { 665baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 666baa489faSSeongJae Park } 667baa489faSSeongJae Park 668baa489faSSeongJae Park static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) 669baa489faSSeongJae Park { 670baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 671baa489faSSeongJae Park } 672baa489faSSeongJae Park 673baa489faSSeongJae Park static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) 674baa489faSSeongJae Park { 675baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 676baa489faSSeongJae Park } 677baa489faSSeongJae Park 678baa489faSSeongJae Park static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) 679baa489faSSeongJae Park { 680baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 681baa489faSSeongJae Park } 682baa489faSSeongJae Park 683baa489faSSeongJae Park typedef void (*test_fn)(char *mem, size_t size); 684baa489faSSeongJae Park 685baa489faSSeongJae Park static void do_run_with_base_page(test_fn fn, bool swapout) 686baa489faSSeongJae Park { 687baa489faSSeongJae Park char *mem; 688baa489faSSeongJae Park int ret; 689baa489faSSeongJae Park 690baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 691baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 692baa489faSSeongJae Park if (mem == MAP_FAILED) { 693baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 694baa489faSSeongJae Park return; 695baa489faSSeongJae Park } 696baa489faSSeongJae Park 697baa489faSSeongJae Park ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 698baa489faSSeongJae Park /* Ignore if not around on a kernel. */ 699baa489faSSeongJae Park if (ret && errno != EINVAL) { 700baa489faSSeongJae Park ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); 701baa489faSSeongJae Park goto munmap; 702baa489faSSeongJae Park } 703baa489faSSeongJae Park 704baa489faSSeongJae Park /* Populate a base page. */ 705baa489faSSeongJae Park memset(mem, 0, pagesize); 706baa489faSSeongJae Park 707baa489faSSeongJae Park if (swapout) { 708baa489faSSeongJae Park madvise(mem, pagesize, MADV_PAGEOUT); 709baa489faSSeongJae Park if (!pagemap_is_swapped(pagemap_fd, mem)) { 710baa489faSSeongJae Park ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 711baa489faSSeongJae Park goto munmap; 712baa489faSSeongJae Park } 713baa489faSSeongJae Park } 714baa489faSSeongJae Park 715baa489faSSeongJae Park fn(mem, pagesize); 716baa489faSSeongJae Park munmap: 717baa489faSSeongJae Park munmap(mem, pagesize); 718baa489faSSeongJae Park } 719baa489faSSeongJae Park 720baa489faSSeongJae Park static void run_with_base_page(test_fn fn, const char *desc) 721baa489faSSeongJae Park { 722baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with base page\n", desc); 723baa489faSSeongJae Park do_run_with_base_page(fn, false); 724baa489faSSeongJae Park } 725baa489faSSeongJae Park 726baa489faSSeongJae Park static void run_with_base_page_swap(test_fn fn, const char *desc) 727baa489faSSeongJae Park { 728baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); 729baa489faSSeongJae Park do_run_with_base_page(fn, true); 730baa489faSSeongJae Park } 731baa489faSSeongJae Park 732baa489faSSeongJae Park enum thp_run { 733baa489faSSeongJae Park THP_RUN_PMD, 734baa489faSSeongJae Park THP_RUN_PMD_SWAPOUT, 735baa489faSSeongJae Park THP_RUN_PTE, 736baa489faSSeongJae Park THP_RUN_PTE_SWAPOUT, 737baa489faSSeongJae Park THP_RUN_SINGLE_PTE, 738baa489faSSeongJae Park THP_RUN_SINGLE_PTE_SWAPOUT, 739baa489faSSeongJae Park THP_RUN_PARTIAL_MREMAP, 740baa489faSSeongJae Park THP_RUN_PARTIAL_SHARED, 741baa489faSSeongJae Park }; 742baa489faSSeongJae Park 743baa489faSSeongJae Park static void do_run_with_thp(test_fn fn, enum thp_run thp_run) 744baa489faSSeongJae Park { 745baa489faSSeongJae Park char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 746baa489faSSeongJae Park size_t size, mmap_size, mremap_size; 747baa489faSSeongJae Park int ret; 748baa489faSSeongJae Park 749baa489faSSeongJae Park /* For alignment purposes, we need twice the thp size. */ 750baa489faSSeongJae Park mmap_size = 2 * thpsize; 751baa489faSSeongJae Park mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 752baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 753baa489faSSeongJae Park if (mmap_mem == MAP_FAILED) { 754baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 755baa489faSSeongJae Park return; 756baa489faSSeongJae Park } 757baa489faSSeongJae Park 758baa489faSSeongJae Park /* We need a THP-aligned memory area. */ 759baa489faSSeongJae Park mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 760baa489faSSeongJae Park 761baa489faSSeongJae Park ret = madvise(mem, thpsize, MADV_HUGEPAGE); 762baa489faSSeongJae Park if (ret) { 763baa489faSSeongJae Park ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 764baa489faSSeongJae Park goto munmap; 765baa489faSSeongJae Park } 766baa489faSSeongJae Park 767baa489faSSeongJae Park /* 768baa489faSSeongJae Park * Try to populate a THP. Touch the first sub-page and test if we get 769baa489faSSeongJae Park * another sub-page populated automatically. 770baa489faSSeongJae Park */ 771baa489faSSeongJae Park mem[0] = 0; 772baa489faSSeongJae Park if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { 773baa489faSSeongJae Park ksft_test_result_skip("Did not get a THP populated\n"); 774baa489faSSeongJae Park goto munmap; 775baa489faSSeongJae Park } 776baa489faSSeongJae Park memset(mem, 0, thpsize); 777baa489faSSeongJae Park 778baa489faSSeongJae Park size = thpsize; 779baa489faSSeongJae Park switch (thp_run) { 780baa489faSSeongJae Park case THP_RUN_PMD: 781baa489faSSeongJae Park case THP_RUN_PMD_SWAPOUT: 782baa489faSSeongJae Park break; 783baa489faSSeongJae Park case THP_RUN_PTE: 784baa489faSSeongJae Park case THP_RUN_PTE_SWAPOUT: 785baa489faSSeongJae Park /* 786baa489faSSeongJae Park * Trigger PTE-mapping the THP by temporarily mapping a single 787baa489faSSeongJae Park * subpage R/O. 788baa489faSSeongJae Park */ 789baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ); 790baa489faSSeongJae Park if (ret) { 791baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 792baa489faSSeongJae Park goto munmap; 793baa489faSSeongJae Park } 794baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 795baa489faSSeongJae Park if (ret) { 796baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 797baa489faSSeongJae Park goto munmap; 798baa489faSSeongJae Park } 799baa489faSSeongJae Park break; 800baa489faSSeongJae Park case THP_RUN_SINGLE_PTE: 801baa489faSSeongJae Park case THP_RUN_SINGLE_PTE_SWAPOUT: 802baa489faSSeongJae Park /* 803baa489faSSeongJae Park * Discard all but a single subpage of that PTE-mapped THP. What 804baa489faSSeongJae Park * remains is a single PTE mapping a single subpage. 805baa489faSSeongJae Park */ 806baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 807baa489faSSeongJae Park if (ret) { 808baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTNEED failed\n"); 809baa489faSSeongJae Park goto munmap; 810baa489faSSeongJae Park } 811baa489faSSeongJae Park size = pagesize; 812baa489faSSeongJae Park break; 813baa489faSSeongJae Park case THP_RUN_PARTIAL_MREMAP: 814baa489faSSeongJae Park /* 815baa489faSSeongJae Park * Remap half of the THP. We need some new memory location 816baa489faSSeongJae Park * for that. 817baa489faSSeongJae Park */ 818baa489faSSeongJae Park mremap_size = thpsize / 2; 819baa489faSSeongJae Park mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 820baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 821baa489faSSeongJae Park if (mem == MAP_FAILED) { 822baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 823baa489faSSeongJae Park goto munmap; 824baa489faSSeongJae Park } 825baa489faSSeongJae Park tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 826baa489faSSeongJae Park MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 827baa489faSSeongJae Park if (tmp != mremap_mem) { 828baa489faSSeongJae Park ksft_test_result_fail("mremap() failed\n"); 829baa489faSSeongJae Park goto munmap; 830baa489faSSeongJae Park } 831baa489faSSeongJae Park size = mremap_size; 832baa489faSSeongJae Park break; 833baa489faSSeongJae Park case THP_RUN_PARTIAL_SHARED: 834baa489faSSeongJae Park /* 835baa489faSSeongJae Park * Share the first page of the THP with a child and quit the 836baa489faSSeongJae Park * child. This will result in some parts of the THP never 837baa489faSSeongJae Park * have been shared. 838baa489faSSeongJae Park */ 839baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 840baa489faSSeongJae Park if (ret) { 841baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 842baa489faSSeongJae Park goto munmap; 843baa489faSSeongJae Park } 844baa489faSSeongJae Park ret = fork(); 845baa489faSSeongJae Park if (ret < 0) { 846baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 847baa489faSSeongJae Park goto munmap; 848baa489faSSeongJae Park } else if (!ret) { 849baa489faSSeongJae Park exit(0); 850baa489faSSeongJae Park } 851baa489faSSeongJae Park wait(&ret); 852baa489faSSeongJae Park /* Allow for sharing all pages again. */ 853baa489faSSeongJae Park ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 854baa489faSSeongJae Park if (ret) { 855baa489faSSeongJae Park ksft_test_result_fail("MADV_DOFORK failed\n"); 856baa489faSSeongJae Park goto munmap; 857baa489faSSeongJae Park } 858baa489faSSeongJae Park break; 859baa489faSSeongJae Park default: 860baa489faSSeongJae Park assert(false); 861baa489faSSeongJae Park } 862baa489faSSeongJae Park 863baa489faSSeongJae Park switch (thp_run) { 864baa489faSSeongJae Park case THP_RUN_PMD_SWAPOUT: 865baa489faSSeongJae Park case THP_RUN_PTE_SWAPOUT: 866baa489faSSeongJae Park case THP_RUN_SINGLE_PTE_SWAPOUT: 867baa489faSSeongJae Park madvise(mem, size, MADV_PAGEOUT); 868baa489faSSeongJae Park if (!range_is_swapped(mem, size)) { 869baa489faSSeongJae Park ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); 870baa489faSSeongJae Park goto munmap; 871baa489faSSeongJae Park } 872baa489faSSeongJae Park break; 873baa489faSSeongJae Park default: 874baa489faSSeongJae Park break; 875baa489faSSeongJae Park } 876baa489faSSeongJae Park 877baa489faSSeongJae Park fn(mem, size); 878baa489faSSeongJae Park munmap: 879baa489faSSeongJae Park munmap(mmap_mem, mmap_size); 880baa489faSSeongJae Park if (mremap_mem != MAP_FAILED) 881baa489faSSeongJae Park munmap(mremap_mem, mremap_size); 882baa489faSSeongJae Park } 883baa489faSSeongJae Park 884baa489faSSeongJae Park static void run_with_thp(test_fn fn, const char *desc) 885baa489faSSeongJae Park { 886baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with THP\n", desc); 887baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PMD); 888baa489faSSeongJae Park } 889baa489faSSeongJae Park 890baa489faSSeongJae Park static void run_with_thp_swap(test_fn fn, const char *desc) 891baa489faSSeongJae Park { 892baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); 893baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); 894baa489faSSeongJae Park } 895baa489faSSeongJae Park 896baa489faSSeongJae Park static void run_with_pte_mapped_thp(test_fn fn, const char *desc) 897baa489faSSeongJae Park { 898baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); 899baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PTE); 900baa489faSSeongJae Park } 901baa489faSSeongJae Park 902baa489faSSeongJae Park static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) 903baa489faSSeongJae Park { 904baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); 905baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); 906baa489faSSeongJae Park } 907baa489faSSeongJae Park 908baa489faSSeongJae Park static void run_with_single_pte_of_thp(test_fn fn, const char *desc) 909baa489faSSeongJae Park { 910baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); 911baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_SINGLE_PTE); 912baa489faSSeongJae Park } 913baa489faSSeongJae Park 914baa489faSSeongJae Park static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) 915baa489faSSeongJae Park { 916baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); 917baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); 918baa489faSSeongJae Park } 919baa489faSSeongJae Park 920baa489faSSeongJae Park static void run_with_partial_mremap_thp(test_fn fn, const char *desc) 921baa489faSSeongJae Park { 922baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); 923baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); 924baa489faSSeongJae Park } 925baa489faSSeongJae Park 926baa489faSSeongJae Park static void run_with_partial_shared_thp(test_fn fn, const char *desc) 927baa489faSSeongJae Park { 928baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); 929baa489faSSeongJae Park do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); 930baa489faSSeongJae Park } 931baa489faSSeongJae Park 932baa489faSSeongJae Park static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 933baa489faSSeongJae Park { 934baa489faSSeongJae Park int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 935baa489faSSeongJae Park char *mem, *dummy; 936baa489faSSeongJae Park 937baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, 938baa489faSSeongJae Park hugetlbsize / 1024); 939baa489faSSeongJae Park 940baa489faSSeongJae Park flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 941baa489faSSeongJae Park 942baa489faSSeongJae Park mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 943baa489faSSeongJae Park if (mem == MAP_FAILED) { 944baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 945baa489faSSeongJae Park return; 946baa489faSSeongJae Park } 947baa489faSSeongJae Park 948baa489faSSeongJae Park /* Populate an huge page. */ 949baa489faSSeongJae Park memset(mem, 0, hugetlbsize); 950baa489faSSeongJae Park 951baa489faSSeongJae Park /* 952baa489faSSeongJae Park * We need a total of two hugetlb pages to handle COW/unsharing 953baa489faSSeongJae Park * properly, otherwise we might get zapped by a SIGBUS. 954baa489faSSeongJae Park */ 955baa489faSSeongJae Park dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 956baa489faSSeongJae Park if (dummy == MAP_FAILED) { 957baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 958baa489faSSeongJae Park goto munmap; 959baa489faSSeongJae Park } 960baa489faSSeongJae Park munmap(dummy, hugetlbsize); 961baa489faSSeongJae Park 962baa489faSSeongJae Park fn(mem, hugetlbsize); 963baa489faSSeongJae Park munmap: 964baa489faSSeongJae Park munmap(mem, hugetlbsize); 965baa489faSSeongJae Park } 966baa489faSSeongJae Park 967baa489faSSeongJae Park struct test_case { 968baa489faSSeongJae Park const char *desc; 969baa489faSSeongJae Park test_fn fn; 970baa489faSSeongJae Park }; 971baa489faSSeongJae Park 972baa489faSSeongJae Park /* 973baa489faSSeongJae Park * Test cases that are specific to anonymous pages: pages in private mappings 974baa489faSSeongJae Park * that may get shared via COW during fork(). 975baa489faSSeongJae Park */ 976baa489faSSeongJae Park static const struct test_case anon_test_cases[] = { 977baa489faSSeongJae Park /* 978baa489faSSeongJae Park * Basic COW tests for fork() without any GUP. If we miss to break COW, 979baa489faSSeongJae Park * either the child can observe modifications by the parent or the 980baa489faSSeongJae Park * other way around. 981baa489faSSeongJae Park */ 982baa489faSSeongJae Park { 983baa489faSSeongJae Park "Basic COW after fork()", 984baa489faSSeongJae Park test_cow_in_parent, 985baa489faSSeongJae Park }, 986baa489faSSeongJae Park /* 987baa489faSSeongJae Park * Basic test, but do an additional mprotect(PROT_READ)+ 988baa489faSSeongJae Park * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 989baa489faSSeongJae Park */ 990baa489faSSeongJae Park { 991baa489faSSeongJae Park "Basic COW after fork() with mprotect() optimization", 992baa489faSSeongJae Park test_cow_in_parent_mprotect, 993baa489faSSeongJae Park }, 994baa489faSSeongJae Park /* 995baa489faSSeongJae Park * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 996baa489faSSeongJae Park * we miss to break COW, the child observes modifications by the parent. 997baa489faSSeongJae Park * This is CVE-2020-29374 reported by Jann Horn. 998baa489faSSeongJae Park */ 999baa489faSSeongJae Park { 1000baa489faSSeongJae Park "vmsplice() + unmap in child", 1001baa489faSSeongJae Park test_vmsplice_in_child 1002baa489faSSeongJae Park }, 1003baa489faSSeongJae Park /* 1004baa489faSSeongJae Park * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1005baa489faSSeongJae Park * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1006baa489faSSeongJae Park */ 1007baa489faSSeongJae Park { 1008baa489faSSeongJae Park "vmsplice() + unmap in child with mprotect() optimization", 1009baa489faSSeongJae Park test_vmsplice_in_child_mprotect 1010baa489faSSeongJae Park }, 1011baa489faSSeongJae Park /* 1012baa489faSSeongJae Park * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1013baa489faSSeongJae Park * fork(); modify in the child. If we miss to break COW, the parent 1014baa489faSSeongJae Park * observes modifications by the child. 1015baa489faSSeongJae Park */ 1016baa489faSSeongJae Park { 1017baa489faSSeongJae Park "vmsplice() before fork(), unmap in parent after fork()", 1018baa489faSSeongJae Park test_vmsplice_before_fork, 1019baa489faSSeongJae Park }, 1020baa489faSSeongJae Park /* 1021baa489faSSeongJae Park * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1022baa489faSSeongJae Park * child. If we miss to break COW, the parent observes modifications by 1023baa489faSSeongJae Park * the child. 1024baa489faSSeongJae Park */ 1025baa489faSSeongJae Park { 1026baa489faSSeongJae Park "vmsplice() + unmap in parent after fork()", 1027baa489faSSeongJae Park test_vmsplice_after_fork, 1028baa489faSSeongJae Park }, 1029baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING 1030baa489faSSeongJae Park /* 1031baa489faSSeongJae Park * Take a R/W longterm pin and then map the page R/O into the page 1032baa489faSSeongJae Park * table to trigger a write fault on next access. When modifying the 1033baa489faSSeongJae Park * page, the page content must be visible via the pin. 1034baa489faSSeongJae Park */ 1035baa489faSSeongJae Park { 1036baa489faSSeongJae Park "R/O-mapping a page registered as iouring fixed buffer", 1037baa489faSSeongJae Park test_iouring_ro, 1038baa489faSSeongJae Park }, 1039baa489faSSeongJae Park /* 1040baa489faSSeongJae Park * Take a R/W longterm pin and then fork() a child. When modifying the 1041baa489faSSeongJae Park * page, the page content must be visible via the pin. We expect the 1042baa489faSSeongJae Park * pinned page to not get shared with the child. 1043baa489faSSeongJae Park */ 1044baa489faSSeongJae Park { 1045baa489faSSeongJae Park "fork() with an iouring fixed buffer", 1046baa489faSSeongJae Park test_iouring_fork, 1047baa489faSSeongJae Park }, 1048baa489faSSeongJae Park 1049baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1050baa489faSSeongJae Park /* 1051baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1052baa489faSSeongJae Park * When modifying the page via the page table, the page content change 1053baa489faSSeongJae Park * must be visible via the pin. 1054baa489faSSeongJae Park */ 1055baa489faSSeongJae Park { 1056baa489faSSeongJae Park "R/O GUP pin on R/O-mapped shared page", 1057baa489faSSeongJae Park test_ro_pin_on_shared, 1058baa489faSSeongJae Park }, 1059baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1060baa489faSSeongJae Park { 1061baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped shared page", 1062baa489faSSeongJae Park test_ro_fast_pin_on_shared, 1063baa489faSSeongJae Park }, 1064baa489faSSeongJae Park /* 1065baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1066baa489faSSeongJae Park * was previously shared. When modifying the page via the page table, 1067baa489faSSeongJae Park * the page content change must be visible via the pin. 1068baa489faSSeongJae Park */ 1069baa489faSSeongJae Park { 1070baa489faSSeongJae Park "R/O GUP pin on R/O-mapped previously-shared page", 1071baa489faSSeongJae Park test_ro_pin_on_ro_previously_shared, 1072baa489faSSeongJae Park }, 1073baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1074baa489faSSeongJae Park { 1075baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped previously-shared page", 1076baa489faSSeongJae Park test_ro_fast_pin_on_ro_previously_shared, 1077baa489faSSeongJae Park }, 1078baa489faSSeongJae Park /* 1079baa489faSSeongJae Park * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1080baa489faSSeongJae Park * When modifying the page via the page table, the page content change 1081baa489faSSeongJae Park * must be visible via the pin. 1082baa489faSSeongJae Park */ 1083baa489faSSeongJae Park { 1084baa489faSSeongJae Park "R/O GUP pin on R/O-mapped exclusive page", 1085baa489faSSeongJae Park test_ro_pin_on_ro_exclusive, 1086baa489faSSeongJae Park }, 1087baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1088baa489faSSeongJae Park { 1089baa489faSSeongJae Park "R/O GUP-fast pin on R/O-mapped exclusive page", 1090baa489faSSeongJae Park test_ro_fast_pin_on_ro_exclusive, 1091baa489faSSeongJae Park }, 1092baa489faSSeongJae Park }; 1093baa489faSSeongJae Park 1094baa489faSSeongJae Park static void run_anon_test_case(struct test_case const *test_case) 1095baa489faSSeongJae Park { 1096baa489faSSeongJae Park int i; 1097baa489faSSeongJae Park 1098baa489faSSeongJae Park run_with_base_page(test_case->fn, test_case->desc); 1099baa489faSSeongJae Park run_with_base_page_swap(test_case->fn, test_case->desc); 1100baa489faSSeongJae Park if (thpsize) { 1101baa489faSSeongJae Park run_with_thp(test_case->fn, test_case->desc); 1102baa489faSSeongJae Park run_with_thp_swap(test_case->fn, test_case->desc); 1103baa489faSSeongJae Park run_with_pte_mapped_thp(test_case->fn, test_case->desc); 1104baa489faSSeongJae Park run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); 1105baa489faSSeongJae Park run_with_single_pte_of_thp(test_case->fn, test_case->desc); 1106baa489faSSeongJae Park run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); 1107baa489faSSeongJae Park run_with_partial_mremap_thp(test_case->fn, test_case->desc); 1108baa489faSSeongJae Park run_with_partial_shared_thp(test_case->fn, test_case->desc); 1109baa489faSSeongJae Park } 1110baa489faSSeongJae Park for (i = 0; i < nr_hugetlbsizes; i++) 1111baa489faSSeongJae Park run_with_hugetlb(test_case->fn, test_case->desc, 1112baa489faSSeongJae Park hugetlbsizes[i]); 1113baa489faSSeongJae Park } 1114baa489faSSeongJae Park 1115baa489faSSeongJae Park static void run_anon_test_cases(void) 1116baa489faSSeongJae Park { 1117baa489faSSeongJae Park int i; 1118baa489faSSeongJae Park 1119baa489faSSeongJae Park ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1120baa489faSSeongJae Park 1121baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1122baa489faSSeongJae Park run_anon_test_case(&anon_test_cases[i]); 1123baa489faSSeongJae Park } 1124baa489faSSeongJae Park 1125baa489faSSeongJae Park static int tests_per_anon_test_case(void) 1126baa489faSSeongJae Park { 1127baa489faSSeongJae Park int tests = 2 + nr_hugetlbsizes; 1128baa489faSSeongJae Park 1129baa489faSSeongJae Park if (thpsize) 1130baa489faSSeongJae Park tests += 8; 1131baa489faSSeongJae Park return tests; 1132baa489faSSeongJae Park } 1133baa489faSSeongJae Park 1134baa489faSSeongJae Park enum anon_thp_collapse_test { 1135baa489faSSeongJae Park ANON_THP_COLLAPSE_UNSHARED, 1136baa489faSSeongJae Park ANON_THP_COLLAPSE_FULLY_SHARED, 1137baa489faSSeongJae Park ANON_THP_COLLAPSE_LOWER_SHARED, 1138baa489faSSeongJae Park ANON_THP_COLLAPSE_UPPER_SHARED, 1139baa489faSSeongJae Park }; 1140baa489faSSeongJae Park 1141baa489faSSeongJae Park static void do_test_anon_thp_collapse(char *mem, size_t size, 1142baa489faSSeongJae Park enum anon_thp_collapse_test test) 1143baa489faSSeongJae Park { 1144baa489faSSeongJae Park struct comm_pipes comm_pipes; 1145baa489faSSeongJae Park char buf; 1146baa489faSSeongJae Park int ret; 1147baa489faSSeongJae Park 1148baa489faSSeongJae Park ret = setup_comm_pipes(&comm_pipes); 1149baa489faSSeongJae Park if (ret) { 1150baa489faSSeongJae Park ksft_test_result_fail("pipe() failed\n"); 1151baa489faSSeongJae Park return; 1152baa489faSSeongJae Park } 1153baa489faSSeongJae Park 1154baa489faSSeongJae Park /* 1155baa489faSSeongJae Park * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1156baa489faSSeongJae Park * R/O, such that we can try collapsing it later. 1157baa489faSSeongJae Park */ 1158baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1159baa489faSSeongJae Park if (ret) { 1160baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 1161baa489faSSeongJae Park goto close_comm_pipes; 1162baa489faSSeongJae Park } 1163baa489faSSeongJae Park ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1164baa489faSSeongJae Park if (ret) { 1165baa489faSSeongJae Park ksft_test_result_fail("mprotect() failed\n"); 1166baa489faSSeongJae Park goto close_comm_pipes; 1167baa489faSSeongJae Park } 1168baa489faSSeongJae Park 1169baa489faSSeongJae Park switch (test) { 1170baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1171baa489faSSeongJae Park /* Collapse before actually COW-sharing the page. */ 1172baa489faSSeongJae Park ret = madvise(mem, size, MADV_COLLAPSE); 1173baa489faSSeongJae Park if (ret) { 1174baa489faSSeongJae Park ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1175baa489faSSeongJae Park strerror(errno)); 1176baa489faSSeongJae Park goto close_comm_pipes; 1177baa489faSSeongJae Park } 1178baa489faSSeongJae Park break; 1179baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1180baa489faSSeongJae Park /* COW-share the full PTE-mapped THP. */ 1181baa489faSSeongJae Park break; 1182baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1183baa489faSSeongJae Park /* Don't COW-share the upper part of the THP. */ 1184baa489faSSeongJae Park ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1185baa489faSSeongJae Park if (ret) { 1186baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 1187baa489faSSeongJae Park goto close_comm_pipes; 1188baa489faSSeongJae Park } 1189baa489faSSeongJae Park break; 1190baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1191baa489faSSeongJae Park /* Don't COW-share the lower part of the THP. */ 1192baa489faSSeongJae Park ret = madvise(mem, size / 2, MADV_DONTFORK); 1193baa489faSSeongJae Park if (ret) { 1194baa489faSSeongJae Park ksft_test_result_fail("MADV_DONTFORK failed\n"); 1195baa489faSSeongJae Park goto close_comm_pipes; 1196baa489faSSeongJae Park } 1197baa489faSSeongJae Park break; 1198baa489faSSeongJae Park default: 1199baa489faSSeongJae Park assert(false); 1200baa489faSSeongJae Park } 1201baa489faSSeongJae Park 1202baa489faSSeongJae Park ret = fork(); 1203baa489faSSeongJae Park if (ret < 0) { 1204baa489faSSeongJae Park ksft_test_result_fail("fork() failed\n"); 1205baa489faSSeongJae Park goto close_comm_pipes; 1206baa489faSSeongJae Park } else if (!ret) { 1207baa489faSSeongJae Park switch (test) { 1208baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1209baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1210baa489faSSeongJae Park exit(child_memcmp_fn(mem, size, &comm_pipes)); 1211baa489faSSeongJae Park break; 1212baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1213baa489faSSeongJae Park exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1214baa489faSSeongJae Park break; 1215baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1216baa489faSSeongJae Park exit(child_memcmp_fn(mem + size / 2, size / 2, 1217baa489faSSeongJae Park &comm_pipes)); 1218baa489faSSeongJae Park break; 1219baa489faSSeongJae Park default: 1220baa489faSSeongJae Park assert(false); 1221baa489faSSeongJae Park } 1222baa489faSSeongJae Park } 1223baa489faSSeongJae Park 1224baa489faSSeongJae Park while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1225baa489faSSeongJae Park ; 1226baa489faSSeongJae Park 1227baa489faSSeongJae Park switch (test) { 1228baa489faSSeongJae Park case ANON_THP_COLLAPSE_UNSHARED: 1229baa489faSSeongJae Park break; 1230baa489faSSeongJae Park case ANON_THP_COLLAPSE_UPPER_SHARED: 1231baa489faSSeongJae Park case ANON_THP_COLLAPSE_LOWER_SHARED: 1232baa489faSSeongJae Park /* 1233baa489faSSeongJae Park * Revert MADV_DONTFORK such that we merge the VMAs and are 1234baa489faSSeongJae Park * able to actually collapse. 1235baa489faSSeongJae Park */ 1236baa489faSSeongJae Park ret = madvise(mem, size, MADV_DOFORK); 1237baa489faSSeongJae Park if (ret) { 1238baa489faSSeongJae Park ksft_test_result_fail("MADV_DOFORK failed\n"); 1239baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1240baa489faSSeongJae Park wait(&ret); 1241baa489faSSeongJae Park goto close_comm_pipes; 1242baa489faSSeongJae Park } 1243baa489faSSeongJae Park /* FALLTHROUGH */ 1244baa489faSSeongJae Park case ANON_THP_COLLAPSE_FULLY_SHARED: 1245baa489faSSeongJae Park /* Collapse before anyone modified the COW-shared page. */ 1246baa489faSSeongJae Park ret = madvise(mem, size, MADV_COLLAPSE); 1247baa489faSSeongJae Park if (ret) { 1248baa489faSSeongJae Park ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", 1249baa489faSSeongJae Park strerror(errno)); 1250baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1251baa489faSSeongJae Park wait(&ret); 1252baa489faSSeongJae Park goto close_comm_pipes; 1253baa489faSSeongJae Park } 1254baa489faSSeongJae Park break; 1255baa489faSSeongJae Park default: 1256baa489faSSeongJae Park assert(false); 1257baa489faSSeongJae Park } 1258baa489faSSeongJae Park 1259baa489faSSeongJae Park /* Modify the page. */ 1260baa489faSSeongJae Park memset(mem, 0xff, size); 1261baa489faSSeongJae Park write(comm_pipes.parent_ready[1], "0", 1); 1262baa489faSSeongJae Park 1263baa489faSSeongJae Park wait(&ret); 1264baa489faSSeongJae Park if (WIFEXITED(ret)) 1265baa489faSSeongJae Park ret = WEXITSTATUS(ret); 1266baa489faSSeongJae Park else 1267baa489faSSeongJae Park ret = -EINVAL; 1268baa489faSSeongJae Park 1269baa489faSSeongJae Park ksft_test_result(!ret, "No leak from parent into child\n"); 1270baa489faSSeongJae Park close_comm_pipes: 1271baa489faSSeongJae Park close_comm_pipes(&comm_pipes); 1272baa489faSSeongJae Park } 1273baa489faSSeongJae Park 1274baa489faSSeongJae Park static void test_anon_thp_collapse_unshared(char *mem, size_t size) 1275baa489faSSeongJae Park { 1276baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1277baa489faSSeongJae Park } 1278baa489faSSeongJae Park 1279baa489faSSeongJae Park static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) 1280baa489faSSeongJae Park { 1281baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1282baa489faSSeongJae Park } 1283baa489faSSeongJae Park 1284baa489faSSeongJae Park static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) 1285baa489faSSeongJae Park { 1286baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1287baa489faSSeongJae Park } 1288baa489faSSeongJae Park 1289baa489faSSeongJae Park static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) 1290baa489faSSeongJae Park { 1291baa489faSSeongJae Park do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1292baa489faSSeongJae Park } 1293baa489faSSeongJae Park 1294baa489faSSeongJae Park /* 1295baa489faSSeongJae Park * Test cases that are specific to anonymous THP: pages in private mappings 1296baa489faSSeongJae Park * that may get shared via COW during fork(). 1297baa489faSSeongJae Park */ 1298baa489faSSeongJae Park static const struct test_case anon_thp_test_cases[] = { 1299baa489faSSeongJae Park /* 1300baa489faSSeongJae Park * Basic COW test for fork() without any GUP when collapsing a THP 1301baa489faSSeongJae Park * before fork(). 1302baa489faSSeongJae Park * 1303baa489faSSeongJae Park * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1304baa489faSSeongJae Park * collapse") might easily get COW handling wrong when not collapsing 1305baa489faSSeongJae Park * exclusivity information properly. 1306baa489faSSeongJae Park */ 1307baa489faSSeongJae Park { 1308baa489faSSeongJae Park "Basic COW after fork() when collapsing before fork()", 1309baa489faSSeongJae Park test_anon_thp_collapse_unshared, 1310baa489faSSeongJae Park }, 1311baa489faSSeongJae Park /* Basic COW test, but collapse after COW-sharing a full THP. */ 1312baa489faSSeongJae Park { 1313baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (fully shared)", 1314baa489faSSeongJae Park test_anon_thp_collapse_fully_shared, 1315baa489faSSeongJae Park }, 1316baa489faSSeongJae Park /* 1317baa489faSSeongJae Park * Basic COW test, but collapse after COW-sharing the lower half of a 1318baa489faSSeongJae Park * THP. 1319baa489faSSeongJae Park */ 1320baa489faSSeongJae Park { 1321baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (lower shared)", 1322baa489faSSeongJae Park test_anon_thp_collapse_lower_shared, 1323baa489faSSeongJae Park }, 1324baa489faSSeongJae Park /* 1325baa489faSSeongJae Park * Basic COW test, but collapse after COW-sharing the upper half of a 1326baa489faSSeongJae Park * THP. 1327baa489faSSeongJae Park */ 1328baa489faSSeongJae Park { 1329baa489faSSeongJae Park "Basic COW after fork() when collapsing after fork() (upper shared)", 1330baa489faSSeongJae Park test_anon_thp_collapse_upper_shared, 1331baa489faSSeongJae Park }, 1332baa489faSSeongJae Park }; 1333baa489faSSeongJae Park 1334baa489faSSeongJae Park static void run_anon_thp_test_cases(void) 1335baa489faSSeongJae Park { 1336baa489faSSeongJae Park int i; 1337baa489faSSeongJae Park 1338baa489faSSeongJae Park if (!thpsize) 1339baa489faSSeongJae Park return; 1340baa489faSSeongJae Park 1341baa489faSSeongJae Park ksft_print_msg("[INFO] Anonymous THP tests\n"); 1342baa489faSSeongJae Park 1343baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1344baa489faSSeongJae Park struct test_case const *test_case = &anon_thp_test_cases[i]; 1345baa489faSSeongJae Park 1346baa489faSSeongJae Park ksft_print_msg("[RUN] %s\n", test_case->desc); 1347baa489faSSeongJae Park do_run_with_thp(test_case->fn, THP_RUN_PMD); 1348baa489faSSeongJae Park } 1349baa489faSSeongJae Park } 1350baa489faSSeongJae Park 1351baa489faSSeongJae Park static int tests_per_anon_thp_test_case(void) 1352baa489faSSeongJae Park { 1353baa489faSSeongJae Park return thpsize ? 1 : 0; 1354baa489faSSeongJae Park } 1355baa489faSSeongJae Park 1356baa489faSSeongJae Park typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1357baa489faSSeongJae Park 1358baa489faSSeongJae Park static void test_cow(char *mem, const char *smem, size_t size) 1359baa489faSSeongJae Park { 1360baa489faSSeongJae Park char *old = malloc(size); 1361baa489faSSeongJae Park 1362baa489faSSeongJae Park /* Backup the original content. */ 1363baa489faSSeongJae Park memcpy(old, smem, size); 1364baa489faSSeongJae Park 1365baa489faSSeongJae Park /* Modify the page. */ 1366baa489faSSeongJae Park memset(mem, 0xff, size); 1367baa489faSSeongJae Park 1368baa489faSSeongJae Park /* See if we still read the old values via the other mapping. */ 1369baa489faSSeongJae Park ksft_test_result(!memcmp(smem, old, size), 1370baa489faSSeongJae Park "Other mapping not modified\n"); 1371baa489faSSeongJae Park free(old); 1372baa489faSSeongJae Park } 1373baa489faSSeongJae Park 1374baa489faSSeongJae Park static void test_ro_pin(char *mem, const char *smem, size_t size) 1375baa489faSSeongJae Park { 1376baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1377baa489faSSeongJae Park } 1378baa489faSSeongJae Park 1379baa489faSSeongJae Park static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1380baa489faSSeongJae Park { 1381baa489faSSeongJae Park do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1382baa489faSSeongJae Park } 1383baa489faSSeongJae Park 1384baa489faSSeongJae Park static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1385baa489faSSeongJae Park { 1386baa489faSSeongJae Park char *mem, *smem, tmp; 1387baa489faSSeongJae Park 1388baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); 1389baa489faSSeongJae Park 1390baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1391baa489faSSeongJae Park MAP_PRIVATE | MAP_ANON, -1, 0); 1392baa489faSSeongJae Park if (mem == MAP_FAILED) { 1393baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1394baa489faSSeongJae Park return; 1395baa489faSSeongJae Park } 1396baa489faSSeongJae Park 1397baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1398baa489faSSeongJae Park if (mem == MAP_FAILED) { 1399baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1400baa489faSSeongJae Park goto munmap; 1401baa489faSSeongJae Park } 1402baa489faSSeongJae Park 1403baa489faSSeongJae Park /* Read from the page to populate the shared zeropage. */ 1404baa489faSSeongJae Park tmp = *mem + *smem; 1405baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1406baa489faSSeongJae Park 1407baa489faSSeongJae Park fn(mem, smem, pagesize); 1408baa489faSSeongJae Park munmap: 1409baa489faSSeongJae Park munmap(mem, pagesize); 1410baa489faSSeongJae Park if (smem != MAP_FAILED) 1411baa489faSSeongJae Park munmap(smem, pagesize); 1412baa489faSSeongJae Park } 1413baa489faSSeongJae Park 1414baa489faSSeongJae Park static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1415baa489faSSeongJae Park { 1416baa489faSSeongJae Park char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1417baa489faSSeongJae Park size_t mmap_size; 1418baa489faSSeongJae Park int ret; 1419baa489faSSeongJae Park 1420baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); 1421baa489faSSeongJae Park 1422baa489faSSeongJae Park if (!has_huge_zeropage) { 1423baa489faSSeongJae Park ksft_test_result_skip("Huge zeropage not enabled\n"); 1424baa489faSSeongJae Park return; 1425baa489faSSeongJae Park } 1426baa489faSSeongJae Park 1427baa489faSSeongJae Park /* For alignment purposes, we need twice the thp size. */ 1428baa489faSSeongJae Park mmap_size = 2 * thpsize; 1429baa489faSSeongJae Park mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1430baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1431baa489faSSeongJae Park if (mmap_mem == MAP_FAILED) { 1432baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1433baa489faSSeongJae Park return; 1434baa489faSSeongJae Park } 1435baa489faSSeongJae Park mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1436baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1437baa489faSSeongJae Park if (mmap_smem == MAP_FAILED) { 1438baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1439baa489faSSeongJae Park goto munmap; 1440baa489faSSeongJae Park } 1441baa489faSSeongJae Park 1442baa489faSSeongJae Park /* We need a THP-aligned memory area. */ 1443baa489faSSeongJae Park mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 1444baa489faSSeongJae Park smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); 1445baa489faSSeongJae Park 1446baa489faSSeongJae Park ret = madvise(mem, thpsize, MADV_HUGEPAGE); 1447baa489faSSeongJae Park ret |= madvise(smem, thpsize, MADV_HUGEPAGE); 1448baa489faSSeongJae Park if (ret) { 1449baa489faSSeongJae Park ksft_test_result_fail("MADV_HUGEPAGE failed\n"); 1450baa489faSSeongJae Park goto munmap; 1451baa489faSSeongJae Park } 1452baa489faSSeongJae Park 1453baa489faSSeongJae Park /* 1454baa489faSSeongJae Park * Read from the memory to populate the huge shared zeropage. Read from 1455baa489faSSeongJae Park * the first sub-page and test if we get another sub-page populated 1456baa489faSSeongJae Park * automatically. 1457baa489faSSeongJae Park */ 1458baa489faSSeongJae Park tmp = *mem + *smem; 1459baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1460baa489faSSeongJae Park if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1461baa489faSSeongJae Park !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1462baa489faSSeongJae Park ksft_test_result_skip("Did not get THPs populated\n"); 1463baa489faSSeongJae Park goto munmap; 1464baa489faSSeongJae Park } 1465baa489faSSeongJae Park 1466baa489faSSeongJae Park fn(mem, smem, thpsize); 1467baa489faSSeongJae Park munmap: 1468baa489faSSeongJae Park munmap(mmap_mem, mmap_size); 1469baa489faSSeongJae Park if (mmap_smem != MAP_FAILED) 1470baa489faSSeongJae Park munmap(mmap_smem, mmap_size); 1471baa489faSSeongJae Park } 1472baa489faSSeongJae Park 1473baa489faSSeongJae Park static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1474baa489faSSeongJae Park { 1475baa489faSSeongJae Park char *mem, *smem, tmp; 1476baa489faSSeongJae Park int fd; 1477baa489faSSeongJae Park 1478baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with memfd\n", desc); 1479baa489faSSeongJae Park 1480baa489faSSeongJae Park fd = memfd_create("test", 0); 1481baa489faSSeongJae Park if (fd < 0) { 1482baa489faSSeongJae Park ksft_test_result_fail("memfd_create() failed\n"); 1483baa489faSSeongJae Park return; 1484baa489faSSeongJae Park } 1485baa489faSSeongJae Park 1486baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1487baa489faSSeongJae Park if (fallocate(fd, 0, 0, pagesize)) { 1488baa489faSSeongJae Park ksft_test_result_fail("fallocate() failed\n"); 1489baa489faSSeongJae Park goto close; 1490baa489faSSeongJae Park } 1491baa489faSSeongJae Park 1492baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1493baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1494baa489faSSeongJae Park if (mem == MAP_FAILED) { 1495baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1496baa489faSSeongJae Park goto close; 1497baa489faSSeongJae Park } 1498baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1499baa489faSSeongJae Park if (mem == MAP_FAILED) { 1500baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1501baa489faSSeongJae Park goto munmap; 1502baa489faSSeongJae Park } 1503baa489faSSeongJae Park 1504baa489faSSeongJae Park /* Fault the page in. */ 1505baa489faSSeongJae Park tmp = *mem + *smem; 1506baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1507baa489faSSeongJae Park 1508baa489faSSeongJae Park fn(mem, smem, pagesize); 1509baa489faSSeongJae Park munmap: 1510baa489faSSeongJae Park munmap(mem, pagesize); 1511baa489faSSeongJae Park if (smem != MAP_FAILED) 1512baa489faSSeongJae Park munmap(smem, pagesize); 1513baa489faSSeongJae Park close: 1514baa489faSSeongJae Park close(fd); 1515baa489faSSeongJae Park } 1516baa489faSSeongJae Park 1517baa489faSSeongJae Park static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1518baa489faSSeongJae Park { 1519baa489faSSeongJae Park char *mem, *smem, tmp; 1520baa489faSSeongJae Park FILE *file; 1521baa489faSSeongJae Park int fd; 1522baa489faSSeongJae Park 1523baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); 1524baa489faSSeongJae Park 1525baa489faSSeongJae Park file = tmpfile(); 1526baa489faSSeongJae Park if (!file) { 1527baa489faSSeongJae Park ksft_test_result_fail("tmpfile() failed\n"); 1528baa489faSSeongJae Park return; 1529baa489faSSeongJae Park } 1530baa489faSSeongJae Park 1531baa489faSSeongJae Park fd = fileno(file); 1532baa489faSSeongJae Park if (fd < 0) { 1533baa489faSSeongJae Park ksft_test_result_skip("fileno() failed\n"); 1534baa489faSSeongJae Park return; 1535baa489faSSeongJae Park } 1536baa489faSSeongJae Park 1537baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1538baa489faSSeongJae Park if (fallocate(fd, 0, 0, pagesize)) { 1539baa489faSSeongJae Park ksft_test_result_fail("fallocate() failed\n"); 1540baa489faSSeongJae Park goto close; 1541baa489faSSeongJae Park } 1542baa489faSSeongJae Park 1543baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1544baa489faSSeongJae Park mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1545baa489faSSeongJae Park if (mem == MAP_FAILED) { 1546baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1547baa489faSSeongJae Park goto close; 1548baa489faSSeongJae Park } 1549baa489faSSeongJae Park smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1550baa489faSSeongJae Park if (mem == MAP_FAILED) { 1551baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1552baa489faSSeongJae Park goto munmap; 1553baa489faSSeongJae Park } 1554baa489faSSeongJae Park 1555baa489faSSeongJae Park /* Fault the page in. */ 1556baa489faSSeongJae Park tmp = *mem + *smem; 1557baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1558baa489faSSeongJae Park 1559baa489faSSeongJae Park fn(mem, smem, pagesize); 1560baa489faSSeongJae Park munmap: 1561baa489faSSeongJae Park munmap(mem, pagesize); 1562baa489faSSeongJae Park if (smem != MAP_FAILED) 1563baa489faSSeongJae Park munmap(smem, pagesize); 1564baa489faSSeongJae Park close: 1565baa489faSSeongJae Park fclose(file); 1566baa489faSSeongJae Park } 1567baa489faSSeongJae Park 1568baa489faSSeongJae Park static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1569baa489faSSeongJae Park size_t hugetlbsize) 1570baa489faSSeongJae Park { 1571baa489faSSeongJae Park int flags = MFD_HUGETLB; 1572baa489faSSeongJae Park char *mem, *smem, tmp; 1573baa489faSSeongJae Park int fd; 1574baa489faSSeongJae Park 1575baa489faSSeongJae Park ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, 1576baa489faSSeongJae Park hugetlbsize / 1024); 1577baa489faSSeongJae Park 1578baa489faSSeongJae Park flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1579baa489faSSeongJae Park 1580baa489faSSeongJae Park fd = memfd_create("test", flags); 1581baa489faSSeongJae Park if (fd < 0) { 1582baa489faSSeongJae Park ksft_test_result_skip("memfd_create() failed\n"); 1583baa489faSSeongJae Park return; 1584baa489faSSeongJae Park } 1585baa489faSSeongJae Park 1586baa489faSSeongJae Park /* File consists of a single page filled with zeroes. */ 1587baa489faSSeongJae Park if (fallocate(fd, 0, 0, hugetlbsize)) { 1588baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1589baa489faSSeongJae Park goto close; 1590baa489faSSeongJae Park } 1591baa489faSSeongJae Park 1592baa489faSSeongJae Park /* Create a private mapping of the memfd. */ 1593baa489faSSeongJae Park mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1594baa489faSSeongJae Park 0); 1595baa489faSSeongJae Park if (mem == MAP_FAILED) { 1596baa489faSSeongJae Park ksft_test_result_skip("need more free huge pages\n"); 1597baa489faSSeongJae Park goto close; 1598baa489faSSeongJae Park } 1599baa489faSSeongJae Park smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1600baa489faSSeongJae Park if (mem == MAP_FAILED) { 1601baa489faSSeongJae Park ksft_test_result_fail("mmap() failed\n"); 1602baa489faSSeongJae Park goto munmap; 1603baa489faSSeongJae Park } 1604baa489faSSeongJae Park 1605baa489faSSeongJae Park /* Fault the page in. */ 1606baa489faSSeongJae Park tmp = *mem + *smem; 1607baa489faSSeongJae Park asm volatile("" : "+r" (tmp)); 1608baa489faSSeongJae Park 1609baa489faSSeongJae Park fn(mem, smem, hugetlbsize); 1610baa489faSSeongJae Park munmap: 1611baa489faSSeongJae Park munmap(mem, hugetlbsize); 1612baa489faSSeongJae Park if (mem != MAP_FAILED) 1613baa489faSSeongJae Park munmap(smem, hugetlbsize); 1614baa489faSSeongJae Park close: 1615baa489faSSeongJae Park close(fd); 1616baa489faSSeongJae Park } 1617baa489faSSeongJae Park 1618baa489faSSeongJae Park struct non_anon_test_case { 1619baa489faSSeongJae Park const char *desc; 1620baa489faSSeongJae Park non_anon_test_fn fn; 1621baa489faSSeongJae Park }; 1622baa489faSSeongJae Park 1623baa489faSSeongJae Park /* 1624baa489faSSeongJae Park * Test cases that target any pages in private mappings that are not anonymous: 1625baa489faSSeongJae Park * pages that may get shared via COW ndependent of fork(). This includes 1626baa489faSSeongJae Park * the shared zeropage(s), pagecache pages, ... 1627baa489faSSeongJae Park */ 1628baa489faSSeongJae Park static const struct non_anon_test_case non_anon_test_cases[] = { 1629baa489faSSeongJae Park /* 1630baa489faSSeongJae Park * Basic COW test without any GUP. If we miss to break COW, changes are 1631baa489faSSeongJae Park * visible via other private/shared mappings. 1632baa489faSSeongJae Park */ 1633baa489faSSeongJae Park { 1634baa489faSSeongJae Park "Basic COW", 1635baa489faSSeongJae Park test_cow, 1636baa489faSSeongJae Park }, 1637baa489faSSeongJae Park /* 1638baa489faSSeongJae Park * Take a R/O longterm pin. When modifying the page via the page table, 1639baa489faSSeongJae Park * the page content change must be visible via the pin. 1640baa489faSSeongJae Park */ 1641baa489faSSeongJae Park { 1642baa489faSSeongJae Park "R/O longterm GUP pin", 1643baa489faSSeongJae Park test_ro_pin, 1644baa489faSSeongJae Park }, 1645baa489faSSeongJae Park /* Same as above, but using GUP-fast. */ 1646baa489faSSeongJae Park { 1647baa489faSSeongJae Park "R/O longterm GUP-fast pin", 1648baa489faSSeongJae Park test_ro_fast_pin, 1649baa489faSSeongJae Park }, 1650baa489faSSeongJae Park }; 1651baa489faSSeongJae Park 1652baa489faSSeongJae Park static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1653baa489faSSeongJae Park { 1654baa489faSSeongJae Park int i; 1655baa489faSSeongJae Park 1656baa489faSSeongJae Park run_with_zeropage(test_case->fn, test_case->desc); 1657baa489faSSeongJae Park run_with_memfd(test_case->fn, test_case->desc); 1658baa489faSSeongJae Park run_with_tmpfile(test_case->fn, test_case->desc); 1659baa489faSSeongJae Park if (thpsize) 1660baa489faSSeongJae Park run_with_huge_zeropage(test_case->fn, test_case->desc); 1661baa489faSSeongJae Park for (i = 0; i < nr_hugetlbsizes; i++) 1662baa489faSSeongJae Park run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1663baa489faSSeongJae Park hugetlbsizes[i]); 1664baa489faSSeongJae Park } 1665baa489faSSeongJae Park 1666baa489faSSeongJae Park static void run_non_anon_test_cases(void) 1667baa489faSSeongJae Park { 1668baa489faSSeongJae Park int i; 1669baa489faSSeongJae Park 1670baa489faSSeongJae Park ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1671baa489faSSeongJae Park 1672baa489faSSeongJae Park for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1673baa489faSSeongJae Park run_non_anon_test_case(&non_anon_test_cases[i]); 1674baa489faSSeongJae Park } 1675baa489faSSeongJae Park 1676baa489faSSeongJae Park static int tests_per_non_anon_test_case(void) 1677baa489faSSeongJae Park { 1678baa489faSSeongJae Park int tests = 3 + nr_hugetlbsizes; 1679baa489faSSeongJae Park 1680baa489faSSeongJae Park if (thpsize) 1681baa489faSSeongJae Park tests += 1; 1682baa489faSSeongJae Park return tests; 1683baa489faSSeongJae Park } 1684baa489faSSeongJae Park 1685baa489faSSeongJae Park int main(int argc, char **argv) 1686baa489faSSeongJae Park { 1687baa489faSSeongJae Park int err; 1688baa489faSSeongJae Park 1689baa489faSSeongJae Park pagesize = getpagesize(); 1690d6e61afbSDavid Hildenbrand thpsize = read_pmd_pagesize(); 1691d6e61afbSDavid Hildenbrand if (thpsize) 1692d6e61afbSDavid Hildenbrand ksft_print_msg("[INFO] detected THP size: %zu KiB\n", 1693d6e61afbSDavid Hildenbrand thpsize / 1024); 1694*81b1e3f9SDavid Hildenbrand nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1695*81b1e3f9SDavid Hildenbrand ARRAY_SIZE(hugetlbsizes)); 1696baa489faSSeongJae Park detect_huge_zeropage(); 1697baa489faSSeongJae Park 1698baa489faSSeongJae Park ksft_print_header(); 1699baa489faSSeongJae Park ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1700baa489faSSeongJae Park ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1701baa489faSSeongJae Park ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1702baa489faSSeongJae Park 1703baa489faSSeongJae Park gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1704baa489faSSeongJae Park pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1705baa489faSSeongJae Park if (pagemap_fd < 0) 1706baa489faSSeongJae Park ksft_exit_fail_msg("opening pagemap failed\n"); 1707baa489faSSeongJae Park 1708baa489faSSeongJae Park run_anon_test_cases(); 1709baa489faSSeongJae Park run_anon_thp_test_cases(); 1710baa489faSSeongJae Park run_non_anon_test_cases(); 1711baa489faSSeongJae Park 1712baa489faSSeongJae Park err = ksft_get_fail_cnt(); 1713baa489faSSeongJae Park if (err) 1714baa489faSSeongJae Park ksft_exit_fail_msg("%d out of %d tests failed\n", 1715baa489faSSeongJae Park err, ksft_test_num()); 1716baa489faSSeongJae Park return ksft_exit_pass(); 1717baa489faSSeongJae Park } 1718