1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> 4 */ 5 6 #include <sysdep/stub.h> 7 8 #include <linux/futex.h> 9 #include <sys/socket.h> 10 #include <errno.h> 11 12 /* 13 * Known security issues 14 * 15 * Userspace can jump to this address to execute *any* syscall that is 16 * permitted by the stub. As we will return afterwards, it can do 17 * whatever it likes, including: 18 * - Tricking the kernel into handing out the memory FD 19 * - Using this memory FD to read/write all physical memory 20 * - Running in parallel to the kernel processing a syscall 21 * (possibly creating data races?) 22 * - Blocking e.g. SIGALRM to avoid time based scheduling 23 * 24 * To avoid this, the permitted location for each syscall needs to be 25 * checked for in the SECCOMP filter (which is reasonably simple). Also, 26 * more care will need to go into considerations how the code might be 27 * tricked by using a prepared stack (or even modifying the stack from 28 * another thread in case SMP support is added). 29 * 30 * As for the SIGALRM, the best counter measure will be to check in the 31 * kernel that the process is reporting back the SIGALRM in a timely 32 * fashion. 33 */ 34 static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS]) 35 { 36 struct stub_data *d = get_stub_data(); 37 int i; 38 unsigned long res; 39 int fd; 40 41 for (i = 0; i < d->syscall_data_len; i++) { 42 struct stub_syscall *sc = &d->syscall_data[i]; 43 44 switch (sc->syscall) { 45 case STUB_SYSCALL_MMAP: 46 if (fd_map) 47 fd = fd_map[sc->mem.fd]; 48 else 49 fd = sc->mem.fd; 50 51 res = stub_syscall6(STUB_MMAP_NR, 52 sc->mem.addr, sc->mem.length, 53 sc->mem.prot, 54 MAP_SHARED | MAP_FIXED, 55 fd, sc->mem.offset); 56 if (res != sc->mem.addr) { 57 d->err = res; 58 d->syscall_data_len = i; 59 return -1; 60 } 61 break; 62 case STUB_SYSCALL_MUNMAP: 63 res = stub_syscall2(__NR_munmap, 64 sc->mem.addr, sc->mem.length); 65 if (res) { 66 d->err = res; 67 d->syscall_data_len = i; 68 return -1; 69 } 70 break; 71 default: 72 d->err = -95; /* EOPNOTSUPP */ 73 d->syscall_data_len = i; 74 return -1; 75 } 76 } 77 78 d->err = 0; 79 d->syscall_data_len = 0; 80 81 return 0; 82 } 83 84 void __section(".__syscall_stub") 85 stub_syscall_handler(void) 86 { 87 syscall_handler(NULL); 88 89 trap_myself(); 90 } 91 92 void __section(".__syscall_stub") 93 stub_signal_interrupt(int sig, siginfo_t *info, void *p) 94 { 95 struct stub_data *d = get_stub_data(); 96 char rcv_data; 97 union { 98 char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)]; 99 struct cmsghdr align; 100 } ctrl = {}; 101 struct iovec iov = { 102 .iov_base = &rcv_data, 103 .iov_len = 1, 104 }; 105 struct msghdr msghdr = { 106 .msg_iov = &iov, 107 .msg_iovlen = 1, 108 .msg_control = &ctrl, 109 .msg_controllen = sizeof(ctrl), 110 }; 111 ucontext_t *uc = p; 112 struct cmsghdr *fd_msg; 113 int *fd_map; 114 int num_fds; 115 long res; 116 117 d->signal = sig; 118 d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0]; 119 d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0]; 120 121 restart_wait: 122 d->futex = FUTEX_IN_KERN; 123 do { 124 res = stub_syscall3(__NR_futex, (unsigned long)&d->futex, 125 FUTEX_WAKE, 1); 126 } while (res == -EINTR); 127 128 do { 129 res = stub_syscall4(__NR_futex, (unsigned long)&d->futex, 130 FUTEX_WAIT, FUTEX_IN_KERN, 0); 131 } while (res == -EINTR || d->futex == FUTEX_IN_KERN); 132 133 if (res < 0 && res != -EAGAIN) 134 stub_syscall1(__NR_exit_group, 1); 135 136 if (d->syscall_data_len) { 137 /* Read passed FDs (if any) */ 138 do { 139 res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0); 140 } while (res == -EINTR); 141 142 /* We should never have a receive error (other than -EAGAIN) */ 143 if (res < 0 && res != -EAGAIN) 144 stub_syscall1(__NR_exit_group, 1); 145 146 /* Receive the FDs */ 147 num_fds = 0; 148 fd_msg = msghdr.msg_control; 149 fd_map = (void *)&CMSG_DATA(fd_msg); 150 if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr)) 151 num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 152 153 /* Try running queued syscalls. */ 154 res = syscall_handler(fd_map); 155 156 while (num_fds) 157 stub_syscall2(__NR_close, fd_map[--num_fds], 0); 158 } else { 159 res = 0; 160 } 161 162 if (res < 0 || d->restart_wait) { 163 /* Report SIGSYS if we restart. */ 164 d->signal = SIGSYS; 165 d->restart_wait = 0; 166 167 goto restart_wait; 168 } 169 170 /* Restore arch dependent state that is not part of the mcontext */ 171 stub_seccomp_restore_state(&d->arch_data); 172 173 /* Return so that the host modified mcontext is restored. */ 174 } 175 176 void __section(".__syscall_stub") 177 stub_signal_restorer(void) 178 { 179 /* We must not have anything on the stack when doing rt_sigreturn */ 180 stub_syscall0(__NR_rt_sigreturn); 181 } 182