1 /* 2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* 18 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose 19 * filter breakage during development. *Do not* use this in production, 20 * as it relies on making library calls that are unsafe in signal context. 21 * 22 * Instead, live systems the auditctl(8) may be used to monitor failures. 23 * E.g. 24 * auditctl -a task,always -F uid=<privsep uid> 25 */ 26 /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */ 27 28 /* XXX it should be possible to do logging via the log socket safely */ 29 30 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 31 /* Use the kernel headers in case of an older toolchain. */ 32 # include <asm/siginfo.h> 33 # define __have_siginfo_t 1 34 # define __have_sigval_t 1 35 # define __have_sigevent_t 1 36 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 37 38 #include "includes.h" 39 40 #ifdef SANDBOX_SECCOMP_FILTER 41 42 #include <sys/types.h> 43 #include <sys/resource.h> 44 #include <sys/prctl.h> 45 46 #include <linux/net.h> 47 #include <linux/audit.h> 48 #include <linux/filter.h> 49 #include <linux/seccomp.h> 50 #include <elf.h> 51 52 #include <asm/unistd.h> 53 #ifdef __s390__ 54 #include <asm/zcrypt.h> 55 #endif 56 57 #include <errno.h> 58 #include <signal.h> 59 #include <stdarg.h> 60 #include <stddef.h> /* for offsetof */ 61 #include <stdio.h> 62 #include <stdlib.h> 63 #include <string.h> 64 #include <unistd.h> 65 66 #include "log.h" 67 #include "ssh-sandbox.h" 68 #include "xmalloc.h" 69 70 /* Linux seccomp_filter sandbox */ 71 #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL 72 73 /* Use a signal handler to emit violations when debugging */ 74 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 75 # undef SECCOMP_FILTER_FAIL 76 # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP 77 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 78 79 #if __BYTE_ORDER == __LITTLE_ENDIAN 80 # define ARG_LO_OFFSET 0 81 # define ARG_HI_OFFSET sizeof(uint32_t) 82 #elif __BYTE_ORDER == __BIG_ENDIAN 83 # define ARG_LO_OFFSET sizeof(uint32_t) 84 # define ARG_HI_OFFSET 0 85 #else 86 #error "Unknown endianness" 87 #endif 88 89 /* Simple helpers to avoid manual errors (but larger BPF programs). */ 90 #define SC_DENY(_nr, _errno) \ 91 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 92 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) 93 #define SC_ALLOW(_nr) \ 94 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 95 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 96 #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ 97 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \ 98 /* load and test first syscall argument, low word */ \ 99 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 100 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ 101 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 102 ((_arg_val) & 0xFFFFFFFF), 0, 3), \ 103 /* load and test first syscall argument, high word */ \ 104 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 105 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ 106 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 107 (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \ 108 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 109 /* reload syscall number; all rules expect it in accumulator */ \ 110 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 111 offsetof(struct seccomp_data, nr)) 112 113 /* Syscall filtering set for preauth. */ 114 static const struct sock_filter preauth_insns[] = { 115 /* Ensure the syscall arch convention is as expected. */ 116 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 117 offsetof(struct seccomp_data, arch)), 118 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), 119 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 120 /* Load the syscall number for checking. */ 121 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 122 offsetof(struct seccomp_data, nr)), 123 124 /* Syscalls to non-fatally deny */ 125 #ifdef __NR_lstat 126 SC_DENY(__NR_lstat, EACCES), 127 #endif 128 #ifdef __NR_lstat64 129 SC_DENY(__NR_lstat64, EACCES), 130 #endif 131 #ifdef __NR_fstat 132 SC_DENY(__NR_fstat, EACCES), 133 #endif 134 #ifdef __NR_fstat64 135 SC_DENY(__NR_fstat64, EACCES), 136 #endif 137 #ifdef __NR_open 138 SC_DENY(__NR_open, EACCES), 139 #endif 140 #ifdef __NR_openat 141 SC_DENY(__NR_openat, EACCES), 142 #endif 143 #ifdef __NR_newfstatat 144 SC_DENY(__NR_newfstatat, EACCES), 145 #endif 146 #ifdef __NR_stat 147 SC_DENY(__NR_stat, EACCES), 148 #endif 149 #ifdef __NR_stat64 150 SC_DENY(__NR_stat64, EACCES), 151 #endif 152 153 /* Syscalls to permit */ 154 #ifdef __NR_brk 155 SC_ALLOW(__NR_brk), 156 #endif 157 #ifdef __NR_clock_gettime 158 SC_ALLOW(__NR_clock_gettime), 159 #endif 160 #ifdef __NR_close 161 SC_ALLOW(__NR_close), 162 #endif 163 #ifdef __NR_exit 164 SC_ALLOW(__NR_exit), 165 #endif 166 #ifdef __NR_exit_group 167 SC_ALLOW(__NR_exit_group), 168 #endif 169 #ifdef __NR_futex 170 SC_ALLOW(__NR_futex), 171 #endif 172 #ifdef __NR_geteuid 173 SC_ALLOW(__NR_geteuid), 174 #endif 175 #ifdef __NR_geteuid32 176 SC_ALLOW(__NR_geteuid32), 177 #endif 178 #ifdef __NR_getpgid 179 SC_ALLOW(__NR_getpgid), 180 #endif 181 #ifdef __NR_getpid 182 SC_ALLOW(__NR_getpid), 183 #endif 184 #ifdef __NR_getrandom 185 SC_ALLOW(__NR_getrandom), 186 #endif 187 #ifdef __NR_gettimeofday 188 SC_ALLOW(__NR_gettimeofday), 189 #endif 190 #ifdef __NR_getuid 191 SC_ALLOW(__NR_getuid), 192 #endif 193 #ifdef __NR_getuid32 194 SC_ALLOW(__NR_getuid32), 195 #endif 196 #ifdef __NR_madvise 197 SC_ALLOW(__NR_madvise), 198 #endif 199 #ifdef __NR_mmap 200 SC_ALLOW(__NR_mmap), 201 #endif 202 #ifdef __NR_mmap2 203 SC_ALLOW(__NR_mmap2), 204 #endif 205 #ifdef __NR_mremap 206 SC_ALLOW(__NR_mremap), 207 #endif 208 #ifdef __NR_munmap 209 SC_ALLOW(__NR_munmap), 210 #endif 211 #ifdef __NR_nanosleep 212 SC_ALLOW(__NR_nanosleep), 213 #endif 214 #ifdef __NR__newselect 215 SC_ALLOW(__NR__newselect), 216 #endif 217 #ifdef __NR_poll 218 SC_ALLOW(__NR_poll), 219 #endif 220 #ifdef __NR_pselect6 221 SC_ALLOW(__NR_pselect6), 222 #endif 223 #ifdef __NR_read 224 SC_ALLOW(__NR_read), 225 #endif 226 #ifdef __NR_rt_sigprocmask 227 SC_ALLOW(__NR_rt_sigprocmask), 228 #endif 229 #ifdef __NR_select 230 SC_ALLOW(__NR_select), 231 #endif 232 #ifdef __NR_shutdown 233 SC_ALLOW(__NR_shutdown), 234 #endif 235 #ifdef __NR_sigprocmask 236 SC_ALLOW(__NR_sigprocmask), 237 #endif 238 #ifdef __NR_time 239 SC_ALLOW(__NR_time), 240 #endif 241 #ifdef __NR_write 242 SC_ALLOW(__NR_write), 243 #endif 244 #ifdef __NR_socketcall 245 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN), 246 SC_DENY(__NR_socketcall, EACCES), 247 #endif 248 #if defined(__NR_ioctl) && defined(__s390__) 249 /* Allow ioctls for ICA crypto card on s390 */ 250 SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK), 251 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO), 252 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT), 253 #endif 254 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT) 255 /* 256 * On Linux x32, the clock_gettime VDSO falls back to the 257 * x86-64 syscall under some circumstances, e.g. 258 * https://bugs.debian.org/849923 259 */ 260 SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT), 261 #endif 262 263 /* Default deny */ 264 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 265 }; 266 267 static const struct sock_fprog preauth_program = { 268 .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])), 269 .filter = (struct sock_filter *)preauth_insns, 270 }; 271 272 struct ssh_sandbox { 273 pid_t child_pid; 274 }; 275 276 struct ssh_sandbox * 277 ssh_sandbox_init(struct monitor *monitor) 278 { 279 struct ssh_sandbox *box; 280 281 /* 282 * Strictly, we don't need to maintain any state here but we need 283 * to return non-NULL to satisfy the API. 284 */ 285 debug3("%s: preparing seccomp filter sandbox", __func__); 286 box = xcalloc(1, sizeof(*box)); 287 box->child_pid = 0; 288 289 return box; 290 } 291 292 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 293 extern struct monitor *pmonitor; 294 void mm_log_handler(LogLevel level, const char *msg, void *ctx); 295 296 static void 297 ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context) 298 { 299 char msg[256]; 300 301 snprintf(msg, sizeof(msg), 302 "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)", 303 __func__, info->si_arch, info->si_syscall, info->si_call_addr); 304 mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor); 305 _exit(1); 306 } 307 308 static void 309 ssh_sandbox_child_debugging(void) 310 { 311 struct sigaction act; 312 sigset_t mask; 313 314 debug3("%s: installing SIGSYS handler", __func__); 315 memset(&act, 0, sizeof(act)); 316 sigemptyset(&mask); 317 sigaddset(&mask, SIGSYS); 318 319 act.sa_sigaction = &ssh_sandbox_violation; 320 act.sa_flags = SA_SIGINFO; 321 if (sigaction(SIGSYS, &act, NULL) == -1) 322 fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno)); 323 if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) 324 fatal("%s: sigprocmask(SIGSYS): %s", 325 __func__, strerror(errno)); 326 } 327 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 328 329 void 330 ssh_sandbox_child(struct ssh_sandbox *box) 331 { 332 struct rlimit rl_zero; 333 int nnp_failed = 0; 334 335 /* Set rlimits for completeness if possible. */ 336 rl_zero.rlim_cur = rl_zero.rlim_max = 0; 337 if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1) 338 fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s", 339 __func__, strerror(errno)); 340 if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1) 341 fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s", 342 __func__, strerror(errno)); 343 if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1) 344 fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s", 345 __func__, strerror(errno)); 346 347 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 348 ssh_sandbox_child_debugging(); 349 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 350 351 debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__); 352 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { 353 debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s", 354 __func__, strerror(errno)); 355 nnp_failed = 1; 356 } 357 debug3("%s: attaching seccomp filter program", __func__); 358 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1) 359 debug("%s: prctl(PR_SET_SECCOMP): %s", 360 __func__, strerror(errno)); 361 else if (nnp_failed) 362 fatal("%s: SECCOMP_MODE_FILTER activated but " 363 "PR_SET_NO_NEW_PRIVS failed", __func__); 364 } 365 366 void 367 ssh_sandbox_parent_finish(struct ssh_sandbox *box) 368 { 369 free(box); 370 debug3("%s: finished", __func__); 371 } 372 373 void 374 ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid) 375 { 376 box->child_pid = child_pid; 377 } 378 379 #endif /* SANDBOX_SECCOMP_FILTER */ 380