1 /* 2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* 18 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose 19 * filter breakage during development. *Do not* use this in production, 20 * as it relies on making library calls that are unsafe in signal context. 21 * 22 * Instead, live systems the auditctl(8) may be used to monitor failures. 23 * E.g. 24 * auditctl -a task,always -F uid=<privsep uid> 25 */ 26 /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */ 27 28 #if 0 29 /* 30 * For older toolchains, it may be necessary to use the kernel 31 * headers directly. 32 */ 33 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 34 # include <asm/siginfo.h> 35 # define __have_siginfo_t 1 36 # define __have_sigval_t 1 37 # define __have_sigevent_t 1 38 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 39 #endif 40 41 #include "includes.h" 42 43 #ifdef SANDBOX_SECCOMP_FILTER 44 45 #include <sys/types.h> 46 #include <sys/resource.h> 47 #include <sys/prctl.h> 48 #include <sys/mman.h> 49 #include <sys/syscall.h> 50 51 #include <linux/net.h> 52 #include <linux/audit.h> 53 #include <linux/filter.h> 54 #include <linux/seccomp.h> 55 #include <elf.h> 56 57 #include <asm/unistd.h> 58 #ifdef __s390__ 59 #include <asm/zcrypt.h> 60 #endif 61 62 #include <errno.h> 63 #include <signal.h> 64 #include <stdarg.h> 65 #include <stddef.h> /* for offsetof */ 66 #include <stdio.h> 67 #include <stdlib.h> 68 #include <string.h> 69 #include <unistd.h> 70 71 #include "log.h" 72 #include "ssh-sandbox.h" 73 #include "xmalloc.h" 74 75 /* Linux seccomp_filter sandbox */ 76 #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL 77 78 /* Use a signal handler to emit violations when debugging */ 79 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 80 # undef SECCOMP_FILTER_FAIL 81 # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP 82 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 83 84 #if __BYTE_ORDER == __LITTLE_ENDIAN 85 # define ARG_LO_OFFSET 0 86 # define ARG_HI_OFFSET sizeof(uint32_t) 87 #elif __BYTE_ORDER == __BIG_ENDIAN 88 # define ARG_LO_OFFSET sizeof(uint32_t) 89 # define ARG_HI_OFFSET 0 90 #else 91 #error "Unknown endianness" 92 #endif 93 94 /* Simple helpers to avoid manual errors (but larger BPF programs). */ 95 #define SC_DENY(_nr, _errno) \ 96 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 97 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) 98 #define SC_ALLOW(_nr) \ 99 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 100 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 101 #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ 102 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \ 103 /* load and test syscall argument, low word */ \ 104 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 105 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ 106 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 107 ((_arg_val) & 0xFFFFFFFF), 0, 3), \ 108 /* load and test syscall argument, high word */ \ 109 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 110 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ 111 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 112 (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \ 113 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 114 /* reload syscall number; all rules expect it in accumulator */ \ 115 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 116 offsetof(struct seccomp_data, nr)) 117 /* Allow if syscall argument contains only values in mask */ 118 #define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \ 119 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \ 120 /* load, mask and test syscall argument, low word */ \ 121 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 122 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ 123 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \ 124 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \ 125 /* load, mask and test syscall argument, high word */ \ 126 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 127 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ 128 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \ 129 ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \ 130 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \ 131 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 132 /* reload syscall number; all rules expect it in accumulator */ \ 133 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 134 offsetof(struct seccomp_data, nr)) 135 136 /* Syscall filtering set for preauth. */ 137 static const struct sock_filter preauth_insns[] = { 138 /* Ensure the syscall arch convention is as expected. */ 139 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 140 offsetof(struct seccomp_data, arch)), 141 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), 142 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 143 /* Load the syscall number for checking. */ 144 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 145 offsetof(struct seccomp_data, nr)), 146 147 /* Syscalls to non-fatally deny */ 148 #ifdef __NR_lstat 149 SC_DENY(__NR_lstat, EACCES), 150 #endif 151 #ifdef __NR_lstat64 152 SC_DENY(__NR_lstat64, EACCES), 153 #endif 154 #ifdef __NR_fstat 155 SC_DENY(__NR_fstat, EACCES), 156 #endif 157 #ifdef __NR_fstat64 158 SC_DENY(__NR_fstat64, EACCES), 159 #endif 160 #ifdef __NR_fstatat64 161 SC_DENY(__NR_fstatat64, EACCES), 162 #endif 163 #ifdef __NR_open 164 SC_DENY(__NR_open, EACCES), 165 #endif 166 #ifdef __NR_openat 167 SC_DENY(__NR_openat, EACCES), 168 #endif 169 #ifdef __NR_newfstatat 170 SC_DENY(__NR_newfstatat, EACCES), 171 #endif 172 #ifdef __NR_stat 173 SC_DENY(__NR_stat, EACCES), 174 #endif 175 #ifdef __NR_stat64 176 SC_DENY(__NR_stat64, EACCES), 177 #endif 178 #ifdef __NR_shmget 179 SC_DENY(__NR_shmget, EACCES), 180 #endif 181 #ifdef __NR_shmat 182 SC_DENY(__NR_shmat, EACCES), 183 #endif 184 #ifdef __NR_shmdt 185 SC_DENY(__NR_shmdt, EACCES), 186 #endif 187 #ifdef __NR_ipc 188 SC_DENY(__NR_ipc, EACCES), 189 #endif 190 #ifdef __NR_statx 191 SC_DENY(__NR_statx, EACCES), 192 #endif 193 194 /* Syscalls to permit */ 195 #ifdef __NR_brk 196 SC_ALLOW(__NR_brk), 197 #endif 198 #ifdef __NR_clock_gettime 199 SC_ALLOW(__NR_clock_gettime), 200 #endif 201 #ifdef __NR_clock_gettime64 202 SC_ALLOW(__NR_clock_gettime64), 203 #endif 204 #ifdef __NR_close 205 SC_ALLOW(__NR_close), 206 #endif 207 #ifdef __NR_exit 208 SC_ALLOW(__NR_exit), 209 #endif 210 #ifdef __NR_exit_group 211 SC_ALLOW(__NR_exit_group), 212 #endif 213 #ifdef __NR_futex 214 SC_ALLOW(__NR_futex), 215 #endif 216 #ifdef __NR_futex_time64 217 SC_ALLOW(__NR_futex_time64), 218 #endif 219 #ifdef __NR_geteuid 220 SC_ALLOW(__NR_geteuid), 221 #endif 222 #ifdef __NR_geteuid32 223 SC_ALLOW(__NR_geteuid32), 224 #endif 225 #ifdef __NR_getpgid 226 SC_ALLOW(__NR_getpgid), 227 #endif 228 #ifdef __NR_getpid 229 SC_ALLOW(__NR_getpid), 230 #endif 231 #ifdef __NR_getrandom 232 SC_ALLOW(__NR_getrandom), 233 #endif 234 #ifdef __NR_gettid 235 SC_ALLOW(__NR_gettid), 236 #endif 237 #ifdef __NR_gettimeofday 238 SC_ALLOW(__NR_gettimeofday), 239 #endif 240 #ifdef __NR_getuid 241 SC_ALLOW(__NR_getuid), 242 #endif 243 #ifdef __NR_getuid32 244 SC_ALLOW(__NR_getuid32), 245 #endif 246 #ifdef __NR_madvise 247 SC_ALLOW(__NR_madvise), 248 #endif 249 #ifdef __NR_mmap 250 SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE), 251 #endif 252 #ifdef __NR_mmap2 253 SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE), 254 #endif 255 #ifdef __NR_mprotect 256 SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE), 257 #endif 258 #ifdef __NR_mremap 259 SC_ALLOW(__NR_mremap), 260 #endif 261 #ifdef __NR_munmap 262 SC_ALLOW(__NR_munmap), 263 #endif 264 #ifdef __NR_nanosleep 265 SC_ALLOW(__NR_nanosleep), 266 #endif 267 #ifdef __NR_clock_nanosleep 268 SC_ALLOW(__NR_clock_nanosleep), 269 #endif 270 #ifdef __NR_clock_nanosleep_time64 271 SC_ALLOW(__NR_clock_nanosleep_time64), 272 #endif 273 #ifdef __NR_clock_gettime64 274 SC_ALLOW(__NR_clock_gettime64), 275 #endif 276 #ifdef __NR__newselect 277 SC_ALLOW(__NR__newselect), 278 #endif 279 #ifdef __NR_ppoll 280 SC_ALLOW(__NR_ppoll), 281 #endif 282 #ifdef __NR_ppoll_time64 283 SC_ALLOW(__NR_ppoll_time64), 284 #endif 285 #ifdef __NR_poll 286 SC_ALLOW(__NR_poll), 287 #endif 288 #ifdef __NR_pselect6 289 SC_ALLOW(__NR_pselect6), 290 #endif 291 #ifdef __NR_pselect6_time64 292 SC_ALLOW(__NR_pselect6_time64), 293 #endif 294 #ifdef __NR_read 295 SC_ALLOW(__NR_read), 296 #endif 297 #ifdef __NR_rt_sigprocmask 298 SC_ALLOW(__NR_rt_sigprocmask), 299 #endif 300 #ifdef __NR_select 301 SC_ALLOW(__NR_select), 302 #endif 303 #ifdef __NR_shutdown 304 SC_ALLOW(__NR_shutdown), 305 #endif 306 #ifdef __NR_sigprocmask 307 SC_ALLOW(__NR_sigprocmask), 308 #endif 309 #ifdef __NR_time 310 SC_ALLOW(__NR_time), 311 #endif 312 #ifdef __NR_write 313 SC_ALLOW(__NR_write), 314 #endif 315 #ifdef __NR_writev 316 SC_ALLOW(__NR_writev), 317 #endif 318 #ifdef __NR_socketcall 319 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN), 320 SC_DENY(__NR_socketcall, EACCES), 321 #endif 322 #if defined(__NR_ioctl) && defined(__s390__) 323 /* Allow ioctls for ICA crypto card on s390 */ 324 SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK), 325 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO), 326 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT), 327 SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB), 328 /* Allow ioctls for EP11 crypto card on s390 */ 329 SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB), 330 #endif 331 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT) 332 /* 333 * On Linux x32, the clock_gettime VDSO falls back to the 334 * x86-64 syscall under some circumstances, e.g. 335 * https://bugs.debian.org/849923 336 */ 337 SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT), 338 #endif 339 340 /* Default deny */ 341 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 342 }; 343 344 static const struct sock_fprog preauth_program = { 345 .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])), 346 .filter = (struct sock_filter *)preauth_insns, 347 }; 348 349 struct ssh_sandbox { 350 pid_t child_pid; 351 }; 352 353 struct ssh_sandbox * 354 ssh_sandbox_init(struct monitor *monitor) 355 { 356 struct ssh_sandbox *box; 357 358 /* 359 * Strictly, we don't need to maintain any state here but we need 360 * to return non-NULL to satisfy the API. 361 */ 362 debug3("%s: preparing seccomp filter sandbox", __func__); 363 box = xcalloc(1, sizeof(*box)); 364 box->child_pid = 0; 365 366 return box; 367 } 368 369 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 370 extern struct monitor *pmonitor; 371 void mm_log_handler(LogLevel level, int forced, const char *msg, void *ctx); 372 373 static void 374 ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context) 375 { 376 char msg[256]; 377 378 snprintf(msg, sizeof(msg), 379 "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)", 380 __func__, info->si_arch, info->si_syscall, info->si_call_addr); 381 mm_log_handler(SYSLOG_LEVEL_FATAL, 0, msg, pmonitor); 382 _exit(1); 383 } 384 385 static void 386 ssh_sandbox_child_debugging(void) 387 { 388 struct sigaction act; 389 sigset_t mask; 390 391 debug3("%s: installing SIGSYS handler", __func__); 392 memset(&act, 0, sizeof(act)); 393 sigemptyset(&mask); 394 sigaddset(&mask, SIGSYS); 395 396 act.sa_sigaction = &ssh_sandbox_violation; 397 act.sa_flags = SA_SIGINFO; 398 if (sigaction(SIGSYS, &act, NULL) == -1) 399 fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno)); 400 if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) 401 fatal("%s: sigprocmask(SIGSYS): %s", 402 __func__, strerror(errno)); 403 } 404 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 405 406 void 407 ssh_sandbox_child(struct ssh_sandbox *box) 408 { 409 struct rlimit rl_zero, rl_one = {.rlim_cur = 1, .rlim_max = 1}; 410 int nnp_failed = 0; 411 412 /* Set rlimits for completeness if possible. */ 413 rl_zero.rlim_cur = rl_zero.rlim_max = 0; 414 if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1) 415 fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s", 416 __func__, strerror(errno)); 417 /* 418 * Cannot use zero for nfds, because poll(2) will fail with 419 * errno=EINVAL if npfds>RLIMIT_NOFILE. 420 */ 421 if (setrlimit(RLIMIT_NOFILE, &rl_one) == -1) 422 fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s", 423 __func__, strerror(errno)); 424 if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1) 425 fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s", 426 __func__, strerror(errno)); 427 428 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG 429 ssh_sandbox_child_debugging(); 430 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 431 432 debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__); 433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { 434 debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s", 435 __func__, strerror(errno)); 436 nnp_failed = 1; 437 } 438 debug3("%s: attaching seccomp filter program", __func__); 439 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1) 440 debug("%s: prctl(PR_SET_SECCOMP): %s", 441 __func__, strerror(errno)); 442 else if (nnp_failed) 443 fatal("%s: SECCOMP_MODE_FILTER activated but " 444 "PR_SET_NO_NEW_PRIVS failed", __func__); 445 } 446 447 void 448 ssh_sandbox_parent_finish(struct ssh_sandbox *box) 449 { 450 free(box); 451 debug3("%s: finished", __func__); 452 } 453 454 void 455 ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid) 456 { 457 box->child_pid = child_pid; 458 } 459 460 #endif /* SANDBOX_SECCOMP_FILTER */ 461