1 /*
2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3 * Copyright (c) 2015,2017,2019,2020,2023 Damien Miller <djm@mindrot.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 /*
19 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
20 * filter breakage during development. *Do not* use this in production,
21 * as it relies on making library calls that are unsafe in signal context.
22 *
23 * Instead, live systems the auditctl(8) may be used to monitor failures.
24 * E.g.
25 * auditctl -a task,always -F uid=<privsep uid>
26 */
27 /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
28
29 #if 0
30 /*
31 * For older toolchains, it may be necessary to use the kernel
32 * headers directly.
33 */
34 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
35 # include <asm/siginfo.h>
36 # define __have_siginfo_t 1
37 # define __have_sigval_t 1
38 # define __have_sigevent_t 1
39 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
40 #endif
41
42 #include "includes.h"
43
44 #ifdef SANDBOX_SECCOMP_FILTER
45
46 #include <sys/types.h>
47 #include <sys/resource.h>
48 #include <sys/prctl.h>
49 #include <sys/mman.h>
50 #include <sys/syscall.h>
51
52 #include <netinet/ip.h>
53
54 #include <linux/futex.h>
55 #include <linux/net.h>
56 #include <linux/audit.h>
57 #include <linux/filter.h>
58 #include <linux/seccomp.h>
59 #include <elf.h>
60
61 #include <asm/unistd.h>
62 #ifdef __s390__
63 #include <asm/zcrypt.h>
64 #endif
65
66 #include <errno.h>
67 #include <signal.h>
68 #include <stdarg.h>
69 #include <stddef.h> /* for offsetof */
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <unistd.h>
74
75 #include "log.h"
76 #include "ssh-sandbox.h"
77 #include "xmalloc.h"
78
79 /* Linux seccomp_filter sandbox */
80 #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
81
82 /* Use a signal handler to emit violations when debugging */
83 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
84 # undef SECCOMP_FILTER_FAIL
85 # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
86 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
87
88 #if __BYTE_ORDER == __LITTLE_ENDIAN
89 # define ARG_LO_OFFSET 0
90 # define ARG_HI_OFFSET sizeof(uint32_t)
91 #elif __BYTE_ORDER == __BIG_ENDIAN
92 # define ARG_LO_OFFSET sizeof(uint32_t)
93 # define ARG_HI_OFFSET 0
94 #else
95 #error "Unknown endianness"
96 #endif
97
98 /* Simple helpers to avoid manual errors (but larger BPF programs). */
99 #define SC_DENY(_nr, _errno) \
100 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
101 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
102 #define SC_ALLOW(_nr) \
103 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
104 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
105 #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
106 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
107 /* load and test syscall argument, low word */ \
108 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
109 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
110 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
111 ((_arg_val) & 0xFFFFFFFF), 0, 3), \
112 /* load and test syscall argument, high word */ \
113 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
114 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
115 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
116 (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
117 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
118 /* reload syscall number; all rules expect it in accumulator */ \
119 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
120 offsetof(struct seccomp_data, nr))
121 /* Allow if syscall argument contains only values in mask */
122 #define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \
123 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
124 /* load, mask and test syscall argument, low word */ \
125 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
126 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
127 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \
128 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \
129 /* load, mask and test syscall argument, high word */ \
130 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
131 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
132 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
133 ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \
134 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \
135 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
136 /* reload syscall number; all rules expect it in accumulator */ \
137 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
138 offsetof(struct seccomp_data, nr))
139 /* Deny unless syscall argument contains only values in mask */
140 #define SC_DENY_UNLESS_ARG_MASK(_nr, _arg_nr, _arg_mask, _errno) \
141 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
142 /* load, mask and test syscall argument, low word */ \
143 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
144 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
145 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \
146 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 3), \
147 /* load, mask and test syscall argument, high word */ \
148 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
149 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
150 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
151 ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \
152 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 1, 0), \
153 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)), \
154 /* reload syscall number; all rules expect it in accumulator */ \
155 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
156 offsetof(struct seccomp_data, nr))
157 #define SC_DENY_UNLESS_MASK(_nr, _arg_nr, _arg_val, _errno) \
158 /* Special handling for futex(2) that combines a bitmap and operation number */
159 #if defined(__NR_futex) || defined(__NR_futex_time64)
160 #define SC_FUTEX_MASK (FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME)
161 #define SC_ALLOW_FUTEX_OP(_nr, _op) \
162 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
163 /* load syscall argument, low word */ \
164 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
165 offsetof(struct seccomp_data, args[1]) + ARG_LO_OFFSET), \
166 /* mask off allowed bitmap values, low word */ \
167 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~(SC_FUTEX_MASK & 0xFFFFFFFF)), \
168 /* test operation number, low word */ \
169 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ((_op) & 0xFFFFFFFF), 0, 4), \
170 /* load syscall argument, high word */ \
171 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
172 offsetof(struct seccomp_data, args[1]) + ARG_HI_OFFSET), \
173 /* mask off allowed bitmap values, high word */ \
174 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
175 ~(((uint32_t)((uint64_t)SC_FUTEX_MASK >> 32)) & 0xFFFFFFFF)), \
176 /* test operation number, high word */ \
177 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
178 (((uint32_t)((uint64_t)(_op) >> 32)) & 0xFFFFFFFF), 0, 1), \
179 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
180 /* reload syscall number; all rules expect it in accumulator */ \
181 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
182
183 /* Use this for both __NR_futex and __NR_futex_time64 */
184 # define SC_FUTEX(_nr) \
185 SC_ALLOW_FUTEX_OP(_nr, FUTEX_WAIT), \
186 SC_ALLOW_FUTEX_OP(_nr, FUTEX_WAIT_BITSET), \
187 SC_ALLOW_FUTEX_OP(_nr, FUTEX_WAKE), \
188 SC_ALLOW_FUTEX_OP(_nr, FUTEX_WAKE_BITSET), \
189 SC_ALLOW_FUTEX_OP(_nr, FUTEX_REQUEUE), \
190 SC_ALLOW_FUTEX_OP(_nr, FUTEX_CMP_REQUEUE)
191 #endif /* __NR_futex || __NR_futex_time64 */
192
193 #if defined(__NR_mmap) || defined(__NR_mmap2)
194 # ifdef MAP_FIXED_NOREPLACE
195 # define SC_MMAP_FLAGS MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_FIXED_NOREPLACE
196 # else
197 # define SC_MMAP_FLAGS MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED
198 # endif /* MAP_FIXED_NOREPLACE */
199 /* Use this for both __NR_mmap and __NR_mmap2 variants */
200 # define SC_MMAP(_nr) \
201 SC_DENY_UNLESS_ARG_MASK(_nr, 3, SC_MMAP_FLAGS, EINVAL), \
202 SC_ALLOW_ARG_MASK(_nr, 2, PROT_READ|PROT_WRITE|PROT_NONE)
203 #endif /* __NR_mmap || __NR_mmap2 */
204
205 /* Special handling for setsockopt(2) */
206 #define SC_ALLOW_SETSOCKOPT(_level, _optname) \
207 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_setsockopt, 0, 10), \
208 /* load and test level, low word */ \
209 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
210 offsetof(struct seccomp_data, args[1]) + ARG_LO_OFFSET), \
211 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
212 ((_level) & 0xFFFFFFFF), 0, 7), \
213 /* load and test level high word is zero */ \
214 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
215 offsetof(struct seccomp_data, args[1]) + ARG_HI_OFFSET), \
216 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 5), \
217 /* load and test optname, low word */ \
218 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
219 offsetof(struct seccomp_data, args[2]) + ARG_LO_OFFSET), \
220 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
221 ((_optname) & 0xFFFFFFFF), 0, 3), \
222 /* load and test level high word is zero */ \
223 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
224 offsetof(struct seccomp_data, args[2]) + ARG_HI_OFFSET), \
225 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \
226 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
227 /* reload syscall number; all rules expect it in accumulator */ \
228 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
229 offsetof(struct seccomp_data, nr))
230
231 /* Syscall filtering set for preauth. */
232 static const struct sock_filter preauth_insns[] = {
233 /* Ensure the syscall arch convention is as expected. */
234 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
235 offsetof(struct seccomp_data, arch)),
236 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
237 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
238 /* Load the syscall number for checking. */
239 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
240 offsetof(struct seccomp_data, nr)),
241
242 /* Syscalls to non-fatally deny */
243 #ifdef __NR_lstat
244 SC_DENY(__NR_lstat, EACCES),
245 #endif
246 #ifdef __NR_lstat64
247 SC_DENY(__NR_lstat64, EACCES),
248 #endif
249 #ifdef __NR_fstat
250 SC_DENY(__NR_fstat, EACCES),
251 #endif
252 #ifdef __NR_fstat64
253 SC_DENY(__NR_fstat64, EACCES),
254 #endif
255 #ifdef __NR_fstatat64
256 SC_DENY(__NR_fstatat64, EACCES),
257 #endif
258 #ifdef __NR_open
259 SC_DENY(__NR_open, EACCES),
260 #endif
261 #ifdef __NR_openat
262 SC_DENY(__NR_openat, EACCES),
263 #endif
264 #ifdef __NR_newfstatat
265 SC_DENY(__NR_newfstatat, EACCES),
266 #endif
267 #ifdef __NR_stat
268 SC_DENY(__NR_stat, EACCES),
269 #endif
270 #ifdef __NR_stat64
271 SC_DENY(__NR_stat64, EACCES),
272 #endif
273 #ifdef __NR_shmget
274 SC_DENY(__NR_shmget, EACCES),
275 #endif
276 #ifdef __NR_shmat
277 SC_DENY(__NR_shmat, EACCES),
278 #endif
279 #ifdef __NR_shmdt
280 SC_DENY(__NR_shmdt, EACCES),
281 #endif
282 #ifdef __NR_ipc
283 SC_DENY(__NR_ipc, EACCES),
284 #endif
285 #ifdef __NR_statx
286 SC_DENY(__NR_statx, EACCES),
287 #endif
288
289 /* Syscalls to permit */
290 #ifdef __NR_brk
291 SC_ALLOW(__NR_brk),
292 #endif
293 #ifdef __NR_clock_gettime
294 SC_ALLOW(__NR_clock_gettime),
295 #endif
296 #ifdef __NR_clock_gettime64
297 SC_ALLOW(__NR_clock_gettime64),
298 #endif
299 #ifdef __NR_close
300 SC_ALLOW(__NR_close),
301 #endif
302 #ifdef __NR_exit
303 SC_ALLOW(__NR_exit),
304 #endif
305 #ifdef __NR_exit_group
306 SC_ALLOW(__NR_exit_group),
307 #endif
308 #ifdef __NR_futex
309 SC_FUTEX(__NR_futex),
310 #endif
311 #ifdef __NR_futex_time64
312 SC_FUTEX(__NR_futex_time64),
313 #endif
314 #ifdef __NR_geteuid
315 SC_ALLOW(__NR_geteuid),
316 #endif
317 #ifdef __NR_geteuid32
318 SC_ALLOW(__NR_geteuid32),
319 #endif
320 #ifdef __NR_getpgid
321 SC_ALLOW(__NR_getpgid),
322 #endif
323 #ifdef __NR_getpid
324 SC_ALLOW(__NR_getpid),
325 #endif
326 #ifdef __NR_getrandom
327 SC_ALLOW(__NR_getrandom),
328 #endif
329 #ifdef __NR_gettid
330 SC_ALLOW(__NR_gettid),
331 #endif
332 #ifdef __NR_gettimeofday
333 SC_ALLOW(__NR_gettimeofday),
334 #endif
335 #ifdef __NR_getuid
336 SC_ALLOW(__NR_getuid),
337 #endif
338 #ifdef __NR_getuid32
339 SC_ALLOW(__NR_getuid32),
340 #endif
341 #ifdef __NR_madvise
342 SC_ALLOW_ARG(__NR_madvise, 2, MADV_NORMAL),
343 # ifdef MADV_FREE
344 SC_ALLOW_ARG(__NR_madvise, 2, MADV_FREE),
345 # endif
346 # ifdef MADV_DONTNEED
347 SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTNEED),
348 # endif
349 # ifdef MADV_DONTFORK
350 SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTFORK),
351 # endif
352 # ifdef MADV_DONTDUMP
353 SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTDUMP),
354 # endif
355 # ifdef MADV_WIPEONFORK
356 SC_ALLOW_ARG(__NR_madvise, 2, MADV_WIPEONFORK),
357 # endif
358 SC_DENY(__NR_madvise, EINVAL),
359 #endif
360 #ifdef __NR_mmap
361 SC_MMAP(__NR_mmap),
362 #endif
363 #ifdef __NR_mmap2
364 SC_MMAP(__NR_mmap2),
365 #endif
366 #ifdef __NR_mprotect
367 SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE),
368 #endif
369 #ifdef __NR_mremap
370 SC_ALLOW(__NR_mremap),
371 #endif
372 #ifdef __NR_munmap
373 SC_ALLOW(__NR_munmap),
374 #endif
375 #ifdef __NR_nanosleep
376 SC_ALLOW(__NR_nanosleep),
377 #endif
378 #ifdef __NR_clock_nanosleep
379 SC_ALLOW(__NR_clock_nanosleep),
380 #endif
381 #ifdef __NR_clock_nanosleep_time64
382 SC_ALLOW(__NR_clock_nanosleep_time64),
383 #endif
384 #ifdef __NR_clock_gettime64
385 SC_ALLOW(__NR_clock_gettime64),
386 #endif
387 #ifdef __NR__newselect
388 SC_ALLOW(__NR__newselect),
389 #endif
390 #ifdef __NR_ppoll
391 SC_ALLOW(__NR_ppoll),
392 #endif
393 #ifdef __NR_ppoll_time64
394 SC_ALLOW(__NR_ppoll_time64),
395 #endif
396 #ifdef __NR_poll
397 SC_ALLOW(__NR_poll),
398 #endif
399 #ifdef __NR_pselect6
400 SC_ALLOW(__NR_pselect6),
401 #endif
402 #ifdef __NR_pselect6_time64
403 SC_ALLOW(__NR_pselect6_time64),
404 #endif
405 #ifdef __NR_read
406 SC_ALLOW(__NR_read),
407 #endif
408 #ifdef __NR_riscv_hwprobe
409 SC_ALLOW(__NR_riscv_hwprobe),
410 #endif
411 #ifdef __NR_rt_sigprocmask
412 SC_ALLOW(__NR_rt_sigprocmask),
413 #endif
414 #ifdef __NR_select
415 SC_ALLOW(__NR_select),
416 #endif
417 #ifdef __NR_shutdown
418 SC_ALLOW(__NR_shutdown),
419 #endif
420 #ifdef __NR_sigprocmask
421 SC_ALLOW(__NR_sigprocmask),
422 #endif
423 #ifdef __NR_time
424 SC_ALLOW(__NR_time),
425 #endif
426 #ifdef __NR_write
427 SC_ALLOW(__NR_write),
428 #endif
429 #ifdef __NR_writev
430 SC_ALLOW(__NR_writev),
431 #endif
432 #ifdef __NR_getsockopt
433 SC_ALLOW(__NR_getsockopt),
434 #endif
435 #ifdef __NR_getsockname
436 SC_ALLOW(__NR_getsockname),
437 #endif
438 #ifdef __NR_getpeername
439 SC_ALLOW(__NR_getpeername),
440 #endif
441 #ifdef __NR_uname
442 SC_ALLOW(__NR_uname),
443 #endif
444 #ifdef __NR_setsockopt
445 SC_ALLOW_SETSOCKOPT(IPPROTO_IPV6, IPV6_TCLASS),
446 SC_ALLOW_SETSOCKOPT(IPPROTO_IP, IP_TOS),
447 #endif
448 #ifdef __NR_socketcall
449 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_GETPEERNAME),
450 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_GETSOCKNAME),
451 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_GETSOCKOPT),
452 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
453 SC_DENY(__NR_socketcall, EACCES),
454 #endif
455 #if defined(__NR_ioctl) && defined(__s390__)
456 /* Allow ioctls for ICA crypto card on s390 */
457 SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
458 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
459 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
460 SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB),
461 /* Allow ioctls for EP11 crypto card on s390 */
462 SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB),
463 #endif
464 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
465 /*
466 * On Linux x32, the clock_gettime VDSO falls back to the
467 * x86-64 syscall under some circumstances, e.g.
468 * https://bugs.debian.org/849923
469 */
470 SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
471 #endif
472
473 /* Default deny */
474 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
475 };
476
477 static const struct sock_fprog preauth_program = {
478 .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
479 .filter = (struct sock_filter *)preauth_insns,
480 };
481
482 struct ssh_sandbox {
483 int junk;
484 };
485
486 struct ssh_sandbox *
ssh_sandbox_init(struct monitor * monitor)487 ssh_sandbox_init(struct monitor *monitor)
488 {
489 struct ssh_sandbox *box;
490
491 /*
492 * Strictly, we don't need to maintain any state here but we need
493 * to return non-NULL to satisfy the API.
494 */
495 debug3_f("preparing seccomp filter sandbox");
496 box = xcalloc(1, sizeof(*box));
497 return box;
498 }
499
500 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
501 extern struct monitor *pmonitor;
502 void mm_log_handler(LogLevel level, int forced, const char *msg, void *ctx);
503
504 static void
ssh_sandbox_violation(int signum,siginfo_t * info,void * void_context)505 ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
506 {
507 char msg[256];
508
509 snprintf(msg, sizeof(msg),
510 "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
511 __func__, info->si_arch, info->si_syscall, info->si_call_addr);
512 mm_log_handler(SYSLOG_LEVEL_FATAL, 0, msg, pmonitor);
513 _exit(1);
514 }
515
516 static void
ssh_sandbox_child_debugging(void)517 ssh_sandbox_child_debugging(void)
518 {
519 struct sigaction act;
520 sigset_t mask;
521
522 debug3_f("installing SIGSYS handler");
523 memset(&act, 0, sizeof(act));
524 sigemptyset(&mask);
525 sigaddset(&mask, SIGSYS);
526
527 act.sa_sigaction = &ssh_sandbox_violation;
528 act.sa_flags = SA_SIGINFO;
529 if (sigaction(SIGSYS, &act, NULL) == -1)
530 fatal_f("sigaction(SIGSYS): %s", strerror(errno));
531 if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
532 fatal("%s: sigprocmask(SIGSYS): %s",
533 __func__, strerror(errno));
534 }
535 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
536
537 void
ssh_sandbox_child(struct ssh_sandbox * box)538 ssh_sandbox_child(struct ssh_sandbox *box)
539 {
540 struct rlimit rl_zero, rl_one = {.rlim_cur = 1, .rlim_max = 1};
541 int nnp_failed = 0;
542
543 /* Set rlimits for completeness if possible. */
544 rl_zero.rlim_cur = rl_zero.rlim_max = 0;
545 if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
546 fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
547 __func__, strerror(errno));
548 /*
549 * Cannot use zero for nfds, because poll(2) will fail with
550 * errno=EINVAL if npfds>RLIMIT_NOFILE.
551 */
552 if (setrlimit(RLIMIT_NOFILE, &rl_one) == -1)
553 fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
554 __func__, strerror(errno));
555 if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
556 fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
557 __func__, strerror(errno));
558
559 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
560 ssh_sandbox_child_debugging();
561 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
562
563 debug3_f("setting PR_SET_NO_NEW_PRIVS");
564 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
565 debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
566 __func__, strerror(errno));
567 nnp_failed = 1;
568 }
569 debug3_f("attaching seccomp filter program");
570 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
571 debug("%s: prctl(PR_SET_SECCOMP): %s",
572 __func__, strerror(errno));
573 else if (nnp_failed)
574 fatal("%s: SECCOMP_MODE_FILTER activated but "
575 "PR_SET_NO_NEW_PRIVS failed", __func__);
576 }
577
578 #endif /* SANDBOX_SECCOMP_FILTER */
579