1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4 *
5 * Test code for seccomp bpf.
6 */
7
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10
11 /*
12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13 * we need to use the kernel's siginfo.h file and trick glibc
14 * into accepting it.
15 */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/time.h>
28 #include <sys/user.h>
29 #include <linux/prctl.h>
30 #include <linux/ptrace.h>
31 #include <linux/seccomp.h>
32 #include <pthread.h>
33 #include <semaphore.h>
34 #include <signal.h>
35 #include <stddef.h>
36 #include <stdbool.h>
37 #include <string.h>
38 #include <time.h>
39 #include <limits.h>
40 #include <linux/elf.h>
41 #include <sys/uio.h>
42 #include <sys/utsname.h>
43 #include <sys/fcntl.h>
44 #include <sys/mman.h>
45 #include <sys/times.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <linux/kcmp.h>
49 #include <sys/resource.h>
50 #include <sys/capability.h>
51 #include <linux/perf_event.h>
52
53 #include <unistd.h>
54 #include <sys/syscall.h>
55 #include <poll.h>
56
57 #include "../kselftest_harness.h"
58 #include "../clone3/clone3_selftests.h"
59
60 /* Attempt to de-conflict with the selftests tree. */
61 #ifndef SKIP
62 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
63 #endif
64
65 #ifndef MIN
66 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
67 #endif
68
69 #ifndef PR_SET_PTRACER
70 # define PR_SET_PTRACER 0x59616d61
71 #endif
72
73 #ifndef noinline
74 #define noinline __attribute__((noinline))
75 #endif
76
77 #ifndef __nocf_check
78 #define __nocf_check __attribute__((nocf_check))
79 #endif
80
81 #ifndef __naked
82 #define __naked __attribute__((__naked__))
83 #endif
84
85 #ifndef PR_SET_NO_NEW_PRIVS
86 #define PR_SET_NO_NEW_PRIVS 38
87 #define PR_GET_NO_NEW_PRIVS 39
88 #endif
89
90 #ifndef PR_SECCOMP_EXT
91 #define PR_SECCOMP_EXT 43
92 #endif
93
94 #ifndef SECCOMP_EXT_ACT
95 #define SECCOMP_EXT_ACT 1
96 #endif
97
98 #ifndef SECCOMP_EXT_ACT_TSYNC
99 #define SECCOMP_EXT_ACT_TSYNC 1
100 #endif
101
102 #ifndef SECCOMP_MODE_STRICT
103 #define SECCOMP_MODE_STRICT 1
104 #endif
105
106 #ifndef SECCOMP_MODE_FILTER
107 #define SECCOMP_MODE_FILTER 2
108 #endif
109
110 #ifndef SECCOMP_RET_ALLOW
111 struct seccomp_data {
112 int nr;
113 __u32 arch;
114 __u64 instruction_pointer;
115 __u64 args[6];
116 };
117 #endif
118
119 #ifndef SECCOMP_RET_KILL_PROCESS
120 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
121 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
122 #endif
123 #ifndef SECCOMP_RET_KILL
124 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
125 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
126 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
127 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
128 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
129 #endif
130 #ifndef SECCOMP_RET_LOG
131 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
132 #endif
133
134 #ifndef __NR_seccomp
135 # if defined(__i386__)
136 # define __NR_seccomp 354
137 # elif defined(__x86_64__)
138 # define __NR_seccomp 317
139 # elif defined(__arm__)
140 # define __NR_seccomp 383
141 # elif defined(__aarch64__)
142 # define __NR_seccomp 277
143 # elif defined(__riscv)
144 # define __NR_seccomp 277
145 # elif defined(__csky__)
146 # define __NR_seccomp 277
147 # elif defined(__loongarch__)
148 # define __NR_seccomp 277
149 # elif defined(__hppa__)
150 # define __NR_seccomp 338
151 # elif defined(__powerpc__)
152 # define __NR_seccomp 358
153 # elif defined(__s390__)
154 # define __NR_seccomp 348
155 # elif defined(__xtensa__)
156 # define __NR_seccomp 337
157 # elif defined(__sh__)
158 # define __NR_seccomp 372
159 # elif defined(__mc68000__)
160 # define __NR_seccomp 380
161 # else
162 # warning "seccomp syscall number unknown for this architecture"
163 # define __NR_seccomp 0xffff
164 # endif
165 #endif
166
167 #ifndef __NR_uretprobe
168 # if defined(__x86_64__)
169 # define __NR_uretprobe 335
170 # endif
171 #endif
172
173 #ifndef SECCOMP_SET_MODE_STRICT
174 #define SECCOMP_SET_MODE_STRICT 0
175 #endif
176
177 #ifndef SECCOMP_SET_MODE_FILTER
178 #define SECCOMP_SET_MODE_FILTER 1
179 #endif
180
181 #ifndef SECCOMP_GET_ACTION_AVAIL
182 #define SECCOMP_GET_ACTION_AVAIL 2
183 #endif
184
185 #ifndef SECCOMP_GET_NOTIF_SIZES
186 #define SECCOMP_GET_NOTIF_SIZES 3
187 #endif
188
189 #ifndef SECCOMP_FILTER_FLAG_TSYNC
190 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
191 #endif
192
193 #ifndef SECCOMP_FILTER_FLAG_LOG
194 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
195 #endif
196
197 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
198 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
199 #endif
200
201 #ifndef PTRACE_SECCOMP_GET_METADATA
202 #define PTRACE_SECCOMP_GET_METADATA 0x420d
203
204 struct seccomp_metadata {
205 __u64 filter_off; /* Input: which filter */
206 __u64 flags; /* Output: filter's flags */
207 };
208 #endif
209
210 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
211 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
212 #endif
213
214 #ifndef SECCOMP_RET_USER_NOTIF
215 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
216
217 #define SECCOMP_IOC_MAGIC '!'
218 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
219 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
220 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type)
221 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
222
223 /* Flags for seccomp notification fd ioctl. */
224 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
225 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
226 struct seccomp_notif_resp)
227 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
228
229 struct seccomp_notif {
230 __u64 id;
231 __u32 pid;
232 __u32 flags;
233 struct seccomp_data data;
234 };
235
236 struct seccomp_notif_resp {
237 __u64 id;
238 __s64 val;
239 __s32 error;
240 __u32 flags;
241 };
242
243 struct seccomp_notif_sizes {
244 __u16 seccomp_notif;
245 __u16 seccomp_notif_resp;
246 __u16 seccomp_data;
247 };
248 #endif
249
250 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD
251 /* On success, the return value is the remote process's added fd number */
252 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
253 struct seccomp_notif_addfd)
254
255 /* valid flags for seccomp_notif_addfd */
256 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
257
258 struct seccomp_notif_addfd {
259 __u64 id;
260 __u32 flags;
261 __u32 srcfd;
262 __u32 newfd;
263 __u32 newfd_flags;
264 };
265 #endif
266
267 #ifndef SECCOMP_ADDFD_FLAG_SEND
268 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
269 #endif
270
271 struct seccomp_notif_addfd_small {
272 __u64 id;
273 char weird[4];
274 };
275 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
276 SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
277
278 struct seccomp_notif_addfd_big {
279 union {
280 struct seccomp_notif_addfd addfd;
281 char buf[sizeof(struct seccomp_notif_addfd) + 8];
282 };
283 };
284 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
285 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
286
287 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
288 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
289 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2
290 #endif
291
292 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
293 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
294 #endif
295
296 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
297 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
298 #endif
299
300 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
301 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
302 #endif
303
304 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)305 int seccomp(unsigned int op, unsigned int flags, void *args)
306 {
307 errno = 0;
308 return syscall(__NR_seccomp, op, flags, args);
309 }
310 #endif
311
312 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
313 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
314 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
315 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
316 #else
317 #error "wut? Unknown __BYTE_ORDER__?!"
318 #endif
319
320 #define SIBLING_EXIT_UNKILLED 0xbadbeef
321 #define SIBLING_EXIT_FAILURE 0xbadface
322 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed
323
__filecmp(pid_t pid1,pid_t pid2,int fd1,int fd2)324 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
325 {
326 #ifdef __NR_kcmp
327 errno = 0;
328 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
329 #else
330 errno = ENOSYS;
331 return -1;
332 #endif
333 }
334
335 /* Have TH_LOG report actual location filecmp() is used. */
336 #define filecmp(pid1, pid2, fd1, fd2) ({ \
337 int _ret; \
338 \
339 _ret = __filecmp(pid1, pid2, fd1, fd2); \
340 if (_ret != 0) { \
341 if (_ret < 0 && errno == ENOSYS) { \
342 TH_LOG("kcmp() syscall missing (test is less accurate)");\
343 _ret = 0; \
344 } \
345 } \
346 _ret; })
347
TEST(kcmp)348 TEST(kcmp)
349 {
350 int ret;
351
352 ret = __filecmp(getpid(), getpid(), 1, 1);
353 EXPECT_EQ(ret, 0);
354 if (ret != 0 && errno == ENOSYS)
355 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
356 }
357
TEST(mode_strict_support)358 TEST(mode_strict_support)
359 {
360 long ret;
361
362 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
363 ASSERT_EQ(0, ret) {
364 TH_LOG("Kernel does not support CONFIG_SECCOMP");
365 }
366 syscall(__NR_exit, 0);
367 }
368
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)369 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
370 {
371 long ret;
372
373 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
374 ASSERT_EQ(0, ret) {
375 TH_LOG("Kernel does not support CONFIG_SECCOMP");
376 }
377 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
378 NULL, NULL, NULL);
379 EXPECT_FALSE(true) {
380 TH_LOG("Unreachable!");
381 }
382 }
383
384 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)385 TEST(no_new_privs_support)
386 {
387 long ret;
388
389 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
390 EXPECT_EQ(0, ret) {
391 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
392 }
393 }
394
395 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
TEST(mode_filter_support)396 TEST(mode_filter_support)
397 {
398 long ret;
399
400 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
401 ASSERT_EQ(0, ret) {
402 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
403 }
404 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
405 EXPECT_EQ(-1, ret);
406 EXPECT_EQ(EFAULT, errno) {
407 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
408 }
409 }
410
TEST(mode_filter_without_nnp)411 TEST(mode_filter_without_nnp)
412 {
413 struct sock_filter filter[] = {
414 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
415 };
416 struct sock_fprog prog = {
417 .len = (unsigned short)ARRAY_SIZE(filter),
418 .filter = filter,
419 };
420 long ret;
421 cap_t cap = cap_get_proc();
422 cap_flag_value_t is_cap_sys_admin = 0;
423
424 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
425 ASSERT_LE(0, ret) {
426 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
427 }
428 errno = 0;
429 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
430 /* Succeeds with CAP_SYS_ADMIN, fails without */
431 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin);
432 if (!is_cap_sys_admin) {
433 EXPECT_EQ(-1, ret);
434 EXPECT_EQ(EACCES, errno);
435 } else {
436 EXPECT_EQ(0, ret);
437 }
438 }
439
440 #define MAX_INSNS_PER_PATH 32768
441
TEST(filter_size_limits)442 TEST(filter_size_limits)
443 {
444 int i;
445 int count = BPF_MAXINSNS + 1;
446 struct sock_filter allow[] = {
447 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
448 };
449 struct sock_filter *filter;
450 struct sock_fprog prog = { };
451 long ret;
452
453 filter = calloc(count, sizeof(*filter));
454 ASSERT_NE(NULL, filter);
455
456 for (i = 0; i < count; i++)
457 filter[i] = allow[0];
458
459 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
460 ASSERT_EQ(0, ret);
461
462 prog.filter = filter;
463 prog.len = count;
464
465 /* Too many filter instructions in a single filter. */
466 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
467 ASSERT_NE(0, ret) {
468 TH_LOG("Installing %d insn filter was allowed", prog.len);
469 }
470
471 /* One less is okay, though. */
472 prog.len -= 1;
473 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
474 ASSERT_EQ(0, ret) {
475 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
476 }
477 }
478
TEST(filter_chain_limits)479 TEST(filter_chain_limits)
480 {
481 int i;
482 int count = BPF_MAXINSNS;
483 struct sock_filter allow[] = {
484 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
485 };
486 struct sock_filter *filter;
487 struct sock_fprog prog = { };
488 long ret;
489
490 filter = calloc(count, sizeof(*filter));
491 ASSERT_NE(NULL, filter);
492
493 for (i = 0; i < count; i++)
494 filter[i] = allow[0];
495
496 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
497 ASSERT_EQ(0, ret);
498
499 prog.filter = filter;
500 prog.len = 1;
501
502 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
503 ASSERT_EQ(0, ret);
504
505 prog.len = count;
506
507 /* Too many total filter instructions. */
508 for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
509 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
510 if (ret != 0)
511 break;
512 }
513 ASSERT_NE(0, ret) {
514 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
515 i, count, i * (count + 4));
516 }
517 }
518
TEST(mode_filter_cannot_move_to_strict)519 TEST(mode_filter_cannot_move_to_strict)
520 {
521 struct sock_filter filter[] = {
522 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
523 };
524 struct sock_fprog prog = {
525 .len = (unsigned short)ARRAY_SIZE(filter),
526 .filter = filter,
527 };
528 long ret;
529
530 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
531 ASSERT_EQ(0, ret);
532
533 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
534 ASSERT_EQ(0, ret);
535
536 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
537 EXPECT_EQ(-1, ret);
538 EXPECT_EQ(EINVAL, errno);
539 }
540
541
TEST(mode_filter_get_seccomp)542 TEST(mode_filter_get_seccomp)
543 {
544 struct sock_filter filter[] = {
545 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
546 };
547 struct sock_fprog prog = {
548 .len = (unsigned short)ARRAY_SIZE(filter),
549 .filter = filter,
550 };
551 long ret;
552
553 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
554 ASSERT_EQ(0, ret);
555
556 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
557 EXPECT_EQ(0, ret);
558
559 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
560 ASSERT_EQ(0, ret);
561
562 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
563 EXPECT_EQ(2, ret);
564 }
565
566
TEST(ALLOW_all)567 TEST(ALLOW_all)
568 {
569 struct sock_filter filter[] = {
570 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
571 };
572 struct sock_fprog prog = {
573 .len = (unsigned short)ARRAY_SIZE(filter),
574 .filter = filter,
575 };
576 long ret;
577
578 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
579 ASSERT_EQ(0, ret);
580
581 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
582 ASSERT_EQ(0, ret);
583 }
584
TEST(empty_prog)585 TEST(empty_prog)
586 {
587 struct sock_filter filter[] = {
588 };
589 struct sock_fprog prog = {
590 .len = (unsigned short)ARRAY_SIZE(filter),
591 .filter = filter,
592 };
593 long ret;
594
595 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
596 ASSERT_EQ(0, ret);
597
598 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
599 EXPECT_EQ(-1, ret);
600 EXPECT_EQ(EINVAL, errno);
601 }
602
TEST(log_all)603 TEST(log_all)
604 {
605 struct sock_filter filter[] = {
606 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
607 };
608 struct sock_fprog prog = {
609 .len = (unsigned short)ARRAY_SIZE(filter),
610 .filter = filter,
611 };
612 long ret;
613 pid_t parent = getppid();
614
615 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
616 ASSERT_EQ(0, ret);
617
618 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
619 ASSERT_EQ(0, ret);
620
621 /* getppid() should succeed and be logged (no check for logging) */
622 EXPECT_EQ(parent, syscall(__NR_getppid));
623 }
624
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)625 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
626 {
627 struct sock_filter filter[] = {
628 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
629 };
630 struct sock_fprog prog = {
631 .len = (unsigned short)ARRAY_SIZE(filter),
632 .filter = filter,
633 };
634 long ret;
635
636 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
637 ASSERT_EQ(0, ret);
638
639 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
640 ASSERT_EQ(0, ret);
641 EXPECT_EQ(0, syscall(__NR_getpid)) {
642 TH_LOG("getpid() shouldn't ever return");
643 }
644 }
645
646 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)647 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
648 {
649 struct sock_filter filter[] = {
650 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
651 };
652 struct sock_fprog prog = {
653 .len = (unsigned short)ARRAY_SIZE(filter),
654 .filter = filter,
655 };
656 long ret;
657
658 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
659 ASSERT_EQ(0, ret);
660
661 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
662 ASSERT_EQ(0, ret);
663 EXPECT_EQ(0, syscall(__NR_getpid)) {
664 TH_LOG("getpid() shouldn't ever return");
665 }
666 }
667
TEST_SIGNAL(KILL_all,SIGSYS)668 TEST_SIGNAL(KILL_all, SIGSYS)
669 {
670 struct sock_filter filter[] = {
671 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
672 };
673 struct sock_fprog prog = {
674 .len = (unsigned short)ARRAY_SIZE(filter),
675 .filter = filter,
676 };
677 long ret;
678
679 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
680 ASSERT_EQ(0, ret);
681
682 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
683 ASSERT_EQ(0, ret);
684 }
685
TEST_SIGNAL(KILL_one,SIGSYS)686 TEST_SIGNAL(KILL_one, SIGSYS)
687 {
688 struct sock_filter filter[] = {
689 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
690 offsetof(struct seccomp_data, nr)),
691 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
692 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
693 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
694 };
695 struct sock_fprog prog = {
696 .len = (unsigned short)ARRAY_SIZE(filter),
697 .filter = filter,
698 };
699 long ret;
700 pid_t parent = getppid();
701
702 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
703 ASSERT_EQ(0, ret);
704
705 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
706 ASSERT_EQ(0, ret);
707
708 EXPECT_EQ(parent, syscall(__NR_getppid));
709 /* getpid() should never return. */
710 EXPECT_EQ(0, syscall(__NR_getpid));
711 }
712
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)713 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
714 {
715 void *fatal_address;
716 struct sock_filter filter[] = {
717 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
718 offsetof(struct seccomp_data, nr)),
719 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
720 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
721 /* Only both with lower 32-bit for now. */
722 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
723 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
724 (unsigned long)&fatal_address, 0, 1),
725 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
726 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
727 };
728 struct sock_fprog prog = {
729 .len = (unsigned short)ARRAY_SIZE(filter),
730 .filter = filter,
731 };
732 long ret;
733 pid_t parent = getppid();
734 struct tms timebuf;
735 clock_t clock = times(&timebuf);
736
737 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
738 ASSERT_EQ(0, ret);
739
740 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
741 ASSERT_EQ(0, ret);
742
743 EXPECT_EQ(parent, syscall(__NR_getppid));
744 EXPECT_LE(clock, syscall(__NR_times, &timebuf));
745 /* times() should never return. */
746 EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
747 }
748
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)749 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
750 {
751 #ifndef __NR_mmap2
752 int sysno = __NR_mmap;
753 #else
754 int sysno = __NR_mmap2;
755 #endif
756 struct sock_filter filter[] = {
757 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
758 offsetof(struct seccomp_data, nr)),
759 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
760 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
761 /* Only both with lower 32-bit for now. */
762 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
763 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
764 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
765 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
766 };
767 struct sock_fprog prog = {
768 .len = (unsigned short)ARRAY_SIZE(filter),
769 .filter = filter,
770 };
771 long ret;
772 pid_t parent = getppid();
773 int fd;
774 void *map1, *map2;
775 int page_size = sysconf(_SC_PAGESIZE);
776
777 ASSERT_LT(0, page_size);
778
779 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
780 ASSERT_EQ(0, ret);
781
782 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
783 ASSERT_EQ(0, ret);
784
785 fd = open("/dev/zero", O_RDONLY);
786 ASSERT_NE(-1, fd);
787
788 EXPECT_EQ(parent, syscall(__NR_getppid));
789 map1 = (void *)syscall(sysno,
790 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
791 EXPECT_NE(MAP_FAILED, map1);
792 /* mmap2() should never return. */
793 map2 = (void *)syscall(sysno,
794 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
795 EXPECT_EQ(MAP_FAILED, map2);
796
797 /* The test failed, so clean up the resources. */
798 munmap(map1, page_size);
799 munmap(map2, page_size);
800 close(fd);
801 }
802
803 /* This is a thread task to die via seccomp filter violation. */
kill_thread(void * data)804 void *kill_thread(void *data)
805 {
806 bool die = (bool)data;
807
808 if (die) {
809 syscall(__NR_getpid);
810 return (void *)SIBLING_EXIT_FAILURE;
811 }
812
813 return (void *)SIBLING_EXIT_UNKILLED;
814 }
815
816 enum kill_t {
817 KILL_THREAD,
818 KILL_PROCESS,
819 RET_UNKNOWN
820 };
821
822 /* Prepare a thread that will kill itself or both of us. */
kill_thread_or_group(struct __test_metadata * _metadata,enum kill_t kill_how)823 void kill_thread_or_group(struct __test_metadata *_metadata,
824 enum kill_t kill_how)
825 {
826 pthread_t thread;
827 void *status;
828 /* Kill only when calling __NR_getpid. */
829 struct sock_filter filter_thread[] = {
830 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
831 offsetof(struct seccomp_data, nr)),
832 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
833 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
834 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
835 };
836 struct sock_fprog prog_thread = {
837 .len = (unsigned short)ARRAY_SIZE(filter_thread),
838 .filter = filter_thread,
839 };
840 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
841 struct sock_filter filter_process[] = {
842 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
843 offsetof(struct seccomp_data, nr)),
844 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
845 BPF_STMT(BPF_RET|BPF_K, kill),
846 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
847 };
848 struct sock_fprog prog_process = {
849 .len = (unsigned short)ARRAY_SIZE(filter_process),
850 .filter = filter_process,
851 };
852
853 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
854 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
855 }
856
857 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
858 kill_how == KILL_THREAD ? &prog_thread
859 : &prog_process));
860
861 /*
862 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
863 * flag cannot be downgraded by a new filter.
864 */
865 if (kill_how == KILL_PROCESS)
866 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
867
868 /* Start a thread that will exit immediately. */
869 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
870 ASSERT_EQ(0, pthread_join(thread, &status));
871 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
872
873 /* Start a thread that will die immediately. */
874 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
875 ASSERT_EQ(0, pthread_join(thread, &status));
876 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
877
878 /*
879 * If we get here, only the spawned thread died. Let the parent know
880 * the whole process didn't die (i.e. this thread, the spawner,
881 * stayed running).
882 */
883 exit(42);
884 }
885
TEST(KILL_thread)886 TEST(KILL_thread)
887 {
888 int status;
889 pid_t child_pid;
890
891 child_pid = fork();
892 ASSERT_LE(0, child_pid);
893 if (child_pid == 0) {
894 kill_thread_or_group(_metadata, KILL_THREAD);
895 _exit(38);
896 }
897
898 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
899
900 /* If only the thread was killed, we'll see exit 42. */
901 ASSERT_TRUE(WIFEXITED(status));
902 ASSERT_EQ(42, WEXITSTATUS(status));
903 }
904
TEST(KILL_process)905 TEST(KILL_process)
906 {
907 int status;
908 pid_t child_pid;
909
910 child_pid = fork();
911 ASSERT_LE(0, child_pid);
912 if (child_pid == 0) {
913 kill_thread_or_group(_metadata, KILL_PROCESS);
914 _exit(38);
915 }
916
917 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
918
919 /* If the entire process was killed, we'll see SIGSYS. */
920 ASSERT_TRUE(WIFSIGNALED(status));
921 ASSERT_EQ(SIGSYS, WTERMSIG(status));
922 }
923
TEST(KILL_unknown)924 TEST(KILL_unknown)
925 {
926 int status;
927 pid_t child_pid;
928
929 child_pid = fork();
930 ASSERT_LE(0, child_pid);
931 if (child_pid == 0) {
932 kill_thread_or_group(_metadata, RET_UNKNOWN);
933 _exit(38);
934 }
935
936 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
937
938 /* If the entire process was killed, we'll see SIGSYS. */
939 EXPECT_TRUE(WIFSIGNALED(status)) {
940 TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
941 }
942 ASSERT_EQ(SIGSYS, WTERMSIG(status));
943 }
944
945 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)946 TEST(arg_out_of_range)
947 {
948 struct sock_filter filter[] = {
949 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
950 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
951 };
952 struct sock_fprog prog = {
953 .len = (unsigned short)ARRAY_SIZE(filter),
954 .filter = filter,
955 };
956 long ret;
957
958 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
959 ASSERT_EQ(0, ret);
960
961 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
962 EXPECT_EQ(-1, ret);
963 EXPECT_EQ(EINVAL, errno);
964 }
965
966 #define ERRNO_FILTER(name, errno) \
967 struct sock_filter _read_filter_##name[] = { \
968 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \
969 offsetof(struct seccomp_data, nr)), \
970 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \
971 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \
972 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \
973 }; \
974 struct sock_fprog prog_##name = { \
975 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
976 .filter = _read_filter_##name, \
977 }
978
979 /* Make sure basic errno values are correctly passed through a filter. */
TEST(ERRNO_valid)980 TEST(ERRNO_valid)
981 {
982 ERRNO_FILTER(valid, E2BIG);
983 long ret;
984 pid_t parent = getppid();
985
986 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
987 ASSERT_EQ(0, ret);
988
989 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
990 ASSERT_EQ(0, ret);
991
992 EXPECT_EQ(parent, syscall(__NR_getppid));
993 EXPECT_EQ(-1, read(-1, NULL, 0));
994 EXPECT_EQ(E2BIG, errno);
995 }
996
997 /* Make sure an errno of zero is correctly handled by the arch code. */
TEST(ERRNO_zero)998 TEST(ERRNO_zero)
999 {
1000 ERRNO_FILTER(zero, 0);
1001 long ret;
1002 pid_t parent = getppid();
1003
1004 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1005 ASSERT_EQ(0, ret);
1006
1007 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
1008 ASSERT_EQ(0, ret);
1009
1010 EXPECT_EQ(parent, syscall(__NR_getppid));
1011 /* "errno" of 0 is ok. */
1012 EXPECT_EQ(0, read(-1, NULL, 0));
1013 }
1014
1015 /*
1016 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
1017 * This tests that the errno value gets capped correctly, fixed by
1018 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
1019 */
TEST(ERRNO_capped)1020 TEST(ERRNO_capped)
1021 {
1022 ERRNO_FILTER(capped, 4096);
1023 long ret;
1024 pid_t parent = getppid();
1025
1026 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1027 ASSERT_EQ(0, ret);
1028
1029 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
1030 ASSERT_EQ(0, ret);
1031
1032 EXPECT_EQ(parent, syscall(__NR_getppid));
1033 EXPECT_EQ(-1, read(-1, NULL, 0));
1034 EXPECT_EQ(4095, errno);
1035 }
1036
1037 /*
1038 * Filters are processed in reverse order: last applied is executed first.
1039 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
1040 * SECCOMP_RET_DATA mask results will follow the most recently applied
1041 * matching filter return (and not the lowest or highest value).
1042 */
TEST(ERRNO_order)1043 TEST(ERRNO_order)
1044 {
1045 ERRNO_FILTER(first, 11);
1046 ERRNO_FILTER(second, 13);
1047 ERRNO_FILTER(third, 12);
1048 long ret;
1049 pid_t parent = getppid();
1050
1051 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1052 ASSERT_EQ(0, ret);
1053
1054 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
1055 ASSERT_EQ(0, ret);
1056
1057 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
1058 ASSERT_EQ(0, ret);
1059
1060 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
1061 ASSERT_EQ(0, ret);
1062
1063 EXPECT_EQ(parent, syscall(__NR_getppid));
1064 EXPECT_EQ(-1, read(-1, NULL, 0));
1065 EXPECT_EQ(12, errno);
1066 }
1067
FIXTURE(TRAP)1068 FIXTURE(TRAP) {
1069 struct sock_fprog prog;
1070 };
1071
FIXTURE_SETUP(TRAP)1072 FIXTURE_SETUP(TRAP)
1073 {
1074 struct sock_filter filter[] = {
1075 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1076 offsetof(struct seccomp_data, nr)),
1077 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1078 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1079 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1080 };
1081
1082 memset(&self->prog, 0, sizeof(self->prog));
1083 self->prog.filter = malloc(sizeof(filter));
1084 ASSERT_NE(NULL, self->prog.filter);
1085 memcpy(self->prog.filter, filter, sizeof(filter));
1086 self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1087 }
1088
FIXTURE_TEARDOWN(TRAP)1089 FIXTURE_TEARDOWN(TRAP)
1090 {
1091 if (self->prog.filter)
1092 free(self->prog.filter);
1093 }
1094
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)1095 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
1096 {
1097 long ret;
1098
1099 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1100 ASSERT_EQ(0, ret);
1101
1102 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1103 ASSERT_EQ(0, ret);
1104 syscall(__NR_getpid);
1105 }
1106
1107 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)1108 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
1109 {
1110 long ret;
1111
1112 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1113 ASSERT_EQ(0, ret);
1114
1115 signal(SIGSYS, SIG_IGN);
1116
1117 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1118 ASSERT_EQ(0, ret);
1119 syscall(__NR_getpid);
1120 }
1121
1122 static siginfo_t TRAP_info;
1123 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)1124 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
1125 {
1126 memcpy(&TRAP_info, info, sizeof(TRAP_info));
1127 TRAP_nr = nr;
1128 }
1129
TEST_F(TRAP,handler)1130 TEST_F(TRAP, handler)
1131 {
1132 int ret, test;
1133 struct sigaction act;
1134 sigset_t mask;
1135
1136 memset(&act, 0, sizeof(act));
1137 sigemptyset(&mask);
1138 sigaddset(&mask, SIGSYS);
1139
1140 act.sa_sigaction = &TRAP_action;
1141 act.sa_flags = SA_SIGINFO;
1142 ret = sigaction(SIGSYS, &act, NULL);
1143 ASSERT_EQ(0, ret) {
1144 TH_LOG("sigaction failed");
1145 }
1146 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
1147 ASSERT_EQ(0, ret) {
1148 TH_LOG("sigprocmask failed");
1149 }
1150
1151 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1152 ASSERT_EQ(0, ret);
1153 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1154 ASSERT_EQ(0, ret);
1155 TRAP_nr = 0;
1156 memset(&TRAP_info, 0, sizeof(TRAP_info));
1157 /* Expect the registers to be rolled back. (nr = error) may vary
1158 * based on arch. */
1159 ret = syscall(__NR_getpid);
1160 /* Silence gcc warning about volatile. */
1161 test = TRAP_nr;
1162 EXPECT_EQ(SIGSYS, test);
1163 struct local_sigsys {
1164 void *_call_addr; /* calling user insn */
1165 int _syscall; /* triggering system call number */
1166 unsigned int _arch; /* AUDIT_ARCH_* of syscall */
1167 } *sigsys = (struct local_sigsys *)
1168 #ifdef si_syscall
1169 &(TRAP_info.si_call_addr);
1170 #else
1171 &TRAP_info.si_pid;
1172 #endif
1173 EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1174 /* Make sure arch is non-zero. */
1175 EXPECT_NE(0, sigsys->_arch);
1176 EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1177 }
1178
FIXTURE(precedence)1179 FIXTURE(precedence) {
1180 struct sock_fprog allow;
1181 struct sock_fprog log;
1182 struct sock_fprog trace;
1183 struct sock_fprog error;
1184 struct sock_fprog trap;
1185 struct sock_fprog kill;
1186 };
1187
FIXTURE_SETUP(precedence)1188 FIXTURE_SETUP(precedence)
1189 {
1190 struct sock_filter allow_insns[] = {
1191 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1192 };
1193 struct sock_filter log_insns[] = {
1194 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1195 offsetof(struct seccomp_data, nr)),
1196 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1197 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1198 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1199 };
1200 struct sock_filter trace_insns[] = {
1201 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1202 offsetof(struct seccomp_data, nr)),
1203 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1204 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1205 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1206 };
1207 struct sock_filter error_insns[] = {
1208 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1209 offsetof(struct seccomp_data, nr)),
1210 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1211 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1212 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1213 };
1214 struct sock_filter trap_insns[] = {
1215 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1216 offsetof(struct seccomp_data, nr)),
1217 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1218 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1219 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1220 };
1221 struct sock_filter kill_insns[] = {
1222 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1223 offsetof(struct seccomp_data, nr)),
1224 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1225 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1226 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1227 };
1228
1229 memset(self, 0, sizeof(*self));
1230 #define FILTER_ALLOC(_x) \
1231 self->_x.filter = malloc(sizeof(_x##_insns)); \
1232 ASSERT_NE(NULL, self->_x.filter); \
1233 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1234 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1235 FILTER_ALLOC(allow);
1236 FILTER_ALLOC(log);
1237 FILTER_ALLOC(trace);
1238 FILTER_ALLOC(error);
1239 FILTER_ALLOC(trap);
1240 FILTER_ALLOC(kill);
1241 }
1242
FIXTURE_TEARDOWN(precedence)1243 FIXTURE_TEARDOWN(precedence)
1244 {
1245 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1246 FILTER_FREE(allow);
1247 FILTER_FREE(log);
1248 FILTER_FREE(trace);
1249 FILTER_FREE(error);
1250 FILTER_FREE(trap);
1251 FILTER_FREE(kill);
1252 }
1253
TEST_F(precedence,allow_ok)1254 TEST_F(precedence, allow_ok)
1255 {
1256 pid_t parent, res = 0;
1257 long ret;
1258
1259 parent = getppid();
1260 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1261 ASSERT_EQ(0, ret);
1262
1263 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1264 ASSERT_EQ(0, ret);
1265 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1266 ASSERT_EQ(0, ret);
1267 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1268 ASSERT_EQ(0, ret);
1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1270 ASSERT_EQ(0, ret);
1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1272 ASSERT_EQ(0, ret);
1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1274 ASSERT_EQ(0, ret);
1275 /* Should work just fine. */
1276 res = syscall(__NR_getppid);
1277 EXPECT_EQ(parent, res);
1278 }
1279
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)1280 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1281 {
1282 pid_t parent, res = 0;
1283 long ret;
1284
1285 parent = getppid();
1286 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1287 ASSERT_EQ(0, ret);
1288
1289 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1290 ASSERT_EQ(0, ret);
1291 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1292 ASSERT_EQ(0, ret);
1293 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1294 ASSERT_EQ(0, ret);
1295 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1296 ASSERT_EQ(0, ret);
1297 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1298 ASSERT_EQ(0, ret);
1299 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1300 ASSERT_EQ(0, ret);
1301 /* Should work just fine. */
1302 res = syscall(__NR_getppid);
1303 EXPECT_EQ(parent, res);
1304 /* getpid() should never return. */
1305 res = syscall(__NR_getpid);
1306 EXPECT_EQ(0, res);
1307 }
1308
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)1309 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1310 {
1311 pid_t parent;
1312 long ret;
1313
1314 parent = getppid();
1315 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1316 ASSERT_EQ(0, ret);
1317
1318 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1319 ASSERT_EQ(0, ret);
1320 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1321 ASSERT_EQ(0, ret);
1322 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1323 ASSERT_EQ(0, ret);
1324 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1325 ASSERT_EQ(0, ret);
1326 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1327 ASSERT_EQ(0, ret);
1328 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1329 ASSERT_EQ(0, ret);
1330 /* Should work just fine. */
1331 EXPECT_EQ(parent, syscall(__NR_getppid));
1332 /* getpid() should never return. */
1333 EXPECT_EQ(0, syscall(__NR_getpid));
1334 }
1335
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)1336 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1337 {
1338 pid_t parent;
1339 long ret;
1340
1341 parent = getppid();
1342 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1343 ASSERT_EQ(0, ret);
1344
1345 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1346 ASSERT_EQ(0, ret);
1347 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1348 ASSERT_EQ(0, ret);
1349 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1350 ASSERT_EQ(0, ret);
1351 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1352 ASSERT_EQ(0, ret);
1353 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1354 ASSERT_EQ(0, ret);
1355 /* Should work just fine. */
1356 EXPECT_EQ(parent, syscall(__NR_getppid));
1357 /* getpid() should never return. */
1358 EXPECT_EQ(0, syscall(__NR_getpid));
1359 }
1360
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)1361 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1362 {
1363 pid_t parent;
1364 long ret;
1365
1366 parent = getppid();
1367 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1368 ASSERT_EQ(0, ret);
1369
1370 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1371 ASSERT_EQ(0, ret);
1372 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1373 ASSERT_EQ(0, ret);
1374 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1375 ASSERT_EQ(0, ret);
1376 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1377 ASSERT_EQ(0, ret);
1378 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1379 ASSERT_EQ(0, ret);
1380 /* Should work just fine. */
1381 EXPECT_EQ(parent, syscall(__NR_getppid));
1382 /* getpid() should never return. */
1383 EXPECT_EQ(0, syscall(__NR_getpid));
1384 }
1385
TEST_F(precedence,errno_is_third)1386 TEST_F(precedence, errno_is_third)
1387 {
1388 pid_t parent;
1389 long ret;
1390
1391 parent = getppid();
1392 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1393 ASSERT_EQ(0, ret);
1394
1395 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1396 ASSERT_EQ(0, ret);
1397 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1398 ASSERT_EQ(0, ret);
1399 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1400 ASSERT_EQ(0, ret);
1401 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1402 ASSERT_EQ(0, ret);
1403 /* Should work just fine. */
1404 EXPECT_EQ(parent, syscall(__NR_getppid));
1405 EXPECT_EQ(0, syscall(__NR_getpid));
1406 }
1407
TEST_F(precedence,errno_is_third_in_any_order)1408 TEST_F(precedence, errno_is_third_in_any_order)
1409 {
1410 pid_t parent;
1411 long ret;
1412
1413 parent = getppid();
1414 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1415 ASSERT_EQ(0, ret);
1416
1417 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1418 ASSERT_EQ(0, ret);
1419 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1420 ASSERT_EQ(0, ret);
1421 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1422 ASSERT_EQ(0, ret);
1423 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1424 ASSERT_EQ(0, ret);
1425 /* Should work just fine. */
1426 EXPECT_EQ(parent, syscall(__NR_getppid));
1427 EXPECT_EQ(0, syscall(__NR_getpid));
1428 }
1429
TEST_F(precedence,trace_is_fourth)1430 TEST_F(precedence, trace_is_fourth)
1431 {
1432 pid_t parent;
1433 long ret;
1434
1435 parent = getppid();
1436 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1437 ASSERT_EQ(0, ret);
1438
1439 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1440 ASSERT_EQ(0, ret);
1441 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1442 ASSERT_EQ(0, ret);
1443 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1444 ASSERT_EQ(0, ret);
1445 /* Should work just fine. */
1446 EXPECT_EQ(parent, syscall(__NR_getppid));
1447 /* No ptracer */
1448 EXPECT_EQ(-1, syscall(__NR_getpid));
1449 }
1450
TEST_F(precedence,trace_is_fourth_in_any_order)1451 TEST_F(precedence, trace_is_fourth_in_any_order)
1452 {
1453 pid_t parent;
1454 long ret;
1455
1456 parent = getppid();
1457 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1458 ASSERT_EQ(0, ret);
1459
1460 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1461 ASSERT_EQ(0, ret);
1462 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1463 ASSERT_EQ(0, ret);
1464 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1465 ASSERT_EQ(0, ret);
1466 /* Should work just fine. */
1467 EXPECT_EQ(parent, syscall(__NR_getppid));
1468 /* No ptracer */
1469 EXPECT_EQ(-1, syscall(__NR_getpid));
1470 }
1471
TEST_F(precedence,log_is_fifth)1472 TEST_F(precedence, log_is_fifth)
1473 {
1474 pid_t mypid, parent;
1475 long ret;
1476
1477 mypid = getpid();
1478 parent = getppid();
1479 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1480 ASSERT_EQ(0, ret);
1481
1482 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1483 ASSERT_EQ(0, ret);
1484 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1485 ASSERT_EQ(0, ret);
1486 /* Should work just fine. */
1487 EXPECT_EQ(parent, syscall(__NR_getppid));
1488 /* Should also work just fine */
1489 EXPECT_EQ(mypid, syscall(__NR_getpid));
1490 }
1491
TEST_F(precedence,log_is_fifth_in_any_order)1492 TEST_F(precedence, log_is_fifth_in_any_order)
1493 {
1494 pid_t mypid, parent;
1495 long ret;
1496
1497 mypid = getpid();
1498 parent = getppid();
1499 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1500 ASSERT_EQ(0, ret);
1501
1502 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1503 ASSERT_EQ(0, ret);
1504 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1505 ASSERT_EQ(0, ret);
1506 /* Should work just fine. */
1507 EXPECT_EQ(parent, syscall(__NR_getppid));
1508 /* Should also work just fine */
1509 EXPECT_EQ(mypid, syscall(__NR_getpid));
1510 }
1511
1512 #ifndef PTRACE_O_TRACESECCOMP
1513 #define PTRACE_O_TRACESECCOMP 0x00000080
1514 #endif
1515
1516 /* Catch the Ubuntu 12.04 value error. */
1517 #if PTRACE_EVENT_SECCOMP != 7
1518 #undef PTRACE_EVENT_SECCOMP
1519 #endif
1520
1521 #ifndef PTRACE_EVENT_SECCOMP
1522 #define PTRACE_EVENT_SECCOMP 7
1523 #endif
1524
1525 #define PTRACE_EVENT_MASK(status) ((status) >> 16)
1526 bool tracer_running;
tracer_stop(int sig)1527 void tracer_stop(int sig)
1528 {
1529 tracer_running = false;
1530 }
1531
1532 typedef void tracer_func_t(struct __test_metadata *_metadata,
1533 pid_t tracee, int status, void *args);
1534
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1535 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1536 tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1537 {
1538 int ret = -1;
1539 struct sigaction action = {
1540 .sa_handler = tracer_stop,
1541 };
1542
1543 /* Allow external shutdown. */
1544 tracer_running = true;
1545 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1546
1547 errno = 0;
1548 while (ret == -1 && errno != EINVAL)
1549 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1550 ASSERT_EQ(0, ret) {
1551 kill(tracee, SIGKILL);
1552 }
1553 /* Wait for attach stop */
1554 wait(NULL);
1555
1556 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1557 PTRACE_O_TRACESYSGOOD :
1558 PTRACE_O_TRACESECCOMP);
1559 ASSERT_EQ(0, ret) {
1560 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1561 kill(tracee, SIGKILL);
1562 }
1563 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1564 tracee, NULL, 0);
1565 ASSERT_EQ(0, ret);
1566
1567 /* Unblock the tracee */
1568 ASSERT_EQ(1, write(fd, "A", 1));
1569 ASSERT_EQ(0, close(fd));
1570
1571 /* Run until we're shut down. Must assert to stop execution. */
1572 while (tracer_running) {
1573 int status;
1574
1575 if (wait(&status) != tracee)
1576 continue;
1577
1578 if (WIFSIGNALED(status)) {
1579 /* Child caught a fatal signal. */
1580 return;
1581 }
1582 if (WIFEXITED(status)) {
1583 /* Child exited with code. */
1584 return;
1585 }
1586
1587 /* Check if we got an expected event. */
1588 ASSERT_EQ(WIFCONTINUED(status), false);
1589 ASSERT_EQ(WIFSTOPPED(status), true);
1590 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) {
1591 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
1592 }
1593
1594 tracer_func(_metadata, tracee, status, args);
1595
1596 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1597 tracee, NULL, 0);
1598 ASSERT_EQ(0, ret);
1599 }
1600 /* Directly report the status of our test harness results. */
1601 syscall(__NR_exit, _metadata->exit_code);
1602 }
1603
1604 /* Common tracer setup/teardown functions. */
cont_handler(int num)1605 void cont_handler(int num)
1606 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1607 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1608 tracer_func_t func, void *args, bool ptrace_syscall)
1609 {
1610 char sync;
1611 int pipefd[2];
1612 pid_t tracer_pid;
1613 pid_t tracee = getpid();
1614
1615 /* Setup a pipe for clean synchronization. */
1616 ASSERT_EQ(0, pipe(pipefd));
1617
1618 /* Fork a child which we'll promote to tracer */
1619 tracer_pid = fork();
1620 ASSERT_LE(0, tracer_pid);
1621 signal(SIGALRM, cont_handler);
1622 if (tracer_pid == 0) {
1623 close(pipefd[0]);
1624 start_tracer(_metadata, pipefd[1], tracee, func, args,
1625 ptrace_syscall);
1626 syscall(__NR_exit, 0);
1627 }
1628 close(pipefd[1]);
1629 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1630 read(pipefd[0], &sync, 1);
1631 close(pipefd[0]);
1632
1633 return tracer_pid;
1634 }
1635
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1636 void teardown_trace_fixture(struct __test_metadata *_metadata,
1637 pid_t tracer)
1638 {
1639 if (tracer) {
1640 int status;
1641 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1642 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1643 }
1644 }
1645
1646 /* "poke" tracer arguments and function. */
1647 struct tracer_args_poke_t {
1648 unsigned long poke_addr;
1649 };
1650
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1651 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1652 void *args)
1653 {
1654 int ret;
1655 unsigned long msg;
1656 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1657
1658 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1659 EXPECT_EQ(0, ret);
1660 /* If this fails, don't try to recover. */
1661 ASSERT_EQ(0x1001, msg) {
1662 kill(tracee, SIGKILL);
1663 }
1664 /*
1665 * Poke in the message.
1666 * Registers are not touched to try to keep this relatively arch
1667 * agnostic.
1668 */
1669 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1670 EXPECT_EQ(0, ret);
1671 }
1672
FIXTURE(TRACE_poke)1673 FIXTURE(TRACE_poke) {
1674 struct sock_fprog prog;
1675 pid_t tracer;
1676 long poked;
1677 struct tracer_args_poke_t tracer_args;
1678 };
1679
FIXTURE_SETUP(TRACE_poke)1680 FIXTURE_SETUP(TRACE_poke)
1681 {
1682 struct sock_filter filter[] = {
1683 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1684 offsetof(struct seccomp_data, nr)),
1685 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1686 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1687 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1688 };
1689
1690 self->poked = 0;
1691 memset(&self->prog, 0, sizeof(self->prog));
1692 self->prog.filter = malloc(sizeof(filter));
1693 ASSERT_NE(NULL, self->prog.filter);
1694 memcpy(self->prog.filter, filter, sizeof(filter));
1695 self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1696
1697 /* Set up tracer args. */
1698 self->tracer_args.poke_addr = (unsigned long)&self->poked;
1699
1700 /* Launch tracer. */
1701 self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1702 &self->tracer_args, false);
1703 }
1704
FIXTURE_TEARDOWN(TRACE_poke)1705 FIXTURE_TEARDOWN(TRACE_poke)
1706 {
1707 teardown_trace_fixture(_metadata, self->tracer);
1708 if (self->prog.filter)
1709 free(self->prog.filter);
1710 }
1711
TEST_F(TRACE_poke,read_has_side_effects)1712 TEST_F(TRACE_poke, read_has_side_effects)
1713 {
1714 ssize_t ret;
1715
1716 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1717 ASSERT_EQ(0, ret);
1718
1719 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1720 ASSERT_EQ(0, ret);
1721
1722 EXPECT_EQ(0, self->poked);
1723 ret = read(-1, NULL, 0);
1724 EXPECT_EQ(-1, ret);
1725 EXPECT_EQ(0x1001, self->poked);
1726 }
1727
TEST_F(TRACE_poke,getpid_runs_normally)1728 TEST_F(TRACE_poke, getpid_runs_normally)
1729 {
1730 long ret;
1731
1732 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1733 ASSERT_EQ(0, ret);
1734
1735 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1736 ASSERT_EQ(0, ret);
1737
1738 EXPECT_EQ(0, self->poked);
1739 EXPECT_NE(0, syscall(__NR_getpid));
1740 EXPECT_EQ(0, self->poked);
1741 }
1742
1743 #if defined(__x86_64__)
1744 # define ARCH_REGS struct user_regs_struct
1745 # define SYSCALL_NUM(_regs) (_regs).orig_rax
1746 # define SYSCALL_RET(_regs) (_regs).rax
1747 #elif defined(__i386__)
1748 # define ARCH_REGS struct user_regs_struct
1749 # define SYSCALL_NUM(_regs) (_regs).orig_eax
1750 # define SYSCALL_RET(_regs) (_regs).eax
1751 #elif defined(__arm__)
1752 # define ARCH_REGS struct pt_regs
1753 # define SYSCALL_NUM(_regs) (_regs).ARM_r7
1754 # ifndef PTRACE_SET_SYSCALL
1755 # define PTRACE_SET_SYSCALL 23
1756 # endif
1757 # define SYSCALL_NUM_SET(_regs, _nr) \
1758 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
1759 # define SYSCALL_RET(_regs) (_regs).ARM_r0
1760 #elif defined(__aarch64__)
1761 # define ARCH_REGS struct user_pt_regs
1762 # define SYSCALL_NUM(_regs) (_regs).regs[8]
1763 # ifndef NT_ARM_SYSTEM_CALL
1764 # define NT_ARM_SYSTEM_CALL 0x404
1765 # endif
1766 # define SYSCALL_NUM_SET(_regs, _nr) \
1767 do { \
1768 struct iovec __v; \
1769 typeof(_nr) __nr = (_nr); \
1770 __v.iov_base = &__nr; \
1771 __v.iov_len = sizeof(__nr); \
1772 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
1773 NT_ARM_SYSTEM_CALL, &__v)); \
1774 } while (0)
1775 # define SYSCALL_RET(_regs) (_regs).regs[0]
1776 #elif defined(__loongarch__)
1777 # define ARCH_REGS struct user_pt_regs
1778 # define SYSCALL_NUM(_regs) (_regs).regs[11]
1779 # define SYSCALL_RET(_regs) (_regs).regs[4]
1780 #elif defined(__riscv) && __riscv_xlen == 64
1781 # define ARCH_REGS struct user_regs_struct
1782 # define SYSCALL_NUM(_regs) (_regs).a7
1783 # define SYSCALL_RET(_regs) (_regs).a0
1784 #elif defined(__csky__)
1785 # define ARCH_REGS struct pt_regs
1786 # if defined(__CSKYABIV2__)
1787 # define SYSCALL_NUM(_regs) (_regs).regs[3]
1788 # else
1789 # define SYSCALL_NUM(_regs) (_regs).regs[9]
1790 # endif
1791 # define SYSCALL_RET(_regs) (_regs).a0
1792 #elif defined(__hppa__)
1793 # define ARCH_REGS struct user_regs_struct
1794 # define SYSCALL_NUM(_regs) (_regs).gr[20]
1795 # define SYSCALL_RET(_regs) (_regs).gr[28]
1796 #elif defined(__powerpc__)
1797 # define ARCH_REGS struct pt_regs
1798 # define SYSCALL_NUM(_regs) (_regs).gpr[0]
1799 # define SYSCALL_RET(_regs) (_regs).gpr[3]
1800 # define SYSCALL_RET_SET(_regs, _val) \
1801 do { \
1802 typeof(_val) _result = (_val); \
1803 if ((_regs.trap & 0xfff0) == 0x3000) { \
1804 /* \
1805 * scv 0 system call uses -ve result \
1806 * for error, so no need to adjust. \
1807 */ \
1808 SYSCALL_RET(_regs) = _result; \
1809 } else { \
1810 /* \
1811 * A syscall error is signaled by the \
1812 * CR0 SO bit and the code is stored as \
1813 * a positive value. \
1814 */ \
1815 if (_result < 0) { \
1816 SYSCALL_RET(_regs) = -_result; \
1817 (_regs).ccr |= 0x10000000; \
1818 } else { \
1819 SYSCALL_RET(_regs) = _result; \
1820 (_regs).ccr &= ~0x10000000; \
1821 } \
1822 } \
1823 } while (0)
1824 # define SYSCALL_RET_SET_ON_PTRACE_EXIT
1825 #elif defined(__s390__)
1826 # define ARCH_REGS s390_regs
1827 # define SYSCALL_NUM(_regs) (_regs).gprs[2]
1828 # define SYSCALL_RET_SET(_regs, _val) \
1829 TH_LOG("Can't modify syscall return on this architecture")
1830 #elif defined(__mips__)
1831 # include <asm/unistd_nr_n32.h>
1832 # include <asm/unistd_nr_n64.h>
1833 # include <asm/unistd_nr_o32.h>
1834 # define ARCH_REGS struct pt_regs
1835 # define SYSCALL_NUM(_regs) \
1836 ({ \
1837 typeof((_regs).regs[2]) _nr; \
1838 if ((_regs).regs[2] == __NR_O32_Linux) \
1839 _nr = (_regs).regs[4]; \
1840 else \
1841 _nr = (_regs).regs[2]; \
1842 _nr; \
1843 })
1844 # define SYSCALL_NUM_SET(_regs, _nr) \
1845 do { \
1846 if ((_regs).regs[2] == __NR_O32_Linux) \
1847 (_regs).regs[4] = _nr; \
1848 else \
1849 (_regs).regs[2] = _nr; \
1850 } while (0)
1851 # define SYSCALL_RET_SET(_regs, _val) \
1852 TH_LOG("Can't modify syscall return on this architecture")
1853 #elif defined(__xtensa__)
1854 # define ARCH_REGS struct user_pt_regs
1855 # define SYSCALL_NUM(_regs) (_regs).syscall
1856 /*
1857 * On xtensa syscall return value is in the register
1858 * a2 of the current window which is not fixed.
1859 */
1860 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
1861 #elif defined(__sh__)
1862 # define ARCH_REGS struct pt_regs
1863 # define SYSCALL_NUM(_regs) (_regs).regs[3]
1864 # define SYSCALL_RET(_regs) (_regs).regs[0]
1865 #elif defined(__mc68000__)
1866 # define ARCH_REGS struct user_regs_struct
1867 # define SYSCALL_NUM(_regs) (_regs).orig_d0
1868 # define SYSCALL_RET(_regs) (_regs).d0
1869 #else
1870 # error "Do not know how to find your architecture's registers and syscalls"
1871 #endif
1872
1873 /*
1874 * Most architectures can change the syscall by just updating the
1875 * associated register. This is the default if not defined above.
1876 */
1877 #ifndef SYSCALL_NUM_SET
1878 # define SYSCALL_NUM_SET(_regs, _nr) \
1879 do { \
1880 SYSCALL_NUM(_regs) = (_nr); \
1881 } while (0)
1882 #endif
1883 /*
1884 * Most architectures can change the syscall return value by just
1885 * writing to the SYSCALL_RET register. This is the default if not
1886 * defined above. If an architecture cannot set the return value
1887 * (for example when the syscall and return value register is
1888 * shared), report it with TH_LOG() in an arch-specific definition
1889 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
1890 */
1891 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
1892 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
1893 #endif
1894 #ifndef SYSCALL_RET_SET
1895 # define SYSCALL_RET_SET(_regs, _val) \
1896 do { \
1897 SYSCALL_RET(_regs) = (_val); \
1898 } while (0)
1899 #endif
1900
1901 /* When the syscall return can't be changed, stub out the tests for it. */
1902 #ifndef SYSCALL_RET
1903 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
1904 #else
1905 # define EXPECT_SYSCALL_RETURN(val, action) \
1906 do { \
1907 errno = 0; \
1908 if (val < 0) { \
1909 EXPECT_EQ(-1, action); \
1910 EXPECT_EQ(-(val), errno); \
1911 } else { \
1912 EXPECT_EQ(val, action); \
1913 } \
1914 } while (0)
1915 #endif
1916
1917 /*
1918 * Some architectures (e.g. powerpc) can only set syscall
1919 * return values on syscall exit during ptrace.
1920 */
1921 const bool ptrace_entry_set_syscall_nr = true;
1922 const bool ptrace_entry_set_syscall_ret =
1923 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
1924 true;
1925 #else
1926 false;
1927 #endif
1928
1929 /*
1930 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1931 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1932 */
1933 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__)
1934 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
1935 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
1936 #else
1937 # define ARCH_GETREGS(_regs) ({ \
1938 struct iovec __v; \
1939 __v.iov_base = &(_regs); \
1940 __v.iov_len = sizeof(_regs); \
1941 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
1942 })
1943 # define ARCH_SETREGS(_regs) ({ \
1944 struct iovec __v; \
1945 __v.iov_base = &(_regs); \
1946 __v.iov_len = sizeof(_regs); \
1947 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
1948 })
1949 #endif
1950
1951 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1952 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1953 {
1954 ARCH_REGS regs;
1955
1956 EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1957 return -1;
1958 }
1959
1960 return SYSCALL_NUM(regs);
1961 }
1962
1963 /* Architecture-specific syscall changing routine. */
__change_syscall(struct __test_metadata * _metadata,pid_t tracee,long * syscall,long * ret)1964 void __change_syscall(struct __test_metadata *_metadata,
1965 pid_t tracee, long *syscall, long *ret)
1966 {
1967 ARCH_REGS orig, regs;
1968
1969 /* Do not get/set registers if we have nothing to do. */
1970 if (!syscall && !ret)
1971 return;
1972
1973 EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1974 return;
1975 }
1976 orig = regs;
1977
1978 if (syscall)
1979 SYSCALL_NUM_SET(regs, *syscall);
1980
1981 if (ret)
1982 SYSCALL_RET_SET(regs, *ret);
1983
1984 /* Flush any register changes made. */
1985 if (memcmp(&orig, ®s, sizeof(orig)) != 0)
1986 EXPECT_EQ(0, ARCH_SETREGS(regs));
1987 }
1988
1989 /* Change only syscall number. */
change_syscall_nr(struct __test_metadata * _metadata,pid_t tracee,long syscall)1990 void change_syscall_nr(struct __test_metadata *_metadata,
1991 pid_t tracee, long syscall)
1992 {
1993 __change_syscall(_metadata, tracee, &syscall, NULL);
1994 }
1995
1996 /* Change syscall return value (and set syscall number to -1). */
change_syscall_ret(struct __test_metadata * _metadata,pid_t tracee,long ret)1997 void change_syscall_ret(struct __test_metadata *_metadata,
1998 pid_t tracee, long ret)
1999 {
2000 long syscall = -1;
2001
2002 __change_syscall(_metadata, tracee, &syscall, &ret);
2003 }
2004
tracer_seccomp(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)2005 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
2006 int status, void *args)
2007 {
2008 int ret;
2009 unsigned long msg;
2010
2011 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) {
2012 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status));
2013 return;
2014 }
2015
2016 /* Make sure we got the right message. */
2017 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
2018 EXPECT_EQ(0, ret);
2019
2020 /* Validate and take action on expected syscalls. */
2021 switch (msg) {
2022 case 0x1002:
2023 /* change getpid to getppid. */
2024 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
2025 change_syscall_nr(_metadata, tracee, __NR_getppid);
2026 break;
2027 case 0x1003:
2028 /* skip gettid with valid return code. */
2029 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
2030 change_syscall_ret(_metadata, tracee, 45000);
2031 break;
2032 case 0x1004:
2033 /* skip openat with error. */
2034 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
2035 change_syscall_ret(_metadata, tracee, -ESRCH);
2036 break;
2037 case 0x1005:
2038 /* do nothing (allow getppid) */
2039 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
2040 break;
2041 default:
2042 EXPECT_EQ(0, msg) {
2043 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
2044 kill(tracee, SIGKILL);
2045 }
2046 }
2047
2048 }
2049
FIXTURE(TRACE_syscall)2050 FIXTURE(TRACE_syscall) {
2051 struct sock_fprog prog;
2052 pid_t tracer, mytid, mypid, parent;
2053 long syscall_nr;
2054 };
2055
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)2056 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
2057 int status, void *args)
2058 {
2059 int ret;
2060 unsigned long msg;
2061 static bool entry;
2062 long syscall_nr_val, syscall_ret_val;
2063 long *syscall_nr = NULL, *syscall_ret = NULL;
2064 FIXTURE_DATA(TRACE_syscall) *self = args;
2065
2066 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) {
2067 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
2068 return;
2069 }
2070
2071 /*
2072 * The traditional way to tell PTRACE_SYSCALL entry/exit
2073 * is by counting.
2074 */
2075 entry = !entry;
2076
2077 /* Make sure we got an appropriate message. */
2078 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
2079 EXPECT_EQ(0, ret);
2080 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
2081 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
2082
2083 /*
2084 * Some architectures only support setting return values during
2085 * syscall exit under ptrace, and on exit the syscall number may
2086 * no longer be available. Therefore, save the initial sycall
2087 * number here, so it can be examined during both entry and exit
2088 * phases.
2089 */
2090 if (entry)
2091 self->syscall_nr = get_syscall(_metadata, tracee);
2092
2093 /*
2094 * Depending on the architecture's syscall setting abilities, we
2095 * pick which things to set during this phase (entry or exit).
2096 */
2097 if (entry == ptrace_entry_set_syscall_nr)
2098 syscall_nr = &syscall_nr_val;
2099 if (entry == ptrace_entry_set_syscall_ret)
2100 syscall_ret = &syscall_ret_val;
2101
2102 /* Now handle the actual rewriting cases. */
2103 switch (self->syscall_nr) {
2104 case __NR_getpid:
2105 syscall_nr_val = __NR_getppid;
2106 /* Never change syscall return for this case. */
2107 syscall_ret = NULL;
2108 break;
2109 case __NR_gettid:
2110 syscall_nr_val = -1;
2111 syscall_ret_val = 45000;
2112 break;
2113 case __NR_openat:
2114 syscall_nr_val = -1;
2115 syscall_ret_val = -ESRCH;
2116 break;
2117 default:
2118 /* Unhandled, do nothing. */
2119 return;
2120 }
2121
2122 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
2123 }
2124
FIXTURE_VARIANT(TRACE_syscall)2125 FIXTURE_VARIANT(TRACE_syscall) {
2126 /*
2127 * All of the SECCOMP_RET_TRACE behaviors can be tested with either
2128 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
2129 * This indicates if we should use SECCOMP_RET_TRACE (false), or
2130 * ptrace (true).
2131 */
2132 bool use_ptrace;
2133 };
2134
FIXTURE_VARIANT_ADD(TRACE_syscall,ptrace)2135 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
2136 .use_ptrace = true,
2137 };
2138
FIXTURE_VARIANT_ADD(TRACE_syscall,seccomp)2139 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
2140 .use_ptrace = false,
2141 };
2142
FIXTURE_SETUP(TRACE_syscall)2143 FIXTURE_SETUP(TRACE_syscall)
2144 {
2145 struct sock_filter filter[] = {
2146 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2147 offsetof(struct seccomp_data, nr)),
2148 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2149 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
2150 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
2151 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
2152 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
2153 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
2154 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2155 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
2156 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2157 };
2158 struct sock_fprog prog = {
2159 .len = (unsigned short)ARRAY_SIZE(filter),
2160 .filter = filter,
2161 };
2162 long ret;
2163
2164 /* Prepare some testable syscall results. */
2165 self->mytid = syscall(__NR_gettid);
2166 ASSERT_GT(self->mytid, 0);
2167 ASSERT_NE(self->mytid, 1) {
2168 TH_LOG("Running this test as init is not supported. :)");
2169 }
2170
2171 self->mypid = getpid();
2172 ASSERT_GT(self->mypid, 0);
2173 ASSERT_EQ(self->mytid, self->mypid);
2174
2175 self->parent = getppid();
2176 ASSERT_GT(self->parent, 0);
2177 ASSERT_NE(self->parent, self->mypid);
2178
2179 /* Launch tracer. */
2180 self->tracer = setup_trace_fixture(_metadata,
2181 variant->use_ptrace ? tracer_ptrace
2182 : tracer_seccomp,
2183 self, variant->use_ptrace);
2184
2185 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2186 ASSERT_EQ(0, ret);
2187
2188 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */
2189 if (variant->use_ptrace)
2190 return;
2191
2192 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2193 ASSERT_EQ(0, ret);
2194 }
2195
FIXTURE_TEARDOWN(TRACE_syscall)2196 FIXTURE_TEARDOWN(TRACE_syscall)
2197 {
2198 teardown_trace_fixture(_metadata, self->tracer);
2199 }
2200
TEST(negative_ENOSYS)2201 TEST(negative_ENOSYS)
2202 {
2203 #if defined(__arm__)
2204 SKIP(return, "arm32 does not support calling syscall -1");
2205 #endif
2206 /*
2207 * There should be no difference between an "internal" skip
2208 * and userspace asking for syscall "-1".
2209 */
2210 errno = 0;
2211 EXPECT_EQ(-1, syscall(-1));
2212 EXPECT_EQ(errno, ENOSYS);
2213 /* And no difference for "still not valid but not -1". */
2214 errno = 0;
2215 EXPECT_EQ(-1, syscall(-101));
2216 EXPECT_EQ(errno, ENOSYS);
2217 }
2218
TEST_F(TRACE_syscall,negative_ENOSYS)2219 TEST_F(TRACE_syscall, negative_ENOSYS)
2220 {
2221 negative_ENOSYS(_metadata);
2222 }
2223
TEST_F(TRACE_syscall,syscall_allowed)2224 TEST_F(TRACE_syscall, syscall_allowed)
2225 {
2226 /* getppid works as expected (no changes). */
2227 EXPECT_EQ(self->parent, syscall(__NR_getppid));
2228 EXPECT_NE(self->mypid, syscall(__NR_getppid));
2229 }
2230
TEST_F(TRACE_syscall,syscall_redirected)2231 TEST_F(TRACE_syscall, syscall_redirected)
2232 {
2233 /* getpid has been redirected to getppid as expected. */
2234 EXPECT_EQ(self->parent, syscall(__NR_getpid));
2235 EXPECT_NE(self->mypid, syscall(__NR_getpid));
2236 }
2237
TEST_F(TRACE_syscall,syscall_errno)2238 TEST_F(TRACE_syscall, syscall_errno)
2239 {
2240 /* Tracer should skip the open syscall, resulting in ESRCH. */
2241 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
2242 }
2243
TEST_F(TRACE_syscall,syscall_faked)2244 TEST_F(TRACE_syscall, syscall_faked)
2245 {
2246 /* Tracer skips the gettid syscall and store altered return value. */
2247 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
2248 }
2249
TEST_F_SIGNAL(TRACE_syscall,kill_immediate,SIGSYS)2250 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS)
2251 {
2252 struct sock_filter filter[] = {
2253 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2254 offsetof(struct seccomp_data, nr)),
2255 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1),
2256 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
2257 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2258 };
2259 struct sock_fprog prog = {
2260 .len = (unsigned short)ARRAY_SIZE(filter),
2261 .filter = filter,
2262 };
2263 long ret;
2264
2265 /* Install "kill on mknodat" filter. */
2266 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2267 ASSERT_EQ(0, ret);
2268
2269 /* This should immediately die with SIGSYS, regardless of tracer. */
2270 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0));
2271 }
2272
TEST_F(TRACE_syscall,skip_after)2273 TEST_F(TRACE_syscall, skip_after)
2274 {
2275 struct sock_filter filter[] = {
2276 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2277 offsetof(struct seccomp_data, nr)),
2278 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2279 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2280 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2281 };
2282 struct sock_fprog prog = {
2283 .len = (unsigned short)ARRAY_SIZE(filter),
2284 .filter = filter,
2285 };
2286 long ret;
2287
2288 /* Install additional "errno on getppid" filter. */
2289 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2290 ASSERT_EQ(0, ret);
2291
2292 /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2293 errno = 0;
2294 EXPECT_EQ(-1, syscall(__NR_getpid));
2295 EXPECT_EQ(EPERM, errno);
2296 }
2297
TEST_F_SIGNAL(TRACE_syscall,kill_after,SIGSYS)2298 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
2299 {
2300 struct sock_filter filter[] = {
2301 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2302 offsetof(struct seccomp_data, nr)),
2303 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2304 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2305 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2306 };
2307 struct sock_fprog prog = {
2308 .len = (unsigned short)ARRAY_SIZE(filter),
2309 .filter = filter,
2310 };
2311 long ret;
2312
2313 /* Install additional "death on getppid" filter. */
2314 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2315 ASSERT_EQ(0, ret);
2316
2317 /* Tracer will redirect getpid to getppid, and we should die. */
2318 EXPECT_NE(self->mypid, syscall(__NR_getpid));
2319 }
2320
TEST(seccomp_syscall)2321 TEST(seccomp_syscall)
2322 {
2323 struct sock_filter filter[] = {
2324 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2325 };
2326 struct sock_fprog prog = {
2327 .len = (unsigned short)ARRAY_SIZE(filter),
2328 .filter = filter,
2329 };
2330 long ret;
2331
2332 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2333 ASSERT_EQ(0, ret) {
2334 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2335 }
2336
2337 /* Reject insane operation. */
2338 ret = seccomp(-1, 0, &prog);
2339 ASSERT_NE(ENOSYS, errno) {
2340 TH_LOG("Kernel does not support seccomp syscall!");
2341 }
2342 EXPECT_EQ(EINVAL, errno) {
2343 TH_LOG("Did not reject crazy op value!");
2344 }
2345
2346 /* Reject strict with flags or pointer. */
2347 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2348 EXPECT_EQ(EINVAL, errno) {
2349 TH_LOG("Did not reject mode strict with flags!");
2350 }
2351 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2352 EXPECT_EQ(EINVAL, errno) {
2353 TH_LOG("Did not reject mode strict with uargs!");
2354 }
2355
2356 /* Reject insane args for filter. */
2357 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2358 EXPECT_EQ(EINVAL, errno) {
2359 TH_LOG("Did not reject crazy filter flags!");
2360 }
2361 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2362 EXPECT_EQ(EFAULT, errno) {
2363 TH_LOG("Did not reject NULL filter!");
2364 }
2365
2366 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2367 EXPECT_EQ(0, errno) {
2368 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2369 strerror(errno));
2370 }
2371 }
2372
TEST(seccomp_syscall_mode_lock)2373 TEST(seccomp_syscall_mode_lock)
2374 {
2375 struct sock_filter filter[] = {
2376 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2377 };
2378 struct sock_fprog prog = {
2379 .len = (unsigned short)ARRAY_SIZE(filter),
2380 .filter = filter,
2381 };
2382 long ret;
2383
2384 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2385 ASSERT_EQ(0, ret) {
2386 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2387 }
2388
2389 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2390 ASSERT_NE(ENOSYS, errno) {
2391 TH_LOG("Kernel does not support seccomp syscall!");
2392 }
2393 EXPECT_EQ(0, ret) {
2394 TH_LOG("Could not install filter!");
2395 }
2396
2397 /* Make sure neither entry point will switch to strict. */
2398 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2399 EXPECT_EQ(EINVAL, errno) {
2400 TH_LOG("Switched to mode strict!");
2401 }
2402
2403 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2404 EXPECT_EQ(EINVAL, errno) {
2405 TH_LOG("Switched to mode strict!");
2406 }
2407 }
2408
2409 /*
2410 * Test detection of known and unknown filter flags. Userspace needs to be able
2411 * to check if a filter flag is supported by the current kernel and a good way
2412 * of doing that is by attempting to enter filter mode, with the flag bit in
2413 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2414 * that the flag is valid and EINVAL indicates that the flag is invalid.
2415 */
TEST(detect_seccomp_filter_flags)2416 TEST(detect_seccomp_filter_flags)
2417 {
2418 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2419 SECCOMP_FILTER_FLAG_LOG,
2420 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2421 SECCOMP_FILTER_FLAG_NEW_LISTENER,
2422 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2423 unsigned int exclusive[] = {
2424 SECCOMP_FILTER_FLAG_TSYNC,
2425 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2426 unsigned int flag, all_flags, exclusive_mask;
2427 int i;
2428 long ret;
2429
2430 /* Test detection of individual known-good filter flags */
2431 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2432 int bits = 0;
2433
2434 flag = flags[i];
2435 /* Make sure the flag is a single bit! */
2436 while (flag) {
2437 if (flag & 0x1)
2438 bits ++;
2439 flag >>= 1;
2440 }
2441 ASSERT_EQ(1, bits);
2442 flag = flags[i];
2443
2444 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2445 ASSERT_NE(ENOSYS, errno) {
2446 TH_LOG("Kernel does not support seccomp syscall!");
2447 }
2448 EXPECT_EQ(-1, ret);
2449 EXPECT_EQ(EFAULT, errno) {
2450 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2451 flag);
2452 }
2453
2454 all_flags |= flag;
2455 }
2456
2457 /*
2458 * Test detection of all known-good filter flags combined. But
2459 * for the exclusive flags we need to mask them out and try them
2460 * individually for the "all flags" testing.
2461 */
2462 exclusive_mask = 0;
2463 for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2464 exclusive_mask |= exclusive[i];
2465 for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2466 flag = all_flags & ~exclusive_mask;
2467 flag |= exclusive[i];
2468
2469 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2470 EXPECT_EQ(-1, ret);
2471 EXPECT_EQ(EFAULT, errno) {
2472 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2473 flag);
2474 }
2475 }
2476
2477 /* Test detection of an unknown filter flags, without exclusives. */
2478 flag = -1;
2479 flag &= ~exclusive_mask;
2480 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2481 EXPECT_EQ(-1, ret);
2482 EXPECT_EQ(EINVAL, errno) {
2483 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2484 flag);
2485 }
2486
2487 /*
2488 * Test detection of an unknown filter flag that may simply need to be
2489 * added to this test
2490 */
2491 flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2492 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2493 EXPECT_EQ(-1, ret);
2494 EXPECT_EQ(EINVAL, errno) {
2495 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2496 flag);
2497 }
2498 }
2499
TEST(TSYNC_first)2500 TEST(TSYNC_first)
2501 {
2502 struct sock_filter filter[] = {
2503 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2504 };
2505 struct sock_fprog prog = {
2506 .len = (unsigned short)ARRAY_SIZE(filter),
2507 .filter = filter,
2508 };
2509 long ret;
2510
2511 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2512 ASSERT_EQ(0, ret) {
2513 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2514 }
2515
2516 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2517 &prog);
2518 ASSERT_NE(ENOSYS, errno) {
2519 TH_LOG("Kernel does not support seccomp syscall!");
2520 }
2521 EXPECT_EQ(0, ret) {
2522 TH_LOG("Could not install initial filter with TSYNC!");
2523 }
2524 }
2525
2526 #define TSYNC_SIBLINGS 2
2527 struct tsync_sibling {
2528 pthread_t tid;
2529 pid_t system_tid;
2530 sem_t *started;
2531 pthread_cond_t *cond;
2532 pthread_mutex_t *mutex;
2533 int diverge;
2534 int num_waits;
2535 struct sock_fprog *prog;
2536 struct __test_metadata *metadata;
2537 };
2538
2539 /*
2540 * To avoid joining joined threads (which is not allowed by Bionic),
2541 * make sure we both successfully join and clear the tid to skip a
2542 * later join attempt during fixture teardown. Any remaining threads
2543 * will be directly killed during teardown.
2544 */
2545 #define PTHREAD_JOIN(tid, status) \
2546 do { \
2547 int _rc = pthread_join(tid, status); \
2548 if (_rc) { \
2549 TH_LOG("pthread_join of tid %u failed: %d\n", \
2550 (unsigned int)tid, _rc); \
2551 } else { \
2552 tid = 0; \
2553 } \
2554 } while (0)
2555
FIXTURE(TSYNC)2556 FIXTURE(TSYNC) {
2557 struct sock_fprog root_prog, apply_prog;
2558 struct tsync_sibling sibling[TSYNC_SIBLINGS];
2559 sem_t started;
2560 pthread_cond_t cond;
2561 pthread_mutex_t mutex;
2562 int sibling_count;
2563 };
2564
FIXTURE_SETUP(TSYNC)2565 FIXTURE_SETUP(TSYNC)
2566 {
2567 struct sock_filter root_filter[] = {
2568 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2569 };
2570 struct sock_filter apply_filter[] = {
2571 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2572 offsetof(struct seccomp_data, nr)),
2573 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2574 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2575 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2576 };
2577
2578 memset(&self->root_prog, 0, sizeof(self->root_prog));
2579 memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2580 memset(&self->sibling, 0, sizeof(self->sibling));
2581 self->root_prog.filter = malloc(sizeof(root_filter));
2582 ASSERT_NE(NULL, self->root_prog.filter);
2583 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2584 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2585
2586 self->apply_prog.filter = malloc(sizeof(apply_filter));
2587 ASSERT_NE(NULL, self->apply_prog.filter);
2588 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2589 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2590
2591 self->sibling_count = 0;
2592 pthread_mutex_init(&self->mutex, NULL);
2593 pthread_cond_init(&self->cond, NULL);
2594 sem_init(&self->started, 0, 0);
2595 self->sibling[0].tid = 0;
2596 self->sibling[0].cond = &self->cond;
2597 self->sibling[0].started = &self->started;
2598 self->sibling[0].mutex = &self->mutex;
2599 self->sibling[0].diverge = 0;
2600 self->sibling[0].num_waits = 1;
2601 self->sibling[0].prog = &self->root_prog;
2602 self->sibling[0].metadata = _metadata;
2603 self->sibling[1].tid = 0;
2604 self->sibling[1].cond = &self->cond;
2605 self->sibling[1].started = &self->started;
2606 self->sibling[1].mutex = &self->mutex;
2607 self->sibling[1].diverge = 0;
2608 self->sibling[1].prog = &self->root_prog;
2609 self->sibling[1].num_waits = 1;
2610 self->sibling[1].metadata = _metadata;
2611 }
2612
FIXTURE_TEARDOWN(TSYNC)2613 FIXTURE_TEARDOWN(TSYNC)
2614 {
2615 int sib = 0;
2616
2617 if (self->root_prog.filter)
2618 free(self->root_prog.filter);
2619 if (self->apply_prog.filter)
2620 free(self->apply_prog.filter);
2621
2622 for ( ; sib < self->sibling_count; ++sib) {
2623 struct tsync_sibling *s = &self->sibling[sib];
2624
2625 if (!s->tid)
2626 continue;
2627 /*
2628 * If a thread is still running, it may be stuck, so hit
2629 * it over the head really hard.
2630 */
2631 pthread_kill(s->tid, 9);
2632 }
2633 pthread_mutex_destroy(&self->mutex);
2634 pthread_cond_destroy(&self->cond);
2635 sem_destroy(&self->started);
2636 }
2637
tsync_sibling(void * data)2638 void *tsync_sibling(void *data)
2639 {
2640 long ret = 0;
2641 struct tsync_sibling *me = data;
2642
2643 me->system_tid = syscall(__NR_gettid);
2644
2645 pthread_mutex_lock(me->mutex);
2646 if (me->diverge) {
2647 /* Just re-apply the root prog to fork the tree */
2648 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2649 me->prog, 0, 0);
2650 }
2651 sem_post(me->started);
2652 /* Return outside of started so parent notices failures. */
2653 if (ret) {
2654 pthread_mutex_unlock(me->mutex);
2655 return (void *)SIBLING_EXIT_FAILURE;
2656 }
2657 do {
2658 pthread_cond_wait(me->cond, me->mutex);
2659 me->num_waits = me->num_waits - 1;
2660 } while (me->num_waits);
2661 pthread_mutex_unlock(me->mutex);
2662
2663 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2664 if (!ret)
2665 return (void *)SIBLING_EXIT_NEWPRIVS;
2666 read(-1, NULL, 0);
2667 return (void *)SIBLING_EXIT_UNKILLED;
2668 }
2669
tsync_start_sibling(struct tsync_sibling * sibling)2670 void tsync_start_sibling(struct tsync_sibling *sibling)
2671 {
2672 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2673 }
2674
TEST_F(TSYNC,siblings_fail_prctl)2675 TEST_F(TSYNC, siblings_fail_prctl)
2676 {
2677 long ret;
2678 void *status;
2679 struct sock_filter filter[] = {
2680 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2681 offsetof(struct seccomp_data, nr)),
2682 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2683 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2684 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2685 };
2686 struct sock_fprog prog = {
2687 .len = (unsigned short)ARRAY_SIZE(filter),
2688 .filter = filter,
2689 };
2690
2691 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2692 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2693 }
2694
2695 /* Check prctl failure detection by requesting sib 0 diverge. */
2696 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2697 ASSERT_NE(ENOSYS, errno) {
2698 TH_LOG("Kernel does not support seccomp syscall!");
2699 }
2700 ASSERT_EQ(0, ret) {
2701 TH_LOG("setting filter failed");
2702 }
2703
2704 self->sibling[0].diverge = 1;
2705 tsync_start_sibling(&self->sibling[0]);
2706 tsync_start_sibling(&self->sibling[1]);
2707
2708 while (self->sibling_count < TSYNC_SIBLINGS) {
2709 sem_wait(&self->started);
2710 self->sibling_count++;
2711 }
2712
2713 /* Signal the threads to clean up*/
2714 pthread_mutex_lock(&self->mutex);
2715 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2716 TH_LOG("cond broadcast non-zero");
2717 }
2718 pthread_mutex_unlock(&self->mutex);
2719
2720 /* Ensure diverging sibling failed to call prctl. */
2721 PTHREAD_JOIN(self->sibling[0].tid, &status);
2722 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2723 PTHREAD_JOIN(self->sibling[1].tid, &status);
2724 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2725 }
2726
TEST_F(TSYNC,two_siblings_with_ancestor)2727 TEST_F(TSYNC, two_siblings_with_ancestor)
2728 {
2729 long ret;
2730 void *status;
2731
2732 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2733 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2734 }
2735
2736 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2737 ASSERT_NE(ENOSYS, errno) {
2738 TH_LOG("Kernel does not support seccomp syscall!");
2739 }
2740 ASSERT_EQ(0, ret) {
2741 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2742 }
2743 tsync_start_sibling(&self->sibling[0]);
2744 tsync_start_sibling(&self->sibling[1]);
2745
2746 while (self->sibling_count < TSYNC_SIBLINGS) {
2747 sem_wait(&self->started);
2748 self->sibling_count++;
2749 }
2750
2751 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2752 &self->apply_prog);
2753 ASSERT_EQ(0, ret) {
2754 TH_LOG("Could install filter on all threads!");
2755 }
2756 /* Tell the siblings to test the policy */
2757 pthread_mutex_lock(&self->mutex);
2758 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2759 TH_LOG("cond broadcast non-zero");
2760 }
2761 pthread_mutex_unlock(&self->mutex);
2762 /* Ensure they are both killed and don't exit cleanly. */
2763 PTHREAD_JOIN(self->sibling[0].tid, &status);
2764 EXPECT_EQ(0x0, (long)status);
2765 PTHREAD_JOIN(self->sibling[1].tid, &status);
2766 EXPECT_EQ(0x0, (long)status);
2767 }
2768
TEST_F(TSYNC,two_sibling_want_nnp)2769 TEST_F(TSYNC, two_sibling_want_nnp)
2770 {
2771 void *status;
2772
2773 /* start siblings before any prctl() operations */
2774 tsync_start_sibling(&self->sibling[0]);
2775 tsync_start_sibling(&self->sibling[1]);
2776 while (self->sibling_count < TSYNC_SIBLINGS) {
2777 sem_wait(&self->started);
2778 self->sibling_count++;
2779 }
2780
2781 /* Tell the siblings to test no policy */
2782 pthread_mutex_lock(&self->mutex);
2783 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2784 TH_LOG("cond broadcast non-zero");
2785 }
2786 pthread_mutex_unlock(&self->mutex);
2787
2788 /* Ensure they are both upset about lacking nnp. */
2789 PTHREAD_JOIN(self->sibling[0].tid, &status);
2790 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2791 PTHREAD_JOIN(self->sibling[1].tid, &status);
2792 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2793 }
2794
TEST_F(TSYNC,two_siblings_with_no_filter)2795 TEST_F(TSYNC, two_siblings_with_no_filter)
2796 {
2797 long ret;
2798 void *status;
2799
2800 /* start siblings before any prctl() operations */
2801 tsync_start_sibling(&self->sibling[0]);
2802 tsync_start_sibling(&self->sibling[1]);
2803 while (self->sibling_count < TSYNC_SIBLINGS) {
2804 sem_wait(&self->started);
2805 self->sibling_count++;
2806 }
2807
2808 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2809 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2810 }
2811
2812 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2813 &self->apply_prog);
2814 ASSERT_NE(ENOSYS, errno) {
2815 TH_LOG("Kernel does not support seccomp syscall!");
2816 }
2817 ASSERT_EQ(0, ret) {
2818 TH_LOG("Could install filter on all threads!");
2819 }
2820
2821 /* Tell the siblings to test the policy */
2822 pthread_mutex_lock(&self->mutex);
2823 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2824 TH_LOG("cond broadcast non-zero");
2825 }
2826 pthread_mutex_unlock(&self->mutex);
2827
2828 /* Ensure they are both killed and don't exit cleanly. */
2829 PTHREAD_JOIN(self->sibling[0].tid, &status);
2830 EXPECT_EQ(0x0, (long)status);
2831 PTHREAD_JOIN(self->sibling[1].tid, &status);
2832 EXPECT_EQ(0x0, (long)status);
2833 }
2834
TEST_F(TSYNC,two_siblings_with_one_divergence)2835 TEST_F(TSYNC, two_siblings_with_one_divergence)
2836 {
2837 long ret;
2838 void *status;
2839
2840 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2841 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2842 }
2843
2844 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2845 ASSERT_NE(ENOSYS, errno) {
2846 TH_LOG("Kernel does not support seccomp syscall!");
2847 }
2848 ASSERT_EQ(0, ret) {
2849 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2850 }
2851 self->sibling[0].diverge = 1;
2852 tsync_start_sibling(&self->sibling[0]);
2853 tsync_start_sibling(&self->sibling[1]);
2854
2855 while (self->sibling_count < TSYNC_SIBLINGS) {
2856 sem_wait(&self->started);
2857 self->sibling_count++;
2858 }
2859
2860 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2861 &self->apply_prog);
2862 ASSERT_EQ(self->sibling[0].system_tid, ret) {
2863 TH_LOG("Did not fail on diverged sibling.");
2864 }
2865
2866 /* Wake the threads */
2867 pthread_mutex_lock(&self->mutex);
2868 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2869 TH_LOG("cond broadcast non-zero");
2870 }
2871 pthread_mutex_unlock(&self->mutex);
2872
2873 /* Ensure they are both unkilled. */
2874 PTHREAD_JOIN(self->sibling[0].tid, &status);
2875 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2876 PTHREAD_JOIN(self->sibling[1].tid, &status);
2877 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2878 }
2879
TEST_F(TSYNC,two_siblings_with_one_divergence_no_tid_in_err)2880 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2881 {
2882 long ret, flags;
2883 void *status;
2884
2885 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2886 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2887 }
2888
2889 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2890 ASSERT_NE(ENOSYS, errno) {
2891 TH_LOG("Kernel does not support seccomp syscall!");
2892 }
2893 ASSERT_EQ(0, ret) {
2894 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2895 }
2896 self->sibling[0].diverge = 1;
2897 tsync_start_sibling(&self->sibling[0]);
2898 tsync_start_sibling(&self->sibling[1]);
2899
2900 while (self->sibling_count < TSYNC_SIBLINGS) {
2901 sem_wait(&self->started);
2902 self->sibling_count++;
2903 }
2904
2905 flags = SECCOMP_FILTER_FLAG_TSYNC | \
2906 SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2907 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2908 ASSERT_EQ(ESRCH, errno) {
2909 TH_LOG("Did not return ESRCH for diverged sibling.");
2910 }
2911 ASSERT_EQ(-1, ret) {
2912 TH_LOG("Did not fail on diverged sibling.");
2913 }
2914
2915 /* Wake the threads */
2916 pthread_mutex_lock(&self->mutex);
2917 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2918 TH_LOG("cond broadcast non-zero");
2919 }
2920 pthread_mutex_unlock(&self->mutex);
2921
2922 /* Ensure they are both unkilled. */
2923 PTHREAD_JOIN(self->sibling[0].tid, &status);
2924 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2925 PTHREAD_JOIN(self->sibling[1].tid, &status);
2926 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2927 }
2928
TEST_F(TSYNC,two_siblings_not_under_filter)2929 TEST_F(TSYNC, two_siblings_not_under_filter)
2930 {
2931 long ret, sib;
2932 void *status;
2933 struct timespec delay = { .tv_nsec = 100000000 };
2934
2935 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2936 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2937 }
2938
2939 /*
2940 * Sibling 0 will have its own seccomp policy
2941 * and Sibling 1 will not be under seccomp at
2942 * all. Sibling 1 will enter seccomp and 0
2943 * will cause failure.
2944 */
2945 self->sibling[0].diverge = 1;
2946 tsync_start_sibling(&self->sibling[0]);
2947 tsync_start_sibling(&self->sibling[1]);
2948
2949 while (self->sibling_count < TSYNC_SIBLINGS) {
2950 sem_wait(&self->started);
2951 self->sibling_count++;
2952 }
2953
2954 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2955 ASSERT_NE(ENOSYS, errno) {
2956 TH_LOG("Kernel does not support seccomp syscall!");
2957 }
2958 ASSERT_EQ(0, ret) {
2959 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2960 }
2961
2962 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2963 &self->apply_prog);
2964 ASSERT_EQ(ret, self->sibling[0].system_tid) {
2965 TH_LOG("Did not fail on diverged sibling.");
2966 }
2967 sib = 1;
2968 if (ret == self->sibling[0].system_tid)
2969 sib = 0;
2970
2971 pthread_mutex_lock(&self->mutex);
2972
2973 /* Increment the other siblings num_waits so we can clean up
2974 * the one we just saw.
2975 */
2976 self->sibling[!sib].num_waits += 1;
2977
2978 /* Signal the thread to clean up*/
2979 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2980 TH_LOG("cond broadcast non-zero");
2981 }
2982 pthread_mutex_unlock(&self->mutex);
2983 PTHREAD_JOIN(self->sibling[sib].tid, &status);
2984 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2985 /* Poll for actual task death. pthread_join doesn't guarantee it. */
2986 while (!kill(self->sibling[sib].system_tid, 0))
2987 nanosleep(&delay, NULL);
2988 /* Switch to the remaining sibling */
2989 sib = !sib;
2990
2991 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2992 &self->apply_prog);
2993 ASSERT_EQ(0, ret) {
2994 TH_LOG("Expected the remaining sibling to sync");
2995 };
2996
2997 pthread_mutex_lock(&self->mutex);
2998
2999 /* If remaining sibling didn't have a chance to wake up during
3000 * the first broadcast, manually reduce the num_waits now.
3001 */
3002 if (self->sibling[sib].num_waits > 1)
3003 self->sibling[sib].num_waits = 1;
3004 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
3005 TH_LOG("cond broadcast non-zero");
3006 }
3007 pthread_mutex_unlock(&self->mutex);
3008 PTHREAD_JOIN(self->sibling[sib].tid, &status);
3009 EXPECT_EQ(0, (long)status);
3010 /* Poll for actual task death. pthread_join doesn't guarantee it. */
3011 while (!kill(self->sibling[sib].system_tid, 0))
3012 nanosleep(&delay, NULL);
3013
3014 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
3015 &self->apply_prog);
3016 ASSERT_EQ(0, ret); /* just us chickens */
3017 }
3018
3019 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)3020 TEST(syscall_restart)
3021 {
3022 long ret;
3023 unsigned long msg;
3024 pid_t child_pid;
3025 int pipefd[2];
3026 int status;
3027 siginfo_t info = { };
3028 struct sock_filter filter[] = {
3029 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3030 offsetof(struct seccomp_data, nr)),
3031
3032 #ifdef __NR_sigreturn
3033 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
3034 #endif
3035 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
3036 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
3037 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
3038 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
3039 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
3040 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
3041
3042 /* Allow __NR_write for easy logging. */
3043 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
3044 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3045 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
3046 /* The nanosleep jump target. */
3047 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
3048 /* The restart_syscall jump target. */
3049 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
3050 };
3051 struct sock_fprog prog = {
3052 .len = (unsigned short)ARRAY_SIZE(filter),
3053 .filter = filter,
3054 };
3055 #if defined(__arm__)
3056 struct utsname utsbuf;
3057 #endif
3058
3059 ASSERT_EQ(0, pipe(pipefd));
3060
3061 child_pid = fork();
3062 ASSERT_LE(0, child_pid);
3063 if (child_pid == 0) {
3064 /* Child uses EXPECT not ASSERT to deliver status correctly. */
3065 char buf = ' ';
3066 struct timespec timeout = { };
3067
3068 /* Attach parent as tracer and stop. */
3069 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
3070 EXPECT_EQ(0, raise(SIGSTOP));
3071
3072 EXPECT_EQ(0, close(pipefd[1]));
3073
3074 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
3075 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3076 }
3077
3078 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
3079 EXPECT_EQ(0, ret) {
3080 TH_LOG("Failed to install filter!");
3081 }
3082
3083 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3084 TH_LOG("Failed to read() sync from parent");
3085 }
3086 EXPECT_EQ('.', buf) {
3087 TH_LOG("Failed to get sync data from read()");
3088 }
3089
3090 /* Start nanosleep to be interrupted. */
3091 timeout.tv_sec = 1;
3092 errno = 0;
3093 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
3094 TH_LOG("Call to nanosleep() failed (errno %d: %s)",
3095 errno, strerror(errno));
3096 }
3097
3098 /* Read final sync from parent. */
3099 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3100 TH_LOG("Failed final read() from parent");
3101 }
3102 EXPECT_EQ('!', buf) {
3103 TH_LOG("Failed to get final data from read()");
3104 }
3105
3106 /* Directly report the status of our test harness results. */
3107 syscall(__NR_exit, _metadata->exit_code);
3108 }
3109 EXPECT_EQ(0, close(pipefd[0]));
3110
3111 /* Attach to child, setup options, and release. */
3112 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3113 ASSERT_EQ(true, WIFSTOPPED(status));
3114 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
3115 PTRACE_O_TRACESECCOMP));
3116 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3117 ASSERT_EQ(1, write(pipefd[1], ".", 1));
3118
3119 /* Wait for nanosleep() to start. */
3120 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3121 ASSERT_EQ(true, WIFSTOPPED(status));
3122 ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3123 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3124 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3125 ASSERT_EQ(0x100, msg);
3126 ret = get_syscall(_metadata, child_pid);
3127 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
3128
3129 /* Might as well check siginfo for sanity while we're here. */
3130 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3131 ASSERT_EQ(SIGTRAP, info.si_signo);
3132 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
3133 EXPECT_EQ(0, info.si_errno);
3134 EXPECT_EQ(getuid(), info.si_uid);
3135 /* Verify signal delivery came from child (seccomp-triggered). */
3136 EXPECT_EQ(child_pid, info.si_pid);
3137
3138 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
3139 ASSERT_EQ(0, kill(child_pid, SIGSTOP));
3140 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3141 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3142 ASSERT_EQ(true, WIFSTOPPED(status));
3143 ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
3144 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3145 /*
3146 * There is no siginfo on SIGSTOP any more, so we can't verify
3147 * signal delivery came from parent now (getpid() == info.si_pid).
3148 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
3149 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
3150 */
3151 EXPECT_EQ(SIGSTOP, info.si_signo);
3152
3153 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
3154 ASSERT_EQ(0, kill(child_pid, SIGCONT));
3155 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3156 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3157 ASSERT_EQ(true, WIFSTOPPED(status));
3158 ASSERT_EQ(SIGCONT, WSTOPSIG(status));
3159 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3160
3161 /* Wait for restart_syscall() to start. */
3162 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3163 ASSERT_EQ(true, WIFSTOPPED(status));
3164 ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3165 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3166 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3167
3168 ASSERT_EQ(0x200, msg);
3169 ret = get_syscall(_metadata, child_pid);
3170 #if defined(__arm__)
3171 /*
3172 * - native ARM registers do NOT expose true syscall.
3173 * - compat ARM registers on ARM64 DO expose true syscall.
3174 * - values of utsbuf.machine include 'armv8l' or 'armb8b'
3175 * for ARM64 running in compat mode.
3176 */
3177 ASSERT_EQ(0, uname(&utsbuf));
3178 if ((strncmp(utsbuf.machine, "arm", 3) == 0) &&
3179 (strncmp(utsbuf.machine, "armv8l", 6) != 0) &&
3180 (strncmp(utsbuf.machine, "armv8b", 6) != 0)) {
3181 EXPECT_EQ(__NR_nanosleep, ret);
3182 } else
3183 #endif
3184 {
3185 EXPECT_EQ(__NR_restart_syscall, ret);
3186 }
3187
3188 /* Write again to end test. */
3189 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3190 ASSERT_EQ(1, write(pipefd[1], "!", 1));
3191 EXPECT_EQ(0, close(pipefd[1]));
3192
3193 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3194 if (WIFSIGNALED(status) || WEXITSTATUS(status))
3195 _metadata->exit_code = KSFT_FAIL;
3196 }
3197
TEST_SIGNAL(filter_flag_log,SIGSYS)3198 TEST_SIGNAL(filter_flag_log, SIGSYS)
3199 {
3200 struct sock_filter allow_filter[] = {
3201 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3202 };
3203 struct sock_filter kill_filter[] = {
3204 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3205 offsetof(struct seccomp_data, nr)),
3206 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
3207 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
3208 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3209 };
3210 struct sock_fprog allow_prog = {
3211 .len = (unsigned short)ARRAY_SIZE(allow_filter),
3212 .filter = allow_filter,
3213 };
3214 struct sock_fprog kill_prog = {
3215 .len = (unsigned short)ARRAY_SIZE(kill_filter),
3216 .filter = kill_filter,
3217 };
3218 long ret;
3219 pid_t parent = getppid();
3220
3221 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3222 ASSERT_EQ(0, ret);
3223
3224 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
3225 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
3226 &allow_prog);
3227 ASSERT_NE(ENOSYS, errno) {
3228 TH_LOG("Kernel does not support seccomp syscall!");
3229 }
3230 EXPECT_NE(0, ret) {
3231 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3232 }
3233 EXPECT_EQ(EINVAL, errno) {
3234 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3235 }
3236
3237 /* Verify that a simple, permissive filter can be added with no flags */
3238 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3239 EXPECT_EQ(0, ret);
3240
3241 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3242 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3243 &allow_prog);
3244 ASSERT_NE(EINVAL, errno) {
3245 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3246 }
3247 EXPECT_EQ(0, ret);
3248
3249 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3250 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3251 &kill_prog);
3252 EXPECT_EQ(0, ret);
3253
3254 EXPECT_EQ(parent, syscall(__NR_getppid));
3255 /* getpid() should never return. */
3256 EXPECT_EQ(0, syscall(__NR_getpid));
3257 }
3258
TEST(get_action_avail)3259 TEST(get_action_avail)
3260 {
3261 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3262 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3263 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW };
3264 __u32 unknown_action = 0x10000000U;
3265 int i;
3266 long ret;
3267
3268 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3269 ASSERT_NE(ENOSYS, errno) {
3270 TH_LOG("Kernel does not support seccomp syscall!");
3271 }
3272 ASSERT_NE(EINVAL, errno) {
3273 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3274 }
3275 EXPECT_EQ(ret, 0);
3276
3277 for (i = 0; i < ARRAY_SIZE(actions); i++) {
3278 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3279 EXPECT_EQ(ret, 0) {
3280 TH_LOG("Expected action (0x%X) not available!",
3281 actions[i]);
3282 }
3283 }
3284
3285 /* Check that an unknown action is handled properly (EOPNOTSUPP) */
3286 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3287 EXPECT_EQ(ret, -1);
3288 EXPECT_EQ(errno, EOPNOTSUPP);
3289 }
3290
TEST(get_metadata)3291 TEST(get_metadata)
3292 {
3293 pid_t pid;
3294 int pipefd[2];
3295 char buf;
3296 struct seccomp_metadata md;
3297 long ret;
3298
3299 /* Only real root can get metadata. */
3300 if (geteuid()) {
3301 SKIP(return, "get_metadata requires real root");
3302 return;
3303 }
3304
3305 ASSERT_EQ(0, pipe(pipefd));
3306
3307 pid = fork();
3308 ASSERT_GE(pid, 0);
3309 if (pid == 0) {
3310 struct sock_filter filter[] = {
3311 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3312 };
3313 struct sock_fprog prog = {
3314 .len = (unsigned short)ARRAY_SIZE(filter),
3315 .filter = filter,
3316 };
3317
3318 /* one with log, one without */
3319 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3320 SECCOMP_FILTER_FLAG_LOG, &prog));
3321 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3322
3323 EXPECT_EQ(0, close(pipefd[0]));
3324 ASSERT_EQ(1, write(pipefd[1], "1", 1));
3325 ASSERT_EQ(0, close(pipefd[1]));
3326
3327 while (1)
3328 sleep(100);
3329 }
3330
3331 ASSERT_EQ(0, close(pipefd[1]));
3332 ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3333
3334 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3335 ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3336
3337 /* Past here must not use ASSERT or child process is never killed. */
3338
3339 md.filter_off = 0;
3340 errno = 0;
3341 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3342 EXPECT_EQ(sizeof(md), ret) {
3343 if (errno == EINVAL)
3344 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3345 }
3346
3347 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3348 EXPECT_EQ(md.filter_off, 0);
3349
3350 md.filter_off = 1;
3351 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3352 EXPECT_EQ(sizeof(md), ret);
3353 EXPECT_EQ(md.flags, 0);
3354 EXPECT_EQ(md.filter_off, 1);
3355
3356 skip:
3357 ASSERT_EQ(0, kill(pid, SIGKILL));
3358 }
3359
user_notif_syscall(int nr,unsigned int flags)3360 static int user_notif_syscall(int nr, unsigned int flags)
3361 {
3362 struct sock_filter filter[] = {
3363 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3364 offsetof(struct seccomp_data, nr)),
3365 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
3366 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
3367 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3368 };
3369
3370 struct sock_fprog prog = {
3371 .len = (unsigned short)ARRAY_SIZE(filter),
3372 .filter = filter,
3373 };
3374
3375 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3376 }
3377
3378 #define USER_NOTIF_MAGIC INT_MAX
TEST(user_notification_basic)3379 TEST(user_notification_basic)
3380 {
3381 pid_t pid;
3382 long ret;
3383 int status, listener;
3384 struct seccomp_notif req = {};
3385 struct seccomp_notif_resp resp = {};
3386 struct pollfd pollfd;
3387
3388 struct sock_filter filter[] = {
3389 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3390 };
3391 struct sock_fprog prog = {
3392 .len = (unsigned short)ARRAY_SIZE(filter),
3393 .filter = filter,
3394 };
3395
3396 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3397 ASSERT_EQ(0, ret) {
3398 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3399 }
3400
3401 pid = fork();
3402 ASSERT_GE(pid, 0);
3403
3404 /* Check that we get -ENOSYS with no listener attached */
3405 if (pid == 0) {
3406 if (user_notif_syscall(__NR_getppid, 0) < 0)
3407 exit(1);
3408 ret = syscall(__NR_getppid);
3409 exit(ret >= 0 || errno != ENOSYS);
3410 }
3411
3412 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3413 EXPECT_EQ(true, WIFEXITED(status));
3414 EXPECT_EQ(0, WEXITSTATUS(status));
3415
3416 /* Add some no-op filters for grins. */
3417 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3418 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3419 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3420 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3421
3422 /* Check that the basic notification machinery works */
3423 listener = user_notif_syscall(__NR_getppid,
3424 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3425 ASSERT_GE(listener, 0);
3426
3427 /* Installing a second listener in the chain should EBUSY */
3428 EXPECT_EQ(user_notif_syscall(__NR_getppid,
3429 SECCOMP_FILTER_FLAG_NEW_LISTENER),
3430 -1);
3431 EXPECT_EQ(errno, EBUSY);
3432
3433 pid = fork();
3434 ASSERT_GE(pid, 0);
3435
3436 if (pid == 0) {
3437 ret = syscall(__NR_getppid);
3438 exit(ret != USER_NOTIF_MAGIC);
3439 }
3440
3441 pollfd.fd = listener;
3442 pollfd.events = POLLIN | POLLOUT;
3443
3444 EXPECT_GT(poll(&pollfd, 1, -1), 0);
3445 EXPECT_EQ(pollfd.revents, POLLIN);
3446
3447 /* Test that we can't pass garbage to the kernel. */
3448 memset(&req, 0, sizeof(req));
3449 req.pid = -1;
3450 errno = 0;
3451 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3452 EXPECT_EQ(-1, ret);
3453 EXPECT_EQ(EINVAL, errno);
3454
3455 if (ret) {
3456 req.pid = 0;
3457 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3458 }
3459
3460 pollfd.fd = listener;
3461 pollfd.events = POLLIN | POLLOUT;
3462
3463 EXPECT_GT(poll(&pollfd, 1, -1), 0);
3464 EXPECT_EQ(pollfd.revents, POLLOUT);
3465
3466 EXPECT_EQ(req.data.nr, __NR_getppid);
3467
3468 resp.id = req.id;
3469 resp.error = 0;
3470 resp.val = USER_NOTIF_MAGIC;
3471
3472 /* check that we make sure flags == 0 */
3473 resp.flags = 1;
3474 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3475 EXPECT_EQ(errno, EINVAL);
3476
3477 resp.flags = 0;
3478 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3479
3480 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3481 EXPECT_EQ(true, WIFEXITED(status));
3482 EXPECT_EQ(0, WEXITSTATUS(status));
3483 }
3484
TEST(user_notification_with_tsync)3485 TEST(user_notification_with_tsync)
3486 {
3487 int ret;
3488 unsigned int flags;
3489
3490 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3491 ASSERT_EQ(0, ret) {
3492 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3493 }
3494
3495 /* these were exclusive */
3496 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3497 SECCOMP_FILTER_FLAG_TSYNC;
3498 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
3499 ASSERT_EQ(EINVAL, errno);
3500
3501 /* but now they're not */
3502 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3503 ret = user_notif_syscall(__NR_getppid, flags);
3504 close(ret);
3505 ASSERT_LE(0, ret);
3506 }
3507
TEST(user_notification_kill_in_middle)3508 TEST(user_notification_kill_in_middle)
3509 {
3510 pid_t pid;
3511 long ret;
3512 int listener;
3513 struct seccomp_notif req = {};
3514 struct seccomp_notif_resp resp = {};
3515
3516 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3517 ASSERT_EQ(0, ret) {
3518 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3519 }
3520
3521 listener = user_notif_syscall(__NR_getppid,
3522 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3523 ASSERT_GE(listener, 0);
3524
3525 /*
3526 * Check that nothing bad happens when we kill the task in the middle
3527 * of a syscall.
3528 */
3529 pid = fork();
3530 ASSERT_GE(pid, 0);
3531
3532 if (pid == 0) {
3533 ret = syscall(__NR_getppid);
3534 exit(ret != USER_NOTIF_MAGIC);
3535 }
3536
3537 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3538 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3539
3540 EXPECT_EQ(kill(pid, SIGKILL), 0);
3541 EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3542
3543 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3544
3545 resp.id = req.id;
3546 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3547 EXPECT_EQ(ret, -1);
3548 EXPECT_EQ(errno, ENOENT);
3549 }
3550
3551 static int handled = -1;
3552
signal_handler(int signal)3553 static void signal_handler(int signal)
3554 {
3555 if (write(handled, "c", 1) != 1)
3556 perror("write from signal");
3557 }
3558
signal_handler_nop(int signal)3559 static void signal_handler_nop(int signal)
3560 {
3561 }
3562
TEST(user_notification_signal)3563 TEST(user_notification_signal)
3564 {
3565 pid_t pid;
3566 long ret;
3567 int status, listener, sk_pair[2];
3568 struct seccomp_notif req = {};
3569 struct seccomp_notif_resp resp = {};
3570 char c;
3571
3572 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3573 ASSERT_EQ(0, ret) {
3574 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3575 }
3576
3577 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3578
3579 listener = user_notif_syscall(__NR_gettid,
3580 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3581 ASSERT_GE(listener, 0);
3582
3583 pid = fork();
3584 ASSERT_GE(pid, 0);
3585
3586 if (pid == 0) {
3587 close(sk_pair[0]);
3588 handled = sk_pair[1];
3589 if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3590 perror("signal");
3591 exit(1);
3592 }
3593 /*
3594 * ERESTARTSYS behavior is a bit hard to test, because we need
3595 * to rely on a signal that has not yet been handled. Let's at
3596 * least check that the error code gets propagated through, and
3597 * hope that it doesn't break when there is actually a signal :)
3598 */
3599 ret = syscall(__NR_gettid);
3600 exit(!(ret == -1 && errno == 512));
3601 }
3602
3603 close(sk_pair[1]);
3604
3605 memset(&req, 0, sizeof(req));
3606 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3607
3608 EXPECT_EQ(kill(pid, SIGUSR1), 0);
3609
3610 /*
3611 * Make sure the signal really is delivered, which means we're not
3612 * stuck in the user notification code any more and the notification
3613 * should be dead.
3614 */
3615 EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3616
3617 resp.id = req.id;
3618 resp.error = -EPERM;
3619 resp.val = 0;
3620
3621 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3622 EXPECT_EQ(errno, ENOENT);
3623
3624 memset(&req, 0, sizeof(req));
3625 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3626
3627 resp.id = req.id;
3628 resp.error = -512; /* -ERESTARTSYS */
3629 resp.val = 0;
3630
3631 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3632
3633 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3634 EXPECT_EQ(true, WIFEXITED(status));
3635 EXPECT_EQ(0, WEXITSTATUS(status));
3636 }
3637
TEST(user_notification_closed_listener)3638 TEST(user_notification_closed_listener)
3639 {
3640 pid_t pid;
3641 long ret;
3642 int status, listener;
3643
3644 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3645 ASSERT_EQ(0, ret) {
3646 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3647 }
3648
3649 listener = user_notif_syscall(__NR_getppid,
3650 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3651 ASSERT_GE(listener, 0);
3652
3653 /*
3654 * Check that we get an ENOSYS when the listener is closed.
3655 */
3656 pid = fork();
3657 ASSERT_GE(pid, 0);
3658 if (pid == 0) {
3659 close(listener);
3660 ret = syscall(__NR_getppid);
3661 exit(ret != -1 && errno != ENOSYS);
3662 }
3663
3664 close(listener);
3665
3666 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3667 EXPECT_EQ(true, WIFEXITED(status));
3668 EXPECT_EQ(0, WEXITSTATUS(status));
3669 }
3670
3671 /*
3672 * Check that a pid in a child namespace still shows up as valid in ours.
3673 */
TEST(user_notification_child_pid_ns)3674 TEST(user_notification_child_pid_ns)
3675 {
3676 pid_t pid;
3677 int status, listener;
3678 struct seccomp_notif req = {};
3679 struct seccomp_notif_resp resp = {};
3680
3681 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
3682 if (errno == EINVAL)
3683 SKIP(return, "kernel missing CLONE_NEWUSER support");
3684 };
3685
3686 listener = user_notif_syscall(__NR_getppid,
3687 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3688 ASSERT_GE(listener, 0);
3689
3690 pid = fork();
3691 ASSERT_GE(pid, 0);
3692
3693 if (pid == 0)
3694 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3695
3696 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3697 EXPECT_EQ(req.pid, pid);
3698
3699 resp.id = req.id;
3700 resp.error = 0;
3701 resp.val = USER_NOTIF_MAGIC;
3702
3703 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3704
3705 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3706 EXPECT_EQ(true, WIFEXITED(status));
3707 EXPECT_EQ(0, WEXITSTATUS(status));
3708 close(listener);
3709 }
3710
3711 /*
3712 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3713 * invalid.
3714 */
TEST(user_notification_sibling_pid_ns)3715 TEST(user_notification_sibling_pid_ns)
3716 {
3717 pid_t pid, pid2;
3718 int status, listener;
3719 struct seccomp_notif req = {};
3720 struct seccomp_notif_resp resp = {};
3721
3722 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3723 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3724 }
3725
3726 listener = user_notif_syscall(__NR_getppid,
3727 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3728 ASSERT_GE(listener, 0);
3729
3730 pid = fork();
3731 ASSERT_GE(pid, 0);
3732
3733 if (pid == 0) {
3734 ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3735 if (errno == EPERM)
3736 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3737 else if (errno == EINVAL)
3738 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
3739 }
3740
3741 pid2 = fork();
3742 ASSERT_GE(pid2, 0);
3743
3744 if (pid2 == 0)
3745 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3746
3747 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3748 EXPECT_EQ(true, WIFEXITED(status));
3749 EXPECT_EQ(0, WEXITSTATUS(status));
3750 exit(WEXITSTATUS(status));
3751 }
3752
3753 /* Create the sibling ns, and sibling in it. */
3754 ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3755 if (errno == EPERM)
3756 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3757 else if (errno == EINVAL)
3758 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
3759 }
3760 ASSERT_EQ(errno, 0);
3761
3762 pid2 = fork();
3763 ASSERT_GE(pid2, 0);
3764
3765 if (pid2 == 0) {
3766 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3767 /*
3768 * The pid should be 0, i.e. the task is in some namespace that
3769 * we can't "see".
3770 */
3771 EXPECT_EQ(req.pid, 0);
3772
3773 resp.id = req.id;
3774 resp.error = 0;
3775 resp.val = USER_NOTIF_MAGIC;
3776
3777 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3778 exit(0);
3779 }
3780
3781 close(listener);
3782
3783 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3784 EXPECT_EQ(true, WIFEXITED(status));
3785 EXPECT_EQ(0, WEXITSTATUS(status));
3786
3787 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3788 EXPECT_EQ(true, WIFEXITED(status));
3789 EXPECT_EQ(0, WEXITSTATUS(status));
3790 }
3791
TEST(user_notification_fault_recv)3792 TEST(user_notification_fault_recv)
3793 {
3794 pid_t pid;
3795 int status, listener;
3796 struct seccomp_notif req = {};
3797 struct seccomp_notif_resp resp = {};
3798
3799 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
3800 if (errno == EINVAL)
3801 SKIP(return, "kernel missing CLONE_NEWUSER support");
3802 }
3803
3804 listener = user_notif_syscall(__NR_getppid,
3805 SECCOMP_FILTER_FLAG_NEW_LISTENER);
3806 ASSERT_GE(listener, 0);
3807
3808 pid = fork();
3809 ASSERT_GE(pid, 0);
3810
3811 if (pid == 0)
3812 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3813
3814 /* Do a bad recv() */
3815 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3816 EXPECT_EQ(errno, EFAULT);
3817
3818 /* We should still be able to receive this notification, though. */
3819 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3820 EXPECT_EQ(req.pid, pid);
3821
3822 resp.id = req.id;
3823 resp.error = 0;
3824 resp.val = USER_NOTIF_MAGIC;
3825
3826 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3827
3828 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3829 EXPECT_EQ(true, WIFEXITED(status));
3830 EXPECT_EQ(0, WEXITSTATUS(status));
3831 }
3832
TEST(seccomp_get_notif_sizes)3833 TEST(seccomp_get_notif_sizes)
3834 {
3835 struct seccomp_notif_sizes sizes;
3836
3837 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3838 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3839 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3840 }
3841
TEST(user_notification_continue)3842 TEST(user_notification_continue)
3843 {
3844 pid_t pid;
3845 long ret;
3846 int status, listener;
3847 struct seccomp_notif req = {};
3848 struct seccomp_notif_resp resp = {};
3849 struct pollfd pollfd;
3850
3851 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3852 ASSERT_EQ(0, ret) {
3853 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3854 }
3855
3856 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3857 ASSERT_GE(listener, 0);
3858
3859 pid = fork();
3860 ASSERT_GE(pid, 0);
3861
3862 if (pid == 0) {
3863 int dup_fd, pipe_fds[2];
3864 pid_t self;
3865
3866 ASSERT_GE(pipe(pipe_fds), 0);
3867
3868 dup_fd = dup(pipe_fds[0]);
3869 ASSERT_GE(dup_fd, 0);
3870 EXPECT_NE(pipe_fds[0], dup_fd);
3871
3872 self = getpid();
3873 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
3874 exit(0);
3875 }
3876
3877 pollfd.fd = listener;
3878 pollfd.events = POLLIN | POLLOUT;
3879
3880 EXPECT_GT(poll(&pollfd, 1, -1), 0);
3881 EXPECT_EQ(pollfd.revents, POLLIN);
3882
3883 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3884
3885 pollfd.fd = listener;
3886 pollfd.events = POLLIN | POLLOUT;
3887
3888 EXPECT_GT(poll(&pollfd, 1, -1), 0);
3889 EXPECT_EQ(pollfd.revents, POLLOUT);
3890
3891 EXPECT_EQ(req.data.nr, __NR_dup);
3892
3893 resp.id = req.id;
3894 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3895
3896 /*
3897 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3898 * args be set to 0.
3899 */
3900 resp.error = 0;
3901 resp.val = USER_NOTIF_MAGIC;
3902 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3903 EXPECT_EQ(errno, EINVAL);
3904
3905 resp.error = USER_NOTIF_MAGIC;
3906 resp.val = 0;
3907 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3908 EXPECT_EQ(errno, EINVAL);
3909
3910 resp.error = 0;
3911 resp.val = 0;
3912 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3913 if (errno == EINVAL)
3914 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3915 }
3916
3917 skip:
3918 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3919 EXPECT_EQ(true, WIFEXITED(status));
3920 EXPECT_EQ(0, WEXITSTATUS(status)) {
3921 if (WEXITSTATUS(status) == 2) {
3922 SKIP(return, "Kernel does not support kcmp() syscall");
3923 return;
3924 }
3925 }
3926 }
3927
TEST(user_notification_filter_empty)3928 TEST(user_notification_filter_empty)
3929 {
3930 pid_t pid;
3931 long ret;
3932 int status;
3933 struct pollfd pollfd;
3934 struct __clone_args args = {
3935 .flags = CLONE_FILES,
3936 .exit_signal = SIGCHLD,
3937 };
3938
3939 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3940 ASSERT_EQ(0, ret) {
3941 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3942 }
3943
3944 if (__NR_clone3 < 0)
3945 SKIP(return, "Test not built with clone3 support");
3946
3947 pid = sys_clone3(&args, sizeof(args));
3948 ASSERT_GE(pid, 0);
3949
3950 if (pid == 0) {
3951 int listener;
3952
3953 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3954 if (listener < 0)
3955 _exit(EXIT_FAILURE);
3956
3957 if (dup2(listener, 200) != 200)
3958 _exit(EXIT_FAILURE);
3959
3960 close(listener);
3961
3962 _exit(EXIT_SUCCESS);
3963 }
3964
3965 EXPECT_EQ(waitpid(pid, &status, 0), pid);
3966 EXPECT_EQ(true, WIFEXITED(status));
3967 EXPECT_EQ(0, WEXITSTATUS(status));
3968
3969 /*
3970 * The seccomp filter has become unused so we should be notified once
3971 * the kernel gets around to cleaning up task struct.
3972 */
3973 pollfd.fd = 200;
3974 pollfd.events = POLLHUP;
3975
3976 EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3977 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3978 }
3979
TEST(user_ioctl_notification_filter_empty)3980 TEST(user_ioctl_notification_filter_empty)
3981 {
3982 pid_t pid;
3983 long ret;
3984 int status, p[2];
3985 struct __clone_args args = {
3986 .flags = CLONE_FILES,
3987 .exit_signal = SIGCHLD,
3988 };
3989 struct seccomp_notif req = {};
3990
3991 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3992 ASSERT_EQ(0, ret) {
3993 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3994 }
3995
3996 if (__NR_clone3 < 0)
3997 SKIP(return, "Test not built with clone3 support");
3998
3999 ASSERT_EQ(0, pipe(p));
4000
4001 pid = sys_clone3(&args, sizeof(args));
4002 ASSERT_GE(pid, 0);
4003
4004 if (pid == 0) {
4005 int listener;
4006
4007 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
4008 if (listener < 0)
4009 _exit(EXIT_FAILURE);
4010
4011 if (dup2(listener, 200) != 200)
4012 _exit(EXIT_FAILURE);
4013 close(p[1]);
4014 close(listener);
4015 sleep(1);
4016
4017 _exit(EXIT_SUCCESS);
4018 }
4019 if (read(p[0], &status, 1) != 0)
4020 _exit(EXIT_SUCCESS);
4021 close(p[0]);
4022 /*
4023 * The seccomp filter has become unused so we should be notified once
4024 * the kernel gets around to cleaning up task struct.
4025 */
4026 EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
4027 EXPECT_EQ(errno, ENOENT);
4028
4029 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4030 EXPECT_EQ(true, WIFEXITED(status));
4031 EXPECT_EQ(0, WEXITSTATUS(status));
4032 }
4033
do_thread(void * data)4034 static void *do_thread(void *data)
4035 {
4036 return NULL;
4037 }
4038
TEST(user_notification_filter_empty_threaded)4039 TEST(user_notification_filter_empty_threaded)
4040 {
4041 pid_t pid;
4042 long ret;
4043 int status;
4044 struct pollfd pollfd;
4045 struct __clone_args args = {
4046 .flags = CLONE_FILES,
4047 .exit_signal = SIGCHLD,
4048 };
4049
4050 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4051 ASSERT_EQ(0, ret) {
4052 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4053 }
4054
4055 if (__NR_clone3 < 0)
4056 SKIP(return, "Test not built with clone3 support");
4057
4058 pid = sys_clone3(&args, sizeof(args));
4059 ASSERT_GE(pid, 0);
4060
4061 if (pid == 0) {
4062 pid_t pid1, pid2;
4063 int listener, status;
4064 pthread_t thread;
4065
4066 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
4067 if (listener < 0)
4068 _exit(EXIT_FAILURE);
4069
4070 if (dup2(listener, 200) != 200)
4071 _exit(EXIT_FAILURE);
4072
4073 close(listener);
4074
4075 pid1 = fork();
4076 if (pid1 < 0)
4077 _exit(EXIT_FAILURE);
4078
4079 if (pid1 == 0)
4080 _exit(EXIT_SUCCESS);
4081
4082 pid2 = fork();
4083 if (pid2 < 0)
4084 _exit(EXIT_FAILURE);
4085
4086 if (pid2 == 0)
4087 _exit(EXIT_SUCCESS);
4088
4089 if (pthread_create(&thread, NULL, do_thread, NULL) ||
4090 pthread_join(thread, NULL))
4091 _exit(EXIT_FAILURE);
4092
4093 if (pthread_create(&thread, NULL, do_thread, NULL) ||
4094 pthread_join(thread, NULL))
4095 _exit(EXIT_FAILURE);
4096
4097 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
4098 WEXITSTATUS(status))
4099 _exit(EXIT_FAILURE);
4100
4101 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
4102 WEXITSTATUS(status))
4103 _exit(EXIT_FAILURE);
4104
4105 exit(EXIT_SUCCESS);
4106 }
4107
4108 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4109 EXPECT_EQ(true, WIFEXITED(status));
4110 EXPECT_EQ(0, WEXITSTATUS(status));
4111
4112 /*
4113 * The seccomp filter has become unused so we should be notified once
4114 * the kernel gets around to cleaning up task struct.
4115 */
4116 pollfd.fd = 200;
4117 pollfd.events = POLLHUP;
4118
4119 EXPECT_GT(poll(&pollfd, 1, 2000), 0);
4120 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
4121 }
4122
4123
get_next_fd(int prev_fd)4124 int get_next_fd(int prev_fd)
4125 {
4126 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) {
4127 if (fcntl(i, F_GETFD) == -1)
4128 return i;
4129 }
4130 _exit(EXIT_FAILURE);
4131 }
4132
TEST(user_notification_addfd)4133 TEST(user_notification_addfd)
4134 {
4135 pid_t pid;
4136 long ret;
4137 int status, listener, memfd, fd, nextfd;
4138 struct seccomp_notif_addfd addfd = {};
4139 struct seccomp_notif_addfd_small small = {};
4140 struct seccomp_notif_addfd_big big = {};
4141 struct seccomp_notif req = {};
4142 struct seccomp_notif_resp resp = {};
4143 /* 100 ms */
4144 struct timespec delay = { .tv_nsec = 100000000 };
4145
4146 /* There may be arbitrary already-open fds at test start. */
4147 memfd = memfd_create("test", 0);
4148 ASSERT_GE(memfd, 0);
4149 nextfd = get_next_fd(memfd);
4150
4151 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4152 ASSERT_EQ(0, ret) {
4153 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4154 }
4155
4156 /* fd: 4 */
4157 /* Check that the basic notification machinery works */
4158 listener = user_notif_syscall(__NR_getppid,
4159 SECCOMP_FILTER_FLAG_NEW_LISTENER);
4160 ASSERT_EQ(listener, nextfd);
4161 nextfd = get_next_fd(nextfd);
4162
4163 pid = fork();
4164 ASSERT_GE(pid, 0);
4165
4166 if (pid == 0) {
4167 /* fds will be added and this value is expected */
4168 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
4169 exit(1);
4170
4171 /* Atomic addfd+send is received here. Check it is a valid fd */
4172 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
4173 exit(1);
4174
4175 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4176 }
4177
4178 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4179
4180 addfd.srcfd = memfd;
4181 addfd.newfd = 0;
4182 addfd.id = req.id;
4183 addfd.flags = 0x0;
4184
4185 /* Verify bad newfd_flags cannot be set */
4186 addfd.newfd_flags = ~O_CLOEXEC;
4187 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4188 EXPECT_EQ(errno, EINVAL);
4189 addfd.newfd_flags = O_CLOEXEC;
4190
4191 /* Verify bad flags cannot be set */
4192 addfd.flags = 0xff;
4193 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4194 EXPECT_EQ(errno, EINVAL);
4195 addfd.flags = 0;
4196
4197 /* Verify that remote_fd cannot be set without setting flags */
4198 addfd.newfd = 1;
4199 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4200 EXPECT_EQ(errno, EINVAL);
4201 addfd.newfd = 0;
4202
4203 /* Verify small size cannot be set */
4204 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
4205 EXPECT_EQ(errno, EINVAL);
4206
4207 /* Verify we can't send bits filled in unknown buffer area */
4208 memset(&big, 0xAA, sizeof(big));
4209 big.addfd = addfd;
4210 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
4211 EXPECT_EQ(errno, E2BIG);
4212
4213
4214 /* Verify we can set an arbitrary remote fd */
4215 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4216 EXPECT_EQ(fd, nextfd);
4217 nextfd = get_next_fd(nextfd);
4218 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4219
4220 /* Verify we can set an arbitrary remote fd with large size */
4221 memset(&big, 0x0, sizeof(big));
4222 big.addfd = addfd;
4223 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
4224 EXPECT_EQ(fd, nextfd);
4225 nextfd = get_next_fd(nextfd);
4226
4227 /* Verify we can set a specific remote fd */
4228 addfd.newfd = 42;
4229 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4230 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4231 EXPECT_EQ(fd, 42);
4232 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4233
4234 /* Resume syscall */
4235 resp.id = req.id;
4236 resp.error = 0;
4237 resp.val = USER_NOTIF_MAGIC;
4238 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4239
4240 /*
4241 * This sets the ID of the ADD FD to the last request plus 1. The
4242 * notification ID increments 1 per notification.
4243 */
4244 addfd.id = req.id + 1;
4245
4246 /* This spins until the underlying notification is generated */
4247 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4248 errno != -EINPROGRESS)
4249 nanosleep(&delay, NULL);
4250
4251 memset(&req, 0, sizeof(req));
4252 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4253 ASSERT_EQ(addfd.id, req.id);
4254
4255 /* Verify we can do an atomic addfd and send */
4256 addfd.newfd = 0;
4257 addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4258 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4259 /*
4260 * Child has earlier "low" fds and now 42, so we expect the next
4261 * lowest available fd to be assigned here.
4262 */
4263 EXPECT_EQ(fd, nextfd);
4264 nextfd = get_next_fd(nextfd);
4265 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4266
4267 /*
4268 * This sets the ID of the ADD FD to the last request plus 1. The
4269 * notification ID increments 1 per notification.
4270 */
4271 addfd.id = req.id + 1;
4272
4273 /* This spins until the underlying notification is generated */
4274 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4275 errno != -EINPROGRESS)
4276 nanosleep(&delay, NULL);
4277
4278 memset(&req, 0, sizeof(req));
4279 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4280 ASSERT_EQ(addfd.id, req.id);
4281
4282 resp.id = req.id;
4283 resp.error = 0;
4284 resp.val = USER_NOTIF_MAGIC;
4285 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4286
4287 /* Wait for child to finish. */
4288 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4289 EXPECT_EQ(true, WIFEXITED(status));
4290 EXPECT_EQ(0, WEXITSTATUS(status));
4291
4292 close(memfd);
4293 }
4294
TEST(user_notification_addfd_rlimit)4295 TEST(user_notification_addfd_rlimit)
4296 {
4297 pid_t pid;
4298 long ret;
4299 int status, listener, memfd;
4300 struct seccomp_notif_addfd addfd = {};
4301 struct seccomp_notif req = {};
4302 struct seccomp_notif_resp resp = {};
4303 const struct rlimit lim = {
4304 .rlim_cur = 0,
4305 .rlim_max = 0,
4306 };
4307
4308 memfd = memfd_create("test", 0);
4309 ASSERT_GE(memfd, 0);
4310
4311 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4312 ASSERT_EQ(0, ret) {
4313 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4314 }
4315
4316 /* Check that the basic notification machinery works */
4317 listener = user_notif_syscall(__NR_getppid,
4318 SECCOMP_FILTER_FLAG_NEW_LISTENER);
4319 ASSERT_GE(listener, 0);
4320
4321 pid = fork();
4322 ASSERT_GE(pid, 0);
4323
4324 if (pid == 0)
4325 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4326
4327
4328 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4329
4330 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
4331
4332 addfd.srcfd = memfd;
4333 addfd.newfd_flags = O_CLOEXEC;
4334 addfd.newfd = 0;
4335 addfd.id = req.id;
4336 addfd.flags = 0;
4337
4338 /* Should probably spot check /proc/sys/fs/file-nr */
4339 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4340 EXPECT_EQ(errno, EMFILE);
4341
4342 addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4343 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4344 EXPECT_EQ(errno, EMFILE);
4345
4346 addfd.newfd = 100;
4347 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4348 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4349 EXPECT_EQ(errno, EBADF);
4350
4351 resp.id = req.id;
4352 resp.error = 0;
4353 resp.val = USER_NOTIF_MAGIC;
4354
4355 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4356
4357 /* Wait for child to finish. */
4358 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4359 EXPECT_EQ(true, WIFEXITED(status));
4360 EXPECT_EQ(0, WEXITSTATUS(status));
4361
4362 close(memfd);
4363 }
4364
4365 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
4366 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
4367 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
4368 #endif
4369
TEST(user_notification_sync)4370 TEST(user_notification_sync)
4371 {
4372 struct seccomp_notif req = {};
4373 struct seccomp_notif_resp resp = {};
4374 int status, listener;
4375 pid_t pid;
4376 long ret;
4377
4378 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4379 ASSERT_EQ(0, ret) {
4380 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4381 }
4382
4383 listener = user_notif_syscall(__NR_getppid,
4384 SECCOMP_FILTER_FLAG_NEW_LISTENER);
4385 ASSERT_GE(listener, 0);
4386
4387 /* Try to set invalid flags. */
4388 EXPECT_SYSCALL_RETURN(-EINVAL,
4389 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
4390
4391 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
4392 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
4393
4394 pid = fork();
4395 ASSERT_GE(pid, 0);
4396 if (pid == 0) {
4397 ret = syscall(__NR_getppid);
4398 ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
4399 _exit(1);
4400 }
4401 _exit(0);
4402 }
4403
4404 req.pid = 0;
4405 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4406
4407 ASSERT_EQ(req.data.nr, __NR_getppid);
4408
4409 resp.id = req.id;
4410 resp.error = 0;
4411 resp.val = USER_NOTIF_MAGIC;
4412 resp.flags = 0;
4413 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4414
4415 ASSERT_EQ(waitpid(pid, &status, 0), pid);
4416 ASSERT_EQ(status, 0);
4417 }
4418
4419
4420 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
FIXTURE(O_SUSPEND_SECCOMP)4421 FIXTURE(O_SUSPEND_SECCOMP) {
4422 pid_t pid;
4423 };
4424
FIXTURE_SETUP(O_SUSPEND_SECCOMP)4425 FIXTURE_SETUP(O_SUSPEND_SECCOMP)
4426 {
4427 ERRNO_FILTER(block_read, E2BIG);
4428 cap_value_t cap_list[] = { CAP_SYS_ADMIN };
4429 cap_t caps;
4430
4431 self->pid = 0;
4432
4433 /* make sure we don't have CAP_SYS_ADMIN */
4434 caps = cap_get_proc();
4435 ASSERT_NE(NULL, caps);
4436 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
4437 ASSERT_EQ(0, cap_set_proc(caps));
4438 cap_free(caps);
4439
4440 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
4441 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
4442
4443 self->pid = fork();
4444 ASSERT_GE(self->pid, 0);
4445
4446 if (self->pid == 0) {
4447 while (1)
4448 pause();
4449 _exit(127);
4450 }
4451 }
4452
FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)4453 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
4454 {
4455 if (self->pid)
4456 kill(self->pid, SIGKILL);
4457 }
4458
TEST_F(O_SUSPEND_SECCOMP,setoptions)4459 TEST_F(O_SUSPEND_SECCOMP, setoptions)
4460 {
4461 int wstatus;
4462
4463 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
4464 ASSERT_EQ(self->pid, wait(&wstatus));
4465 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
4466 if (errno == EINVAL)
4467 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
4468 ASSERT_EQ(EPERM, errno);
4469 }
4470
TEST_F(O_SUSPEND_SECCOMP,seize)4471 TEST_F(O_SUSPEND_SECCOMP, seize)
4472 {
4473 int ret;
4474
4475 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
4476 ASSERT_EQ(-1, ret);
4477 if (errno == EINVAL)
4478 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
4479 ASSERT_EQ(EPERM, errno);
4480 }
4481
4482 /*
4483 * get_nth - Get the nth, space separated entry in a file.
4484 *
4485 * Returns the length of the read field.
4486 * Throws error if field is zero-lengthed.
4487 */
get_nth(struct __test_metadata * _metadata,const char * path,const unsigned int position,char ** entry)4488 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
4489 const unsigned int position, char **entry)
4490 {
4491 char *line = NULL;
4492 unsigned int i;
4493 ssize_t nread;
4494 size_t len = 0;
4495 FILE *f;
4496
4497 f = fopen(path, "r");
4498 ASSERT_NE(f, NULL) {
4499 TH_LOG("Could not open %s: %s", path, strerror(errno));
4500 }
4501
4502 for (i = 0; i < position; i++) {
4503 nread = getdelim(&line, &len, ' ', f);
4504 ASSERT_GE(nread, 0) {
4505 TH_LOG("Failed to read %d entry in file %s", i, path);
4506 }
4507 }
4508 fclose(f);
4509
4510 ASSERT_GT(nread, 0) {
4511 TH_LOG("Entry in file %s had zero length", path);
4512 }
4513
4514 *entry = line;
4515 return nread - 1;
4516 }
4517
4518 /* For a given PID, get the task state (D, R, etc...) */
get_proc_stat(struct __test_metadata * _metadata,pid_t pid)4519 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
4520 {
4521 char proc_path[100] = {0};
4522 char status;
4523 char *line;
4524
4525 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
4526 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
4527
4528 status = *line;
4529 free(line);
4530
4531 return status;
4532 }
4533
TEST(user_notification_fifo)4534 TEST(user_notification_fifo)
4535 {
4536 struct seccomp_notif_resp resp = {};
4537 struct seccomp_notif req = {};
4538 int i, status, listener;
4539 pid_t pid, pids[3];
4540 __u64 baseid;
4541 long ret;
4542 /* 100 ms */
4543 struct timespec delay = { .tv_nsec = 100000000 };
4544
4545 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4546 ASSERT_EQ(0, ret) {
4547 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4548 }
4549
4550 /* Setup a listener */
4551 listener = user_notif_syscall(__NR_getppid,
4552 SECCOMP_FILTER_FLAG_NEW_LISTENER);
4553 ASSERT_GE(listener, 0);
4554
4555 pid = fork();
4556 ASSERT_GE(pid, 0);
4557
4558 if (pid == 0) {
4559 ret = syscall(__NR_getppid);
4560 exit(ret != USER_NOTIF_MAGIC);
4561 }
4562
4563 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4564 baseid = req.id + 1;
4565
4566 resp.id = req.id;
4567 resp.error = 0;
4568 resp.val = USER_NOTIF_MAGIC;
4569
4570 /* check that we make sure flags == 0 */
4571 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4572
4573 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4574 EXPECT_EQ(true, WIFEXITED(status));
4575 EXPECT_EQ(0, WEXITSTATUS(status));
4576
4577 /* Start children, and generate notifications */
4578 for (i = 0; i < ARRAY_SIZE(pids); i++) {
4579 pid = fork();
4580 if (pid == 0) {
4581 ret = syscall(__NR_getppid);
4582 exit(ret != USER_NOTIF_MAGIC);
4583 }
4584 pids[i] = pid;
4585 }
4586
4587 /* This spins until all of the children are sleeping */
4588 restart_wait:
4589 for (i = 0; i < ARRAY_SIZE(pids); i++) {
4590 if (get_proc_stat(_metadata, pids[i]) != 'S') {
4591 nanosleep(&delay, NULL);
4592 goto restart_wait;
4593 }
4594 }
4595
4596 /* Read the notifications in order (and respond) */
4597 for (i = 0; i < ARRAY_SIZE(pids); i++) {
4598 memset(&req, 0, sizeof(req));
4599 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4600 EXPECT_EQ(req.id, baseid + i);
4601 resp.id = req.id;
4602 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4603 }
4604
4605 /* Make sure notifications were received */
4606 for (i = 0; i < ARRAY_SIZE(pids); i++) {
4607 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
4608 EXPECT_EQ(true, WIFEXITED(status));
4609 EXPECT_EQ(0, WEXITSTATUS(status));
4610 }
4611 }
4612
4613 /* get_proc_syscall - Get the syscall in progress for a given pid
4614 *
4615 * Returns the current syscall number for a given process
4616 * Returns -1 if not in syscall (running or blocked)
4617 */
get_proc_syscall(struct __test_metadata * _metadata,int pid)4618 static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
4619 {
4620 char proc_path[100] = {0};
4621 long ret = -1;
4622 ssize_t nread;
4623 char *line;
4624
4625 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
4626 nread = get_nth(_metadata, proc_path, 1, &line);
4627 ASSERT_GT(nread, 0);
4628
4629 if (!strncmp("running", line, MIN(7, nread)))
4630 ret = strtol(line, NULL, 16);
4631
4632 free(line);
4633 return ret;
4634 }
4635
4636 /* Ensure non-fatal signals prior to receive are unmodified */
TEST(user_notification_wait_killable_pre_notification)4637 TEST(user_notification_wait_killable_pre_notification)
4638 {
4639 struct sigaction new_action = {
4640 .sa_handler = signal_handler,
4641 };
4642 int listener, status, sk_pair[2];
4643 pid_t pid;
4644 long ret;
4645 char c;
4646 /* 100 ms */
4647 struct timespec delay = { .tv_nsec = 100000000 };
4648
4649 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
4650
4651 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4652 ASSERT_EQ(0, ret)
4653 {
4654 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4655 }
4656
4657 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
4658
4659 listener = user_notif_syscall(
4660 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4661 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4662 ASSERT_GE(listener, 0);
4663
4664 /*
4665 * Check that we can kill the process with SIGUSR1 prior to receiving
4666 * the notification. SIGUSR1 is wired up to a custom signal handler,
4667 * and make sure it gets called.
4668 */
4669 pid = fork();
4670 ASSERT_GE(pid, 0);
4671
4672 if (pid == 0) {
4673 close(sk_pair[0]);
4674 handled = sk_pair[1];
4675
4676 /* Setup the non-fatal sigaction without SA_RESTART */
4677 if (sigaction(SIGUSR1, &new_action, NULL)) {
4678 perror("sigaction");
4679 exit(1);
4680 }
4681
4682 ret = syscall(__NR_getppid);
4683 /* Make sure we got a return from a signal interruption */
4684 exit(ret != -1 || errno != EINTR);
4685 }
4686
4687 /*
4688 * Make sure we've gotten to the seccomp user notification wait
4689 * from getppid prior to sending any signals
4690 */
4691 while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
4692 get_proc_stat(_metadata, pid) != 'S')
4693 nanosleep(&delay, NULL);
4694
4695 /* Send non-fatal kill signal */
4696 EXPECT_EQ(kill(pid, SIGUSR1), 0);
4697
4698 /* wait for process to exit (exit checks for EINTR) */
4699 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4700 EXPECT_EQ(true, WIFEXITED(status));
4701 EXPECT_EQ(0, WEXITSTATUS(status));
4702
4703 EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
4704 }
4705
4706 /* Ensure non-fatal signals after receive are blocked */
TEST(user_notification_wait_killable)4707 TEST(user_notification_wait_killable)
4708 {
4709 struct sigaction new_action = {
4710 .sa_handler = signal_handler,
4711 };
4712 struct seccomp_notif_resp resp = {};
4713 struct seccomp_notif req = {};
4714 int listener, status, sk_pair[2];
4715 pid_t pid;
4716 long ret;
4717 char c;
4718 /* 100 ms */
4719 struct timespec delay = { .tv_nsec = 100000000 };
4720
4721 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
4722
4723 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4724 ASSERT_EQ(0, ret)
4725 {
4726 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4727 }
4728
4729 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
4730
4731 listener = user_notif_syscall(
4732 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4733 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4734 ASSERT_GE(listener, 0);
4735
4736 pid = fork();
4737 ASSERT_GE(pid, 0);
4738
4739 if (pid == 0) {
4740 close(sk_pair[0]);
4741 handled = sk_pair[1];
4742
4743 /* Setup the sigaction without SA_RESTART */
4744 if (sigaction(SIGUSR1, &new_action, NULL)) {
4745 perror("sigaction");
4746 exit(1);
4747 }
4748
4749 /* Make sure that the syscall is completed (no EINTR) */
4750 ret = syscall(__NR_getppid);
4751 exit(ret != USER_NOTIF_MAGIC);
4752 }
4753
4754 /*
4755 * Get the notification, to make move the notifying process into a
4756 * non-preemptible (TASK_KILLABLE) state.
4757 */
4758 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4759 /* Send non-fatal kill signal */
4760 EXPECT_EQ(kill(pid, SIGUSR1), 0);
4761
4762 /*
4763 * Make sure the task enters moves to TASK_KILLABLE by waiting for
4764 * D (Disk Sleep) state after receiving non-fatal signal.
4765 */
4766 while (get_proc_stat(_metadata, pid) != 'D')
4767 nanosleep(&delay, NULL);
4768
4769 resp.id = req.id;
4770 resp.val = USER_NOTIF_MAGIC;
4771 /* Make sure the notification is found and able to be replied to */
4772 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4773
4774 /*
4775 * Make sure that the signal handler does get called once we're back in
4776 * userspace.
4777 */
4778 EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
4779 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
4780 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4781 EXPECT_EQ(true, WIFEXITED(status));
4782 EXPECT_EQ(0, WEXITSTATUS(status));
4783 }
4784
4785 /* Ensure fatal signals after receive are not blocked */
TEST(user_notification_wait_killable_fatal)4786 TEST(user_notification_wait_killable_fatal)
4787 {
4788 struct seccomp_notif req = {};
4789 int listener, status;
4790 pid_t pid;
4791 long ret;
4792 /* 100 ms */
4793 struct timespec delay = { .tv_nsec = 100000000 };
4794
4795 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4796 ASSERT_EQ(0, ret)
4797 {
4798 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4799 }
4800
4801 listener = user_notif_syscall(
4802 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4803 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4804 ASSERT_GE(listener, 0);
4805
4806 pid = fork();
4807 ASSERT_GE(pid, 0);
4808
4809 if (pid == 0) {
4810 /* This should never complete as it should get a SIGTERM */
4811 syscall(__NR_getppid);
4812 exit(1);
4813 }
4814
4815 while (get_proc_stat(_metadata, pid) != 'S')
4816 nanosleep(&delay, NULL);
4817
4818 /*
4819 * Get the notification, to make move the notifying process into a
4820 * non-preemptible (TASK_KILLABLE) state.
4821 */
4822 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4823 /* Kill the process with a fatal signal */
4824 EXPECT_EQ(kill(pid, SIGTERM), 0);
4825
4826 /*
4827 * Wait for the process to exit, and make sure the process terminated
4828 * due to the SIGTERM signal.
4829 */
4830 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4831 EXPECT_EQ(true, WIFSIGNALED(status));
4832 EXPECT_EQ(SIGTERM, WTERMSIG(status));
4833 }
4834
4835 /* Ensure signals after the reply do not interrupt */
TEST(user_notification_wait_killable_after_reply)4836 TEST(user_notification_wait_killable_after_reply)
4837 {
4838 int i, max_iter = 100000;
4839 int listener, status;
4840 int pipe_fds[2];
4841 pid_t pid;
4842 long ret;
4843
4844 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4845 ASSERT_EQ(0, ret)
4846 {
4847 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4848 }
4849
4850 listener = user_notif_syscall(
4851 __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4852 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4853 ASSERT_GE(listener, 0);
4854
4855 /*
4856 * Used to count invocations. One token is transferred from the child
4857 * to the parent per syscall invocation, the parent tries to take
4858 * one token per successful RECV. If the syscall is restarted after
4859 * RECV the parent will try to get two tokens while the child only
4860 * provided one.
4861 */
4862 ASSERT_EQ(pipe(pipe_fds), 0);
4863
4864 pid = fork();
4865 ASSERT_GE(pid, 0);
4866
4867 if (pid == 0) {
4868 struct sigaction new_action = {
4869 .sa_handler = signal_handler_nop,
4870 .sa_flags = SA_RESTART,
4871 };
4872 struct itimerval timer = {
4873 .it_value = { .tv_usec = 1000 },
4874 .it_interval = { .tv_usec = 1000 },
4875 };
4876 char c = 'a';
4877
4878 close(pipe_fds[0]);
4879
4880 /* Setup the sigaction with SA_RESTART */
4881 if (sigaction(SIGALRM, &new_action, NULL)) {
4882 perror("sigaction");
4883 exit(1);
4884 }
4885
4886 /*
4887 * Kill with SIGALRM repeatedly, to try to hit the race when
4888 * handling the syscall.
4889 */
4890 if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
4891 perror("setitimer");
4892
4893 for (i = 0; i < max_iter; ++i) {
4894 int fd;
4895
4896 /* Send one token per iteration to catch repeats. */
4897 if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
4898 perror("write");
4899 exit(1);
4900 }
4901
4902 fd = syscall(__NR_dup, 0);
4903 if (fd < 0) {
4904 perror("dup");
4905 exit(1);
4906 }
4907 close(fd);
4908 }
4909
4910 exit(0);
4911 }
4912
4913 close(pipe_fds[1]);
4914
4915 for (i = 0; i < max_iter; ++i) {
4916 struct seccomp_notif req = {};
4917 struct seccomp_notif_addfd addfd = {};
4918 struct pollfd pfd = {
4919 .fd = pipe_fds[0],
4920 .events = POLLIN,
4921 };
4922 char c;
4923
4924 /*
4925 * Try to receive one token. If it failed, one child syscall
4926 * was restarted after RECV and needed to be handled twice.
4927 */
4928 ASSERT_EQ(poll(&pfd, 1, 1000), 1)
4929 kill(pid, SIGKILL);
4930
4931 ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
4932 kill(pid, SIGKILL);
4933
4934 /*
4935 * Get the notification, reply to it as fast as possible to test
4936 * whether the child wrongly skips going into the non-preemptible
4937 * (TASK_KILLABLE) state.
4938 */
4939 do
4940 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
4941 while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
4942 ASSERT_EQ(ret, 0)
4943 kill(pid, SIGKILL);
4944
4945 addfd.id = req.id;
4946 addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4947 addfd.srcfd = 0;
4948 ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
4949 kill(pid, SIGKILL);
4950 }
4951
4952 /*
4953 * Wait for the process to exit, and make sure the process terminated
4954 * with a zero exit code..
4955 */
4956 EXPECT_EQ(waitpid(pid, &status, 0), pid);
4957 EXPECT_EQ(true, WIFEXITED(status));
4958 EXPECT_EQ(0, WEXITSTATUS(status));
4959 }
4960
4961 struct tsync_vs_thread_leader_args {
4962 pthread_t leader;
4963 };
4964
tsync_vs_dead_thread_leader_sibling(void * _args)4965 static void *tsync_vs_dead_thread_leader_sibling(void *_args)
4966 {
4967 struct sock_filter allow_filter[] = {
4968 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
4969 };
4970 struct sock_fprog allow_prog = {
4971 .len = (unsigned short)ARRAY_SIZE(allow_filter),
4972 .filter = allow_filter,
4973 };
4974 struct tsync_vs_thread_leader_args *args = _args;
4975 void *retval;
4976 long ret;
4977
4978 ret = pthread_join(args->leader, &retval);
4979 if (ret)
4980 exit(1);
4981 if (retval != _args)
4982 exit(2);
4983 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
4984 if (ret)
4985 exit(3);
4986
4987 exit(0);
4988 }
4989
4990 /*
4991 * Ensure that a dead thread leader doesn't prevent installing new filters with
4992 * SECCOMP_FILTER_FLAG_TSYNC from other threads.
4993 */
TEST(tsync_vs_dead_thread_leader)4994 TEST(tsync_vs_dead_thread_leader)
4995 {
4996 int status;
4997 pid_t pid;
4998 long ret;
4999
5000 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
5001 ASSERT_EQ(0, ret) {
5002 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
5003 }
5004
5005 pid = fork();
5006 ASSERT_GE(pid, 0);
5007
5008 if (pid == 0) {
5009 struct sock_filter allow_filter[] = {
5010 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
5011 };
5012 struct sock_fprog allow_prog = {
5013 .len = (unsigned short)ARRAY_SIZE(allow_filter),
5014 .filter = allow_filter,
5015 };
5016 struct tsync_vs_thread_leader_args *args;
5017 pthread_t sibling;
5018
5019 args = malloc(sizeof(*args));
5020 ASSERT_NE(NULL, args);
5021 args->leader = pthread_self();
5022
5023 ret = pthread_create(&sibling, NULL,
5024 tsync_vs_dead_thread_leader_sibling, args);
5025 ASSERT_EQ(0, ret);
5026
5027 /* Install a new filter just to the leader thread. */
5028 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
5029 ASSERT_EQ(0, ret);
5030 pthread_exit(args);
5031 exit(1);
5032 }
5033
5034 EXPECT_EQ(pid, waitpid(pid, &status, 0));
5035 EXPECT_EQ(0, status);
5036 }
5037
5038 #ifdef __x86_64__
5039
5040 /*
5041 * We need naked probed_uprobe function. Using __nocf_check
5042 * check to skip possible endbr64 instruction and ignoring
5043 * -Wattributes, otherwise the compilation might fail.
5044 */
5045 #pragma GCC diagnostic push
5046 #pragma GCC diagnostic ignored "-Wattributes"
5047
probed_uprobe(void)5048 __naked __nocf_check noinline int probed_uprobe(void)
5049 {
5050 /*
5051 * Optimized uprobe is possible only on top of nop5 instruction.
5052 */
5053 asm volatile (" \n"
5054 ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
5055 "ret \n"
5056 );
5057 }
5058 #pragma GCC diagnostic pop
5059
5060 #else
probed_uprobe(void)5061 noinline int probed_uprobe(void)
5062 {
5063 return 1;
5064 }
5065 #endif
5066
probed_uretprobe(void)5067 noinline int probed_uretprobe(void)
5068 {
5069 return 1;
5070 }
5071
parse_uint_from_file(const char * file,const char * fmt)5072 static int parse_uint_from_file(const char *file, const char *fmt)
5073 {
5074 int err = -1, ret;
5075 FILE *f;
5076
5077 f = fopen(file, "re");
5078 if (f) {
5079 err = fscanf(f, fmt, &ret);
5080 fclose(f);
5081 }
5082 return err == 1 ? ret : err;
5083 }
5084
determine_uprobe_perf_type(void)5085 static int determine_uprobe_perf_type(void)
5086 {
5087 const char *file = "/sys/bus/event_source/devices/uprobe/type";
5088
5089 return parse_uint_from_file(file, "%d\n");
5090 }
5091
determine_uprobe_retprobe_bit(void)5092 static int determine_uprobe_retprobe_bit(void)
5093 {
5094 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
5095
5096 return parse_uint_from_file(file, "config:%d\n");
5097 }
5098
get_uprobe_offset(const void * addr)5099 static ssize_t get_uprobe_offset(const void *addr)
5100 {
5101 size_t start, base, end;
5102 bool found = false;
5103 char buf[256];
5104 FILE *f;
5105
5106 f = fopen("/proc/self/maps", "r");
5107 if (!f)
5108 return -1;
5109
5110 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
5111 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
5112 found = true;
5113 break;
5114 }
5115 }
5116 fclose(f);
5117 return found ? (uintptr_t)addr - start + base : -1;
5118 }
5119
FIXTURE(UPROBE)5120 FIXTURE(UPROBE) {
5121 int fd;
5122 };
5123
FIXTURE_VARIANT(UPROBE)5124 FIXTURE_VARIANT(UPROBE) {
5125 /*
5126 * All of the U(RET)PROBE behaviors can be tested with either
5127 * u(ret)probe attached or not
5128 */
5129 bool attach;
5130 /*
5131 * Test both uprobe and uretprobe.
5132 */
5133 bool uretprobe;
5134 };
5135
FIXTURE_VARIANT_ADD(UPROBE,not_attached)5136 FIXTURE_VARIANT_ADD(UPROBE, not_attached) {
5137 .attach = false,
5138 .uretprobe = false,
5139 };
5140
FIXTURE_VARIANT_ADD(UPROBE,uprobe_attached)5141 FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) {
5142 .attach = true,
5143 .uretprobe = false,
5144 };
5145
FIXTURE_VARIANT_ADD(UPROBE,uretprobe_attached)5146 FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) {
5147 .attach = true,
5148 .uretprobe = true,
5149 };
5150
FIXTURE_SETUP(UPROBE)5151 FIXTURE_SETUP(UPROBE)
5152 {
5153 const size_t attr_sz = sizeof(struct perf_event_attr);
5154 struct perf_event_attr attr;
5155 ssize_t offset;
5156 int type, bit;
5157
5158 #if !defined(__NR_uprobe) || !defined(__NR_uretprobe)
5159 SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined");
5160 #endif
5161
5162 if (!variant->attach)
5163 return;
5164
5165 memset(&attr, 0, attr_sz);
5166
5167 type = determine_uprobe_perf_type();
5168 ASSERT_GE(type, 0);
5169
5170 if (variant->uretprobe) {
5171 bit = determine_uprobe_retprobe_bit();
5172 ASSERT_GE(bit, 0);
5173 }
5174
5175 offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe);
5176 ASSERT_GE(offset, 0);
5177
5178 if (variant->uretprobe)
5179 attr.config |= 1 << bit;
5180 attr.size = attr_sz;
5181 attr.type = type;
5182 attr.config1 = ptr_to_u64("/proc/self/exe");
5183 attr.config2 = offset;
5184
5185 self->fd = syscall(__NR_perf_event_open, &attr,
5186 getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
5187 PERF_FLAG_FD_CLOEXEC);
5188 }
5189
FIXTURE_TEARDOWN(UPROBE)5190 FIXTURE_TEARDOWN(UPROBE)
5191 {
5192 /* we could call close(self->fd), but we'd need extra filter for
5193 * that and since we are calling _exit right away..
5194 */
5195 }
5196
run_probed_with_filter(struct sock_fprog * prog)5197 static int run_probed_with_filter(struct sock_fprog *prog)
5198 {
5199 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
5200 seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
5201 return -1;
5202 }
5203
5204 /*
5205 * Uprobe is optimized after first hit, so let's hit twice.
5206 */
5207 probed_uprobe();
5208 probed_uprobe();
5209
5210 probed_uretprobe();
5211 return 0;
5212 }
5213
TEST_F(UPROBE,uprobe_default_allow)5214 TEST_F(UPROBE, uprobe_default_allow)
5215 {
5216 struct sock_filter filter[] = {
5217 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
5218 };
5219 struct sock_fprog prog = {
5220 .len = (unsigned short)ARRAY_SIZE(filter),
5221 .filter = filter,
5222 };
5223
5224 ASSERT_EQ(0, run_probed_with_filter(&prog));
5225 }
5226
TEST_F(UPROBE,uprobe_default_block)5227 TEST_F(UPROBE, uprobe_default_block)
5228 {
5229 struct sock_filter filter[] = {
5230 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
5231 offsetof(struct seccomp_data, nr)),
5232 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
5233 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
5234 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
5235 };
5236 struct sock_fprog prog = {
5237 .len = (unsigned short)ARRAY_SIZE(filter),
5238 .filter = filter,
5239 };
5240
5241 ASSERT_EQ(0, run_probed_with_filter(&prog));
5242 }
5243
TEST_F(UPROBE,uprobe_block_syscall)5244 TEST_F(UPROBE, uprobe_block_syscall)
5245 {
5246 struct sock_filter filter[] = {
5247 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
5248 offsetof(struct seccomp_data, nr)),
5249 #ifdef __NR_uprobe
5250 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2),
5251 #endif
5252 #ifdef __NR_uretprobe
5253 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
5254 #endif
5255 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
5256 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
5257 };
5258 struct sock_fprog prog = {
5259 .len = (unsigned short)ARRAY_SIZE(filter),
5260 .filter = filter,
5261 };
5262
5263 ASSERT_EQ(0, run_probed_with_filter(&prog));
5264 }
5265
TEST_F(UPROBE,uprobe_default_block_with_syscall)5266 TEST_F(UPROBE, uprobe_default_block_with_syscall)
5267 {
5268 struct sock_filter filter[] = {
5269 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
5270 offsetof(struct seccomp_data, nr)),
5271 #ifdef __NR_uprobe
5272 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0),
5273 #endif
5274 #ifdef __NR_uretprobe
5275 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
5276 #endif
5277 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
5278 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
5279 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
5280 };
5281 struct sock_fprog prog = {
5282 .len = (unsigned short)ARRAY_SIZE(filter),
5283 .filter = filter,
5284 };
5285
5286 ASSERT_EQ(0, run_probed_with_filter(&prog));
5287 }
5288
5289 /*
5290 * TODO:
5291 * - expand NNP testing
5292 * - better arch-specific TRACE and TRAP handlers.
5293 * - endianness checking when appropriate
5294 * - 64-bit arg prodding
5295 * - arch value testing (x86 modes especially)
5296 * - verify that FILTER_FLAG_LOG filters generate log messages
5297 * - verify that RET_LOG generates log messages
5298 */
5299
5300 TEST_HARNESS_MAIN
5301