xref: /linux/tools/testing/selftests/seccomp/seccomp_bpf.c (revision 65898b375659f2556da9ac22ea5649407f6f6447)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22 
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/user.h>
28 #include <linux/prctl.h>
29 #include <linux/ptrace.h>
30 #include <linux/seccomp.h>
31 #include <pthread.h>
32 #include <semaphore.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <time.h>
38 #include <limits.h>
39 #include <linux/elf.h>
40 #include <sys/uio.h>
41 #include <sys/utsname.h>
42 #include <sys/fcntl.h>
43 #include <sys/mman.h>
44 #include <sys/times.h>
45 #include <sys/socket.h>
46 #include <sys/ioctl.h>
47 #include <linux/kcmp.h>
48 
49 #include <unistd.h>
50 #include <sys/syscall.h>
51 #include <poll.h>
52 
53 #include "../kselftest_harness.h"
54 
55 #ifndef PR_SET_PTRACER
56 # define PR_SET_PTRACER 0x59616d61
57 #endif
58 
59 #ifndef PR_SET_NO_NEW_PRIVS
60 #define PR_SET_NO_NEW_PRIVS 38
61 #define PR_GET_NO_NEW_PRIVS 39
62 #endif
63 
64 #ifndef PR_SECCOMP_EXT
65 #define PR_SECCOMP_EXT 43
66 #endif
67 
68 #ifndef SECCOMP_EXT_ACT
69 #define SECCOMP_EXT_ACT 1
70 #endif
71 
72 #ifndef SECCOMP_EXT_ACT_TSYNC
73 #define SECCOMP_EXT_ACT_TSYNC 1
74 #endif
75 
76 #ifndef SECCOMP_MODE_STRICT
77 #define SECCOMP_MODE_STRICT 1
78 #endif
79 
80 #ifndef SECCOMP_MODE_FILTER
81 #define SECCOMP_MODE_FILTER 2
82 #endif
83 
84 #ifndef SECCOMP_RET_ALLOW
85 struct seccomp_data {
86 	int nr;
87 	__u32 arch;
88 	__u64 instruction_pointer;
89 	__u64 args[6];
90 };
91 #endif
92 
93 #ifndef SECCOMP_RET_KILL_PROCESS
94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
95 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
96 #endif
97 #ifndef SECCOMP_RET_KILL
98 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
99 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
100 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
101 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
102 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
103 #endif
104 #ifndef SECCOMP_RET_LOG
105 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
106 #endif
107 
108 #ifndef __NR_seccomp
109 # if defined(__i386__)
110 #  define __NR_seccomp 354
111 # elif defined(__x86_64__)
112 #  define __NR_seccomp 317
113 # elif defined(__arm__)
114 #  define __NR_seccomp 383
115 # elif defined(__aarch64__)
116 #  define __NR_seccomp 277
117 # elif defined(__riscv)
118 #  define __NR_seccomp 277
119 # elif defined(__hppa__)
120 #  define __NR_seccomp 338
121 # elif defined(__powerpc__)
122 #  define __NR_seccomp 358
123 # elif defined(__s390__)
124 #  define __NR_seccomp 348
125 # elif defined(__xtensa__)
126 #  define __NR_seccomp 337
127 # else
128 #  warning "seccomp syscall number unknown for this architecture"
129 #  define __NR_seccomp 0xffff
130 # endif
131 #endif
132 
133 #ifndef SECCOMP_SET_MODE_STRICT
134 #define SECCOMP_SET_MODE_STRICT 0
135 #endif
136 
137 #ifndef SECCOMP_SET_MODE_FILTER
138 #define SECCOMP_SET_MODE_FILTER 1
139 #endif
140 
141 #ifndef SECCOMP_GET_ACTION_AVAIL
142 #define SECCOMP_GET_ACTION_AVAIL 2
143 #endif
144 
145 #ifndef SECCOMP_GET_NOTIF_SIZES
146 #define SECCOMP_GET_NOTIF_SIZES 3
147 #endif
148 
149 #ifndef SECCOMP_FILTER_FLAG_TSYNC
150 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
151 #endif
152 
153 #ifndef SECCOMP_FILTER_FLAG_LOG
154 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
155 #endif
156 
157 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
158 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
159 #endif
160 
161 #ifndef PTRACE_SECCOMP_GET_METADATA
162 #define PTRACE_SECCOMP_GET_METADATA	0x420d
163 
164 struct seccomp_metadata {
165 	__u64 filter_off;       /* Input: which filter */
166 	__u64 flags;             /* Output: filter's flags */
167 };
168 #endif
169 
170 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
171 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
172 
173 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
174 
175 #define SECCOMP_IOC_MAGIC		'!'
176 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
177 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
178 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
179 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
180 
181 /* Flags for seccomp notification fd ioctl. */
182 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
183 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
184 						struct seccomp_notif_resp)
185 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOR(2, __u64)
186 
187 struct seccomp_notif {
188 	__u64 id;
189 	__u32 pid;
190 	__u32 flags;
191 	struct seccomp_data data;
192 };
193 
194 struct seccomp_notif_resp {
195 	__u64 id;
196 	__s64 val;
197 	__s32 error;
198 	__u32 flags;
199 };
200 
201 struct seccomp_notif_sizes {
202 	__u16 seccomp_notif;
203 	__u16 seccomp_notif_resp;
204 	__u16 seccomp_data;
205 };
206 #endif
207 
208 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
209 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
210 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
211 #endif
212 
213 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
214 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
215 #endif
216 
217 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
218 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
219 #endif
220 
221 #ifndef seccomp
222 int seccomp(unsigned int op, unsigned int flags, void *args)
223 {
224 	errno = 0;
225 	return syscall(__NR_seccomp, op, flags, args);
226 }
227 #endif
228 
229 #if __BYTE_ORDER == __LITTLE_ENDIAN
230 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
231 #elif __BYTE_ORDER == __BIG_ENDIAN
232 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
233 #else
234 #error "wut? Unknown __BYTE_ORDER?!"
235 #endif
236 
237 #define SIBLING_EXIT_UNKILLED	0xbadbeef
238 #define SIBLING_EXIT_FAILURE	0xbadface
239 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
240 
241 TEST(mode_strict_support)
242 {
243 	long ret;
244 
245 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
246 	ASSERT_EQ(0, ret) {
247 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
248 	}
249 	syscall(__NR_exit, 0);
250 }
251 
252 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
253 {
254 	long ret;
255 
256 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
257 	ASSERT_EQ(0, ret) {
258 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
259 	}
260 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
261 		NULL, NULL, NULL);
262 	EXPECT_FALSE(true) {
263 		TH_LOG("Unreachable!");
264 	}
265 }
266 
267 /* Note! This doesn't test no new privs behavior */
268 TEST(no_new_privs_support)
269 {
270 	long ret;
271 
272 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
273 	EXPECT_EQ(0, ret) {
274 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
275 	}
276 }
277 
278 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
279 TEST(mode_filter_support)
280 {
281 	long ret;
282 
283 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
284 	ASSERT_EQ(0, ret) {
285 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
286 	}
287 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
288 	EXPECT_EQ(-1, ret);
289 	EXPECT_EQ(EFAULT, errno) {
290 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
291 	}
292 }
293 
294 TEST(mode_filter_without_nnp)
295 {
296 	struct sock_filter filter[] = {
297 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
298 	};
299 	struct sock_fprog prog = {
300 		.len = (unsigned short)ARRAY_SIZE(filter),
301 		.filter = filter,
302 	};
303 	long ret;
304 
305 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
306 	ASSERT_LE(0, ret) {
307 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
308 	}
309 	errno = 0;
310 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
311 	/* Succeeds with CAP_SYS_ADMIN, fails without */
312 	/* TODO(wad) check caps not euid */
313 	if (geteuid()) {
314 		EXPECT_EQ(-1, ret);
315 		EXPECT_EQ(EACCES, errno);
316 	} else {
317 		EXPECT_EQ(0, ret);
318 	}
319 }
320 
321 #define MAX_INSNS_PER_PATH 32768
322 
323 TEST(filter_size_limits)
324 {
325 	int i;
326 	int count = BPF_MAXINSNS + 1;
327 	struct sock_filter allow[] = {
328 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
329 	};
330 	struct sock_filter *filter;
331 	struct sock_fprog prog = { };
332 	long ret;
333 
334 	filter = calloc(count, sizeof(*filter));
335 	ASSERT_NE(NULL, filter);
336 
337 	for (i = 0; i < count; i++)
338 		filter[i] = allow[0];
339 
340 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
341 	ASSERT_EQ(0, ret);
342 
343 	prog.filter = filter;
344 	prog.len = count;
345 
346 	/* Too many filter instructions in a single filter. */
347 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
348 	ASSERT_NE(0, ret) {
349 		TH_LOG("Installing %d insn filter was allowed", prog.len);
350 	}
351 
352 	/* One less is okay, though. */
353 	prog.len -= 1;
354 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
355 	ASSERT_EQ(0, ret) {
356 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
357 	}
358 }
359 
360 TEST(filter_chain_limits)
361 {
362 	int i;
363 	int count = BPF_MAXINSNS;
364 	struct sock_filter allow[] = {
365 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
366 	};
367 	struct sock_filter *filter;
368 	struct sock_fprog prog = { };
369 	long ret;
370 
371 	filter = calloc(count, sizeof(*filter));
372 	ASSERT_NE(NULL, filter);
373 
374 	for (i = 0; i < count; i++)
375 		filter[i] = allow[0];
376 
377 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
378 	ASSERT_EQ(0, ret);
379 
380 	prog.filter = filter;
381 	prog.len = 1;
382 
383 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
384 	ASSERT_EQ(0, ret);
385 
386 	prog.len = count;
387 
388 	/* Too many total filter instructions. */
389 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
390 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
391 		if (ret != 0)
392 			break;
393 	}
394 	ASSERT_NE(0, ret) {
395 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
396 		       i, count, i * (count + 4));
397 	}
398 }
399 
400 TEST(mode_filter_cannot_move_to_strict)
401 {
402 	struct sock_filter filter[] = {
403 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
404 	};
405 	struct sock_fprog prog = {
406 		.len = (unsigned short)ARRAY_SIZE(filter),
407 		.filter = filter,
408 	};
409 	long ret;
410 
411 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
412 	ASSERT_EQ(0, ret);
413 
414 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
415 	ASSERT_EQ(0, ret);
416 
417 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
418 	EXPECT_EQ(-1, ret);
419 	EXPECT_EQ(EINVAL, errno);
420 }
421 
422 
423 TEST(mode_filter_get_seccomp)
424 {
425 	struct sock_filter filter[] = {
426 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
427 	};
428 	struct sock_fprog prog = {
429 		.len = (unsigned short)ARRAY_SIZE(filter),
430 		.filter = filter,
431 	};
432 	long ret;
433 
434 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
435 	ASSERT_EQ(0, ret);
436 
437 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
438 	EXPECT_EQ(0, ret);
439 
440 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
441 	ASSERT_EQ(0, ret);
442 
443 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
444 	EXPECT_EQ(2, ret);
445 }
446 
447 
448 TEST(ALLOW_all)
449 {
450 	struct sock_filter filter[] = {
451 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
452 	};
453 	struct sock_fprog prog = {
454 		.len = (unsigned short)ARRAY_SIZE(filter),
455 		.filter = filter,
456 	};
457 	long ret;
458 
459 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
460 	ASSERT_EQ(0, ret);
461 
462 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
463 	ASSERT_EQ(0, ret);
464 }
465 
466 TEST(empty_prog)
467 {
468 	struct sock_filter filter[] = {
469 	};
470 	struct sock_fprog prog = {
471 		.len = (unsigned short)ARRAY_SIZE(filter),
472 		.filter = filter,
473 	};
474 	long ret;
475 
476 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
477 	ASSERT_EQ(0, ret);
478 
479 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
480 	EXPECT_EQ(-1, ret);
481 	EXPECT_EQ(EINVAL, errno);
482 }
483 
484 TEST(log_all)
485 {
486 	struct sock_filter filter[] = {
487 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
488 	};
489 	struct sock_fprog prog = {
490 		.len = (unsigned short)ARRAY_SIZE(filter),
491 		.filter = filter,
492 	};
493 	long ret;
494 	pid_t parent = getppid();
495 
496 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
497 	ASSERT_EQ(0, ret);
498 
499 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
500 	ASSERT_EQ(0, ret);
501 
502 	/* getppid() should succeed and be logged (no check for logging) */
503 	EXPECT_EQ(parent, syscall(__NR_getppid));
504 }
505 
506 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
507 {
508 	struct sock_filter filter[] = {
509 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
510 	};
511 	struct sock_fprog prog = {
512 		.len = (unsigned short)ARRAY_SIZE(filter),
513 		.filter = filter,
514 	};
515 	long ret;
516 
517 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
518 	ASSERT_EQ(0, ret);
519 
520 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
521 	ASSERT_EQ(0, ret);
522 	EXPECT_EQ(0, syscall(__NR_getpid)) {
523 		TH_LOG("getpid() shouldn't ever return");
524 	}
525 }
526 
527 /* return code >= 0x80000000 is unused. */
528 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
529 {
530 	struct sock_filter filter[] = {
531 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
532 	};
533 	struct sock_fprog prog = {
534 		.len = (unsigned short)ARRAY_SIZE(filter),
535 		.filter = filter,
536 	};
537 	long ret;
538 
539 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
540 	ASSERT_EQ(0, ret);
541 
542 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
543 	ASSERT_EQ(0, ret);
544 	EXPECT_EQ(0, syscall(__NR_getpid)) {
545 		TH_LOG("getpid() shouldn't ever return");
546 	}
547 }
548 
549 TEST_SIGNAL(KILL_all, SIGSYS)
550 {
551 	struct sock_filter filter[] = {
552 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
553 	};
554 	struct sock_fprog prog = {
555 		.len = (unsigned short)ARRAY_SIZE(filter),
556 		.filter = filter,
557 	};
558 	long ret;
559 
560 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
561 	ASSERT_EQ(0, ret);
562 
563 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
564 	ASSERT_EQ(0, ret);
565 }
566 
567 TEST_SIGNAL(KILL_one, SIGSYS)
568 {
569 	struct sock_filter filter[] = {
570 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
571 			offsetof(struct seccomp_data, nr)),
572 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
573 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
574 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
575 	};
576 	struct sock_fprog prog = {
577 		.len = (unsigned short)ARRAY_SIZE(filter),
578 		.filter = filter,
579 	};
580 	long ret;
581 	pid_t parent = getppid();
582 
583 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
584 	ASSERT_EQ(0, ret);
585 
586 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
587 	ASSERT_EQ(0, ret);
588 
589 	EXPECT_EQ(parent, syscall(__NR_getppid));
590 	/* getpid() should never return. */
591 	EXPECT_EQ(0, syscall(__NR_getpid));
592 }
593 
594 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
595 {
596 	void *fatal_address;
597 	struct sock_filter filter[] = {
598 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
599 			offsetof(struct seccomp_data, nr)),
600 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
601 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
602 		/* Only both with lower 32-bit for now. */
603 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
604 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
605 			(unsigned long)&fatal_address, 0, 1),
606 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
607 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
608 	};
609 	struct sock_fprog prog = {
610 		.len = (unsigned short)ARRAY_SIZE(filter),
611 		.filter = filter,
612 	};
613 	long ret;
614 	pid_t parent = getppid();
615 	struct tms timebuf;
616 	clock_t clock = times(&timebuf);
617 
618 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
619 	ASSERT_EQ(0, ret);
620 
621 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
622 	ASSERT_EQ(0, ret);
623 
624 	EXPECT_EQ(parent, syscall(__NR_getppid));
625 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
626 	/* times() should never return. */
627 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
628 }
629 
630 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
631 {
632 #ifndef __NR_mmap2
633 	int sysno = __NR_mmap;
634 #else
635 	int sysno = __NR_mmap2;
636 #endif
637 	struct sock_filter filter[] = {
638 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
639 			offsetof(struct seccomp_data, nr)),
640 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
641 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
642 		/* Only both with lower 32-bit for now. */
643 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
644 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
645 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
646 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
647 	};
648 	struct sock_fprog prog = {
649 		.len = (unsigned short)ARRAY_SIZE(filter),
650 		.filter = filter,
651 	};
652 	long ret;
653 	pid_t parent = getppid();
654 	int fd;
655 	void *map1, *map2;
656 	int page_size = sysconf(_SC_PAGESIZE);
657 
658 	ASSERT_LT(0, page_size);
659 
660 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
661 	ASSERT_EQ(0, ret);
662 
663 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
664 	ASSERT_EQ(0, ret);
665 
666 	fd = open("/dev/zero", O_RDONLY);
667 	ASSERT_NE(-1, fd);
668 
669 	EXPECT_EQ(parent, syscall(__NR_getppid));
670 	map1 = (void *)syscall(sysno,
671 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
672 	EXPECT_NE(MAP_FAILED, map1);
673 	/* mmap2() should never return. */
674 	map2 = (void *)syscall(sysno,
675 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
676 	EXPECT_EQ(MAP_FAILED, map2);
677 
678 	/* The test failed, so clean up the resources. */
679 	munmap(map1, page_size);
680 	munmap(map2, page_size);
681 	close(fd);
682 }
683 
684 /* This is a thread task to die via seccomp filter violation. */
685 void *kill_thread(void *data)
686 {
687 	bool die = (bool)data;
688 
689 	if (die) {
690 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
691 		return (void *)SIBLING_EXIT_FAILURE;
692 	}
693 
694 	return (void *)SIBLING_EXIT_UNKILLED;
695 }
696 
697 /* Prepare a thread that will kill itself or both of us. */
698 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
699 {
700 	pthread_t thread;
701 	void *status;
702 	/* Kill only when calling __NR_prctl. */
703 	struct sock_filter filter_thread[] = {
704 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
705 			offsetof(struct seccomp_data, nr)),
706 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
707 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
708 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
709 	};
710 	struct sock_fprog prog_thread = {
711 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
712 		.filter = filter_thread,
713 	};
714 	struct sock_filter filter_process[] = {
715 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
716 			offsetof(struct seccomp_data, nr)),
717 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
718 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
719 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
720 	};
721 	struct sock_fprog prog_process = {
722 		.len = (unsigned short)ARRAY_SIZE(filter_process),
723 		.filter = filter_process,
724 	};
725 
726 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
727 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
728 	}
729 
730 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
731 			     kill_process ? &prog_process : &prog_thread));
732 
733 	/*
734 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
735 	 * flag cannot be downgraded by a new filter.
736 	 */
737 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
738 
739 	/* Start a thread that will exit immediately. */
740 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
741 	ASSERT_EQ(0, pthread_join(thread, &status));
742 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
743 
744 	/* Start a thread that will die immediately. */
745 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
746 	ASSERT_EQ(0, pthread_join(thread, &status));
747 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
748 
749 	/*
750 	 * If we get here, only the spawned thread died. Let the parent know
751 	 * the whole process didn't die (i.e. this thread, the spawner,
752 	 * stayed running).
753 	 */
754 	exit(42);
755 }
756 
757 TEST(KILL_thread)
758 {
759 	int status;
760 	pid_t child_pid;
761 
762 	child_pid = fork();
763 	ASSERT_LE(0, child_pid);
764 	if (child_pid == 0) {
765 		kill_thread_or_group(_metadata, false);
766 		_exit(38);
767 	}
768 
769 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
770 
771 	/* If only the thread was killed, we'll see exit 42. */
772 	ASSERT_TRUE(WIFEXITED(status));
773 	ASSERT_EQ(42, WEXITSTATUS(status));
774 }
775 
776 TEST(KILL_process)
777 {
778 	int status;
779 	pid_t child_pid;
780 
781 	child_pid = fork();
782 	ASSERT_LE(0, child_pid);
783 	if (child_pid == 0) {
784 		kill_thread_or_group(_metadata, true);
785 		_exit(38);
786 	}
787 
788 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
789 
790 	/* If the entire process was killed, we'll see SIGSYS. */
791 	ASSERT_TRUE(WIFSIGNALED(status));
792 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
793 }
794 
795 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
796 TEST(arg_out_of_range)
797 {
798 	struct sock_filter filter[] = {
799 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
800 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
801 	};
802 	struct sock_fprog prog = {
803 		.len = (unsigned short)ARRAY_SIZE(filter),
804 		.filter = filter,
805 	};
806 	long ret;
807 
808 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
809 	ASSERT_EQ(0, ret);
810 
811 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
812 	EXPECT_EQ(-1, ret);
813 	EXPECT_EQ(EINVAL, errno);
814 }
815 
816 #define ERRNO_FILTER(name, errno)					\
817 	struct sock_filter _read_filter_##name[] = {			\
818 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
819 			offsetof(struct seccomp_data, nr)),		\
820 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
821 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
822 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
823 	};								\
824 	struct sock_fprog prog_##name = {				\
825 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
826 		.filter = _read_filter_##name,				\
827 	}
828 
829 /* Make sure basic errno values are correctly passed through a filter. */
830 TEST(ERRNO_valid)
831 {
832 	ERRNO_FILTER(valid, E2BIG);
833 	long ret;
834 	pid_t parent = getppid();
835 
836 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
837 	ASSERT_EQ(0, ret);
838 
839 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
840 	ASSERT_EQ(0, ret);
841 
842 	EXPECT_EQ(parent, syscall(__NR_getppid));
843 	EXPECT_EQ(-1, read(0, NULL, 0));
844 	EXPECT_EQ(E2BIG, errno);
845 }
846 
847 /* Make sure an errno of zero is correctly handled by the arch code. */
848 TEST(ERRNO_zero)
849 {
850 	ERRNO_FILTER(zero, 0);
851 	long ret;
852 	pid_t parent = getppid();
853 
854 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
855 	ASSERT_EQ(0, ret);
856 
857 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
858 	ASSERT_EQ(0, ret);
859 
860 	EXPECT_EQ(parent, syscall(__NR_getppid));
861 	/* "errno" of 0 is ok. */
862 	EXPECT_EQ(0, read(0, NULL, 0));
863 }
864 
865 /*
866  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
867  * This tests that the errno value gets capped correctly, fixed by
868  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
869  */
870 TEST(ERRNO_capped)
871 {
872 	ERRNO_FILTER(capped, 4096);
873 	long ret;
874 	pid_t parent = getppid();
875 
876 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
877 	ASSERT_EQ(0, ret);
878 
879 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
880 	ASSERT_EQ(0, ret);
881 
882 	EXPECT_EQ(parent, syscall(__NR_getppid));
883 	EXPECT_EQ(-1, read(0, NULL, 0));
884 	EXPECT_EQ(4095, errno);
885 }
886 
887 /*
888  * Filters are processed in reverse order: last applied is executed first.
889  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
890  * SECCOMP_RET_DATA mask results will follow the most recently applied
891  * matching filter return (and not the lowest or highest value).
892  */
893 TEST(ERRNO_order)
894 {
895 	ERRNO_FILTER(first,  11);
896 	ERRNO_FILTER(second, 13);
897 	ERRNO_FILTER(third,  12);
898 	long ret;
899 	pid_t parent = getppid();
900 
901 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
902 	ASSERT_EQ(0, ret);
903 
904 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
905 	ASSERT_EQ(0, ret);
906 
907 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
908 	ASSERT_EQ(0, ret);
909 
910 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
911 	ASSERT_EQ(0, ret);
912 
913 	EXPECT_EQ(parent, syscall(__NR_getppid));
914 	EXPECT_EQ(-1, read(0, NULL, 0));
915 	EXPECT_EQ(12, errno);
916 }
917 
918 FIXTURE(TRAP) {
919 	struct sock_fprog prog;
920 };
921 
922 FIXTURE_SETUP(TRAP)
923 {
924 	struct sock_filter filter[] = {
925 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
926 			offsetof(struct seccomp_data, nr)),
927 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
928 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
929 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
930 	};
931 
932 	memset(&self->prog, 0, sizeof(self->prog));
933 	self->prog.filter = malloc(sizeof(filter));
934 	ASSERT_NE(NULL, self->prog.filter);
935 	memcpy(self->prog.filter, filter, sizeof(filter));
936 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
937 }
938 
939 FIXTURE_TEARDOWN(TRAP)
940 {
941 	if (self->prog.filter)
942 		free(self->prog.filter);
943 }
944 
945 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
946 {
947 	long ret;
948 
949 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
950 	ASSERT_EQ(0, ret);
951 
952 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
953 	ASSERT_EQ(0, ret);
954 	syscall(__NR_getpid);
955 }
956 
957 /* Ensure that SIGSYS overrides SIG_IGN */
958 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
959 {
960 	long ret;
961 
962 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
963 	ASSERT_EQ(0, ret);
964 
965 	signal(SIGSYS, SIG_IGN);
966 
967 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
968 	ASSERT_EQ(0, ret);
969 	syscall(__NR_getpid);
970 }
971 
972 static siginfo_t TRAP_info;
973 static volatile int TRAP_nr;
974 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
975 {
976 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
977 	TRAP_nr = nr;
978 }
979 
980 TEST_F(TRAP, handler)
981 {
982 	int ret, test;
983 	struct sigaction act;
984 	sigset_t mask;
985 
986 	memset(&act, 0, sizeof(act));
987 	sigemptyset(&mask);
988 	sigaddset(&mask, SIGSYS);
989 
990 	act.sa_sigaction = &TRAP_action;
991 	act.sa_flags = SA_SIGINFO;
992 	ret = sigaction(SIGSYS, &act, NULL);
993 	ASSERT_EQ(0, ret) {
994 		TH_LOG("sigaction failed");
995 	}
996 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
997 	ASSERT_EQ(0, ret) {
998 		TH_LOG("sigprocmask failed");
999 	}
1000 
1001 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1002 	ASSERT_EQ(0, ret);
1003 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1004 	ASSERT_EQ(0, ret);
1005 	TRAP_nr = 0;
1006 	memset(&TRAP_info, 0, sizeof(TRAP_info));
1007 	/* Expect the registers to be rolled back. (nr = error) may vary
1008 	 * based on arch. */
1009 	ret = syscall(__NR_getpid);
1010 	/* Silence gcc warning about volatile. */
1011 	test = TRAP_nr;
1012 	EXPECT_EQ(SIGSYS, test);
1013 	struct local_sigsys {
1014 		void *_call_addr;	/* calling user insn */
1015 		int _syscall;		/* triggering system call number */
1016 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1017 	} *sigsys = (struct local_sigsys *)
1018 #ifdef si_syscall
1019 		&(TRAP_info.si_call_addr);
1020 #else
1021 		&TRAP_info.si_pid;
1022 #endif
1023 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1024 	/* Make sure arch is non-zero. */
1025 	EXPECT_NE(0, sigsys->_arch);
1026 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1027 }
1028 
1029 FIXTURE(precedence) {
1030 	struct sock_fprog allow;
1031 	struct sock_fprog log;
1032 	struct sock_fprog trace;
1033 	struct sock_fprog error;
1034 	struct sock_fprog trap;
1035 	struct sock_fprog kill;
1036 };
1037 
1038 FIXTURE_SETUP(precedence)
1039 {
1040 	struct sock_filter allow_insns[] = {
1041 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1042 	};
1043 	struct sock_filter log_insns[] = {
1044 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1045 			offsetof(struct seccomp_data, nr)),
1046 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1047 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1048 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1049 	};
1050 	struct sock_filter trace_insns[] = {
1051 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1052 			offsetof(struct seccomp_data, nr)),
1053 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1054 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1055 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1056 	};
1057 	struct sock_filter error_insns[] = {
1058 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1059 			offsetof(struct seccomp_data, nr)),
1060 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1061 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1062 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1063 	};
1064 	struct sock_filter trap_insns[] = {
1065 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1066 			offsetof(struct seccomp_data, nr)),
1067 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1068 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1069 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1070 	};
1071 	struct sock_filter kill_insns[] = {
1072 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1073 			offsetof(struct seccomp_data, nr)),
1074 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1075 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1076 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1077 	};
1078 
1079 	memset(self, 0, sizeof(*self));
1080 #define FILTER_ALLOC(_x) \
1081 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1082 	ASSERT_NE(NULL, self->_x.filter); \
1083 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1084 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1085 	FILTER_ALLOC(allow);
1086 	FILTER_ALLOC(log);
1087 	FILTER_ALLOC(trace);
1088 	FILTER_ALLOC(error);
1089 	FILTER_ALLOC(trap);
1090 	FILTER_ALLOC(kill);
1091 }
1092 
1093 FIXTURE_TEARDOWN(precedence)
1094 {
1095 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1096 	FILTER_FREE(allow);
1097 	FILTER_FREE(log);
1098 	FILTER_FREE(trace);
1099 	FILTER_FREE(error);
1100 	FILTER_FREE(trap);
1101 	FILTER_FREE(kill);
1102 }
1103 
1104 TEST_F(precedence, allow_ok)
1105 {
1106 	pid_t parent, res = 0;
1107 	long ret;
1108 
1109 	parent = getppid();
1110 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1111 	ASSERT_EQ(0, ret);
1112 
1113 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1114 	ASSERT_EQ(0, ret);
1115 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1116 	ASSERT_EQ(0, ret);
1117 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1118 	ASSERT_EQ(0, ret);
1119 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1120 	ASSERT_EQ(0, ret);
1121 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1122 	ASSERT_EQ(0, ret);
1123 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1124 	ASSERT_EQ(0, ret);
1125 	/* Should work just fine. */
1126 	res = syscall(__NR_getppid);
1127 	EXPECT_EQ(parent, res);
1128 }
1129 
1130 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1131 {
1132 	pid_t parent, res = 0;
1133 	long ret;
1134 
1135 	parent = getppid();
1136 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1137 	ASSERT_EQ(0, ret);
1138 
1139 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1140 	ASSERT_EQ(0, ret);
1141 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1142 	ASSERT_EQ(0, ret);
1143 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1144 	ASSERT_EQ(0, ret);
1145 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1146 	ASSERT_EQ(0, ret);
1147 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1148 	ASSERT_EQ(0, ret);
1149 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1150 	ASSERT_EQ(0, ret);
1151 	/* Should work just fine. */
1152 	res = syscall(__NR_getppid);
1153 	EXPECT_EQ(parent, res);
1154 	/* getpid() should never return. */
1155 	res = syscall(__NR_getpid);
1156 	EXPECT_EQ(0, res);
1157 }
1158 
1159 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1160 {
1161 	pid_t parent;
1162 	long ret;
1163 
1164 	parent = getppid();
1165 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1166 	ASSERT_EQ(0, ret);
1167 
1168 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1169 	ASSERT_EQ(0, ret);
1170 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1171 	ASSERT_EQ(0, ret);
1172 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1173 	ASSERT_EQ(0, ret);
1174 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1175 	ASSERT_EQ(0, ret);
1176 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1177 	ASSERT_EQ(0, ret);
1178 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1179 	ASSERT_EQ(0, ret);
1180 	/* Should work just fine. */
1181 	EXPECT_EQ(parent, syscall(__NR_getppid));
1182 	/* getpid() should never return. */
1183 	EXPECT_EQ(0, syscall(__NR_getpid));
1184 }
1185 
1186 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1187 {
1188 	pid_t parent;
1189 	long ret;
1190 
1191 	parent = getppid();
1192 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1193 	ASSERT_EQ(0, ret);
1194 
1195 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1196 	ASSERT_EQ(0, ret);
1197 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1198 	ASSERT_EQ(0, ret);
1199 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1200 	ASSERT_EQ(0, ret);
1201 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1202 	ASSERT_EQ(0, ret);
1203 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1204 	ASSERT_EQ(0, ret);
1205 	/* Should work just fine. */
1206 	EXPECT_EQ(parent, syscall(__NR_getppid));
1207 	/* getpid() should never return. */
1208 	EXPECT_EQ(0, syscall(__NR_getpid));
1209 }
1210 
1211 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1212 {
1213 	pid_t parent;
1214 	long ret;
1215 
1216 	parent = getppid();
1217 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1218 	ASSERT_EQ(0, ret);
1219 
1220 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1221 	ASSERT_EQ(0, ret);
1222 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1223 	ASSERT_EQ(0, ret);
1224 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1225 	ASSERT_EQ(0, ret);
1226 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1227 	ASSERT_EQ(0, ret);
1228 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1229 	ASSERT_EQ(0, ret);
1230 	/* Should work just fine. */
1231 	EXPECT_EQ(parent, syscall(__NR_getppid));
1232 	/* getpid() should never return. */
1233 	EXPECT_EQ(0, syscall(__NR_getpid));
1234 }
1235 
1236 TEST_F(precedence, errno_is_third)
1237 {
1238 	pid_t parent;
1239 	long ret;
1240 
1241 	parent = getppid();
1242 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1243 	ASSERT_EQ(0, ret);
1244 
1245 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1246 	ASSERT_EQ(0, ret);
1247 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1248 	ASSERT_EQ(0, ret);
1249 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1250 	ASSERT_EQ(0, ret);
1251 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1252 	ASSERT_EQ(0, ret);
1253 	/* Should work just fine. */
1254 	EXPECT_EQ(parent, syscall(__NR_getppid));
1255 	EXPECT_EQ(0, syscall(__NR_getpid));
1256 }
1257 
1258 TEST_F(precedence, errno_is_third_in_any_order)
1259 {
1260 	pid_t parent;
1261 	long ret;
1262 
1263 	parent = getppid();
1264 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1265 	ASSERT_EQ(0, ret);
1266 
1267 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1268 	ASSERT_EQ(0, ret);
1269 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1270 	ASSERT_EQ(0, ret);
1271 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1272 	ASSERT_EQ(0, ret);
1273 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1274 	ASSERT_EQ(0, ret);
1275 	/* Should work just fine. */
1276 	EXPECT_EQ(parent, syscall(__NR_getppid));
1277 	EXPECT_EQ(0, syscall(__NR_getpid));
1278 }
1279 
1280 TEST_F(precedence, trace_is_fourth)
1281 {
1282 	pid_t parent;
1283 	long ret;
1284 
1285 	parent = getppid();
1286 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1287 	ASSERT_EQ(0, ret);
1288 
1289 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1290 	ASSERT_EQ(0, ret);
1291 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1292 	ASSERT_EQ(0, ret);
1293 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1294 	ASSERT_EQ(0, ret);
1295 	/* Should work just fine. */
1296 	EXPECT_EQ(parent, syscall(__NR_getppid));
1297 	/* No ptracer */
1298 	EXPECT_EQ(-1, syscall(__NR_getpid));
1299 }
1300 
1301 TEST_F(precedence, trace_is_fourth_in_any_order)
1302 {
1303 	pid_t parent;
1304 	long ret;
1305 
1306 	parent = getppid();
1307 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1308 	ASSERT_EQ(0, ret);
1309 
1310 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1311 	ASSERT_EQ(0, ret);
1312 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1313 	ASSERT_EQ(0, ret);
1314 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1315 	ASSERT_EQ(0, ret);
1316 	/* Should work just fine. */
1317 	EXPECT_EQ(parent, syscall(__NR_getppid));
1318 	/* No ptracer */
1319 	EXPECT_EQ(-1, syscall(__NR_getpid));
1320 }
1321 
1322 TEST_F(precedence, log_is_fifth)
1323 {
1324 	pid_t mypid, parent;
1325 	long ret;
1326 
1327 	mypid = getpid();
1328 	parent = getppid();
1329 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1330 	ASSERT_EQ(0, ret);
1331 
1332 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1333 	ASSERT_EQ(0, ret);
1334 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1335 	ASSERT_EQ(0, ret);
1336 	/* Should work just fine. */
1337 	EXPECT_EQ(parent, syscall(__NR_getppid));
1338 	/* Should also work just fine */
1339 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1340 }
1341 
1342 TEST_F(precedence, log_is_fifth_in_any_order)
1343 {
1344 	pid_t mypid, parent;
1345 	long ret;
1346 
1347 	mypid = getpid();
1348 	parent = getppid();
1349 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1350 	ASSERT_EQ(0, ret);
1351 
1352 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1353 	ASSERT_EQ(0, ret);
1354 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1355 	ASSERT_EQ(0, ret);
1356 	/* Should work just fine. */
1357 	EXPECT_EQ(parent, syscall(__NR_getppid));
1358 	/* Should also work just fine */
1359 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1360 }
1361 
1362 #ifndef PTRACE_O_TRACESECCOMP
1363 #define PTRACE_O_TRACESECCOMP	0x00000080
1364 #endif
1365 
1366 /* Catch the Ubuntu 12.04 value error. */
1367 #if PTRACE_EVENT_SECCOMP != 7
1368 #undef PTRACE_EVENT_SECCOMP
1369 #endif
1370 
1371 #ifndef PTRACE_EVENT_SECCOMP
1372 #define PTRACE_EVENT_SECCOMP 7
1373 #endif
1374 
1375 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1376 bool tracer_running;
1377 void tracer_stop(int sig)
1378 {
1379 	tracer_running = false;
1380 }
1381 
1382 typedef void tracer_func_t(struct __test_metadata *_metadata,
1383 			   pid_t tracee, int status, void *args);
1384 
1385 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1386 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1387 {
1388 	int ret = -1;
1389 	struct sigaction action = {
1390 		.sa_handler = tracer_stop,
1391 	};
1392 
1393 	/* Allow external shutdown. */
1394 	tracer_running = true;
1395 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1396 
1397 	errno = 0;
1398 	while (ret == -1 && errno != EINVAL)
1399 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1400 	ASSERT_EQ(0, ret) {
1401 		kill(tracee, SIGKILL);
1402 	}
1403 	/* Wait for attach stop */
1404 	wait(NULL);
1405 
1406 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1407 						      PTRACE_O_TRACESYSGOOD :
1408 						      PTRACE_O_TRACESECCOMP);
1409 	ASSERT_EQ(0, ret) {
1410 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1411 		kill(tracee, SIGKILL);
1412 	}
1413 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1414 		     tracee, NULL, 0);
1415 	ASSERT_EQ(0, ret);
1416 
1417 	/* Unblock the tracee */
1418 	ASSERT_EQ(1, write(fd, "A", 1));
1419 	ASSERT_EQ(0, close(fd));
1420 
1421 	/* Run until we're shut down. Must assert to stop execution. */
1422 	while (tracer_running) {
1423 		int status;
1424 
1425 		if (wait(&status) != tracee)
1426 			continue;
1427 		if (WIFSIGNALED(status) || WIFEXITED(status))
1428 			/* Child is dead. Time to go. */
1429 			return;
1430 
1431 		/* Check if this is a seccomp event. */
1432 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1433 
1434 		tracer_func(_metadata, tracee, status, args);
1435 
1436 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1437 			     tracee, NULL, 0);
1438 		ASSERT_EQ(0, ret);
1439 	}
1440 	/* Directly report the status of our test harness results. */
1441 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1442 }
1443 
1444 /* Common tracer setup/teardown functions. */
1445 void cont_handler(int num)
1446 { }
1447 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1448 			  tracer_func_t func, void *args, bool ptrace_syscall)
1449 {
1450 	char sync;
1451 	int pipefd[2];
1452 	pid_t tracer_pid;
1453 	pid_t tracee = getpid();
1454 
1455 	/* Setup a pipe for clean synchronization. */
1456 	ASSERT_EQ(0, pipe(pipefd));
1457 
1458 	/* Fork a child which we'll promote to tracer */
1459 	tracer_pid = fork();
1460 	ASSERT_LE(0, tracer_pid);
1461 	signal(SIGALRM, cont_handler);
1462 	if (tracer_pid == 0) {
1463 		close(pipefd[0]);
1464 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1465 			     ptrace_syscall);
1466 		syscall(__NR_exit, 0);
1467 	}
1468 	close(pipefd[1]);
1469 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1470 	read(pipefd[0], &sync, 1);
1471 	close(pipefd[0]);
1472 
1473 	return tracer_pid;
1474 }
1475 void teardown_trace_fixture(struct __test_metadata *_metadata,
1476 			    pid_t tracer)
1477 {
1478 	if (tracer) {
1479 		int status;
1480 		/*
1481 		 * Extract the exit code from the other process and
1482 		 * adopt it for ourselves in case its asserts failed.
1483 		 */
1484 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1485 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1486 		if (WEXITSTATUS(status))
1487 			_metadata->passed = 0;
1488 	}
1489 }
1490 
1491 /* "poke" tracer arguments and function. */
1492 struct tracer_args_poke_t {
1493 	unsigned long poke_addr;
1494 };
1495 
1496 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1497 		 void *args)
1498 {
1499 	int ret;
1500 	unsigned long msg;
1501 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1502 
1503 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1504 	EXPECT_EQ(0, ret);
1505 	/* If this fails, don't try to recover. */
1506 	ASSERT_EQ(0x1001, msg) {
1507 		kill(tracee, SIGKILL);
1508 	}
1509 	/*
1510 	 * Poke in the message.
1511 	 * Registers are not touched to try to keep this relatively arch
1512 	 * agnostic.
1513 	 */
1514 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1515 	EXPECT_EQ(0, ret);
1516 }
1517 
1518 FIXTURE(TRACE_poke) {
1519 	struct sock_fprog prog;
1520 	pid_t tracer;
1521 	long poked;
1522 	struct tracer_args_poke_t tracer_args;
1523 };
1524 
1525 FIXTURE_SETUP(TRACE_poke)
1526 {
1527 	struct sock_filter filter[] = {
1528 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1529 			offsetof(struct seccomp_data, nr)),
1530 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1531 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1532 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1533 	};
1534 
1535 	self->poked = 0;
1536 	memset(&self->prog, 0, sizeof(self->prog));
1537 	self->prog.filter = malloc(sizeof(filter));
1538 	ASSERT_NE(NULL, self->prog.filter);
1539 	memcpy(self->prog.filter, filter, sizeof(filter));
1540 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1541 
1542 	/* Set up tracer args. */
1543 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1544 
1545 	/* Launch tracer. */
1546 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1547 					   &self->tracer_args, false);
1548 }
1549 
1550 FIXTURE_TEARDOWN(TRACE_poke)
1551 {
1552 	teardown_trace_fixture(_metadata, self->tracer);
1553 	if (self->prog.filter)
1554 		free(self->prog.filter);
1555 }
1556 
1557 TEST_F(TRACE_poke, read_has_side_effects)
1558 {
1559 	ssize_t ret;
1560 
1561 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1562 	ASSERT_EQ(0, ret);
1563 
1564 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1565 	ASSERT_EQ(0, ret);
1566 
1567 	EXPECT_EQ(0, self->poked);
1568 	ret = read(-1, NULL, 0);
1569 	EXPECT_EQ(-1, ret);
1570 	EXPECT_EQ(0x1001, self->poked);
1571 }
1572 
1573 TEST_F(TRACE_poke, getpid_runs_normally)
1574 {
1575 	long ret;
1576 
1577 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1578 	ASSERT_EQ(0, ret);
1579 
1580 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1581 	ASSERT_EQ(0, ret);
1582 
1583 	EXPECT_EQ(0, self->poked);
1584 	EXPECT_NE(0, syscall(__NR_getpid));
1585 	EXPECT_EQ(0, self->poked);
1586 }
1587 
1588 #if defined(__x86_64__)
1589 # define ARCH_REGS	struct user_regs_struct
1590 # define SYSCALL_NUM	orig_rax
1591 # define SYSCALL_RET	rax
1592 #elif defined(__i386__)
1593 # define ARCH_REGS	struct user_regs_struct
1594 # define SYSCALL_NUM	orig_eax
1595 # define SYSCALL_RET	eax
1596 #elif defined(__arm__)
1597 # define ARCH_REGS	struct pt_regs
1598 # define SYSCALL_NUM	ARM_r7
1599 # define SYSCALL_RET	ARM_r0
1600 #elif defined(__aarch64__)
1601 # define ARCH_REGS	struct user_pt_regs
1602 # define SYSCALL_NUM	regs[8]
1603 # define SYSCALL_RET	regs[0]
1604 #elif defined(__riscv) && __riscv_xlen == 64
1605 # define ARCH_REGS	struct user_regs_struct
1606 # define SYSCALL_NUM	a7
1607 # define SYSCALL_RET	a0
1608 #elif defined(__hppa__)
1609 # define ARCH_REGS	struct user_regs_struct
1610 # define SYSCALL_NUM	gr[20]
1611 # define SYSCALL_RET	gr[28]
1612 #elif defined(__powerpc__)
1613 # define ARCH_REGS	struct pt_regs
1614 # define SYSCALL_NUM	gpr[0]
1615 # define SYSCALL_RET	gpr[3]
1616 #elif defined(__s390__)
1617 # define ARCH_REGS     s390_regs
1618 # define SYSCALL_NUM   gprs[2]
1619 # define SYSCALL_RET   gprs[2]
1620 # define SYSCALL_NUM_RET_SHARE_REG
1621 #elif defined(__mips__)
1622 # define ARCH_REGS	struct pt_regs
1623 # define SYSCALL_NUM	regs[2]
1624 # define SYSCALL_SYSCALL_NUM regs[4]
1625 # define SYSCALL_RET	regs[2]
1626 # define SYSCALL_NUM_RET_SHARE_REG
1627 #elif defined(__xtensa__)
1628 # define ARCH_REGS	struct user_pt_regs
1629 # define SYSCALL_NUM	syscall
1630 /*
1631  * On xtensa syscall return value is in the register
1632  * a2 of the current window which is not fixed.
1633  */
1634 #define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
1635 #else
1636 # error "Do not know how to find your architecture's registers and syscalls"
1637 #endif
1638 
1639 /* When the syscall return can't be changed, stub out the tests for it. */
1640 #ifdef SYSCALL_NUM_RET_SHARE_REG
1641 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1642 #else
1643 # define EXPECT_SYSCALL_RETURN(val, action)		\
1644 	do {						\
1645 		errno = 0;				\
1646 		if (val < 0) {				\
1647 			EXPECT_EQ(-1, action);		\
1648 			EXPECT_EQ(-(val), errno);	\
1649 		} else {				\
1650 			EXPECT_EQ(val, action);		\
1651 		}					\
1652 	} while (0)
1653 #endif
1654 
1655 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1656  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1657  */
1658 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1659 #define HAVE_GETREGS
1660 #endif
1661 
1662 /* Architecture-specific syscall fetching routine. */
1663 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1664 {
1665 	ARCH_REGS regs;
1666 #ifdef HAVE_GETREGS
1667 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1668 		TH_LOG("PTRACE_GETREGS failed");
1669 		return -1;
1670 	}
1671 #else
1672 	struct iovec iov;
1673 
1674 	iov.iov_base = &regs;
1675 	iov.iov_len = sizeof(regs);
1676 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1677 		TH_LOG("PTRACE_GETREGSET failed");
1678 		return -1;
1679 	}
1680 #endif
1681 
1682 #if defined(__mips__)
1683 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1684 		return regs.SYSCALL_SYSCALL_NUM;
1685 #endif
1686 	return regs.SYSCALL_NUM;
1687 }
1688 
1689 /* Architecture-specific syscall changing routine. */
1690 void change_syscall(struct __test_metadata *_metadata,
1691 		    pid_t tracee, int syscall, int result)
1692 {
1693 	int ret;
1694 	ARCH_REGS regs;
1695 #ifdef HAVE_GETREGS
1696 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1697 #else
1698 	struct iovec iov;
1699 	iov.iov_base = &regs;
1700 	iov.iov_len = sizeof(regs);
1701 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1702 #endif
1703 	EXPECT_EQ(0, ret) {}
1704 
1705 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1706 	defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
1707 	defined(__xtensa__)
1708 	{
1709 		regs.SYSCALL_NUM = syscall;
1710 	}
1711 #elif defined(__mips__)
1712 	{
1713 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1714 			regs.SYSCALL_SYSCALL_NUM = syscall;
1715 		else
1716 			regs.SYSCALL_NUM = syscall;
1717 	}
1718 
1719 #elif defined(__arm__)
1720 # ifndef PTRACE_SET_SYSCALL
1721 #  define PTRACE_SET_SYSCALL   23
1722 # endif
1723 	{
1724 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1725 		EXPECT_EQ(0, ret);
1726 	}
1727 
1728 #elif defined(__aarch64__)
1729 # ifndef NT_ARM_SYSTEM_CALL
1730 #  define NT_ARM_SYSTEM_CALL 0x404
1731 # endif
1732 	{
1733 		iov.iov_base = &syscall;
1734 		iov.iov_len = sizeof(syscall);
1735 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1736 			     &iov);
1737 		EXPECT_EQ(0, ret);
1738 	}
1739 
1740 #else
1741 	ASSERT_EQ(1, 0) {
1742 		TH_LOG("How is the syscall changed on this architecture?");
1743 	}
1744 #endif
1745 
1746 	/* If syscall is skipped, change return value. */
1747 	if (syscall == -1)
1748 #ifdef SYSCALL_NUM_RET_SHARE_REG
1749 		TH_LOG("Can't modify syscall return on this architecture");
1750 
1751 #elif defined(__xtensa__)
1752 		regs.SYSCALL_RET(regs) = result;
1753 #else
1754 		regs.SYSCALL_RET = result;
1755 #endif
1756 
1757 #ifdef HAVE_GETREGS
1758 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1759 #else
1760 	iov.iov_base = &regs;
1761 	iov.iov_len = sizeof(regs);
1762 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1763 #endif
1764 	EXPECT_EQ(0, ret);
1765 }
1766 
1767 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1768 		    int status, void *args)
1769 {
1770 	int ret;
1771 	unsigned long msg;
1772 
1773 	/* Make sure we got the right message. */
1774 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1775 	EXPECT_EQ(0, ret);
1776 
1777 	/* Validate and take action on expected syscalls. */
1778 	switch (msg) {
1779 	case 0x1002:
1780 		/* change getpid to getppid. */
1781 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1782 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1783 		break;
1784 	case 0x1003:
1785 		/* skip gettid with valid return code. */
1786 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1787 		change_syscall(_metadata, tracee, -1, 45000);
1788 		break;
1789 	case 0x1004:
1790 		/* skip openat with error. */
1791 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1792 		change_syscall(_metadata, tracee, -1, -ESRCH);
1793 		break;
1794 	case 0x1005:
1795 		/* do nothing (allow getppid) */
1796 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1797 		break;
1798 	default:
1799 		EXPECT_EQ(0, msg) {
1800 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1801 			kill(tracee, SIGKILL);
1802 		}
1803 	}
1804 
1805 }
1806 
1807 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1808 		   int status, void *args)
1809 {
1810 	int ret, nr;
1811 	unsigned long msg;
1812 	static bool entry;
1813 
1814 	/*
1815 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
1816 	 * is by counting.
1817 	 */
1818 	entry = !entry;
1819 
1820 	/* Make sure we got an appropriate message. */
1821 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1822 	EXPECT_EQ(0, ret);
1823 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1824 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1825 
1826 	if (!entry)
1827 		return;
1828 
1829 	nr = get_syscall(_metadata, tracee);
1830 
1831 	if (nr == __NR_getpid)
1832 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1833 	if (nr == __NR_gettid)
1834 		change_syscall(_metadata, tracee, -1, 45000);
1835 	if (nr == __NR_openat)
1836 		change_syscall(_metadata, tracee, -1, -ESRCH);
1837 }
1838 
1839 FIXTURE(TRACE_syscall) {
1840 	struct sock_fprog prog;
1841 	pid_t tracer, mytid, mypid, parent;
1842 };
1843 
1844 FIXTURE_SETUP(TRACE_syscall)
1845 {
1846 	struct sock_filter filter[] = {
1847 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1848 			offsetof(struct seccomp_data, nr)),
1849 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1850 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1851 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1852 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1853 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1854 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1855 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1856 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1857 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1858 	};
1859 
1860 	memset(&self->prog, 0, sizeof(self->prog));
1861 	self->prog.filter = malloc(sizeof(filter));
1862 	ASSERT_NE(NULL, self->prog.filter);
1863 	memcpy(self->prog.filter, filter, sizeof(filter));
1864 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1865 
1866 	/* Prepare some testable syscall results. */
1867 	self->mytid = syscall(__NR_gettid);
1868 	ASSERT_GT(self->mytid, 0);
1869 	ASSERT_NE(self->mytid, 1) {
1870 		TH_LOG("Running this test as init is not supported. :)");
1871 	}
1872 
1873 	self->mypid = getpid();
1874 	ASSERT_GT(self->mypid, 0);
1875 	ASSERT_EQ(self->mytid, self->mypid);
1876 
1877 	self->parent = getppid();
1878 	ASSERT_GT(self->parent, 0);
1879 	ASSERT_NE(self->parent, self->mypid);
1880 
1881 	/* Launch tracer. */
1882 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1883 					   false);
1884 }
1885 
1886 FIXTURE_TEARDOWN(TRACE_syscall)
1887 {
1888 	teardown_trace_fixture(_metadata, self->tracer);
1889 	if (self->prog.filter)
1890 		free(self->prog.filter);
1891 }
1892 
1893 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1894 {
1895 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1896 	teardown_trace_fixture(_metadata, self->tracer);
1897 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1898 					   true);
1899 
1900 	/* Tracer will redirect getpid to getppid. */
1901 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1902 }
1903 
1904 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1905 {
1906 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1907 	teardown_trace_fixture(_metadata, self->tracer);
1908 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1909 					   true);
1910 
1911 	/* Tracer should skip the open syscall, resulting in ESRCH. */
1912 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1913 }
1914 
1915 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1916 {
1917 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1918 	teardown_trace_fixture(_metadata, self->tracer);
1919 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1920 					   true);
1921 
1922 	/* Tracer should skip the gettid syscall, resulting fake pid. */
1923 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1924 }
1925 
1926 TEST_F(TRACE_syscall, syscall_allowed)
1927 {
1928 	long ret;
1929 
1930 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1931 	ASSERT_EQ(0, ret);
1932 
1933 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1934 	ASSERT_EQ(0, ret);
1935 
1936 	/* getppid works as expected (no changes). */
1937 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1938 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1939 }
1940 
1941 TEST_F(TRACE_syscall, syscall_redirected)
1942 {
1943 	long ret;
1944 
1945 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1946 	ASSERT_EQ(0, ret);
1947 
1948 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1949 	ASSERT_EQ(0, ret);
1950 
1951 	/* getpid has been redirected to getppid as expected. */
1952 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1953 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1954 }
1955 
1956 TEST_F(TRACE_syscall, syscall_errno)
1957 {
1958 	long ret;
1959 
1960 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1961 	ASSERT_EQ(0, ret);
1962 
1963 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1964 	ASSERT_EQ(0, ret);
1965 
1966 	/* openat has been skipped and an errno return. */
1967 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1968 }
1969 
1970 TEST_F(TRACE_syscall, syscall_faked)
1971 {
1972 	long ret;
1973 
1974 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1975 	ASSERT_EQ(0, ret);
1976 
1977 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1978 	ASSERT_EQ(0, ret);
1979 
1980 	/* gettid has been skipped and an altered return value stored. */
1981 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1982 }
1983 
1984 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1985 {
1986 	struct sock_filter filter[] = {
1987 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1988 			offsetof(struct seccomp_data, nr)),
1989 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1990 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1991 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1992 	};
1993 	struct sock_fprog prog = {
1994 		.len = (unsigned short)ARRAY_SIZE(filter),
1995 		.filter = filter,
1996 	};
1997 	long ret;
1998 
1999 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2000 	ASSERT_EQ(0, ret);
2001 
2002 	/* Install fixture filter. */
2003 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2004 	ASSERT_EQ(0, ret);
2005 
2006 	/* Install "errno on getppid" filter. */
2007 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2008 	ASSERT_EQ(0, ret);
2009 
2010 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2011 	errno = 0;
2012 	EXPECT_EQ(-1, syscall(__NR_getpid));
2013 	EXPECT_EQ(EPERM, errno);
2014 }
2015 
2016 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
2017 {
2018 	struct sock_filter filter[] = {
2019 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2020 			offsetof(struct seccomp_data, nr)),
2021 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2022 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2023 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2024 	};
2025 	struct sock_fprog prog = {
2026 		.len = (unsigned short)ARRAY_SIZE(filter),
2027 		.filter = filter,
2028 	};
2029 	long ret;
2030 
2031 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2032 	ASSERT_EQ(0, ret);
2033 
2034 	/* Install fixture filter. */
2035 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2036 	ASSERT_EQ(0, ret);
2037 
2038 	/* Install "death on getppid" filter. */
2039 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2040 	ASSERT_EQ(0, ret);
2041 
2042 	/* Tracer will redirect getpid to getppid, and we should die. */
2043 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2044 }
2045 
2046 TEST_F(TRACE_syscall, skip_after_ptrace)
2047 {
2048 	struct sock_filter filter[] = {
2049 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2050 			offsetof(struct seccomp_data, nr)),
2051 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2052 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2053 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2054 	};
2055 	struct sock_fprog prog = {
2056 		.len = (unsigned short)ARRAY_SIZE(filter),
2057 		.filter = filter,
2058 	};
2059 	long ret;
2060 
2061 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2062 	teardown_trace_fixture(_metadata, self->tracer);
2063 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2064 					   true);
2065 
2066 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2067 	ASSERT_EQ(0, ret);
2068 
2069 	/* Install "errno on getppid" filter. */
2070 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2071 	ASSERT_EQ(0, ret);
2072 
2073 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2074 	EXPECT_EQ(-1, syscall(__NR_getpid));
2075 	EXPECT_EQ(EPERM, errno);
2076 }
2077 
2078 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2079 {
2080 	struct sock_filter filter[] = {
2081 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2082 			offsetof(struct seccomp_data, nr)),
2083 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2084 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2085 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2086 	};
2087 	struct sock_fprog prog = {
2088 		.len = (unsigned short)ARRAY_SIZE(filter),
2089 		.filter = filter,
2090 	};
2091 	long ret;
2092 
2093 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2094 	teardown_trace_fixture(_metadata, self->tracer);
2095 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2096 					   true);
2097 
2098 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2099 	ASSERT_EQ(0, ret);
2100 
2101 	/* Install "death on getppid" filter. */
2102 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2103 	ASSERT_EQ(0, ret);
2104 
2105 	/* Tracer will redirect getpid to getppid, and we should die. */
2106 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2107 }
2108 
2109 TEST(seccomp_syscall)
2110 {
2111 	struct sock_filter filter[] = {
2112 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2113 	};
2114 	struct sock_fprog prog = {
2115 		.len = (unsigned short)ARRAY_SIZE(filter),
2116 		.filter = filter,
2117 	};
2118 	long ret;
2119 
2120 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2121 	ASSERT_EQ(0, ret) {
2122 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2123 	}
2124 
2125 	/* Reject insane operation. */
2126 	ret = seccomp(-1, 0, &prog);
2127 	ASSERT_NE(ENOSYS, errno) {
2128 		TH_LOG("Kernel does not support seccomp syscall!");
2129 	}
2130 	EXPECT_EQ(EINVAL, errno) {
2131 		TH_LOG("Did not reject crazy op value!");
2132 	}
2133 
2134 	/* Reject strict with flags or pointer. */
2135 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2136 	EXPECT_EQ(EINVAL, errno) {
2137 		TH_LOG("Did not reject mode strict with flags!");
2138 	}
2139 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2140 	EXPECT_EQ(EINVAL, errno) {
2141 		TH_LOG("Did not reject mode strict with uargs!");
2142 	}
2143 
2144 	/* Reject insane args for filter. */
2145 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2146 	EXPECT_EQ(EINVAL, errno) {
2147 		TH_LOG("Did not reject crazy filter flags!");
2148 	}
2149 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2150 	EXPECT_EQ(EFAULT, errno) {
2151 		TH_LOG("Did not reject NULL filter!");
2152 	}
2153 
2154 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2155 	EXPECT_EQ(0, errno) {
2156 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2157 			strerror(errno));
2158 	}
2159 }
2160 
2161 TEST(seccomp_syscall_mode_lock)
2162 {
2163 	struct sock_filter filter[] = {
2164 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2165 	};
2166 	struct sock_fprog prog = {
2167 		.len = (unsigned short)ARRAY_SIZE(filter),
2168 		.filter = filter,
2169 	};
2170 	long ret;
2171 
2172 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2173 	ASSERT_EQ(0, ret) {
2174 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2175 	}
2176 
2177 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2178 	ASSERT_NE(ENOSYS, errno) {
2179 		TH_LOG("Kernel does not support seccomp syscall!");
2180 	}
2181 	EXPECT_EQ(0, ret) {
2182 		TH_LOG("Could not install filter!");
2183 	}
2184 
2185 	/* Make sure neither entry point will switch to strict. */
2186 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2187 	EXPECT_EQ(EINVAL, errno) {
2188 		TH_LOG("Switched to mode strict!");
2189 	}
2190 
2191 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2192 	EXPECT_EQ(EINVAL, errno) {
2193 		TH_LOG("Switched to mode strict!");
2194 	}
2195 }
2196 
2197 /*
2198  * Test detection of known and unknown filter flags. Userspace needs to be able
2199  * to check if a filter flag is supported by the current kernel and a good way
2200  * of doing that is by attempting to enter filter mode, with the flag bit in
2201  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2202  * that the flag is valid and EINVAL indicates that the flag is invalid.
2203  */
2204 TEST(detect_seccomp_filter_flags)
2205 {
2206 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2207 				 SECCOMP_FILTER_FLAG_LOG,
2208 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2209 				 SECCOMP_FILTER_FLAG_NEW_LISTENER,
2210 				 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2211 	unsigned int exclusive[] = {
2212 				SECCOMP_FILTER_FLAG_TSYNC,
2213 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2214 	unsigned int flag, all_flags, exclusive_mask;
2215 	int i;
2216 	long ret;
2217 
2218 	/* Test detection of individual known-good filter flags */
2219 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2220 		int bits = 0;
2221 
2222 		flag = flags[i];
2223 		/* Make sure the flag is a single bit! */
2224 		while (flag) {
2225 			if (flag & 0x1)
2226 				bits ++;
2227 			flag >>= 1;
2228 		}
2229 		ASSERT_EQ(1, bits);
2230 		flag = flags[i];
2231 
2232 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2233 		ASSERT_NE(ENOSYS, errno) {
2234 			TH_LOG("Kernel does not support seccomp syscall!");
2235 		}
2236 		EXPECT_EQ(-1, ret);
2237 		EXPECT_EQ(EFAULT, errno) {
2238 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2239 			       flag);
2240 		}
2241 
2242 		all_flags |= flag;
2243 	}
2244 
2245 	/*
2246 	 * Test detection of all known-good filter flags combined. But
2247 	 * for the exclusive flags we need to mask them out and try them
2248 	 * individually for the "all flags" testing.
2249 	 */
2250 	exclusive_mask = 0;
2251 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2252 		exclusive_mask |= exclusive[i];
2253 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2254 		flag = all_flags & ~exclusive_mask;
2255 		flag |= exclusive[i];
2256 
2257 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2258 		EXPECT_EQ(-1, ret);
2259 		EXPECT_EQ(EFAULT, errno) {
2260 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2261 			       flag);
2262 		}
2263 	}
2264 
2265 	/* Test detection of an unknown filter flags, without exclusives. */
2266 	flag = -1;
2267 	flag &= ~exclusive_mask;
2268 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2269 	EXPECT_EQ(-1, ret);
2270 	EXPECT_EQ(EINVAL, errno) {
2271 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2272 		       flag);
2273 	}
2274 
2275 	/*
2276 	 * Test detection of an unknown filter flag that may simply need to be
2277 	 * added to this test
2278 	 */
2279 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2280 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2281 	EXPECT_EQ(-1, ret);
2282 	EXPECT_EQ(EINVAL, errno) {
2283 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2284 		       flag);
2285 	}
2286 }
2287 
2288 TEST(TSYNC_first)
2289 {
2290 	struct sock_filter filter[] = {
2291 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2292 	};
2293 	struct sock_fprog prog = {
2294 		.len = (unsigned short)ARRAY_SIZE(filter),
2295 		.filter = filter,
2296 	};
2297 	long ret;
2298 
2299 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2300 	ASSERT_EQ(0, ret) {
2301 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2302 	}
2303 
2304 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2305 		      &prog);
2306 	ASSERT_NE(ENOSYS, errno) {
2307 		TH_LOG("Kernel does not support seccomp syscall!");
2308 	}
2309 	EXPECT_EQ(0, ret) {
2310 		TH_LOG("Could not install initial filter with TSYNC!");
2311 	}
2312 }
2313 
2314 #define TSYNC_SIBLINGS 2
2315 struct tsync_sibling {
2316 	pthread_t tid;
2317 	pid_t system_tid;
2318 	sem_t *started;
2319 	pthread_cond_t *cond;
2320 	pthread_mutex_t *mutex;
2321 	int diverge;
2322 	int num_waits;
2323 	struct sock_fprog *prog;
2324 	struct __test_metadata *metadata;
2325 };
2326 
2327 /*
2328  * To avoid joining joined threads (which is not allowed by Bionic),
2329  * make sure we both successfully join and clear the tid to skip a
2330  * later join attempt during fixture teardown. Any remaining threads
2331  * will be directly killed during teardown.
2332  */
2333 #define PTHREAD_JOIN(tid, status)					\
2334 	do {								\
2335 		int _rc = pthread_join(tid, status);			\
2336 		if (_rc) {						\
2337 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2338 				(unsigned int)tid, _rc);		\
2339 		} else {						\
2340 			tid = 0;					\
2341 		}							\
2342 	} while (0)
2343 
2344 FIXTURE(TSYNC) {
2345 	struct sock_fprog root_prog, apply_prog;
2346 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2347 	sem_t started;
2348 	pthread_cond_t cond;
2349 	pthread_mutex_t mutex;
2350 	int sibling_count;
2351 };
2352 
2353 FIXTURE_SETUP(TSYNC)
2354 {
2355 	struct sock_filter root_filter[] = {
2356 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2357 	};
2358 	struct sock_filter apply_filter[] = {
2359 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2360 			offsetof(struct seccomp_data, nr)),
2361 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2362 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2363 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2364 	};
2365 
2366 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2367 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2368 	memset(&self->sibling, 0, sizeof(self->sibling));
2369 	self->root_prog.filter = malloc(sizeof(root_filter));
2370 	ASSERT_NE(NULL, self->root_prog.filter);
2371 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2372 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2373 
2374 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2375 	ASSERT_NE(NULL, self->apply_prog.filter);
2376 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2377 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2378 
2379 	self->sibling_count = 0;
2380 	pthread_mutex_init(&self->mutex, NULL);
2381 	pthread_cond_init(&self->cond, NULL);
2382 	sem_init(&self->started, 0, 0);
2383 	self->sibling[0].tid = 0;
2384 	self->sibling[0].cond = &self->cond;
2385 	self->sibling[0].started = &self->started;
2386 	self->sibling[0].mutex = &self->mutex;
2387 	self->sibling[0].diverge = 0;
2388 	self->sibling[0].num_waits = 1;
2389 	self->sibling[0].prog = &self->root_prog;
2390 	self->sibling[0].metadata = _metadata;
2391 	self->sibling[1].tid = 0;
2392 	self->sibling[1].cond = &self->cond;
2393 	self->sibling[1].started = &self->started;
2394 	self->sibling[1].mutex = &self->mutex;
2395 	self->sibling[1].diverge = 0;
2396 	self->sibling[1].prog = &self->root_prog;
2397 	self->sibling[1].num_waits = 1;
2398 	self->sibling[1].metadata = _metadata;
2399 }
2400 
2401 FIXTURE_TEARDOWN(TSYNC)
2402 {
2403 	int sib = 0;
2404 
2405 	if (self->root_prog.filter)
2406 		free(self->root_prog.filter);
2407 	if (self->apply_prog.filter)
2408 		free(self->apply_prog.filter);
2409 
2410 	for ( ; sib < self->sibling_count; ++sib) {
2411 		struct tsync_sibling *s = &self->sibling[sib];
2412 
2413 		if (!s->tid)
2414 			continue;
2415 		/*
2416 		 * If a thread is still running, it may be stuck, so hit
2417 		 * it over the head really hard.
2418 		 */
2419 		pthread_kill(s->tid, 9);
2420 	}
2421 	pthread_mutex_destroy(&self->mutex);
2422 	pthread_cond_destroy(&self->cond);
2423 	sem_destroy(&self->started);
2424 }
2425 
2426 void *tsync_sibling(void *data)
2427 {
2428 	long ret = 0;
2429 	struct tsync_sibling *me = data;
2430 
2431 	me->system_tid = syscall(__NR_gettid);
2432 
2433 	pthread_mutex_lock(me->mutex);
2434 	if (me->diverge) {
2435 		/* Just re-apply the root prog to fork the tree */
2436 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2437 				me->prog, 0, 0);
2438 	}
2439 	sem_post(me->started);
2440 	/* Return outside of started so parent notices failures. */
2441 	if (ret) {
2442 		pthread_mutex_unlock(me->mutex);
2443 		return (void *)SIBLING_EXIT_FAILURE;
2444 	}
2445 	do {
2446 		pthread_cond_wait(me->cond, me->mutex);
2447 		me->num_waits = me->num_waits - 1;
2448 	} while (me->num_waits);
2449 	pthread_mutex_unlock(me->mutex);
2450 
2451 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2452 	if (!ret)
2453 		return (void *)SIBLING_EXIT_NEWPRIVS;
2454 	read(0, NULL, 0);
2455 	return (void *)SIBLING_EXIT_UNKILLED;
2456 }
2457 
2458 void tsync_start_sibling(struct tsync_sibling *sibling)
2459 {
2460 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2461 }
2462 
2463 TEST_F(TSYNC, siblings_fail_prctl)
2464 {
2465 	long ret;
2466 	void *status;
2467 	struct sock_filter filter[] = {
2468 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2469 			offsetof(struct seccomp_data, nr)),
2470 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2471 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2472 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2473 	};
2474 	struct sock_fprog prog = {
2475 		.len = (unsigned short)ARRAY_SIZE(filter),
2476 		.filter = filter,
2477 	};
2478 
2479 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2480 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2481 	}
2482 
2483 	/* Check prctl failure detection by requesting sib 0 diverge. */
2484 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2485 	ASSERT_NE(ENOSYS, errno) {
2486 		TH_LOG("Kernel does not support seccomp syscall!");
2487 	}
2488 	ASSERT_EQ(0, ret) {
2489 		TH_LOG("setting filter failed");
2490 	}
2491 
2492 	self->sibling[0].diverge = 1;
2493 	tsync_start_sibling(&self->sibling[0]);
2494 	tsync_start_sibling(&self->sibling[1]);
2495 
2496 	while (self->sibling_count < TSYNC_SIBLINGS) {
2497 		sem_wait(&self->started);
2498 		self->sibling_count++;
2499 	}
2500 
2501 	/* Signal the threads to clean up*/
2502 	pthread_mutex_lock(&self->mutex);
2503 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2504 		TH_LOG("cond broadcast non-zero");
2505 	}
2506 	pthread_mutex_unlock(&self->mutex);
2507 
2508 	/* Ensure diverging sibling failed to call prctl. */
2509 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2510 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2511 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2512 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2513 }
2514 
2515 TEST_F(TSYNC, two_siblings_with_ancestor)
2516 {
2517 	long ret;
2518 	void *status;
2519 
2520 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2521 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2522 	}
2523 
2524 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2525 	ASSERT_NE(ENOSYS, errno) {
2526 		TH_LOG("Kernel does not support seccomp syscall!");
2527 	}
2528 	ASSERT_EQ(0, ret) {
2529 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2530 	}
2531 	tsync_start_sibling(&self->sibling[0]);
2532 	tsync_start_sibling(&self->sibling[1]);
2533 
2534 	while (self->sibling_count < TSYNC_SIBLINGS) {
2535 		sem_wait(&self->started);
2536 		self->sibling_count++;
2537 	}
2538 
2539 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2540 		      &self->apply_prog);
2541 	ASSERT_EQ(0, ret) {
2542 		TH_LOG("Could install filter on all threads!");
2543 	}
2544 	/* Tell the siblings to test the policy */
2545 	pthread_mutex_lock(&self->mutex);
2546 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2547 		TH_LOG("cond broadcast non-zero");
2548 	}
2549 	pthread_mutex_unlock(&self->mutex);
2550 	/* Ensure they are both killed and don't exit cleanly. */
2551 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2552 	EXPECT_EQ(0x0, (long)status);
2553 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2554 	EXPECT_EQ(0x0, (long)status);
2555 }
2556 
2557 TEST_F(TSYNC, two_sibling_want_nnp)
2558 {
2559 	void *status;
2560 
2561 	/* start siblings before any prctl() operations */
2562 	tsync_start_sibling(&self->sibling[0]);
2563 	tsync_start_sibling(&self->sibling[1]);
2564 	while (self->sibling_count < TSYNC_SIBLINGS) {
2565 		sem_wait(&self->started);
2566 		self->sibling_count++;
2567 	}
2568 
2569 	/* Tell the siblings to test no policy */
2570 	pthread_mutex_lock(&self->mutex);
2571 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2572 		TH_LOG("cond broadcast non-zero");
2573 	}
2574 	pthread_mutex_unlock(&self->mutex);
2575 
2576 	/* Ensure they are both upset about lacking nnp. */
2577 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2578 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2579 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2580 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2581 }
2582 
2583 TEST_F(TSYNC, two_siblings_with_no_filter)
2584 {
2585 	long ret;
2586 	void *status;
2587 
2588 	/* start siblings before any prctl() operations */
2589 	tsync_start_sibling(&self->sibling[0]);
2590 	tsync_start_sibling(&self->sibling[1]);
2591 	while (self->sibling_count < TSYNC_SIBLINGS) {
2592 		sem_wait(&self->started);
2593 		self->sibling_count++;
2594 	}
2595 
2596 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2597 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2598 	}
2599 
2600 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2601 		      &self->apply_prog);
2602 	ASSERT_NE(ENOSYS, errno) {
2603 		TH_LOG("Kernel does not support seccomp syscall!");
2604 	}
2605 	ASSERT_EQ(0, ret) {
2606 		TH_LOG("Could install filter on all threads!");
2607 	}
2608 
2609 	/* Tell the siblings to test the policy */
2610 	pthread_mutex_lock(&self->mutex);
2611 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2612 		TH_LOG("cond broadcast non-zero");
2613 	}
2614 	pthread_mutex_unlock(&self->mutex);
2615 
2616 	/* Ensure they are both killed and don't exit cleanly. */
2617 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2618 	EXPECT_EQ(0x0, (long)status);
2619 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2620 	EXPECT_EQ(0x0, (long)status);
2621 }
2622 
2623 TEST_F(TSYNC, two_siblings_with_one_divergence)
2624 {
2625 	long ret;
2626 	void *status;
2627 
2628 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2629 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2630 	}
2631 
2632 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2633 	ASSERT_NE(ENOSYS, errno) {
2634 		TH_LOG("Kernel does not support seccomp syscall!");
2635 	}
2636 	ASSERT_EQ(0, ret) {
2637 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2638 	}
2639 	self->sibling[0].diverge = 1;
2640 	tsync_start_sibling(&self->sibling[0]);
2641 	tsync_start_sibling(&self->sibling[1]);
2642 
2643 	while (self->sibling_count < TSYNC_SIBLINGS) {
2644 		sem_wait(&self->started);
2645 		self->sibling_count++;
2646 	}
2647 
2648 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2649 		      &self->apply_prog);
2650 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2651 		TH_LOG("Did not fail on diverged sibling.");
2652 	}
2653 
2654 	/* Wake the threads */
2655 	pthread_mutex_lock(&self->mutex);
2656 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2657 		TH_LOG("cond broadcast non-zero");
2658 	}
2659 	pthread_mutex_unlock(&self->mutex);
2660 
2661 	/* Ensure they are both unkilled. */
2662 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2663 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2664 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2665 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2666 }
2667 
2668 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2669 {
2670 	long ret, flags;
2671 	void *status;
2672 
2673 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2674 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2675 	}
2676 
2677 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2678 	ASSERT_NE(ENOSYS, errno) {
2679 		TH_LOG("Kernel does not support seccomp syscall!");
2680 	}
2681 	ASSERT_EQ(0, ret) {
2682 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2683 	}
2684 	self->sibling[0].diverge = 1;
2685 	tsync_start_sibling(&self->sibling[0]);
2686 	tsync_start_sibling(&self->sibling[1]);
2687 
2688 	while (self->sibling_count < TSYNC_SIBLINGS) {
2689 		sem_wait(&self->started);
2690 		self->sibling_count++;
2691 	}
2692 
2693 	flags = SECCOMP_FILTER_FLAG_TSYNC | \
2694 		SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2695 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2696 	ASSERT_EQ(ESRCH, errno) {
2697 		TH_LOG("Did not return ESRCH for diverged sibling.");
2698 	}
2699 	ASSERT_EQ(-1, ret) {
2700 		TH_LOG("Did not fail on diverged sibling.");
2701 	}
2702 
2703 	/* Wake the threads */
2704 	pthread_mutex_lock(&self->mutex);
2705 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2706 		TH_LOG("cond broadcast non-zero");
2707 	}
2708 	pthread_mutex_unlock(&self->mutex);
2709 
2710 	/* Ensure they are both unkilled. */
2711 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2712 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2713 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2714 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2715 }
2716 
2717 TEST_F(TSYNC, two_siblings_not_under_filter)
2718 {
2719 	long ret, sib;
2720 	void *status;
2721 	struct timespec delay = { .tv_nsec = 100000000 };
2722 
2723 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2724 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2725 	}
2726 
2727 	/*
2728 	 * Sibling 0 will have its own seccomp policy
2729 	 * and Sibling 1 will not be under seccomp at
2730 	 * all. Sibling 1 will enter seccomp and 0
2731 	 * will cause failure.
2732 	 */
2733 	self->sibling[0].diverge = 1;
2734 	tsync_start_sibling(&self->sibling[0]);
2735 	tsync_start_sibling(&self->sibling[1]);
2736 
2737 	while (self->sibling_count < TSYNC_SIBLINGS) {
2738 		sem_wait(&self->started);
2739 		self->sibling_count++;
2740 	}
2741 
2742 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2743 	ASSERT_NE(ENOSYS, errno) {
2744 		TH_LOG("Kernel does not support seccomp syscall!");
2745 	}
2746 	ASSERT_EQ(0, ret) {
2747 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2748 	}
2749 
2750 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2751 		      &self->apply_prog);
2752 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2753 		TH_LOG("Did not fail on diverged sibling.");
2754 	}
2755 	sib = 1;
2756 	if (ret == self->sibling[0].system_tid)
2757 		sib = 0;
2758 
2759 	pthread_mutex_lock(&self->mutex);
2760 
2761 	/* Increment the other siblings num_waits so we can clean up
2762 	 * the one we just saw.
2763 	 */
2764 	self->sibling[!sib].num_waits += 1;
2765 
2766 	/* Signal the thread to clean up*/
2767 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2768 		TH_LOG("cond broadcast non-zero");
2769 	}
2770 	pthread_mutex_unlock(&self->mutex);
2771 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2772 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2773 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2774 	while (!kill(self->sibling[sib].system_tid, 0))
2775 		nanosleep(&delay, NULL);
2776 	/* Switch to the remaining sibling */
2777 	sib = !sib;
2778 
2779 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2780 		      &self->apply_prog);
2781 	ASSERT_EQ(0, ret) {
2782 		TH_LOG("Expected the remaining sibling to sync");
2783 	};
2784 
2785 	pthread_mutex_lock(&self->mutex);
2786 
2787 	/* If remaining sibling didn't have a chance to wake up during
2788 	 * the first broadcast, manually reduce the num_waits now.
2789 	 */
2790 	if (self->sibling[sib].num_waits > 1)
2791 		self->sibling[sib].num_waits = 1;
2792 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2793 		TH_LOG("cond broadcast non-zero");
2794 	}
2795 	pthread_mutex_unlock(&self->mutex);
2796 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2797 	EXPECT_EQ(0, (long)status);
2798 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2799 	while (!kill(self->sibling[sib].system_tid, 0))
2800 		nanosleep(&delay, NULL);
2801 
2802 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2803 		      &self->apply_prog);
2804 	ASSERT_EQ(0, ret);  /* just us chickens */
2805 }
2806 
2807 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2808 TEST(syscall_restart)
2809 {
2810 	long ret;
2811 	unsigned long msg;
2812 	pid_t child_pid;
2813 	int pipefd[2];
2814 	int status;
2815 	siginfo_t info = { };
2816 	struct sock_filter filter[] = {
2817 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2818 			 offsetof(struct seccomp_data, nr)),
2819 
2820 #ifdef __NR_sigreturn
2821 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
2822 #endif
2823 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2824 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2825 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2826 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2827 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
2828 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2829 
2830 		/* Allow __NR_write for easy logging. */
2831 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2832 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2833 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2834 		/* The nanosleep jump target. */
2835 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2836 		/* The restart_syscall jump target. */
2837 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2838 	};
2839 	struct sock_fprog prog = {
2840 		.len = (unsigned short)ARRAY_SIZE(filter),
2841 		.filter = filter,
2842 	};
2843 #if defined(__arm__)
2844 	struct utsname utsbuf;
2845 #endif
2846 
2847 	ASSERT_EQ(0, pipe(pipefd));
2848 
2849 	child_pid = fork();
2850 	ASSERT_LE(0, child_pid);
2851 	if (child_pid == 0) {
2852 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2853 		char buf = ' ';
2854 		struct timespec timeout = { };
2855 
2856 		/* Attach parent as tracer and stop. */
2857 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2858 		EXPECT_EQ(0, raise(SIGSTOP));
2859 
2860 		EXPECT_EQ(0, close(pipefd[1]));
2861 
2862 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2863 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2864 		}
2865 
2866 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2867 		EXPECT_EQ(0, ret) {
2868 			TH_LOG("Failed to install filter!");
2869 		}
2870 
2871 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2872 			TH_LOG("Failed to read() sync from parent");
2873 		}
2874 		EXPECT_EQ('.', buf) {
2875 			TH_LOG("Failed to get sync data from read()");
2876 		}
2877 
2878 		/* Start nanosleep to be interrupted. */
2879 		timeout.tv_sec = 1;
2880 		errno = 0;
2881 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2882 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2883 		}
2884 
2885 		/* Read final sync from parent. */
2886 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2887 			TH_LOG("Failed final read() from parent");
2888 		}
2889 		EXPECT_EQ('!', buf) {
2890 			TH_LOG("Failed to get final data from read()");
2891 		}
2892 
2893 		/* Directly report the status of our test harness results. */
2894 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2895 						     : EXIT_FAILURE);
2896 	}
2897 	EXPECT_EQ(0, close(pipefd[0]));
2898 
2899 	/* Attach to child, setup options, and release. */
2900 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2901 	ASSERT_EQ(true, WIFSTOPPED(status));
2902 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2903 			    PTRACE_O_TRACESECCOMP));
2904 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2905 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2906 
2907 	/* Wait for nanosleep() to start. */
2908 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2909 	ASSERT_EQ(true, WIFSTOPPED(status));
2910 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2911 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2912 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2913 	ASSERT_EQ(0x100, msg);
2914 	ret = get_syscall(_metadata, child_pid);
2915 	EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
2916 
2917 	/* Might as well check siginfo for sanity while we're here. */
2918 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2919 	ASSERT_EQ(SIGTRAP, info.si_signo);
2920 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2921 	EXPECT_EQ(0, info.si_errno);
2922 	EXPECT_EQ(getuid(), info.si_uid);
2923 	/* Verify signal delivery came from child (seccomp-triggered). */
2924 	EXPECT_EQ(child_pid, info.si_pid);
2925 
2926 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2927 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2928 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2929 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2930 	ASSERT_EQ(true, WIFSTOPPED(status));
2931 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2932 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2933 	/*
2934 	 * There is no siginfo on SIGSTOP any more, so we can't verify
2935 	 * signal delivery came from parent now (getpid() == info.si_pid).
2936 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2937 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2938 	 */
2939 	EXPECT_EQ(SIGSTOP, info.si_signo);
2940 
2941 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2942 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2943 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2944 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2945 	ASSERT_EQ(true, WIFSTOPPED(status));
2946 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2947 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2948 
2949 	/* Wait for restart_syscall() to start. */
2950 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2951 	ASSERT_EQ(true, WIFSTOPPED(status));
2952 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2953 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2954 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2955 
2956 	ASSERT_EQ(0x200, msg);
2957 	ret = get_syscall(_metadata, child_pid);
2958 #if defined(__arm__)
2959 	/*
2960 	 * FIXME:
2961 	 * - native ARM registers do NOT expose true syscall.
2962 	 * - compat ARM registers on ARM64 DO expose true syscall.
2963 	 */
2964 	ASSERT_EQ(0, uname(&utsbuf));
2965 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2966 		EXPECT_EQ(__NR_nanosleep, ret);
2967 	} else
2968 #endif
2969 	{
2970 		EXPECT_EQ(__NR_restart_syscall, ret);
2971 	}
2972 
2973 	/* Write again to end test. */
2974 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2975 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2976 	EXPECT_EQ(0, close(pipefd[1]));
2977 
2978 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2979 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2980 		_metadata->passed = 0;
2981 }
2982 
2983 TEST_SIGNAL(filter_flag_log, SIGSYS)
2984 {
2985 	struct sock_filter allow_filter[] = {
2986 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2987 	};
2988 	struct sock_filter kill_filter[] = {
2989 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2990 			offsetof(struct seccomp_data, nr)),
2991 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2992 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2993 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2994 	};
2995 	struct sock_fprog allow_prog = {
2996 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2997 		.filter = allow_filter,
2998 	};
2999 	struct sock_fprog kill_prog = {
3000 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
3001 		.filter = kill_filter,
3002 	};
3003 	long ret;
3004 	pid_t parent = getppid();
3005 
3006 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3007 	ASSERT_EQ(0, ret);
3008 
3009 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
3010 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
3011 		      &allow_prog);
3012 	ASSERT_NE(ENOSYS, errno) {
3013 		TH_LOG("Kernel does not support seccomp syscall!");
3014 	}
3015 	EXPECT_NE(0, ret) {
3016 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3017 	}
3018 	EXPECT_EQ(EINVAL, errno) {
3019 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3020 	}
3021 
3022 	/* Verify that a simple, permissive filter can be added with no flags */
3023 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3024 	EXPECT_EQ(0, ret);
3025 
3026 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3027 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3028 		      &allow_prog);
3029 	ASSERT_NE(EINVAL, errno) {
3030 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3031 	}
3032 	EXPECT_EQ(0, ret);
3033 
3034 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3035 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3036 		      &kill_prog);
3037 	EXPECT_EQ(0, ret);
3038 
3039 	EXPECT_EQ(parent, syscall(__NR_getppid));
3040 	/* getpid() should never return. */
3041 	EXPECT_EQ(0, syscall(__NR_getpid));
3042 }
3043 
3044 TEST(get_action_avail)
3045 {
3046 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3047 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3048 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3049 	__u32 unknown_action = 0x10000000U;
3050 	int i;
3051 	long ret;
3052 
3053 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3054 	ASSERT_NE(ENOSYS, errno) {
3055 		TH_LOG("Kernel does not support seccomp syscall!");
3056 	}
3057 	ASSERT_NE(EINVAL, errno) {
3058 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3059 	}
3060 	EXPECT_EQ(ret, 0);
3061 
3062 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
3063 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3064 		EXPECT_EQ(ret, 0) {
3065 			TH_LOG("Expected action (0x%X) not available!",
3066 			       actions[i]);
3067 		}
3068 	}
3069 
3070 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
3071 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3072 	EXPECT_EQ(ret, -1);
3073 	EXPECT_EQ(errno, EOPNOTSUPP);
3074 }
3075 
3076 TEST(get_metadata)
3077 {
3078 	pid_t pid;
3079 	int pipefd[2];
3080 	char buf;
3081 	struct seccomp_metadata md;
3082 	long ret;
3083 
3084 	/* Only real root can get metadata. */
3085 	if (geteuid()) {
3086 		XFAIL(return, "get_metadata requires real root");
3087 		return;
3088 	}
3089 
3090 	ASSERT_EQ(0, pipe(pipefd));
3091 
3092 	pid = fork();
3093 	ASSERT_GE(pid, 0);
3094 	if (pid == 0) {
3095 		struct sock_filter filter[] = {
3096 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3097 		};
3098 		struct sock_fprog prog = {
3099 			.len = (unsigned short)ARRAY_SIZE(filter),
3100 			.filter = filter,
3101 		};
3102 
3103 		/* one with log, one without */
3104 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3105 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3106 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3107 
3108 		EXPECT_EQ(0, close(pipefd[0]));
3109 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3110 		ASSERT_EQ(0, close(pipefd[1]));
3111 
3112 		while (1)
3113 			sleep(100);
3114 	}
3115 
3116 	ASSERT_EQ(0, close(pipefd[1]));
3117 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3118 
3119 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3120 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3121 
3122 	/* Past here must not use ASSERT or child process is never killed. */
3123 
3124 	md.filter_off = 0;
3125 	errno = 0;
3126 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3127 	EXPECT_EQ(sizeof(md), ret) {
3128 		if (errno == EINVAL)
3129 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3130 	}
3131 
3132 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3133 	EXPECT_EQ(md.filter_off, 0);
3134 
3135 	md.filter_off = 1;
3136 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3137 	EXPECT_EQ(sizeof(md), ret);
3138 	EXPECT_EQ(md.flags, 0);
3139 	EXPECT_EQ(md.filter_off, 1);
3140 
3141 skip:
3142 	ASSERT_EQ(0, kill(pid, SIGKILL));
3143 }
3144 
3145 static int user_trap_syscall(int nr, unsigned int flags)
3146 {
3147 	struct sock_filter filter[] = {
3148 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3149 			offsetof(struct seccomp_data, nr)),
3150 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3151 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3152 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3153 	};
3154 
3155 	struct sock_fprog prog = {
3156 		.len = (unsigned short)ARRAY_SIZE(filter),
3157 		.filter = filter,
3158 	};
3159 
3160 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3161 }
3162 
3163 #define USER_NOTIF_MAGIC INT_MAX
3164 TEST(user_notification_basic)
3165 {
3166 	pid_t pid;
3167 	long ret;
3168 	int status, listener;
3169 	struct seccomp_notif req = {};
3170 	struct seccomp_notif_resp resp = {};
3171 	struct pollfd pollfd;
3172 
3173 	struct sock_filter filter[] = {
3174 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3175 	};
3176 	struct sock_fprog prog = {
3177 		.len = (unsigned short)ARRAY_SIZE(filter),
3178 		.filter = filter,
3179 	};
3180 
3181 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3182 	ASSERT_EQ(0, ret) {
3183 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3184 	}
3185 
3186 	pid = fork();
3187 	ASSERT_GE(pid, 0);
3188 
3189 	/* Check that we get -ENOSYS with no listener attached */
3190 	if (pid == 0) {
3191 		if (user_trap_syscall(__NR_getppid, 0) < 0)
3192 			exit(1);
3193 		ret = syscall(__NR_getppid);
3194 		exit(ret >= 0 || errno != ENOSYS);
3195 	}
3196 
3197 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3198 	EXPECT_EQ(true, WIFEXITED(status));
3199 	EXPECT_EQ(0, WEXITSTATUS(status));
3200 
3201 	/* Add some no-op filters for grins. */
3202 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3203 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3204 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3205 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3206 
3207 	/* Check that the basic notification machinery works */
3208 	listener = user_trap_syscall(__NR_getppid,
3209 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3210 	ASSERT_GE(listener, 0);
3211 
3212 	/* Installing a second listener in the chain should EBUSY */
3213 	EXPECT_EQ(user_trap_syscall(__NR_getppid,
3214 				    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3215 		  -1);
3216 	EXPECT_EQ(errno, EBUSY);
3217 
3218 	pid = fork();
3219 	ASSERT_GE(pid, 0);
3220 
3221 	if (pid == 0) {
3222 		ret = syscall(__NR_getppid);
3223 		exit(ret != USER_NOTIF_MAGIC);
3224 	}
3225 
3226 	pollfd.fd = listener;
3227 	pollfd.events = POLLIN | POLLOUT;
3228 
3229 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3230 	EXPECT_EQ(pollfd.revents, POLLIN);
3231 
3232 	/* Test that we can't pass garbage to the kernel. */
3233 	memset(&req, 0, sizeof(req));
3234 	req.pid = -1;
3235 	errno = 0;
3236 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3237 	EXPECT_EQ(-1, ret);
3238 	EXPECT_EQ(EINVAL, errno);
3239 
3240 	if (ret) {
3241 		req.pid = 0;
3242 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3243 	}
3244 
3245 	pollfd.fd = listener;
3246 	pollfd.events = POLLIN | POLLOUT;
3247 
3248 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3249 	EXPECT_EQ(pollfd.revents, POLLOUT);
3250 
3251 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3252 
3253 	resp.id = req.id;
3254 	resp.error = 0;
3255 	resp.val = USER_NOTIF_MAGIC;
3256 
3257 	/* check that we make sure flags == 0 */
3258 	resp.flags = 1;
3259 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3260 	EXPECT_EQ(errno, EINVAL);
3261 
3262 	resp.flags = 0;
3263 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3264 
3265 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3266 	EXPECT_EQ(true, WIFEXITED(status));
3267 	EXPECT_EQ(0, WEXITSTATUS(status));
3268 }
3269 
3270 TEST(user_notification_with_tsync)
3271 {
3272 	int ret;
3273 	unsigned int flags;
3274 
3275 	/* these were exclusive */
3276 	flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3277 		SECCOMP_FILTER_FLAG_TSYNC;
3278 	ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
3279 	ASSERT_EQ(EINVAL, errno);
3280 
3281 	/* but now they're not */
3282 	flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3283 	ret = user_trap_syscall(__NR_getppid, flags);
3284 	close(ret);
3285 	ASSERT_LE(0, ret);
3286 }
3287 
3288 TEST(user_notification_kill_in_middle)
3289 {
3290 	pid_t pid;
3291 	long ret;
3292 	int listener;
3293 	struct seccomp_notif req = {};
3294 	struct seccomp_notif_resp resp = {};
3295 
3296 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3297 	ASSERT_EQ(0, ret) {
3298 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3299 	}
3300 
3301 	listener = user_trap_syscall(__NR_getppid,
3302 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3303 	ASSERT_GE(listener, 0);
3304 
3305 	/*
3306 	 * Check that nothing bad happens when we kill the task in the middle
3307 	 * of a syscall.
3308 	 */
3309 	pid = fork();
3310 	ASSERT_GE(pid, 0);
3311 
3312 	if (pid == 0) {
3313 		ret = syscall(__NR_getppid);
3314 		exit(ret != USER_NOTIF_MAGIC);
3315 	}
3316 
3317 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3318 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3319 
3320 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3321 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3322 
3323 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3324 
3325 	resp.id = req.id;
3326 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3327 	EXPECT_EQ(ret, -1);
3328 	EXPECT_EQ(errno, ENOENT);
3329 }
3330 
3331 static int handled = -1;
3332 
3333 static void signal_handler(int signal)
3334 {
3335 	if (write(handled, "c", 1) != 1)
3336 		perror("write from signal");
3337 }
3338 
3339 TEST(user_notification_signal)
3340 {
3341 	pid_t pid;
3342 	long ret;
3343 	int status, listener, sk_pair[2];
3344 	struct seccomp_notif req = {};
3345 	struct seccomp_notif_resp resp = {};
3346 	char c;
3347 
3348 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3349 	ASSERT_EQ(0, ret) {
3350 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3351 	}
3352 
3353 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3354 
3355 	listener = user_trap_syscall(__NR_gettid,
3356 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3357 	ASSERT_GE(listener, 0);
3358 
3359 	pid = fork();
3360 	ASSERT_GE(pid, 0);
3361 
3362 	if (pid == 0) {
3363 		close(sk_pair[0]);
3364 		handled = sk_pair[1];
3365 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3366 			perror("signal");
3367 			exit(1);
3368 		}
3369 		/*
3370 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3371 		 * to rely on a signal that has not yet been handled. Let's at
3372 		 * least check that the error code gets propagated through, and
3373 		 * hope that it doesn't break when there is actually a signal :)
3374 		 */
3375 		ret = syscall(__NR_gettid);
3376 		exit(!(ret == -1 && errno == 512));
3377 	}
3378 
3379 	close(sk_pair[1]);
3380 
3381 	memset(&req, 0, sizeof(req));
3382 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3383 
3384 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3385 
3386 	/*
3387 	 * Make sure the signal really is delivered, which means we're not
3388 	 * stuck in the user notification code any more and the notification
3389 	 * should be dead.
3390 	 */
3391 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3392 
3393 	resp.id = req.id;
3394 	resp.error = -EPERM;
3395 	resp.val = 0;
3396 
3397 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3398 	EXPECT_EQ(errno, ENOENT);
3399 
3400 	memset(&req, 0, sizeof(req));
3401 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3402 
3403 	resp.id = req.id;
3404 	resp.error = -512; /* -ERESTARTSYS */
3405 	resp.val = 0;
3406 
3407 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3408 
3409 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3410 	EXPECT_EQ(true, WIFEXITED(status));
3411 	EXPECT_EQ(0, WEXITSTATUS(status));
3412 }
3413 
3414 TEST(user_notification_closed_listener)
3415 {
3416 	pid_t pid;
3417 	long ret;
3418 	int status, listener;
3419 
3420 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3421 	ASSERT_EQ(0, ret) {
3422 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3423 	}
3424 
3425 	listener = user_trap_syscall(__NR_getppid,
3426 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3427 	ASSERT_GE(listener, 0);
3428 
3429 	/*
3430 	 * Check that we get an ENOSYS when the listener is closed.
3431 	 */
3432 	pid = fork();
3433 	ASSERT_GE(pid, 0);
3434 	if (pid == 0) {
3435 		close(listener);
3436 		ret = syscall(__NR_getppid);
3437 		exit(ret != -1 && errno != ENOSYS);
3438 	}
3439 
3440 	close(listener);
3441 
3442 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3443 	EXPECT_EQ(true, WIFEXITED(status));
3444 	EXPECT_EQ(0, WEXITSTATUS(status));
3445 }
3446 
3447 /*
3448  * Check that a pid in a child namespace still shows up as valid in ours.
3449  */
3450 TEST(user_notification_child_pid_ns)
3451 {
3452 	pid_t pid;
3453 	int status, listener;
3454 	struct seccomp_notif req = {};
3455 	struct seccomp_notif_resp resp = {};
3456 
3457 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3458 
3459 	listener = user_trap_syscall(__NR_getppid,
3460 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3461 	ASSERT_GE(listener, 0);
3462 
3463 	pid = fork();
3464 	ASSERT_GE(pid, 0);
3465 
3466 	if (pid == 0)
3467 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3468 
3469 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3470 	EXPECT_EQ(req.pid, pid);
3471 
3472 	resp.id = req.id;
3473 	resp.error = 0;
3474 	resp.val = USER_NOTIF_MAGIC;
3475 
3476 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3477 
3478 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3479 	EXPECT_EQ(true, WIFEXITED(status));
3480 	EXPECT_EQ(0, WEXITSTATUS(status));
3481 	close(listener);
3482 }
3483 
3484 /*
3485  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3486  * invalid.
3487  */
3488 TEST(user_notification_sibling_pid_ns)
3489 {
3490 	pid_t pid, pid2;
3491 	int status, listener;
3492 	struct seccomp_notif req = {};
3493 	struct seccomp_notif_resp resp = {};
3494 
3495 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3496 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3497 	}
3498 
3499 	listener = user_trap_syscall(__NR_getppid,
3500 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3501 	ASSERT_GE(listener, 0);
3502 
3503 	pid = fork();
3504 	ASSERT_GE(pid, 0);
3505 
3506 	if (pid == 0) {
3507 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3508 
3509 		pid2 = fork();
3510 		ASSERT_GE(pid2, 0);
3511 
3512 		if (pid2 == 0)
3513 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3514 
3515 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3516 		EXPECT_EQ(true, WIFEXITED(status));
3517 		EXPECT_EQ(0, WEXITSTATUS(status));
3518 		exit(WEXITSTATUS(status));
3519 	}
3520 
3521 	/* Create the sibling ns, and sibling in it. */
3522 	ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3523 	ASSERT_EQ(errno, 0);
3524 
3525 	pid2 = fork();
3526 	ASSERT_GE(pid2, 0);
3527 
3528 	if (pid2 == 0) {
3529 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3530 		/*
3531 		 * The pid should be 0, i.e. the task is in some namespace that
3532 		 * we can't "see".
3533 		 */
3534 		EXPECT_EQ(req.pid, 0);
3535 
3536 		resp.id = req.id;
3537 		resp.error = 0;
3538 		resp.val = USER_NOTIF_MAGIC;
3539 
3540 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3541 		exit(0);
3542 	}
3543 
3544 	close(listener);
3545 
3546 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3547 	EXPECT_EQ(true, WIFEXITED(status));
3548 	EXPECT_EQ(0, WEXITSTATUS(status));
3549 
3550 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3551 	EXPECT_EQ(true, WIFEXITED(status));
3552 	EXPECT_EQ(0, WEXITSTATUS(status));
3553 }
3554 
3555 TEST(user_notification_fault_recv)
3556 {
3557 	pid_t pid;
3558 	int status, listener;
3559 	struct seccomp_notif req = {};
3560 	struct seccomp_notif_resp resp = {};
3561 
3562 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3563 
3564 	listener = user_trap_syscall(__NR_getppid,
3565 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3566 	ASSERT_GE(listener, 0);
3567 
3568 	pid = fork();
3569 	ASSERT_GE(pid, 0);
3570 
3571 	if (pid == 0)
3572 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3573 
3574 	/* Do a bad recv() */
3575 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3576 	EXPECT_EQ(errno, EFAULT);
3577 
3578 	/* We should still be able to receive this notification, though. */
3579 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3580 	EXPECT_EQ(req.pid, pid);
3581 
3582 	resp.id = req.id;
3583 	resp.error = 0;
3584 	resp.val = USER_NOTIF_MAGIC;
3585 
3586 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3587 
3588 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3589 	EXPECT_EQ(true, WIFEXITED(status));
3590 	EXPECT_EQ(0, WEXITSTATUS(status));
3591 }
3592 
3593 TEST(seccomp_get_notif_sizes)
3594 {
3595 	struct seccomp_notif_sizes sizes;
3596 
3597 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3598 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3599 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3600 }
3601 
3602 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3603 {
3604 #ifdef __NR_kcmp
3605 	return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3606 #else
3607 	errno = ENOSYS;
3608 	return -1;
3609 #endif
3610 }
3611 
3612 TEST(user_notification_continue)
3613 {
3614 	pid_t pid;
3615 	long ret;
3616 	int status, listener;
3617 	struct seccomp_notif req = {};
3618 	struct seccomp_notif_resp resp = {};
3619 	struct pollfd pollfd;
3620 
3621 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3622 	ASSERT_EQ(0, ret) {
3623 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3624 	}
3625 
3626 	listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3627 	ASSERT_GE(listener, 0);
3628 
3629 	pid = fork();
3630 	ASSERT_GE(pid, 0);
3631 
3632 	if (pid == 0) {
3633 		int dup_fd, pipe_fds[2];
3634 		pid_t self;
3635 
3636 		ret = pipe(pipe_fds);
3637 		if (ret < 0)
3638 			exit(1);
3639 
3640 		dup_fd = dup(pipe_fds[0]);
3641 		if (dup_fd < 0)
3642 			exit(1);
3643 
3644 		self = getpid();
3645 
3646 		ret = filecmp(self, self, pipe_fds[0], dup_fd);
3647 		if (ret)
3648 			exit(2);
3649 
3650 		exit(0);
3651 	}
3652 
3653 	pollfd.fd = listener;
3654 	pollfd.events = POLLIN | POLLOUT;
3655 
3656 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3657 	EXPECT_EQ(pollfd.revents, POLLIN);
3658 
3659 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3660 
3661 	pollfd.fd = listener;
3662 	pollfd.events = POLLIN | POLLOUT;
3663 
3664 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3665 	EXPECT_EQ(pollfd.revents, POLLOUT);
3666 
3667 	EXPECT_EQ(req.data.nr, __NR_dup);
3668 
3669 	resp.id = req.id;
3670 	resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3671 
3672 	/*
3673 	 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3674 	 * args be set to 0.
3675 	 */
3676 	resp.error = 0;
3677 	resp.val = USER_NOTIF_MAGIC;
3678 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3679 	EXPECT_EQ(errno, EINVAL);
3680 
3681 	resp.error = USER_NOTIF_MAGIC;
3682 	resp.val = 0;
3683 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3684 	EXPECT_EQ(errno, EINVAL);
3685 
3686 	resp.error = 0;
3687 	resp.val = 0;
3688 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3689 		if (errno == EINVAL)
3690 			XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3691 	}
3692 
3693 skip:
3694 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3695 	EXPECT_EQ(true, WIFEXITED(status));
3696 	EXPECT_EQ(0, WEXITSTATUS(status)) {
3697 		if (WEXITSTATUS(status) == 2) {
3698 			XFAIL(return, "Kernel does not support kcmp() syscall");
3699 			return;
3700 		}
3701 	}
3702 }
3703 
3704 /*
3705  * TODO:
3706  * - add microbenchmarks
3707  * - expand NNP testing
3708  * - better arch-specific TRACE and TRAP handlers.
3709  * - endianness checking when appropriate
3710  * - 64-bit arg prodding
3711  * - arch value testing (x86 modes especially)
3712  * - verify that FILTER_FLAG_LOG filters generate log messages
3713  * - verify that RET_LOG generates log messages
3714  * - ...
3715  */
3716 
3717 TEST_HARNESS_MAIN
3718