xref: /linux/tools/testing/selftests/seccomp/seccomp_bpf.c (revision 93df8a1ed6231727c5db94a80b1a6bd5ee67cec3)
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #include <asm/siginfo.h>
9 #define __have_siginfo_t 1
10 #define __have_sigval_t 1
11 #define __have_sigevent_t 1
12 
13 #include <errno.h>
14 #include <linux/filter.h>
15 #include <sys/prctl.h>
16 #include <sys/ptrace.h>
17 #include <sys/user.h>
18 #include <linux/prctl.h>
19 #include <linux/ptrace.h>
20 #include <linux/seccomp.h>
21 #include <poll.h>
22 #include <pthread.h>
23 #include <semaphore.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <linux/elf.h>
29 #include <sys/uio.h>
30 
31 #define _GNU_SOURCE
32 #include <unistd.h>
33 #include <sys/syscall.h>
34 
35 #include "test_harness.h"
36 
37 #ifndef PR_SET_PTRACER
38 # define PR_SET_PTRACER 0x59616d61
39 #endif
40 
41 #ifndef PR_SET_NO_NEW_PRIVS
42 #define PR_SET_NO_NEW_PRIVS 38
43 #define PR_GET_NO_NEW_PRIVS 39
44 #endif
45 
46 #ifndef PR_SECCOMP_EXT
47 #define PR_SECCOMP_EXT 43
48 #endif
49 
50 #ifndef SECCOMP_EXT_ACT
51 #define SECCOMP_EXT_ACT 1
52 #endif
53 
54 #ifndef SECCOMP_EXT_ACT_TSYNC
55 #define SECCOMP_EXT_ACT_TSYNC 1
56 #endif
57 
58 #ifndef SECCOMP_MODE_STRICT
59 #define SECCOMP_MODE_STRICT 1
60 #endif
61 
62 #ifndef SECCOMP_MODE_FILTER
63 #define SECCOMP_MODE_FILTER 2
64 #endif
65 
66 #ifndef SECCOMP_RET_KILL
67 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
68 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
69 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
70 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
71 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
72 
73 /* Masks for the return value sections. */
74 #define SECCOMP_RET_ACTION      0x7fff0000U
75 #define SECCOMP_RET_DATA        0x0000ffffU
76 
77 struct seccomp_data {
78 	int nr;
79 	__u32 arch;
80 	__u64 instruction_pointer;
81 	__u64 args[6];
82 };
83 #endif
84 
85 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
86 
87 #define SIBLING_EXIT_UNKILLED	0xbadbeef
88 #define SIBLING_EXIT_FAILURE	0xbadface
89 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
90 
91 TEST(mode_strict_support)
92 {
93 	long ret;
94 
95 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
96 	ASSERT_EQ(0, ret) {
97 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
98 	}
99 	syscall(__NR_exit, 1);
100 }
101 
102 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
103 {
104 	long ret;
105 
106 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
107 	ASSERT_EQ(0, ret) {
108 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
109 	}
110 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
111 		NULL, NULL, NULL);
112 	EXPECT_FALSE(true) {
113 		TH_LOG("Unreachable!");
114 	}
115 }
116 
117 /* Note! This doesn't test no new privs behavior */
118 TEST(no_new_privs_support)
119 {
120 	long ret;
121 
122 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
123 	EXPECT_EQ(0, ret) {
124 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
125 	}
126 }
127 
128 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
129 TEST(mode_filter_support)
130 {
131 	long ret;
132 
133 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
134 	ASSERT_EQ(0, ret) {
135 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
136 	}
137 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
138 	EXPECT_EQ(-1, ret);
139 	EXPECT_EQ(EFAULT, errno) {
140 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
141 	}
142 }
143 
144 TEST(mode_filter_without_nnp)
145 {
146 	struct sock_filter filter[] = {
147 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
148 	};
149 	struct sock_fprog prog = {
150 		.len = (unsigned short)ARRAY_SIZE(filter),
151 		.filter = filter,
152 	};
153 	long ret;
154 
155 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
156 	ASSERT_LE(0, ret) {
157 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
158 	}
159 	errno = 0;
160 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
161 	/* Succeeds with CAP_SYS_ADMIN, fails without */
162 	/* TODO(wad) check caps not euid */
163 	if (geteuid()) {
164 		EXPECT_EQ(-1, ret);
165 		EXPECT_EQ(EACCES, errno);
166 	} else {
167 		EXPECT_EQ(0, ret);
168 	}
169 }
170 
171 #define MAX_INSNS_PER_PATH 32768
172 
173 TEST(filter_size_limits)
174 {
175 	int i;
176 	int count = BPF_MAXINSNS + 1;
177 	struct sock_filter allow[] = {
178 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
179 	};
180 	struct sock_filter *filter;
181 	struct sock_fprog prog = { };
182 	long ret;
183 
184 	filter = calloc(count, sizeof(*filter));
185 	ASSERT_NE(NULL, filter);
186 
187 	for (i = 0; i < count; i++)
188 		filter[i] = allow[0];
189 
190 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
191 	ASSERT_EQ(0, ret);
192 
193 	prog.filter = filter;
194 	prog.len = count;
195 
196 	/* Too many filter instructions in a single filter. */
197 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
198 	ASSERT_NE(0, ret) {
199 		TH_LOG("Installing %d insn filter was allowed", prog.len);
200 	}
201 
202 	/* One less is okay, though. */
203 	prog.len -= 1;
204 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
205 	ASSERT_EQ(0, ret) {
206 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
207 	}
208 }
209 
210 TEST(filter_chain_limits)
211 {
212 	int i;
213 	int count = BPF_MAXINSNS;
214 	struct sock_filter allow[] = {
215 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
216 	};
217 	struct sock_filter *filter;
218 	struct sock_fprog prog = { };
219 	long ret;
220 
221 	filter = calloc(count, sizeof(*filter));
222 	ASSERT_NE(NULL, filter);
223 
224 	for (i = 0; i < count; i++)
225 		filter[i] = allow[0];
226 
227 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
228 	ASSERT_EQ(0, ret);
229 
230 	prog.filter = filter;
231 	prog.len = 1;
232 
233 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
234 	ASSERT_EQ(0, ret);
235 
236 	prog.len = count;
237 
238 	/* Too many total filter instructions. */
239 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
240 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
241 		if (ret != 0)
242 			break;
243 	}
244 	ASSERT_NE(0, ret) {
245 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
246 		       i, count, i * (count + 4));
247 	}
248 }
249 
250 TEST(mode_filter_cannot_move_to_strict)
251 {
252 	struct sock_filter filter[] = {
253 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
254 	};
255 	struct sock_fprog prog = {
256 		.len = (unsigned short)ARRAY_SIZE(filter),
257 		.filter = filter,
258 	};
259 	long ret;
260 
261 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
262 	ASSERT_EQ(0, ret);
263 
264 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
265 	ASSERT_EQ(0, ret);
266 
267 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
268 	EXPECT_EQ(-1, ret);
269 	EXPECT_EQ(EINVAL, errno);
270 }
271 
272 
273 TEST(mode_filter_get_seccomp)
274 {
275 	struct sock_filter filter[] = {
276 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
277 	};
278 	struct sock_fprog prog = {
279 		.len = (unsigned short)ARRAY_SIZE(filter),
280 		.filter = filter,
281 	};
282 	long ret;
283 
284 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
285 	ASSERT_EQ(0, ret);
286 
287 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
288 	EXPECT_EQ(0, ret);
289 
290 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
291 	ASSERT_EQ(0, ret);
292 
293 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
294 	EXPECT_EQ(2, ret);
295 }
296 
297 
298 TEST(ALLOW_all)
299 {
300 	struct sock_filter filter[] = {
301 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
302 	};
303 	struct sock_fprog prog = {
304 		.len = (unsigned short)ARRAY_SIZE(filter),
305 		.filter = filter,
306 	};
307 	long ret;
308 
309 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
310 	ASSERT_EQ(0, ret);
311 
312 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
313 	ASSERT_EQ(0, ret);
314 }
315 
316 TEST(empty_prog)
317 {
318 	struct sock_filter filter[] = {
319 	};
320 	struct sock_fprog prog = {
321 		.len = (unsigned short)ARRAY_SIZE(filter),
322 		.filter = filter,
323 	};
324 	long ret;
325 
326 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
327 	ASSERT_EQ(0, ret);
328 
329 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
330 	EXPECT_EQ(-1, ret);
331 	EXPECT_EQ(EINVAL, errno);
332 }
333 
334 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
335 {
336 	struct sock_filter filter[] = {
337 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
338 	};
339 	struct sock_fprog prog = {
340 		.len = (unsigned short)ARRAY_SIZE(filter),
341 		.filter = filter,
342 	};
343 	long ret;
344 
345 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346 	ASSERT_EQ(0, ret);
347 
348 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349 	ASSERT_EQ(0, ret);
350 	EXPECT_EQ(0, syscall(__NR_getpid)) {
351 		TH_LOG("getpid() shouldn't ever return");
352 	}
353 }
354 
355 /* return code >= 0x80000000 is unused. */
356 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
357 {
358 	struct sock_filter filter[] = {
359 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
360 	};
361 	struct sock_fprog prog = {
362 		.len = (unsigned short)ARRAY_SIZE(filter),
363 		.filter = filter,
364 	};
365 	long ret;
366 
367 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
368 	ASSERT_EQ(0, ret);
369 
370 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
371 	ASSERT_EQ(0, ret);
372 	EXPECT_EQ(0, syscall(__NR_getpid)) {
373 		TH_LOG("getpid() shouldn't ever return");
374 	}
375 }
376 
377 TEST_SIGNAL(KILL_all, SIGSYS)
378 {
379 	struct sock_filter filter[] = {
380 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
381 	};
382 	struct sock_fprog prog = {
383 		.len = (unsigned short)ARRAY_SIZE(filter),
384 		.filter = filter,
385 	};
386 	long ret;
387 
388 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
389 	ASSERT_EQ(0, ret);
390 
391 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
392 	ASSERT_EQ(0, ret);
393 }
394 
395 TEST_SIGNAL(KILL_one, SIGSYS)
396 {
397 	struct sock_filter filter[] = {
398 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
399 			offsetof(struct seccomp_data, nr)),
400 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
401 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
402 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
403 	};
404 	struct sock_fprog prog = {
405 		.len = (unsigned short)ARRAY_SIZE(filter),
406 		.filter = filter,
407 	};
408 	long ret;
409 	pid_t parent = getppid();
410 
411 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
412 	ASSERT_EQ(0, ret);
413 
414 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
415 	ASSERT_EQ(0, ret);
416 
417 	EXPECT_EQ(parent, syscall(__NR_getppid));
418 	/* getpid() should never return. */
419 	EXPECT_EQ(0, syscall(__NR_getpid));
420 }
421 
422 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
423 {
424 	struct sock_filter filter[] = {
425 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
426 			offsetof(struct seccomp_data, nr)),
427 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
428 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
429 		/* Only both with lower 32-bit for now. */
430 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
431 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
432 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
433 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
434 	};
435 	struct sock_fprog prog = {
436 		.len = (unsigned short)ARRAY_SIZE(filter),
437 		.filter = filter,
438 	};
439 	long ret;
440 	pid_t parent = getppid();
441 	pid_t pid = getpid();
442 
443 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
444 	ASSERT_EQ(0, ret);
445 
446 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
447 	ASSERT_EQ(0, ret);
448 
449 	EXPECT_EQ(parent, syscall(__NR_getppid));
450 	EXPECT_EQ(pid, syscall(__NR_getpid));
451 	/* getpid() should never return. */
452 	EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
453 }
454 
455 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
456 {
457 	struct sock_filter filter[] = {
458 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
459 			offsetof(struct seccomp_data, nr)),
460 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
461 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
462 		/* Only both with lower 32-bit for now. */
463 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
464 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
465 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
466 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
467 	};
468 	struct sock_fprog prog = {
469 		.len = (unsigned short)ARRAY_SIZE(filter),
470 		.filter = filter,
471 	};
472 	long ret;
473 	pid_t parent = getppid();
474 	pid_t pid = getpid();
475 
476 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
477 	ASSERT_EQ(0, ret);
478 
479 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
480 	ASSERT_EQ(0, ret);
481 
482 	EXPECT_EQ(parent, syscall(__NR_getppid));
483 	EXPECT_EQ(pid, syscall(__NR_getpid));
484 	/* getpid() should never return. */
485 	EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
486 }
487 
488 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
489 TEST(arg_out_of_range)
490 {
491 	struct sock_filter filter[] = {
492 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
493 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494 	};
495 	struct sock_fprog prog = {
496 		.len = (unsigned short)ARRAY_SIZE(filter),
497 		.filter = filter,
498 	};
499 	long ret;
500 
501 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
502 	ASSERT_EQ(0, ret);
503 
504 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
505 	EXPECT_EQ(-1, ret);
506 	EXPECT_EQ(EINVAL, errno);
507 }
508 
509 TEST(ERRNO_valid)
510 {
511 	struct sock_filter filter[] = {
512 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
513 			offsetof(struct seccomp_data, nr)),
514 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
515 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
516 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
517 	};
518 	struct sock_fprog prog = {
519 		.len = (unsigned short)ARRAY_SIZE(filter),
520 		.filter = filter,
521 	};
522 	long ret;
523 	pid_t parent = getppid();
524 
525 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
526 	ASSERT_EQ(0, ret);
527 
528 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
529 	ASSERT_EQ(0, ret);
530 
531 	EXPECT_EQ(parent, syscall(__NR_getppid));
532 	EXPECT_EQ(-1, read(0, NULL, 0));
533 	EXPECT_EQ(E2BIG, errno);
534 }
535 
536 TEST(ERRNO_zero)
537 {
538 	struct sock_filter filter[] = {
539 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
540 			offsetof(struct seccomp_data, nr)),
541 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
542 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
543 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
544 	};
545 	struct sock_fprog prog = {
546 		.len = (unsigned short)ARRAY_SIZE(filter),
547 		.filter = filter,
548 	};
549 	long ret;
550 	pid_t parent = getppid();
551 
552 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
553 	ASSERT_EQ(0, ret);
554 
555 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
556 	ASSERT_EQ(0, ret);
557 
558 	EXPECT_EQ(parent, syscall(__NR_getppid));
559 	/* "errno" of 0 is ok. */
560 	EXPECT_EQ(0, read(0, NULL, 0));
561 }
562 
563 TEST(ERRNO_capped)
564 {
565 	struct sock_filter filter[] = {
566 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
567 			offsetof(struct seccomp_data, nr)),
568 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
569 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
570 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
571 	};
572 	struct sock_fprog prog = {
573 		.len = (unsigned short)ARRAY_SIZE(filter),
574 		.filter = filter,
575 	};
576 	long ret;
577 	pid_t parent = getppid();
578 
579 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
580 	ASSERT_EQ(0, ret);
581 
582 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
583 	ASSERT_EQ(0, ret);
584 
585 	EXPECT_EQ(parent, syscall(__NR_getppid));
586 	EXPECT_EQ(-1, read(0, NULL, 0));
587 	EXPECT_EQ(4095, errno);
588 }
589 
590 FIXTURE_DATA(TRAP) {
591 	struct sock_fprog prog;
592 };
593 
594 FIXTURE_SETUP(TRAP)
595 {
596 	struct sock_filter filter[] = {
597 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
598 			offsetof(struct seccomp_data, nr)),
599 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
600 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
601 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
602 	};
603 
604 	memset(&self->prog, 0, sizeof(self->prog));
605 	self->prog.filter = malloc(sizeof(filter));
606 	ASSERT_NE(NULL, self->prog.filter);
607 	memcpy(self->prog.filter, filter, sizeof(filter));
608 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
609 }
610 
611 FIXTURE_TEARDOWN(TRAP)
612 {
613 	if (self->prog.filter)
614 		free(self->prog.filter);
615 }
616 
617 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
618 {
619 	long ret;
620 
621 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
622 	ASSERT_EQ(0, ret);
623 
624 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
625 	ASSERT_EQ(0, ret);
626 	syscall(__NR_getpid);
627 }
628 
629 /* Ensure that SIGSYS overrides SIG_IGN */
630 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
631 {
632 	long ret;
633 
634 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
635 	ASSERT_EQ(0, ret);
636 
637 	signal(SIGSYS, SIG_IGN);
638 
639 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
640 	ASSERT_EQ(0, ret);
641 	syscall(__NR_getpid);
642 }
643 
644 static struct siginfo TRAP_info;
645 static volatile int TRAP_nr;
646 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
647 {
648 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
649 	TRAP_nr = nr;
650 }
651 
652 TEST_F(TRAP, handler)
653 {
654 	int ret, test;
655 	struct sigaction act;
656 	sigset_t mask;
657 
658 	memset(&act, 0, sizeof(act));
659 	sigemptyset(&mask);
660 	sigaddset(&mask, SIGSYS);
661 
662 	act.sa_sigaction = &TRAP_action;
663 	act.sa_flags = SA_SIGINFO;
664 	ret = sigaction(SIGSYS, &act, NULL);
665 	ASSERT_EQ(0, ret) {
666 		TH_LOG("sigaction failed");
667 	}
668 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
669 	ASSERT_EQ(0, ret) {
670 		TH_LOG("sigprocmask failed");
671 	}
672 
673 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
674 	ASSERT_EQ(0, ret);
675 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
676 	ASSERT_EQ(0, ret);
677 	TRAP_nr = 0;
678 	memset(&TRAP_info, 0, sizeof(TRAP_info));
679 	/* Expect the registers to be rolled back. (nr = error) may vary
680 	 * based on arch. */
681 	ret = syscall(__NR_getpid);
682 	/* Silence gcc warning about volatile. */
683 	test = TRAP_nr;
684 	EXPECT_EQ(SIGSYS, test);
685 	struct local_sigsys {
686 		void *_call_addr;	/* calling user insn */
687 		int _syscall;		/* triggering system call number */
688 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
689 	} *sigsys = (struct local_sigsys *)
690 #ifdef si_syscall
691 		&(TRAP_info.si_call_addr);
692 #else
693 		&TRAP_info.si_pid;
694 #endif
695 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
696 	/* Make sure arch is non-zero. */
697 	EXPECT_NE(0, sigsys->_arch);
698 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
699 }
700 
701 FIXTURE_DATA(precedence) {
702 	struct sock_fprog allow;
703 	struct sock_fprog trace;
704 	struct sock_fprog error;
705 	struct sock_fprog trap;
706 	struct sock_fprog kill;
707 };
708 
709 FIXTURE_SETUP(precedence)
710 {
711 	struct sock_filter allow_insns[] = {
712 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
713 	};
714 	struct sock_filter trace_insns[] = {
715 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
716 			offsetof(struct seccomp_data, nr)),
717 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
718 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
719 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
720 	};
721 	struct sock_filter error_insns[] = {
722 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
723 			offsetof(struct seccomp_data, nr)),
724 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
725 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
726 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
727 	};
728 	struct sock_filter trap_insns[] = {
729 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
730 			offsetof(struct seccomp_data, nr)),
731 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
732 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
733 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
734 	};
735 	struct sock_filter kill_insns[] = {
736 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
737 			offsetof(struct seccomp_data, nr)),
738 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
739 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
740 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
741 	};
742 
743 	memset(self, 0, sizeof(*self));
744 #define FILTER_ALLOC(_x) \
745 	self->_x.filter = malloc(sizeof(_x##_insns)); \
746 	ASSERT_NE(NULL, self->_x.filter); \
747 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
748 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
749 	FILTER_ALLOC(allow);
750 	FILTER_ALLOC(trace);
751 	FILTER_ALLOC(error);
752 	FILTER_ALLOC(trap);
753 	FILTER_ALLOC(kill);
754 }
755 
756 FIXTURE_TEARDOWN(precedence)
757 {
758 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
759 	FILTER_FREE(allow);
760 	FILTER_FREE(trace);
761 	FILTER_FREE(error);
762 	FILTER_FREE(trap);
763 	FILTER_FREE(kill);
764 }
765 
766 TEST_F(precedence, allow_ok)
767 {
768 	pid_t parent, res = 0;
769 	long ret;
770 
771 	parent = getppid();
772 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
773 	ASSERT_EQ(0, ret);
774 
775 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
776 	ASSERT_EQ(0, ret);
777 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
778 	ASSERT_EQ(0, ret);
779 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
780 	ASSERT_EQ(0, ret);
781 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
782 	ASSERT_EQ(0, ret);
783 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
784 	ASSERT_EQ(0, ret);
785 	/* Should work just fine. */
786 	res = syscall(__NR_getppid);
787 	EXPECT_EQ(parent, res);
788 }
789 
790 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
791 {
792 	pid_t parent, res = 0;
793 	long ret;
794 
795 	parent = getppid();
796 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
797 	ASSERT_EQ(0, ret);
798 
799 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
800 	ASSERT_EQ(0, ret);
801 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
802 	ASSERT_EQ(0, ret);
803 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
804 	ASSERT_EQ(0, ret);
805 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
806 	ASSERT_EQ(0, ret);
807 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
808 	ASSERT_EQ(0, ret);
809 	/* Should work just fine. */
810 	res = syscall(__NR_getppid);
811 	EXPECT_EQ(parent, res);
812 	/* getpid() should never return. */
813 	res = syscall(__NR_getpid);
814 	EXPECT_EQ(0, res);
815 }
816 
817 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
818 {
819 	pid_t parent;
820 	long ret;
821 
822 	parent = getppid();
823 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
824 	ASSERT_EQ(0, ret);
825 
826 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
827 	ASSERT_EQ(0, ret);
828 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
829 	ASSERT_EQ(0, ret);
830 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
831 	ASSERT_EQ(0, ret);
832 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
833 	ASSERT_EQ(0, ret);
834 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
835 	ASSERT_EQ(0, ret);
836 	/* Should work just fine. */
837 	EXPECT_EQ(parent, syscall(__NR_getppid));
838 	/* getpid() should never return. */
839 	EXPECT_EQ(0, syscall(__NR_getpid));
840 }
841 
842 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
843 {
844 	pid_t parent;
845 	long ret;
846 
847 	parent = getppid();
848 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
849 	ASSERT_EQ(0, ret);
850 
851 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
852 	ASSERT_EQ(0, ret);
853 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
854 	ASSERT_EQ(0, ret);
855 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
856 	ASSERT_EQ(0, ret);
857 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
858 	ASSERT_EQ(0, ret);
859 	/* Should work just fine. */
860 	EXPECT_EQ(parent, syscall(__NR_getppid));
861 	/* getpid() should never return. */
862 	EXPECT_EQ(0, syscall(__NR_getpid));
863 }
864 
865 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
866 {
867 	pid_t parent;
868 	long ret;
869 
870 	parent = getppid();
871 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
872 	ASSERT_EQ(0, ret);
873 
874 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
875 	ASSERT_EQ(0, ret);
876 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
877 	ASSERT_EQ(0, ret);
878 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
879 	ASSERT_EQ(0, ret);
880 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
881 	ASSERT_EQ(0, ret);
882 	/* Should work just fine. */
883 	EXPECT_EQ(parent, syscall(__NR_getppid));
884 	/* getpid() should never return. */
885 	EXPECT_EQ(0, syscall(__NR_getpid));
886 }
887 
888 TEST_F(precedence, errno_is_third)
889 {
890 	pid_t parent;
891 	long ret;
892 
893 	parent = getppid();
894 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
895 	ASSERT_EQ(0, ret);
896 
897 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
898 	ASSERT_EQ(0, ret);
899 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
900 	ASSERT_EQ(0, ret);
901 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
902 	ASSERT_EQ(0, ret);
903 	/* Should work just fine. */
904 	EXPECT_EQ(parent, syscall(__NR_getppid));
905 	EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907 
908 TEST_F(precedence, errno_is_third_in_any_order)
909 {
910 	pid_t parent;
911 	long ret;
912 
913 	parent = getppid();
914 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915 	ASSERT_EQ(0, ret);
916 
917 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
918 	ASSERT_EQ(0, ret);
919 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
920 	ASSERT_EQ(0, ret);
921 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
922 	ASSERT_EQ(0, ret);
923 	/* Should work just fine. */
924 	EXPECT_EQ(parent, syscall(__NR_getppid));
925 	EXPECT_EQ(0, syscall(__NR_getpid));
926 }
927 
928 TEST_F(precedence, trace_is_fourth)
929 {
930 	pid_t parent;
931 	long ret;
932 
933 	parent = getppid();
934 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
935 	ASSERT_EQ(0, ret);
936 
937 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
938 	ASSERT_EQ(0, ret);
939 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
940 	ASSERT_EQ(0, ret);
941 	/* Should work just fine. */
942 	EXPECT_EQ(parent, syscall(__NR_getppid));
943 	/* No ptracer */
944 	EXPECT_EQ(-1, syscall(__NR_getpid));
945 }
946 
947 TEST_F(precedence, trace_is_fourth_in_any_order)
948 {
949 	pid_t parent;
950 	long ret;
951 
952 	parent = getppid();
953 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
954 	ASSERT_EQ(0, ret);
955 
956 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
957 	ASSERT_EQ(0, ret);
958 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
959 	ASSERT_EQ(0, ret);
960 	/* Should work just fine. */
961 	EXPECT_EQ(parent, syscall(__NR_getppid));
962 	/* No ptracer */
963 	EXPECT_EQ(-1, syscall(__NR_getpid));
964 }
965 
966 #ifndef PTRACE_O_TRACESECCOMP
967 #define PTRACE_O_TRACESECCOMP	0x00000080
968 #endif
969 
970 /* Catch the Ubuntu 12.04 value error. */
971 #if PTRACE_EVENT_SECCOMP != 7
972 #undef PTRACE_EVENT_SECCOMP
973 #endif
974 
975 #ifndef PTRACE_EVENT_SECCOMP
976 #define PTRACE_EVENT_SECCOMP 7
977 #endif
978 
979 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
980 bool tracer_running;
981 void tracer_stop(int sig)
982 {
983 	tracer_running = false;
984 }
985 
986 typedef void tracer_func_t(struct __test_metadata *_metadata,
987 			   pid_t tracee, int status, void *args);
988 
989 void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
990 	    tracer_func_t tracer_func, void *args)
991 {
992 	int ret = -1;
993 	struct sigaction action = {
994 		.sa_handler = tracer_stop,
995 	};
996 
997 	/* Allow external shutdown. */
998 	tracer_running = true;
999 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1000 
1001 	errno = 0;
1002 	while (ret == -1 && errno != EINVAL)
1003 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1004 	ASSERT_EQ(0, ret) {
1005 		kill(tracee, SIGKILL);
1006 	}
1007 	/* Wait for attach stop */
1008 	wait(NULL);
1009 
1010 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1011 	ASSERT_EQ(0, ret) {
1012 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1013 		kill(tracee, SIGKILL);
1014 	}
1015 	ptrace(PTRACE_CONT, tracee, NULL, 0);
1016 
1017 	/* Unblock the tracee */
1018 	ASSERT_EQ(1, write(fd, "A", 1));
1019 	ASSERT_EQ(0, close(fd));
1020 
1021 	/* Run until we're shut down. Must assert to stop execution. */
1022 	while (tracer_running) {
1023 		int status;
1024 
1025 		if (wait(&status) != tracee)
1026 			continue;
1027 		if (WIFSIGNALED(status) || WIFEXITED(status))
1028 			/* Child is dead. Time to go. */
1029 			return;
1030 
1031 		/* Make sure this is a seccomp event. */
1032 		ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1033 
1034 		tracer_func(_metadata, tracee, status, args);
1035 
1036 		ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1037 		ASSERT_EQ(0, ret);
1038 	}
1039 	/* Directly report the status of our test harness results. */
1040 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1041 }
1042 
1043 /* Common tracer setup/teardown functions. */
1044 void cont_handler(int num)
1045 { }
1046 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1047 			  tracer_func_t func, void *args)
1048 {
1049 	char sync;
1050 	int pipefd[2];
1051 	pid_t tracer_pid;
1052 	pid_t tracee = getpid();
1053 
1054 	/* Setup a pipe for clean synchronization. */
1055 	ASSERT_EQ(0, pipe(pipefd));
1056 
1057 	/* Fork a child which we'll promote to tracer */
1058 	tracer_pid = fork();
1059 	ASSERT_LE(0, tracer_pid);
1060 	signal(SIGALRM, cont_handler);
1061 	if (tracer_pid == 0) {
1062 		close(pipefd[0]);
1063 		tracer(_metadata, pipefd[1], tracee, func, args);
1064 		syscall(__NR_exit, 0);
1065 	}
1066 	close(pipefd[1]);
1067 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1068 	read(pipefd[0], &sync, 1);
1069 	close(pipefd[0]);
1070 
1071 	return tracer_pid;
1072 }
1073 void teardown_trace_fixture(struct __test_metadata *_metadata,
1074 			    pid_t tracer)
1075 {
1076 	if (tracer) {
1077 		int status;
1078 		/*
1079 		 * Extract the exit code from the other process and
1080 		 * adopt it for ourselves in case its asserts failed.
1081 		 */
1082 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1083 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1084 		if (WEXITSTATUS(status))
1085 			_metadata->passed = 0;
1086 	}
1087 }
1088 
1089 /* "poke" tracer arguments and function. */
1090 struct tracer_args_poke_t {
1091 	unsigned long poke_addr;
1092 };
1093 
1094 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1095 		 void *args)
1096 {
1097 	int ret;
1098 	unsigned long msg;
1099 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1100 
1101 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1102 	EXPECT_EQ(0, ret);
1103 	/* If this fails, don't try to recover. */
1104 	ASSERT_EQ(0x1001, msg) {
1105 		kill(tracee, SIGKILL);
1106 	}
1107 	/*
1108 	 * Poke in the message.
1109 	 * Registers are not touched to try to keep this relatively arch
1110 	 * agnostic.
1111 	 */
1112 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1113 	EXPECT_EQ(0, ret);
1114 }
1115 
1116 FIXTURE_DATA(TRACE_poke) {
1117 	struct sock_fprog prog;
1118 	pid_t tracer;
1119 	long poked;
1120 	struct tracer_args_poke_t tracer_args;
1121 };
1122 
1123 FIXTURE_SETUP(TRACE_poke)
1124 {
1125 	struct sock_filter filter[] = {
1126 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1127 			offsetof(struct seccomp_data, nr)),
1128 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1129 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1130 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1131 	};
1132 
1133 	self->poked = 0;
1134 	memset(&self->prog, 0, sizeof(self->prog));
1135 	self->prog.filter = malloc(sizeof(filter));
1136 	ASSERT_NE(NULL, self->prog.filter);
1137 	memcpy(self->prog.filter, filter, sizeof(filter));
1138 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1139 
1140 	/* Set up tracer args. */
1141 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1142 
1143 	/* Launch tracer. */
1144 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1145 					   &self->tracer_args);
1146 }
1147 
1148 FIXTURE_TEARDOWN(TRACE_poke)
1149 {
1150 	teardown_trace_fixture(_metadata, self->tracer);
1151 	if (self->prog.filter)
1152 		free(self->prog.filter);
1153 }
1154 
1155 TEST_F(TRACE_poke, read_has_side_effects)
1156 {
1157 	ssize_t ret;
1158 
1159 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1160 	ASSERT_EQ(0, ret);
1161 
1162 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1163 	ASSERT_EQ(0, ret);
1164 
1165 	EXPECT_EQ(0, self->poked);
1166 	ret = read(-1, NULL, 0);
1167 	EXPECT_EQ(-1, ret);
1168 	EXPECT_EQ(0x1001, self->poked);
1169 }
1170 
1171 TEST_F(TRACE_poke, getpid_runs_normally)
1172 {
1173 	long ret;
1174 
1175 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1176 	ASSERT_EQ(0, ret);
1177 
1178 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1179 	ASSERT_EQ(0, ret);
1180 
1181 	EXPECT_EQ(0, self->poked);
1182 	EXPECT_NE(0, syscall(__NR_getpid));
1183 	EXPECT_EQ(0, self->poked);
1184 }
1185 
1186 #if defined(__x86_64__)
1187 # define ARCH_REGS	struct user_regs_struct
1188 # define SYSCALL_NUM	orig_rax
1189 # define SYSCALL_RET	rax
1190 #elif defined(__i386__)
1191 # define ARCH_REGS	struct user_regs_struct
1192 # define SYSCALL_NUM	orig_eax
1193 # define SYSCALL_RET	eax
1194 #elif defined(__arm__)
1195 # define ARCH_REGS	struct pt_regs
1196 # define SYSCALL_NUM	ARM_r7
1197 # define SYSCALL_RET	ARM_r0
1198 #elif defined(__aarch64__)
1199 # define ARCH_REGS	struct user_pt_regs
1200 # define SYSCALL_NUM	regs[8]
1201 # define SYSCALL_RET	regs[0]
1202 #else
1203 # error "Do not know how to find your architecture's registers and syscalls"
1204 #endif
1205 
1206 /* Architecture-specific syscall fetching routine. */
1207 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1208 {
1209 	struct iovec iov;
1210 	ARCH_REGS regs;
1211 
1212 	iov.iov_base = &regs;
1213 	iov.iov_len = sizeof(regs);
1214 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1215 		TH_LOG("PTRACE_GETREGSET failed");
1216 		return -1;
1217 	}
1218 
1219 	return regs.SYSCALL_NUM;
1220 }
1221 
1222 /* Architecture-specific syscall changing routine. */
1223 void change_syscall(struct __test_metadata *_metadata,
1224 		    pid_t tracee, int syscall)
1225 {
1226 	struct iovec iov;
1227 	int ret;
1228 	ARCH_REGS regs;
1229 
1230 	iov.iov_base = &regs;
1231 	iov.iov_len = sizeof(regs);
1232 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1233 	EXPECT_EQ(0, ret);
1234 
1235 #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
1236 	{
1237 		regs.SYSCALL_NUM = syscall;
1238 	}
1239 
1240 #elif defined(__arm__)
1241 # ifndef PTRACE_SET_SYSCALL
1242 #  define PTRACE_SET_SYSCALL   23
1243 # endif
1244 	{
1245 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1246 		EXPECT_EQ(0, ret);
1247 	}
1248 
1249 #else
1250 	ASSERT_EQ(1, 0) {
1251 		TH_LOG("How is the syscall changed on this architecture?");
1252 	}
1253 #endif
1254 
1255 	/* If syscall is skipped, change return value. */
1256 	if (syscall == -1)
1257 		regs.SYSCALL_RET = 1;
1258 
1259 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1260 	EXPECT_EQ(0, ret);
1261 }
1262 
1263 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1264 		    int status, void *args)
1265 {
1266 	int ret;
1267 	unsigned long msg;
1268 
1269 	/* Make sure we got the right message. */
1270 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1271 	EXPECT_EQ(0, ret);
1272 
1273 	switch (msg) {
1274 	case 0x1002:
1275 		/* change getpid to getppid. */
1276 		change_syscall(_metadata, tracee, __NR_getppid);
1277 		break;
1278 	case 0x1003:
1279 		/* skip gettid. */
1280 		change_syscall(_metadata, tracee, -1);
1281 		break;
1282 	case 0x1004:
1283 		/* do nothing (allow getppid) */
1284 		break;
1285 	default:
1286 		EXPECT_EQ(0, msg) {
1287 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1288 			kill(tracee, SIGKILL);
1289 		}
1290 	}
1291 
1292 }
1293 
1294 FIXTURE_DATA(TRACE_syscall) {
1295 	struct sock_fprog prog;
1296 	pid_t tracer, mytid, mypid, parent;
1297 };
1298 
1299 FIXTURE_SETUP(TRACE_syscall)
1300 {
1301 	struct sock_filter filter[] = {
1302 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1303 			offsetof(struct seccomp_data, nr)),
1304 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1305 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1306 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1307 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1308 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1309 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1310 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1311 	};
1312 
1313 	memset(&self->prog, 0, sizeof(self->prog));
1314 	self->prog.filter = malloc(sizeof(filter));
1315 	ASSERT_NE(NULL, self->prog.filter);
1316 	memcpy(self->prog.filter, filter, sizeof(filter));
1317 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1318 
1319 	/* Prepare some testable syscall results. */
1320 	self->mytid = syscall(__NR_gettid);
1321 	ASSERT_GT(self->mytid, 0);
1322 	ASSERT_NE(self->mytid, 1) {
1323 		TH_LOG("Running this test as init is not supported. :)");
1324 	}
1325 
1326 	self->mypid = getpid();
1327 	ASSERT_GT(self->mypid, 0);
1328 	ASSERT_EQ(self->mytid, self->mypid);
1329 
1330 	self->parent = getppid();
1331 	ASSERT_GT(self->parent, 0);
1332 	ASSERT_NE(self->parent, self->mypid);
1333 
1334 	/* Launch tracer. */
1335 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1336 }
1337 
1338 FIXTURE_TEARDOWN(TRACE_syscall)
1339 {
1340 	teardown_trace_fixture(_metadata, self->tracer);
1341 	if (self->prog.filter)
1342 		free(self->prog.filter);
1343 }
1344 
1345 TEST_F(TRACE_syscall, syscall_allowed)
1346 {
1347 	long ret;
1348 
1349 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1350 	ASSERT_EQ(0, ret);
1351 
1352 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1353 	ASSERT_EQ(0, ret);
1354 
1355 	/* getppid works as expected (no changes). */
1356 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1357 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1358 }
1359 
1360 TEST_F(TRACE_syscall, syscall_redirected)
1361 {
1362 	long ret;
1363 
1364 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1365 	ASSERT_EQ(0, ret);
1366 
1367 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1368 	ASSERT_EQ(0, ret);
1369 
1370 	/* getpid has been redirected to getppid as expected. */
1371 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1372 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1373 }
1374 
1375 TEST_F(TRACE_syscall, syscall_dropped)
1376 {
1377 	long ret;
1378 
1379 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1380 	ASSERT_EQ(0, ret);
1381 
1382 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1383 	ASSERT_EQ(0, ret);
1384 
1385 	/* gettid has been skipped and an altered return value stored. */
1386 	EXPECT_EQ(1, syscall(__NR_gettid));
1387 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
1388 }
1389 
1390 #ifndef __NR_seccomp
1391 # if defined(__i386__)
1392 #  define __NR_seccomp 354
1393 # elif defined(__x86_64__)
1394 #  define __NR_seccomp 317
1395 # elif defined(__arm__)
1396 #  define __NR_seccomp 383
1397 # elif defined(__aarch64__)
1398 #  define __NR_seccomp 277
1399 # else
1400 #  warning "seccomp syscall number unknown for this architecture"
1401 #  define __NR_seccomp 0xffff
1402 # endif
1403 #endif
1404 
1405 #ifndef SECCOMP_SET_MODE_STRICT
1406 #define SECCOMP_SET_MODE_STRICT 0
1407 #endif
1408 
1409 #ifndef SECCOMP_SET_MODE_FILTER
1410 #define SECCOMP_SET_MODE_FILTER 1
1411 #endif
1412 
1413 #ifndef SECCOMP_FLAG_FILTER_TSYNC
1414 #define SECCOMP_FLAG_FILTER_TSYNC 1
1415 #endif
1416 
1417 #ifndef seccomp
1418 int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
1419 {
1420 	errno = 0;
1421 	return syscall(__NR_seccomp, op, flags, filter);
1422 }
1423 #endif
1424 
1425 TEST(seccomp_syscall)
1426 {
1427 	struct sock_filter filter[] = {
1428 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1429 	};
1430 	struct sock_fprog prog = {
1431 		.len = (unsigned short)ARRAY_SIZE(filter),
1432 		.filter = filter,
1433 	};
1434 	long ret;
1435 
1436 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1437 	ASSERT_EQ(0, ret) {
1438 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1439 	}
1440 
1441 	/* Reject insane operation. */
1442 	ret = seccomp(-1, 0, &prog);
1443 	EXPECT_EQ(EINVAL, errno) {
1444 		TH_LOG("Did not reject crazy op value!");
1445 	}
1446 
1447 	/* Reject strict with flags or pointer. */
1448 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1449 	EXPECT_EQ(EINVAL, errno) {
1450 		TH_LOG("Did not reject mode strict with flags!");
1451 	}
1452 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1453 	EXPECT_EQ(EINVAL, errno) {
1454 		TH_LOG("Did not reject mode strict with uargs!");
1455 	}
1456 
1457 	/* Reject insane args for filter. */
1458 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1459 	EXPECT_EQ(EINVAL, errno) {
1460 		TH_LOG("Did not reject crazy filter flags!");
1461 	}
1462 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1463 	EXPECT_EQ(EFAULT, errno) {
1464 		TH_LOG("Did not reject NULL filter!");
1465 	}
1466 
1467 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1468 	EXPECT_EQ(0, errno) {
1469 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1470 			strerror(errno));
1471 	}
1472 }
1473 
1474 TEST(seccomp_syscall_mode_lock)
1475 {
1476 	struct sock_filter filter[] = {
1477 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1478 	};
1479 	struct sock_fprog prog = {
1480 		.len = (unsigned short)ARRAY_SIZE(filter),
1481 		.filter = filter,
1482 	};
1483 	long ret;
1484 
1485 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1486 	ASSERT_EQ(0, ret) {
1487 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1488 	}
1489 
1490 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1491 	EXPECT_EQ(0, ret) {
1492 		TH_LOG("Could not install filter!");
1493 	}
1494 
1495 	/* Make sure neither entry point will switch to strict. */
1496 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1497 	EXPECT_EQ(EINVAL, errno) {
1498 		TH_LOG("Switched to mode strict!");
1499 	}
1500 
1501 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1502 	EXPECT_EQ(EINVAL, errno) {
1503 		TH_LOG("Switched to mode strict!");
1504 	}
1505 }
1506 
1507 TEST(TSYNC_first)
1508 {
1509 	struct sock_filter filter[] = {
1510 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1511 	};
1512 	struct sock_fprog prog = {
1513 		.len = (unsigned short)ARRAY_SIZE(filter),
1514 		.filter = filter,
1515 	};
1516 	long ret;
1517 
1518 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1519 	ASSERT_EQ(0, ret) {
1520 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1521 	}
1522 
1523 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1524 		      &prog);
1525 	EXPECT_EQ(0, ret) {
1526 		TH_LOG("Could not install initial filter with TSYNC!");
1527 	}
1528 }
1529 
1530 #define TSYNC_SIBLINGS 2
1531 struct tsync_sibling {
1532 	pthread_t tid;
1533 	pid_t system_tid;
1534 	sem_t *started;
1535 	pthread_cond_t *cond;
1536 	pthread_mutex_t *mutex;
1537 	int diverge;
1538 	int num_waits;
1539 	struct sock_fprog *prog;
1540 	struct __test_metadata *metadata;
1541 };
1542 
1543 FIXTURE_DATA(TSYNC) {
1544 	struct sock_fprog root_prog, apply_prog;
1545 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
1546 	sem_t started;
1547 	pthread_cond_t cond;
1548 	pthread_mutex_t mutex;
1549 	int sibling_count;
1550 };
1551 
1552 FIXTURE_SETUP(TSYNC)
1553 {
1554 	struct sock_filter root_filter[] = {
1555 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1556 	};
1557 	struct sock_filter apply_filter[] = {
1558 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1559 			offsetof(struct seccomp_data, nr)),
1560 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1561 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1562 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1563 	};
1564 
1565 	memset(&self->root_prog, 0, sizeof(self->root_prog));
1566 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1567 	memset(&self->sibling, 0, sizeof(self->sibling));
1568 	self->root_prog.filter = malloc(sizeof(root_filter));
1569 	ASSERT_NE(NULL, self->root_prog.filter);
1570 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1571 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1572 
1573 	self->apply_prog.filter = malloc(sizeof(apply_filter));
1574 	ASSERT_NE(NULL, self->apply_prog.filter);
1575 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1576 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1577 
1578 	self->sibling_count = 0;
1579 	pthread_mutex_init(&self->mutex, NULL);
1580 	pthread_cond_init(&self->cond, NULL);
1581 	sem_init(&self->started, 0, 0);
1582 	self->sibling[0].tid = 0;
1583 	self->sibling[0].cond = &self->cond;
1584 	self->sibling[0].started = &self->started;
1585 	self->sibling[0].mutex = &self->mutex;
1586 	self->sibling[0].diverge = 0;
1587 	self->sibling[0].num_waits = 1;
1588 	self->sibling[0].prog = &self->root_prog;
1589 	self->sibling[0].metadata = _metadata;
1590 	self->sibling[1].tid = 0;
1591 	self->sibling[1].cond = &self->cond;
1592 	self->sibling[1].started = &self->started;
1593 	self->sibling[1].mutex = &self->mutex;
1594 	self->sibling[1].diverge = 0;
1595 	self->sibling[1].prog = &self->root_prog;
1596 	self->sibling[1].num_waits = 1;
1597 	self->sibling[1].metadata = _metadata;
1598 }
1599 
1600 FIXTURE_TEARDOWN(TSYNC)
1601 {
1602 	int sib = 0;
1603 
1604 	if (self->root_prog.filter)
1605 		free(self->root_prog.filter);
1606 	if (self->apply_prog.filter)
1607 		free(self->apply_prog.filter);
1608 
1609 	for ( ; sib < self->sibling_count; ++sib) {
1610 		struct tsync_sibling *s = &self->sibling[sib];
1611 		void *status;
1612 
1613 		if (!s->tid)
1614 			continue;
1615 		if (pthread_kill(s->tid, 0)) {
1616 			pthread_cancel(s->tid);
1617 			pthread_join(s->tid, &status);
1618 		}
1619 	}
1620 	pthread_mutex_destroy(&self->mutex);
1621 	pthread_cond_destroy(&self->cond);
1622 	sem_destroy(&self->started);
1623 }
1624 
1625 void *tsync_sibling(void *data)
1626 {
1627 	long ret = 0;
1628 	struct tsync_sibling *me = data;
1629 
1630 	me->system_tid = syscall(__NR_gettid);
1631 
1632 	pthread_mutex_lock(me->mutex);
1633 	if (me->diverge) {
1634 		/* Just re-apply the root prog to fork the tree */
1635 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1636 				me->prog, 0, 0);
1637 	}
1638 	sem_post(me->started);
1639 	/* Return outside of started so parent notices failures. */
1640 	if (ret) {
1641 		pthread_mutex_unlock(me->mutex);
1642 		return (void *)SIBLING_EXIT_FAILURE;
1643 	}
1644 	do {
1645 		pthread_cond_wait(me->cond, me->mutex);
1646 		me->num_waits = me->num_waits - 1;
1647 	} while (me->num_waits);
1648 	pthread_mutex_unlock(me->mutex);
1649 
1650 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1651 	if (!ret)
1652 		return (void *)SIBLING_EXIT_NEWPRIVS;
1653 	read(0, NULL, 0);
1654 	return (void *)SIBLING_EXIT_UNKILLED;
1655 }
1656 
1657 void tsync_start_sibling(struct tsync_sibling *sibling)
1658 {
1659 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1660 }
1661 
1662 TEST_F(TSYNC, siblings_fail_prctl)
1663 {
1664 	long ret;
1665 	void *status;
1666 	struct sock_filter filter[] = {
1667 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1668 			offsetof(struct seccomp_data, nr)),
1669 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1670 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1671 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1672 	};
1673 	struct sock_fprog prog = {
1674 		.len = (unsigned short)ARRAY_SIZE(filter),
1675 		.filter = filter,
1676 	};
1677 
1678 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1679 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1680 	}
1681 
1682 	/* Check prctl failure detection by requesting sib 0 diverge. */
1683 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1684 	ASSERT_EQ(0, ret) {
1685 		TH_LOG("setting filter failed");
1686 	}
1687 
1688 	self->sibling[0].diverge = 1;
1689 	tsync_start_sibling(&self->sibling[0]);
1690 	tsync_start_sibling(&self->sibling[1]);
1691 
1692 	while (self->sibling_count < TSYNC_SIBLINGS) {
1693 		sem_wait(&self->started);
1694 		self->sibling_count++;
1695 	}
1696 
1697 	/* Signal the threads to clean up*/
1698 	pthread_mutex_lock(&self->mutex);
1699 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1700 		TH_LOG("cond broadcast non-zero");
1701 	}
1702 	pthread_mutex_unlock(&self->mutex);
1703 
1704 	/* Ensure diverging sibling failed to call prctl. */
1705 	pthread_join(self->sibling[0].tid, &status);
1706 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1707 	pthread_join(self->sibling[1].tid, &status);
1708 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1709 }
1710 
1711 TEST_F(TSYNC, two_siblings_with_ancestor)
1712 {
1713 	long ret;
1714 	void *status;
1715 
1716 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1717 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1718 	}
1719 
1720 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1721 	ASSERT_EQ(0, ret) {
1722 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1723 	}
1724 	tsync_start_sibling(&self->sibling[0]);
1725 	tsync_start_sibling(&self->sibling[1]);
1726 
1727 	while (self->sibling_count < TSYNC_SIBLINGS) {
1728 		sem_wait(&self->started);
1729 		self->sibling_count++;
1730 	}
1731 
1732 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1733 		      &self->apply_prog);
1734 	ASSERT_EQ(0, ret) {
1735 		TH_LOG("Could install filter on all threads!");
1736 	}
1737 	/* Tell the siblings to test the policy */
1738 	pthread_mutex_lock(&self->mutex);
1739 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1740 		TH_LOG("cond broadcast non-zero");
1741 	}
1742 	pthread_mutex_unlock(&self->mutex);
1743 	/* Ensure they are both killed and don't exit cleanly. */
1744 	pthread_join(self->sibling[0].tid, &status);
1745 	EXPECT_EQ(0x0, (long)status);
1746 	pthread_join(self->sibling[1].tid, &status);
1747 	EXPECT_EQ(0x0, (long)status);
1748 }
1749 
1750 TEST_F(TSYNC, two_sibling_want_nnp)
1751 {
1752 	void *status;
1753 
1754 	/* start siblings before any prctl() operations */
1755 	tsync_start_sibling(&self->sibling[0]);
1756 	tsync_start_sibling(&self->sibling[1]);
1757 	while (self->sibling_count < TSYNC_SIBLINGS) {
1758 		sem_wait(&self->started);
1759 		self->sibling_count++;
1760 	}
1761 
1762 	/* Tell the siblings to test no policy */
1763 	pthread_mutex_lock(&self->mutex);
1764 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1765 		TH_LOG("cond broadcast non-zero");
1766 	}
1767 	pthread_mutex_unlock(&self->mutex);
1768 
1769 	/* Ensure they are both upset about lacking nnp. */
1770 	pthread_join(self->sibling[0].tid, &status);
1771 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1772 	pthread_join(self->sibling[1].tid, &status);
1773 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1774 }
1775 
1776 TEST_F(TSYNC, two_siblings_with_no_filter)
1777 {
1778 	long ret;
1779 	void *status;
1780 
1781 	/* start siblings before any prctl() operations */
1782 	tsync_start_sibling(&self->sibling[0]);
1783 	tsync_start_sibling(&self->sibling[1]);
1784 	while (self->sibling_count < TSYNC_SIBLINGS) {
1785 		sem_wait(&self->started);
1786 		self->sibling_count++;
1787 	}
1788 
1789 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1790 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1791 	}
1792 
1793 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1794 		      &self->apply_prog);
1795 	ASSERT_EQ(0, ret) {
1796 		TH_LOG("Could install filter on all threads!");
1797 	}
1798 
1799 	/* Tell the siblings to test the policy */
1800 	pthread_mutex_lock(&self->mutex);
1801 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1802 		TH_LOG("cond broadcast non-zero");
1803 	}
1804 	pthread_mutex_unlock(&self->mutex);
1805 
1806 	/* Ensure they are both killed and don't exit cleanly. */
1807 	pthread_join(self->sibling[0].tid, &status);
1808 	EXPECT_EQ(0x0, (long)status);
1809 	pthread_join(self->sibling[1].tid, &status);
1810 	EXPECT_EQ(0x0, (long)status);
1811 }
1812 
1813 TEST_F(TSYNC, two_siblings_with_one_divergence)
1814 {
1815 	long ret;
1816 	void *status;
1817 
1818 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1819 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1820 	}
1821 
1822 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1823 	ASSERT_EQ(0, ret) {
1824 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1825 	}
1826 	self->sibling[0].diverge = 1;
1827 	tsync_start_sibling(&self->sibling[0]);
1828 	tsync_start_sibling(&self->sibling[1]);
1829 
1830 	while (self->sibling_count < TSYNC_SIBLINGS) {
1831 		sem_wait(&self->started);
1832 		self->sibling_count++;
1833 	}
1834 
1835 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1836 		      &self->apply_prog);
1837 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
1838 		TH_LOG("Did not fail on diverged sibling.");
1839 	}
1840 
1841 	/* Wake the threads */
1842 	pthread_mutex_lock(&self->mutex);
1843 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1844 		TH_LOG("cond broadcast non-zero");
1845 	}
1846 	pthread_mutex_unlock(&self->mutex);
1847 
1848 	/* Ensure they are both unkilled. */
1849 	pthread_join(self->sibling[0].tid, &status);
1850 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1851 	pthread_join(self->sibling[1].tid, &status);
1852 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1853 }
1854 
1855 TEST_F(TSYNC, two_siblings_not_under_filter)
1856 {
1857 	long ret, sib;
1858 	void *status;
1859 
1860 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1861 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1862 	}
1863 
1864 	/*
1865 	 * Sibling 0 will have its own seccomp policy
1866 	 * and Sibling 1 will not be under seccomp at
1867 	 * all. Sibling 1 will enter seccomp and 0
1868 	 * will cause failure.
1869 	 */
1870 	self->sibling[0].diverge = 1;
1871 	tsync_start_sibling(&self->sibling[0]);
1872 	tsync_start_sibling(&self->sibling[1]);
1873 
1874 	while (self->sibling_count < TSYNC_SIBLINGS) {
1875 		sem_wait(&self->started);
1876 		self->sibling_count++;
1877 	}
1878 
1879 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1880 	ASSERT_EQ(0, ret) {
1881 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1882 	}
1883 
1884 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1885 		      &self->apply_prog);
1886 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
1887 		TH_LOG("Did not fail on diverged sibling.");
1888 	}
1889 	sib = 1;
1890 	if (ret == self->sibling[0].system_tid)
1891 		sib = 0;
1892 
1893 	pthread_mutex_lock(&self->mutex);
1894 
1895 	/* Increment the other siblings num_waits so we can clean up
1896 	 * the one we just saw.
1897 	 */
1898 	self->sibling[!sib].num_waits += 1;
1899 
1900 	/* Signal the thread to clean up*/
1901 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1902 		TH_LOG("cond broadcast non-zero");
1903 	}
1904 	pthread_mutex_unlock(&self->mutex);
1905 	pthread_join(self->sibling[sib].tid, &status);
1906 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1907 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
1908 	while (!kill(self->sibling[sib].system_tid, 0))
1909 		sleep(0.1);
1910 	/* Switch to the remaining sibling */
1911 	sib = !sib;
1912 
1913 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1914 		      &self->apply_prog);
1915 	ASSERT_EQ(0, ret) {
1916 		TH_LOG("Expected the remaining sibling to sync");
1917 	};
1918 
1919 	pthread_mutex_lock(&self->mutex);
1920 
1921 	/* If remaining sibling didn't have a chance to wake up during
1922 	 * the first broadcast, manually reduce the num_waits now.
1923 	 */
1924 	if (self->sibling[sib].num_waits > 1)
1925 		self->sibling[sib].num_waits = 1;
1926 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1927 		TH_LOG("cond broadcast non-zero");
1928 	}
1929 	pthread_mutex_unlock(&self->mutex);
1930 	pthread_join(self->sibling[sib].tid, &status);
1931 	EXPECT_EQ(0, (long)status);
1932 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
1933 	while (!kill(self->sibling[sib].system_tid, 0))
1934 		sleep(0.1);
1935 
1936 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1937 		      &self->apply_prog);
1938 	ASSERT_EQ(0, ret);  /* just us chickens */
1939 }
1940 
1941 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
1942 TEST(syscall_restart)
1943 {
1944 	long ret;
1945 	unsigned long msg;
1946 	pid_t child_pid;
1947 	int pipefd[2];
1948 	int status;
1949 	siginfo_t info = { };
1950 	struct sock_filter filter[] = {
1951 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1952 			 offsetof(struct seccomp_data, nr)),
1953 
1954 #ifdef __NR_sigreturn
1955 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
1956 #endif
1957 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
1958 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
1959 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
1960 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
1961 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
1962 
1963 		/* Allow __NR_write for easy logging. */
1964 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
1965 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1966 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1967 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
1968 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
1969 	};
1970 	struct sock_fprog prog = {
1971 		.len = (unsigned short)ARRAY_SIZE(filter),
1972 		.filter = filter,
1973 	};
1974 
1975 	ASSERT_EQ(0, pipe(pipefd));
1976 
1977 	child_pid = fork();
1978 	ASSERT_LE(0, child_pid);
1979 	if (child_pid == 0) {
1980 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
1981 		char buf = ' ';
1982 		struct pollfd fds = {
1983 			.fd = pipefd[0],
1984 			.events = POLLIN,
1985 		};
1986 
1987 		/* Attach parent as tracer and stop. */
1988 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
1989 		EXPECT_EQ(0, raise(SIGSTOP));
1990 
1991 		EXPECT_EQ(0, close(pipefd[1]));
1992 
1993 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1994 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1995 		}
1996 
1997 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1998 		EXPECT_EQ(0, ret) {
1999 			TH_LOG("Failed to install filter!");
2000 		}
2001 
2002 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2003 			TH_LOG("Failed to read() sync from parent");
2004 		}
2005 		EXPECT_EQ('.', buf) {
2006 			TH_LOG("Failed to get sync data from read()");
2007 		}
2008 
2009 		/* Start poll to be interrupted. */
2010 		errno = 0;
2011 		EXPECT_EQ(1, poll(&fds, 1, -1)) {
2012 			TH_LOG("Call to poll() failed (errno %d)", errno);
2013 		}
2014 
2015 		/* Read final sync from parent. */
2016 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2017 			TH_LOG("Failed final read() from parent");
2018 		}
2019 		EXPECT_EQ('!', buf) {
2020 			TH_LOG("Failed to get final data from read()");
2021 		}
2022 
2023 		/* Directly report the status of our test harness results. */
2024 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2025 						     : EXIT_FAILURE);
2026 	}
2027 	EXPECT_EQ(0, close(pipefd[0]));
2028 
2029 	/* Attach to child, setup options, and release. */
2030 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2031 	ASSERT_EQ(true, WIFSTOPPED(status));
2032 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2033 			    PTRACE_O_TRACESECCOMP));
2034 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2035 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2036 
2037 	/* Wait for poll() to start. */
2038 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2039 	ASSERT_EQ(true, WIFSTOPPED(status));
2040 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2041 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2042 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2043 	ASSERT_EQ(0x100, msg);
2044 	EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
2045 
2046 	/* Might as well check siginfo for sanity while we're here. */
2047 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2048 	ASSERT_EQ(SIGTRAP, info.si_signo);
2049 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2050 	EXPECT_EQ(0, info.si_errno);
2051 	EXPECT_EQ(getuid(), info.si_uid);
2052 	/* Verify signal delivery came from child (seccomp-triggered). */
2053 	EXPECT_EQ(child_pid, info.si_pid);
2054 
2055 	/* Interrupt poll with SIGSTOP (which we'll need to handle). */
2056 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2057 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2058 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2059 	ASSERT_EQ(true, WIFSTOPPED(status));
2060 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2061 	/* Verify signal delivery came from parent now. */
2062 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2063 	EXPECT_EQ(getpid(), info.si_pid);
2064 
2065 	/* Restart poll with SIGCONT, which triggers restart_syscall. */
2066 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2067 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2068 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2069 	ASSERT_EQ(true, WIFSTOPPED(status));
2070 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2071 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2072 
2073 	/* Wait for restart_syscall() to start. */
2074 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2075 	ASSERT_EQ(true, WIFSTOPPED(status));
2076 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2077 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2078 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2079 	ASSERT_EQ(0x200, msg);
2080 	ret = get_syscall(_metadata, child_pid);
2081 #if defined(__arm__)
2082 	/* FIXME: ARM does not expose true syscall in registers. */
2083 	EXPECT_EQ(__NR_poll, ret);
2084 #else
2085 	EXPECT_EQ(__NR_restart_syscall, ret);
2086 #endif
2087 
2088 	/* Write again to end poll. */
2089 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2090 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2091 	EXPECT_EQ(0, close(pipefd[1]));
2092 
2093 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2094 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2095 		_metadata->passed = 0;
2096 }
2097 
2098 /*
2099  * TODO:
2100  * - add microbenchmarks
2101  * - expand NNP testing
2102  * - better arch-specific TRACE and TRAP handlers.
2103  * - endianness checking when appropriate
2104  * - 64-bit arg prodding
2105  * - arch value testing (x86 modes especially)
2106  * - ...
2107  */
2108 
2109 TEST_HARNESS_MAIN
2110