1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 49 #include <unistd.h> 50 #include <sys/syscall.h> 51 #include <poll.h> 52 53 #include "../kselftest_harness.h" 54 55 #ifndef PR_SET_PTRACER 56 # define PR_SET_PTRACER 0x59616d61 57 #endif 58 59 #ifndef PR_SET_NO_NEW_PRIVS 60 #define PR_SET_NO_NEW_PRIVS 38 61 #define PR_GET_NO_NEW_PRIVS 39 62 #endif 63 64 #ifndef PR_SECCOMP_EXT 65 #define PR_SECCOMP_EXT 43 66 #endif 67 68 #ifndef SECCOMP_EXT_ACT 69 #define SECCOMP_EXT_ACT 1 70 #endif 71 72 #ifndef SECCOMP_EXT_ACT_TSYNC 73 #define SECCOMP_EXT_ACT_TSYNC 1 74 #endif 75 76 #ifndef SECCOMP_MODE_STRICT 77 #define SECCOMP_MODE_STRICT 1 78 #endif 79 80 #ifndef SECCOMP_MODE_FILTER 81 #define SECCOMP_MODE_FILTER 2 82 #endif 83 84 #ifndef SECCOMP_RET_ALLOW 85 struct seccomp_data { 86 int nr; 87 __u32 arch; 88 __u64 instruction_pointer; 89 __u64 args[6]; 90 }; 91 #endif 92 93 #ifndef SECCOMP_RET_KILL_PROCESS 94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 95 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 96 #endif 97 #ifndef SECCOMP_RET_KILL 98 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 99 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 100 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 101 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 102 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 103 #endif 104 #ifndef SECCOMP_RET_LOG 105 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 106 #endif 107 108 #ifndef __NR_seccomp 109 # if defined(__i386__) 110 # define __NR_seccomp 354 111 # elif defined(__x86_64__) 112 # define __NR_seccomp 317 113 # elif defined(__arm__) 114 # define __NR_seccomp 383 115 # elif defined(__aarch64__) 116 # define __NR_seccomp 277 117 # elif defined(__riscv) 118 # define __NR_seccomp 277 119 # elif defined(__hppa__) 120 # define __NR_seccomp 338 121 # elif defined(__powerpc__) 122 # define __NR_seccomp 358 123 # elif defined(__s390__) 124 # define __NR_seccomp 348 125 # elif defined(__xtensa__) 126 # define __NR_seccomp 337 127 # else 128 # warning "seccomp syscall number unknown for this architecture" 129 # define __NR_seccomp 0xffff 130 # endif 131 #endif 132 133 #ifndef SECCOMP_SET_MODE_STRICT 134 #define SECCOMP_SET_MODE_STRICT 0 135 #endif 136 137 #ifndef SECCOMP_SET_MODE_FILTER 138 #define SECCOMP_SET_MODE_FILTER 1 139 #endif 140 141 #ifndef SECCOMP_GET_ACTION_AVAIL 142 #define SECCOMP_GET_ACTION_AVAIL 2 143 #endif 144 145 #ifndef SECCOMP_GET_NOTIF_SIZES 146 #define SECCOMP_GET_NOTIF_SIZES 3 147 #endif 148 149 #ifndef SECCOMP_FILTER_FLAG_TSYNC 150 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 151 #endif 152 153 #ifndef SECCOMP_FILTER_FLAG_LOG 154 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 155 #endif 156 157 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 158 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 159 #endif 160 161 #ifndef PTRACE_SECCOMP_GET_METADATA 162 #define PTRACE_SECCOMP_GET_METADATA 0x420d 163 164 struct seccomp_metadata { 165 __u64 filter_off; /* Input: which filter */ 166 __u64 flags; /* Output: filter's flags */ 167 }; 168 #endif 169 170 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 171 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 172 173 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 174 175 #define SECCOMP_IOC_MAGIC '!' 176 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 177 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 178 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 179 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 180 181 /* Flags for seccomp notification fd ioctl. */ 182 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 183 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 184 struct seccomp_notif_resp) 185 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) 186 187 struct seccomp_notif { 188 __u64 id; 189 __u32 pid; 190 __u32 flags; 191 struct seccomp_data data; 192 }; 193 194 struct seccomp_notif_resp { 195 __u64 id; 196 __s64 val; 197 __s32 error; 198 __u32 flags; 199 }; 200 201 struct seccomp_notif_sizes { 202 __u16 seccomp_notif; 203 __u16 seccomp_notif_resp; 204 __u16 seccomp_data; 205 }; 206 #endif 207 208 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 209 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 210 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 211 #endif 212 213 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 214 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 215 #endif 216 217 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 218 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 219 #endif 220 221 #ifndef seccomp 222 int seccomp(unsigned int op, unsigned int flags, void *args) 223 { 224 errno = 0; 225 return syscall(__NR_seccomp, op, flags, args); 226 } 227 #endif 228 229 #if __BYTE_ORDER == __LITTLE_ENDIAN 230 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 231 #elif __BYTE_ORDER == __BIG_ENDIAN 232 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 233 #else 234 #error "wut? Unknown __BYTE_ORDER?!" 235 #endif 236 237 #define SIBLING_EXIT_UNKILLED 0xbadbeef 238 #define SIBLING_EXIT_FAILURE 0xbadface 239 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 240 241 TEST(mode_strict_support) 242 { 243 long ret; 244 245 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 246 ASSERT_EQ(0, ret) { 247 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 248 } 249 syscall(__NR_exit, 0); 250 } 251 252 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 253 { 254 long ret; 255 256 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 257 ASSERT_EQ(0, ret) { 258 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 259 } 260 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 261 NULL, NULL, NULL); 262 EXPECT_FALSE(true) { 263 TH_LOG("Unreachable!"); 264 } 265 } 266 267 /* Note! This doesn't test no new privs behavior */ 268 TEST(no_new_privs_support) 269 { 270 long ret; 271 272 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 273 EXPECT_EQ(0, ret) { 274 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 275 } 276 } 277 278 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 279 TEST(mode_filter_support) 280 { 281 long ret; 282 283 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 284 ASSERT_EQ(0, ret) { 285 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 286 } 287 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 288 EXPECT_EQ(-1, ret); 289 EXPECT_EQ(EFAULT, errno) { 290 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 291 } 292 } 293 294 TEST(mode_filter_without_nnp) 295 { 296 struct sock_filter filter[] = { 297 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 298 }; 299 struct sock_fprog prog = { 300 .len = (unsigned short)ARRAY_SIZE(filter), 301 .filter = filter, 302 }; 303 long ret; 304 305 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 306 ASSERT_LE(0, ret) { 307 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 308 } 309 errno = 0; 310 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 311 /* Succeeds with CAP_SYS_ADMIN, fails without */ 312 /* TODO(wad) check caps not euid */ 313 if (geteuid()) { 314 EXPECT_EQ(-1, ret); 315 EXPECT_EQ(EACCES, errno); 316 } else { 317 EXPECT_EQ(0, ret); 318 } 319 } 320 321 #define MAX_INSNS_PER_PATH 32768 322 323 TEST(filter_size_limits) 324 { 325 int i; 326 int count = BPF_MAXINSNS + 1; 327 struct sock_filter allow[] = { 328 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 329 }; 330 struct sock_filter *filter; 331 struct sock_fprog prog = { }; 332 long ret; 333 334 filter = calloc(count, sizeof(*filter)); 335 ASSERT_NE(NULL, filter); 336 337 for (i = 0; i < count; i++) 338 filter[i] = allow[0]; 339 340 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 341 ASSERT_EQ(0, ret); 342 343 prog.filter = filter; 344 prog.len = count; 345 346 /* Too many filter instructions in a single filter. */ 347 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 348 ASSERT_NE(0, ret) { 349 TH_LOG("Installing %d insn filter was allowed", prog.len); 350 } 351 352 /* One less is okay, though. */ 353 prog.len -= 1; 354 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 355 ASSERT_EQ(0, ret) { 356 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 357 } 358 } 359 360 TEST(filter_chain_limits) 361 { 362 int i; 363 int count = BPF_MAXINSNS; 364 struct sock_filter allow[] = { 365 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 366 }; 367 struct sock_filter *filter; 368 struct sock_fprog prog = { }; 369 long ret; 370 371 filter = calloc(count, sizeof(*filter)); 372 ASSERT_NE(NULL, filter); 373 374 for (i = 0; i < count; i++) 375 filter[i] = allow[0]; 376 377 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 378 ASSERT_EQ(0, ret); 379 380 prog.filter = filter; 381 prog.len = 1; 382 383 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 384 ASSERT_EQ(0, ret); 385 386 prog.len = count; 387 388 /* Too many total filter instructions. */ 389 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 390 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 391 if (ret != 0) 392 break; 393 } 394 ASSERT_NE(0, ret) { 395 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 396 i, count, i * (count + 4)); 397 } 398 } 399 400 TEST(mode_filter_cannot_move_to_strict) 401 { 402 struct sock_filter filter[] = { 403 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 404 }; 405 struct sock_fprog prog = { 406 .len = (unsigned short)ARRAY_SIZE(filter), 407 .filter = filter, 408 }; 409 long ret; 410 411 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 412 ASSERT_EQ(0, ret); 413 414 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 415 ASSERT_EQ(0, ret); 416 417 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 418 EXPECT_EQ(-1, ret); 419 EXPECT_EQ(EINVAL, errno); 420 } 421 422 423 TEST(mode_filter_get_seccomp) 424 { 425 struct sock_filter filter[] = { 426 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 427 }; 428 struct sock_fprog prog = { 429 .len = (unsigned short)ARRAY_SIZE(filter), 430 .filter = filter, 431 }; 432 long ret; 433 434 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 435 ASSERT_EQ(0, ret); 436 437 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 438 EXPECT_EQ(0, ret); 439 440 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 441 ASSERT_EQ(0, ret); 442 443 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 444 EXPECT_EQ(2, ret); 445 } 446 447 448 TEST(ALLOW_all) 449 { 450 struct sock_filter filter[] = { 451 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 452 }; 453 struct sock_fprog prog = { 454 .len = (unsigned short)ARRAY_SIZE(filter), 455 .filter = filter, 456 }; 457 long ret; 458 459 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 460 ASSERT_EQ(0, ret); 461 462 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 463 ASSERT_EQ(0, ret); 464 } 465 466 TEST(empty_prog) 467 { 468 struct sock_filter filter[] = { 469 }; 470 struct sock_fprog prog = { 471 .len = (unsigned short)ARRAY_SIZE(filter), 472 .filter = filter, 473 }; 474 long ret; 475 476 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 477 ASSERT_EQ(0, ret); 478 479 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 480 EXPECT_EQ(-1, ret); 481 EXPECT_EQ(EINVAL, errno); 482 } 483 484 TEST(log_all) 485 { 486 struct sock_filter filter[] = { 487 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 488 }; 489 struct sock_fprog prog = { 490 .len = (unsigned short)ARRAY_SIZE(filter), 491 .filter = filter, 492 }; 493 long ret; 494 pid_t parent = getppid(); 495 496 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 497 ASSERT_EQ(0, ret); 498 499 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 500 ASSERT_EQ(0, ret); 501 502 /* getppid() should succeed and be logged (no check for logging) */ 503 EXPECT_EQ(parent, syscall(__NR_getppid)); 504 } 505 506 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 507 { 508 struct sock_filter filter[] = { 509 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 510 }; 511 struct sock_fprog prog = { 512 .len = (unsigned short)ARRAY_SIZE(filter), 513 .filter = filter, 514 }; 515 long ret; 516 517 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 518 ASSERT_EQ(0, ret); 519 520 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 521 ASSERT_EQ(0, ret); 522 EXPECT_EQ(0, syscall(__NR_getpid)) { 523 TH_LOG("getpid() shouldn't ever return"); 524 } 525 } 526 527 /* return code >= 0x80000000 is unused. */ 528 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 529 { 530 struct sock_filter filter[] = { 531 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 532 }; 533 struct sock_fprog prog = { 534 .len = (unsigned short)ARRAY_SIZE(filter), 535 .filter = filter, 536 }; 537 long ret; 538 539 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 540 ASSERT_EQ(0, ret); 541 542 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 543 ASSERT_EQ(0, ret); 544 EXPECT_EQ(0, syscall(__NR_getpid)) { 545 TH_LOG("getpid() shouldn't ever return"); 546 } 547 } 548 549 TEST_SIGNAL(KILL_all, SIGSYS) 550 { 551 struct sock_filter filter[] = { 552 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 553 }; 554 struct sock_fprog prog = { 555 .len = (unsigned short)ARRAY_SIZE(filter), 556 .filter = filter, 557 }; 558 long ret; 559 560 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 561 ASSERT_EQ(0, ret); 562 563 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 564 ASSERT_EQ(0, ret); 565 } 566 567 TEST_SIGNAL(KILL_one, SIGSYS) 568 { 569 struct sock_filter filter[] = { 570 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 571 offsetof(struct seccomp_data, nr)), 572 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 573 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 574 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 575 }; 576 struct sock_fprog prog = { 577 .len = (unsigned short)ARRAY_SIZE(filter), 578 .filter = filter, 579 }; 580 long ret; 581 pid_t parent = getppid(); 582 583 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 584 ASSERT_EQ(0, ret); 585 586 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 587 ASSERT_EQ(0, ret); 588 589 EXPECT_EQ(parent, syscall(__NR_getppid)); 590 /* getpid() should never return. */ 591 EXPECT_EQ(0, syscall(__NR_getpid)); 592 } 593 594 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 595 { 596 void *fatal_address; 597 struct sock_filter filter[] = { 598 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 599 offsetof(struct seccomp_data, nr)), 600 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 601 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 602 /* Only both with lower 32-bit for now. */ 603 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 604 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 605 (unsigned long)&fatal_address, 0, 1), 606 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 607 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 608 }; 609 struct sock_fprog prog = { 610 .len = (unsigned short)ARRAY_SIZE(filter), 611 .filter = filter, 612 }; 613 long ret; 614 pid_t parent = getppid(); 615 struct tms timebuf; 616 clock_t clock = times(&timebuf); 617 618 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 619 ASSERT_EQ(0, ret); 620 621 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 622 ASSERT_EQ(0, ret); 623 624 EXPECT_EQ(parent, syscall(__NR_getppid)); 625 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 626 /* times() should never return. */ 627 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 628 } 629 630 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 631 { 632 #ifndef __NR_mmap2 633 int sysno = __NR_mmap; 634 #else 635 int sysno = __NR_mmap2; 636 #endif 637 struct sock_filter filter[] = { 638 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 639 offsetof(struct seccomp_data, nr)), 640 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 641 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 642 /* Only both with lower 32-bit for now. */ 643 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 644 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 645 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 646 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 647 }; 648 struct sock_fprog prog = { 649 .len = (unsigned short)ARRAY_SIZE(filter), 650 .filter = filter, 651 }; 652 long ret; 653 pid_t parent = getppid(); 654 int fd; 655 void *map1, *map2; 656 int page_size = sysconf(_SC_PAGESIZE); 657 658 ASSERT_LT(0, page_size); 659 660 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 661 ASSERT_EQ(0, ret); 662 663 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 664 ASSERT_EQ(0, ret); 665 666 fd = open("/dev/zero", O_RDONLY); 667 ASSERT_NE(-1, fd); 668 669 EXPECT_EQ(parent, syscall(__NR_getppid)); 670 map1 = (void *)syscall(sysno, 671 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 672 EXPECT_NE(MAP_FAILED, map1); 673 /* mmap2() should never return. */ 674 map2 = (void *)syscall(sysno, 675 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 676 EXPECT_EQ(MAP_FAILED, map2); 677 678 /* The test failed, so clean up the resources. */ 679 munmap(map1, page_size); 680 munmap(map2, page_size); 681 close(fd); 682 } 683 684 /* This is a thread task to die via seccomp filter violation. */ 685 void *kill_thread(void *data) 686 { 687 bool die = (bool)data; 688 689 if (die) { 690 prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 691 return (void *)SIBLING_EXIT_FAILURE; 692 } 693 694 return (void *)SIBLING_EXIT_UNKILLED; 695 } 696 697 /* Prepare a thread that will kill itself or both of us. */ 698 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) 699 { 700 pthread_t thread; 701 void *status; 702 /* Kill only when calling __NR_prctl. */ 703 struct sock_filter filter_thread[] = { 704 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 705 offsetof(struct seccomp_data, nr)), 706 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 707 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 708 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 709 }; 710 struct sock_fprog prog_thread = { 711 .len = (unsigned short)ARRAY_SIZE(filter_thread), 712 .filter = filter_thread, 713 }; 714 struct sock_filter filter_process[] = { 715 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 716 offsetof(struct seccomp_data, nr)), 717 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 718 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), 719 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 720 }; 721 struct sock_fprog prog_process = { 722 .len = (unsigned short)ARRAY_SIZE(filter_process), 723 .filter = filter_process, 724 }; 725 726 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 727 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 728 } 729 730 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 731 kill_process ? &prog_process : &prog_thread)); 732 733 /* 734 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 735 * flag cannot be downgraded by a new filter. 736 */ 737 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 738 739 /* Start a thread that will exit immediately. */ 740 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 741 ASSERT_EQ(0, pthread_join(thread, &status)); 742 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 743 744 /* Start a thread that will die immediately. */ 745 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 746 ASSERT_EQ(0, pthread_join(thread, &status)); 747 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 748 749 /* 750 * If we get here, only the spawned thread died. Let the parent know 751 * the whole process didn't die (i.e. this thread, the spawner, 752 * stayed running). 753 */ 754 exit(42); 755 } 756 757 TEST(KILL_thread) 758 { 759 int status; 760 pid_t child_pid; 761 762 child_pid = fork(); 763 ASSERT_LE(0, child_pid); 764 if (child_pid == 0) { 765 kill_thread_or_group(_metadata, false); 766 _exit(38); 767 } 768 769 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 770 771 /* If only the thread was killed, we'll see exit 42. */ 772 ASSERT_TRUE(WIFEXITED(status)); 773 ASSERT_EQ(42, WEXITSTATUS(status)); 774 } 775 776 TEST(KILL_process) 777 { 778 int status; 779 pid_t child_pid; 780 781 child_pid = fork(); 782 ASSERT_LE(0, child_pid); 783 if (child_pid == 0) { 784 kill_thread_or_group(_metadata, true); 785 _exit(38); 786 } 787 788 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 789 790 /* If the entire process was killed, we'll see SIGSYS. */ 791 ASSERT_TRUE(WIFSIGNALED(status)); 792 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 793 } 794 795 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 796 TEST(arg_out_of_range) 797 { 798 struct sock_filter filter[] = { 799 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 800 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 801 }; 802 struct sock_fprog prog = { 803 .len = (unsigned short)ARRAY_SIZE(filter), 804 .filter = filter, 805 }; 806 long ret; 807 808 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 809 ASSERT_EQ(0, ret); 810 811 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 812 EXPECT_EQ(-1, ret); 813 EXPECT_EQ(EINVAL, errno); 814 } 815 816 #define ERRNO_FILTER(name, errno) \ 817 struct sock_filter _read_filter_##name[] = { \ 818 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 819 offsetof(struct seccomp_data, nr)), \ 820 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 821 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 822 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 823 }; \ 824 struct sock_fprog prog_##name = { \ 825 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 826 .filter = _read_filter_##name, \ 827 } 828 829 /* Make sure basic errno values are correctly passed through a filter. */ 830 TEST(ERRNO_valid) 831 { 832 ERRNO_FILTER(valid, E2BIG); 833 long ret; 834 pid_t parent = getppid(); 835 836 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 837 ASSERT_EQ(0, ret); 838 839 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 840 ASSERT_EQ(0, ret); 841 842 EXPECT_EQ(parent, syscall(__NR_getppid)); 843 EXPECT_EQ(-1, read(0, NULL, 0)); 844 EXPECT_EQ(E2BIG, errno); 845 } 846 847 /* Make sure an errno of zero is correctly handled by the arch code. */ 848 TEST(ERRNO_zero) 849 { 850 ERRNO_FILTER(zero, 0); 851 long ret; 852 pid_t parent = getppid(); 853 854 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 855 ASSERT_EQ(0, ret); 856 857 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 858 ASSERT_EQ(0, ret); 859 860 EXPECT_EQ(parent, syscall(__NR_getppid)); 861 /* "errno" of 0 is ok. */ 862 EXPECT_EQ(0, read(0, NULL, 0)); 863 } 864 865 /* 866 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 867 * This tests that the errno value gets capped correctly, fixed by 868 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 869 */ 870 TEST(ERRNO_capped) 871 { 872 ERRNO_FILTER(capped, 4096); 873 long ret; 874 pid_t parent = getppid(); 875 876 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 877 ASSERT_EQ(0, ret); 878 879 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 880 ASSERT_EQ(0, ret); 881 882 EXPECT_EQ(parent, syscall(__NR_getppid)); 883 EXPECT_EQ(-1, read(0, NULL, 0)); 884 EXPECT_EQ(4095, errno); 885 } 886 887 /* 888 * Filters are processed in reverse order: last applied is executed first. 889 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 890 * SECCOMP_RET_DATA mask results will follow the most recently applied 891 * matching filter return (and not the lowest or highest value). 892 */ 893 TEST(ERRNO_order) 894 { 895 ERRNO_FILTER(first, 11); 896 ERRNO_FILTER(second, 13); 897 ERRNO_FILTER(third, 12); 898 long ret; 899 pid_t parent = getppid(); 900 901 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 902 ASSERT_EQ(0, ret); 903 904 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 905 ASSERT_EQ(0, ret); 906 907 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 908 ASSERT_EQ(0, ret); 909 910 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 911 ASSERT_EQ(0, ret); 912 913 EXPECT_EQ(parent, syscall(__NR_getppid)); 914 EXPECT_EQ(-1, read(0, NULL, 0)); 915 EXPECT_EQ(12, errno); 916 } 917 918 FIXTURE(TRAP) { 919 struct sock_fprog prog; 920 }; 921 922 FIXTURE_SETUP(TRAP) 923 { 924 struct sock_filter filter[] = { 925 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 926 offsetof(struct seccomp_data, nr)), 927 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 928 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 929 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 930 }; 931 932 memset(&self->prog, 0, sizeof(self->prog)); 933 self->prog.filter = malloc(sizeof(filter)); 934 ASSERT_NE(NULL, self->prog.filter); 935 memcpy(self->prog.filter, filter, sizeof(filter)); 936 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 937 } 938 939 FIXTURE_TEARDOWN(TRAP) 940 { 941 if (self->prog.filter) 942 free(self->prog.filter); 943 } 944 945 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 946 { 947 long ret; 948 949 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 950 ASSERT_EQ(0, ret); 951 952 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 953 ASSERT_EQ(0, ret); 954 syscall(__NR_getpid); 955 } 956 957 /* Ensure that SIGSYS overrides SIG_IGN */ 958 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 959 { 960 long ret; 961 962 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 963 ASSERT_EQ(0, ret); 964 965 signal(SIGSYS, SIG_IGN); 966 967 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 968 ASSERT_EQ(0, ret); 969 syscall(__NR_getpid); 970 } 971 972 static siginfo_t TRAP_info; 973 static volatile int TRAP_nr; 974 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 975 { 976 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 977 TRAP_nr = nr; 978 } 979 980 TEST_F(TRAP, handler) 981 { 982 int ret, test; 983 struct sigaction act; 984 sigset_t mask; 985 986 memset(&act, 0, sizeof(act)); 987 sigemptyset(&mask); 988 sigaddset(&mask, SIGSYS); 989 990 act.sa_sigaction = &TRAP_action; 991 act.sa_flags = SA_SIGINFO; 992 ret = sigaction(SIGSYS, &act, NULL); 993 ASSERT_EQ(0, ret) { 994 TH_LOG("sigaction failed"); 995 } 996 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 997 ASSERT_EQ(0, ret) { 998 TH_LOG("sigprocmask failed"); 999 } 1000 1001 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1002 ASSERT_EQ(0, ret); 1003 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1004 ASSERT_EQ(0, ret); 1005 TRAP_nr = 0; 1006 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1007 /* Expect the registers to be rolled back. (nr = error) may vary 1008 * based on arch. */ 1009 ret = syscall(__NR_getpid); 1010 /* Silence gcc warning about volatile. */ 1011 test = TRAP_nr; 1012 EXPECT_EQ(SIGSYS, test); 1013 struct local_sigsys { 1014 void *_call_addr; /* calling user insn */ 1015 int _syscall; /* triggering system call number */ 1016 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1017 } *sigsys = (struct local_sigsys *) 1018 #ifdef si_syscall 1019 &(TRAP_info.si_call_addr); 1020 #else 1021 &TRAP_info.si_pid; 1022 #endif 1023 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1024 /* Make sure arch is non-zero. */ 1025 EXPECT_NE(0, sigsys->_arch); 1026 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1027 } 1028 1029 FIXTURE(precedence) { 1030 struct sock_fprog allow; 1031 struct sock_fprog log; 1032 struct sock_fprog trace; 1033 struct sock_fprog error; 1034 struct sock_fprog trap; 1035 struct sock_fprog kill; 1036 }; 1037 1038 FIXTURE_SETUP(precedence) 1039 { 1040 struct sock_filter allow_insns[] = { 1041 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1042 }; 1043 struct sock_filter log_insns[] = { 1044 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1045 offsetof(struct seccomp_data, nr)), 1046 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1047 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1048 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1049 }; 1050 struct sock_filter trace_insns[] = { 1051 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1052 offsetof(struct seccomp_data, nr)), 1053 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1054 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1055 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1056 }; 1057 struct sock_filter error_insns[] = { 1058 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1059 offsetof(struct seccomp_data, nr)), 1060 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1061 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1062 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1063 }; 1064 struct sock_filter trap_insns[] = { 1065 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1066 offsetof(struct seccomp_data, nr)), 1067 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1068 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1069 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1070 }; 1071 struct sock_filter kill_insns[] = { 1072 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1073 offsetof(struct seccomp_data, nr)), 1074 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1075 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1076 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1077 }; 1078 1079 memset(self, 0, sizeof(*self)); 1080 #define FILTER_ALLOC(_x) \ 1081 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1082 ASSERT_NE(NULL, self->_x.filter); \ 1083 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1084 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1085 FILTER_ALLOC(allow); 1086 FILTER_ALLOC(log); 1087 FILTER_ALLOC(trace); 1088 FILTER_ALLOC(error); 1089 FILTER_ALLOC(trap); 1090 FILTER_ALLOC(kill); 1091 } 1092 1093 FIXTURE_TEARDOWN(precedence) 1094 { 1095 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1096 FILTER_FREE(allow); 1097 FILTER_FREE(log); 1098 FILTER_FREE(trace); 1099 FILTER_FREE(error); 1100 FILTER_FREE(trap); 1101 FILTER_FREE(kill); 1102 } 1103 1104 TEST_F(precedence, allow_ok) 1105 { 1106 pid_t parent, res = 0; 1107 long ret; 1108 1109 parent = getppid(); 1110 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1111 ASSERT_EQ(0, ret); 1112 1113 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1114 ASSERT_EQ(0, ret); 1115 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1116 ASSERT_EQ(0, ret); 1117 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1118 ASSERT_EQ(0, ret); 1119 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1120 ASSERT_EQ(0, ret); 1121 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1122 ASSERT_EQ(0, ret); 1123 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1124 ASSERT_EQ(0, ret); 1125 /* Should work just fine. */ 1126 res = syscall(__NR_getppid); 1127 EXPECT_EQ(parent, res); 1128 } 1129 1130 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1131 { 1132 pid_t parent, res = 0; 1133 long ret; 1134 1135 parent = getppid(); 1136 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1137 ASSERT_EQ(0, ret); 1138 1139 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1140 ASSERT_EQ(0, ret); 1141 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1142 ASSERT_EQ(0, ret); 1143 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1144 ASSERT_EQ(0, ret); 1145 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1146 ASSERT_EQ(0, ret); 1147 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1148 ASSERT_EQ(0, ret); 1149 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1150 ASSERT_EQ(0, ret); 1151 /* Should work just fine. */ 1152 res = syscall(__NR_getppid); 1153 EXPECT_EQ(parent, res); 1154 /* getpid() should never return. */ 1155 res = syscall(__NR_getpid); 1156 EXPECT_EQ(0, res); 1157 } 1158 1159 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1160 { 1161 pid_t parent; 1162 long ret; 1163 1164 parent = getppid(); 1165 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1166 ASSERT_EQ(0, ret); 1167 1168 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1169 ASSERT_EQ(0, ret); 1170 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1171 ASSERT_EQ(0, ret); 1172 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1173 ASSERT_EQ(0, ret); 1174 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1175 ASSERT_EQ(0, ret); 1176 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1177 ASSERT_EQ(0, ret); 1178 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1179 ASSERT_EQ(0, ret); 1180 /* Should work just fine. */ 1181 EXPECT_EQ(parent, syscall(__NR_getppid)); 1182 /* getpid() should never return. */ 1183 EXPECT_EQ(0, syscall(__NR_getpid)); 1184 } 1185 1186 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1187 { 1188 pid_t parent; 1189 long ret; 1190 1191 parent = getppid(); 1192 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1193 ASSERT_EQ(0, ret); 1194 1195 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1196 ASSERT_EQ(0, ret); 1197 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1198 ASSERT_EQ(0, ret); 1199 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1200 ASSERT_EQ(0, ret); 1201 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1202 ASSERT_EQ(0, ret); 1203 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1204 ASSERT_EQ(0, ret); 1205 /* Should work just fine. */ 1206 EXPECT_EQ(parent, syscall(__NR_getppid)); 1207 /* getpid() should never return. */ 1208 EXPECT_EQ(0, syscall(__NR_getpid)); 1209 } 1210 1211 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1212 { 1213 pid_t parent; 1214 long ret; 1215 1216 parent = getppid(); 1217 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1218 ASSERT_EQ(0, ret); 1219 1220 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1221 ASSERT_EQ(0, ret); 1222 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1223 ASSERT_EQ(0, ret); 1224 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1225 ASSERT_EQ(0, ret); 1226 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1227 ASSERT_EQ(0, ret); 1228 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1229 ASSERT_EQ(0, ret); 1230 /* Should work just fine. */ 1231 EXPECT_EQ(parent, syscall(__NR_getppid)); 1232 /* getpid() should never return. */ 1233 EXPECT_EQ(0, syscall(__NR_getpid)); 1234 } 1235 1236 TEST_F(precedence, errno_is_third) 1237 { 1238 pid_t parent; 1239 long ret; 1240 1241 parent = getppid(); 1242 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1243 ASSERT_EQ(0, ret); 1244 1245 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1246 ASSERT_EQ(0, ret); 1247 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1248 ASSERT_EQ(0, ret); 1249 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1250 ASSERT_EQ(0, ret); 1251 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1252 ASSERT_EQ(0, ret); 1253 /* Should work just fine. */ 1254 EXPECT_EQ(parent, syscall(__NR_getppid)); 1255 EXPECT_EQ(0, syscall(__NR_getpid)); 1256 } 1257 1258 TEST_F(precedence, errno_is_third_in_any_order) 1259 { 1260 pid_t parent; 1261 long ret; 1262 1263 parent = getppid(); 1264 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1265 ASSERT_EQ(0, ret); 1266 1267 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1268 ASSERT_EQ(0, ret); 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1274 ASSERT_EQ(0, ret); 1275 /* Should work just fine. */ 1276 EXPECT_EQ(parent, syscall(__NR_getppid)); 1277 EXPECT_EQ(0, syscall(__NR_getpid)); 1278 } 1279 1280 TEST_F(precedence, trace_is_fourth) 1281 { 1282 pid_t parent; 1283 long ret; 1284 1285 parent = getppid(); 1286 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1287 ASSERT_EQ(0, ret); 1288 1289 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1290 ASSERT_EQ(0, ret); 1291 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1292 ASSERT_EQ(0, ret); 1293 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1294 ASSERT_EQ(0, ret); 1295 /* Should work just fine. */ 1296 EXPECT_EQ(parent, syscall(__NR_getppid)); 1297 /* No ptracer */ 1298 EXPECT_EQ(-1, syscall(__NR_getpid)); 1299 } 1300 1301 TEST_F(precedence, trace_is_fourth_in_any_order) 1302 { 1303 pid_t parent; 1304 long ret; 1305 1306 parent = getppid(); 1307 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1308 ASSERT_EQ(0, ret); 1309 1310 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1311 ASSERT_EQ(0, ret); 1312 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1313 ASSERT_EQ(0, ret); 1314 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1315 ASSERT_EQ(0, ret); 1316 /* Should work just fine. */ 1317 EXPECT_EQ(parent, syscall(__NR_getppid)); 1318 /* No ptracer */ 1319 EXPECT_EQ(-1, syscall(__NR_getpid)); 1320 } 1321 1322 TEST_F(precedence, log_is_fifth) 1323 { 1324 pid_t mypid, parent; 1325 long ret; 1326 1327 mypid = getpid(); 1328 parent = getppid(); 1329 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1330 ASSERT_EQ(0, ret); 1331 1332 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1333 ASSERT_EQ(0, ret); 1334 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1335 ASSERT_EQ(0, ret); 1336 /* Should work just fine. */ 1337 EXPECT_EQ(parent, syscall(__NR_getppid)); 1338 /* Should also work just fine */ 1339 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1340 } 1341 1342 TEST_F(precedence, log_is_fifth_in_any_order) 1343 { 1344 pid_t mypid, parent; 1345 long ret; 1346 1347 mypid = getpid(); 1348 parent = getppid(); 1349 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1350 ASSERT_EQ(0, ret); 1351 1352 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1353 ASSERT_EQ(0, ret); 1354 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1355 ASSERT_EQ(0, ret); 1356 /* Should work just fine. */ 1357 EXPECT_EQ(parent, syscall(__NR_getppid)); 1358 /* Should also work just fine */ 1359 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1360 } 1361 1362 #ifndef PTRACE_O_TRACESECCOMP 1363 #define PTRACE_O_TRACESECCOMP 0x00000080 1364 #endif 1365 1366 /* Catch the Ubuntu 12.04 value error. */ 1367 #if PTRACE_EVENT_SECCOMP != 7 1368 #undef PTRACE_EVENT_SECCOMP 1369 #endif 1370 1371 #ifndef PTRACE_EVENT_SECCOMP 1372 #define PTRACE_EVENT_SECCOMP 7 1373 #endif 1374 1375 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) 1376 bool tracer_running; 1377 void tracer_stop(int sig) 1378 { 1379 tracer_running = false; 1380 } 1381 1382 typedef void tracer_func_t(struct __test_metadata *_metadata, 1383 pid_t tracee, int status, void *args); 1384 1385 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1386 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1387 { 1388 int ret = -1; 1389 struct sigaction action = { 1390 .sa_handler = tracer_stop, 1391 }; 1392 1393 /* Allow external shutdown. */ 1394 tracer_running = true; 1395 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1396 1397 errno = 0; 1398 while (ret == -1 && errno != EINVAL) 1399 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1400 ASSERT_EQ(0, ret) { 1401 kill(tracee, SIGKILL); 1402 } 1403 /* Wait for attach stop */ 1404 wait(NULL); 1405 1406 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1407 PTRACE_O_TRACESYSGOOD : 1408 PTRACE_O_TRACESECCOMP); 1409 ASSERT_EQ(0, ret) { 1410 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1411 kill(tracee, SIGKILL); 1412 } 1413 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1414 tracee, NULL, 0); 1415 ASSERT_EQ(0, ret); 1416 1417 /* Unblock the tracee */ 1418 ASSERT_EQ(1, write(fd, "A", 1)); 1419 ASSERT_EQ(0, close(fd)); 1420 1421 /* Run until we're shut down. Must assert to stop execution. */ 1422 while (tracer_running) { 1423 int status; 1424 1425 if (wait(&status) != tracee) 1426 continue; 1427 if (WIFSIGNALED(status) || WIFEXITED(status)) 1428 /* Child is dead. Time to go. */ 1429 return; 1430 1431 /* Check if this is a seccomp event. */ 1432 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); 1433 1434 tracer_func(_metadata, tracee, status, args); 1435 1436 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1437 tracee, NULL, 0); 1438 ASSERT_EQ(0, ret); 1439 } 1440 /* Directly report the status of our test harness results. */ 1441 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1442 } 1443 1444 /* Common tracer setup/teardown functions. */ 1445 void cont_handler(int num) 1446 { } 1447 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1448 tracer_func_t func, void *args, bool ptrace_syscall) 1449 { 1450 char sync; 1451 int pipefd[2]; 1452 pid_t tracer_pid; 1453 pid_t tracee = getpid(); 1454 1455 /* Setup a pipe for clean synchronization. */ 1456 ASSERT_EQ(0, pipe(pipefd)); 1457 1458 /* Fork a child which we'll promote to tracer */ 1459 tracer_pid = fork(); 1460 ASSERT_LE(0, tracer_pid); 1461 signal(SIGALRM, cont_handler); 1462 if (tracer_pid == 0) { 1463 close(pipefd[0]); 1464 start_tracer(_metadata, pipefd[1], tracee, func, args, 1465 ptrace_syscall); 1466 syscall(__NR_exit, 0); 1467 } 1468 close(pipefd[1]); 1469 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1470 read(pipefd[0], &sync, 1); 1471 close(pipefd[0]); 1472 1473 return tracer_pid; 1474 } 1475 void teardown_trace_fixture(struct __test_metadata *_metadata, 1476 pid_t tracer) 1477 { 1478 if (tracer) { 1479 int status; 1480 /* 1481 * Extract the exit code from the other process and 1482 * adopt it for ourselves in case its asserts failed. 1483 */ 1484 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1485 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1486 if (WEXITSTATUS(status)) 1487 _metadata->passed = 0; 1488 } 1489 } 1490 1491 /* "poke" tracer arguments and function. */ 1492 struct tracer_args_poke_t { 1493 unsigned long poke_addr; 1494 }; 1495 1496 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1497 void *args) 1498 { 1499 int ret; 1500 unsigned long msg; 1501 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1502 1503 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1504 EXPECT_EQ(0, ret); 1505 /* If this fails, don't try to recover. */ 1506 ASSERT_EQ(0x1001, msg) { 1507 kill(tracee, SIGKILL); 1508 } 1509 /* 1510 * Poke in the message. 1511 * Registers are not touched to try to keep this relatively arch 1512 * agnostic. 1513 */ 1514 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1515 EXPECT_EQ(0, ret); 1516 } 1517 1518 FIXTURE(TRACE_poke) { 1519 struct sock_fprog prog; 1520 pid_t tracer; 1521 long poked; 1522 struct tracer_args_poke_t tracer_args; 1523 }; 1524 1525 FIXTURE_SETUP(TRACE_poke) 1526 { 1527 struct sock_filter filter[] = { 1528 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1529 offsetof(struct seccomp_data, nr)), 1530 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1531 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1532 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1533 }; 1534 1535 self->poked = 0; 1536 memset(&self->prog, 0, sizeof(self->prog)); 1537 self->prog.filter = malloc(sizeof(filter)); 1538 ASSERT_NE(NULL, self->prog.filter); 1539 memcpy(self->prog.filter, filter, sizeof(filter)); 1540 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1541 1542 /* Set up tracer args. */ 1543 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1544 1545 /* Launch tracer. */ 1546 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1547 &self->tracer_args, false); 1548 } 1549 1550 FIXTURE_TEARDOWN(TRACE_poke) 1551 { 1552 teardown_trace_fixture(_metadata, self->tracer); 1553 if (self->prog.filter) 1554 free(self->prog.filter); 1555 } 1556 1557 TEST_F(TRACE_poke, read_has_side_effects) 1558 { 1559 ssize_t ret; 1560 1561 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1562 ASSERT_EQ(0, ret); 1563 1564 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1565 ASSERT_EQ(0, ret); 1566 1567 EXPECT_EQ(0, self->poked); 1568 ret = read(-1, NULL, 0); 1569 EXPECT_EQ(-1, ret); 1570 EXPECT_EQ(0x1001, self->poked); 1571 } 1572 1573 TEST_F(TRACE_poke, getpid_runs_normally) 1574 { 1575 long ret; 1576 1577 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1578 ASSERT_EQ(0, ret); 1579 1580 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1581 ASSERT_EQ(0, ret); 1582 1583 EXPECT_EQ(0, self->poked); 1584 EXPECT_NE(0, syscall(__NR_getpid)); 1585 EXPECT_EQ(0, self->poked); 1586 } 1587 1588 #if defined(__x86_64__) 1589 # define ARCH_REGS struct user_regs_struct 1590 # define SYSCALL_NUM orig_rax 1591 # define SYSCALL_RET rax 1592 #elif defined(__i386__) 1593 # define ARCH_REGS struct user_regs_struct 1594 # define SYSCALL_NUM orig_eax 1595 # define SYSCALL_RET eax 1596 #elif defined(__arm__) 1597 # define ARCH_REGS struct pt_regs 1598 # define SYSCALL_NUM ARM_r7 1599 # define SYSCALL_RET ARM_r0 1600 #elif defined(__aarch64__) 1601 # define ARCH_REGS struct user_pt_regs 1602 # define SYSCALL_NUM regs[8] 1603 # define SYSCALL_RET regs[0] 1604 #elif defined(__riscv) && __riscv_xlen == 64 1605 # define ARCH_REGS struct user_regs_struct 1606 # define SYSCALL_NUM a7 1607 # define SYSCALL_RET a0 1608 #elif defined(__hppa__) 1609 # define ARCH_REGS struct user_regs_struct 1610 # define SYSCALL_NUM gr[20] 1611 # define SYSCALL_RET gr[28] 1612 #elif defined(__powerpc__) 1613 # define ARCH_REGS struct pt_regs 1614 # define SYSCALL_NUM gpr[0] 1615 # define SYSCALL_RET gpr[3] 1616 #elif defined(__s390__) 1617 # define ARCH_REGS s390_regs 1618 # define SYSCALL_NUM gprs[2] 1619 # define SYSCALL_RET gprs[2] 1620 # define SYSCALL_NUM_RET_SHARE_REG 1621 #elif defined(__mips__) 1622 # define ARCH_REGS struct pt_regs 1623 # define SYSCALL_NUM regs[2] 1624 # define SYSCALL_SYSCALL_NUM regs[4] 1625 # define SYSCALL_RET regs[2] 1626 # define SYSCALL_NUM_RET_SHARE_REG 1627 #elif defined(__xtensa__) 1628 # define ARCH_REGS struct user_pt_regs 1629 # define SYSCALL_NUM syscall 1630 /* 1631 * On xtensa syscall return value is in the register 1632 * a2 of the current window which is not fixed. 1633 */ 1634 #define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] 1635 #else 1636 # error "Do not know how to find your architecture's registers and syscalls" 1637 #endif 1638 1639 /* When the syscall return can't be changed, stub out the tests for it. */ 1640 #ifdef SYSCALL_NUM_RET_SHARE_REG 1641 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1642 #else 1643 # define EXPECT_SYSCALL_RETURN(val, action) \ 1644 do { \ 1645 errno = 0; \ 1646 if (val < 0) { \ 1647 EXPECT_EQ(-1, action); \ 1648 EXPECT_EQ(-(val), errno); \ 1649 } else { \ 1650 EXPECT_EQ(val, action); \ 1651 } \ 1652 } while (0) 1653 #endif 1654 1655 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1656 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1657 */ 1658 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) 1659 #define HAVE_GETREGS 1660 #endif 1661 1662 /* Architecture-specific syscall fetching routine. */ 1663 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1664 { 1665 ARCH_REGS regs; 1666 #ifdef HAVE_GETREGS 1667 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { 1668 TH_LOG("PTRACE_GETREGS failed"); 1669 return -1; 1670 } 1671 #else 1672 struct iovec iov; 1673 1674 iov.iov_base = ®s; 1675 iov.iov_len = sizeof(regs); 1676 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { 1677 TH_LOG("PTRACE_GETREGSET failed"); 1678 return -1; 1679 } 1680 #endif 1681 1682 #if defined(__mips__) 1683 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1684 return regs.SYSCALL_SYSCALL_NUM; 1685 #endif 1686 return regs.SYSCALL_NUM; 1687 } 1688 1689 /* Architecture-specific syscall changing routine. */ 1690 void change_syscall(struct __test_metadata *_metadata, 1691 pid_t tracee, int syscall, int result) 1692 { 1693 int ret; 1694 ARCH_REGS regs; 1695 #ifdef HAVE_GETREGS 1696 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); 1697 #else 1698 struct iovec iov; 1699 iov.iov_base = ®s; 1700 iov.iov_len = sizeof(regs); 1701 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1702 #endif 1703 EXPECT_EQ(0, ret) {} 1704 1705 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ 1706 defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ 1707 defined(__xtensa__) 1708 { 1709 regs.SYSCALL_NUM = syscall; 1710 } 1711 #elif defined(__mips__) 1712 { 1713 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1714 regs.SYSCALL_SYSCALL_NUM = syscall; 1715 else 1716 regs.SYSCALL_NUM = syscall; 1717 } 1718 1719 #elif defined(__arm__) 1720 # ifndef PTRACE_SET_SYSCALL 1721 # define PTRACE_SET_SYSCALL 23 1722 # endif 1723 { 1724 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); 1725 EXPECT_EQ(0, ret); 1726 } 1727 1728 #elif defined(__aarch64__) 1729 # ifndef NT_ARM_SYSTEM_CALL 1730 # define NT_ARM_SYSTEM_CALL 0x404 1731 # endif 1732 { 1733 iov.iov_base = &syscall; 1734 iov.iov_len = sizeof(syscall); 1735 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, 1736 &iov); 1737 EXPECT_EQ(0, ret); 1738 } 1739 1740 #else 1741 ASSERT_EQ(1, 0) { 1742 TH_LOG("How is the syscall changed on this architecture?"); 1743 } 1744 #endif 1745 1746 /* If syscall is skipped, change return value. */ 1747 if (syscall == -1) 1748 #ifdef SYSCALL_NUM_RET_SHARE_REG 1749 TH_LOG("Can't modify syscall return on this architecture"); 1750 1751 #elif defined(__xtensa__) 1752 regs.SYSCALL_RET(regs) = result; 1753 #else 1754 regs.SYSCALL_RET = result; 1755 #endif 1756 1757 #ifdef HAVE_GETREGS 1758 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); 1759 #else 1760 iov.iov_base = ®s; 1761 iov.iov_len = sizeof(regs); 1762 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1763 #endif 1764 EXPECT_EQ(0, ret); 1765 } 1766 1767 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, 1768 int status, void *args) 1769 { 1770 int ret; 1771 unsigned long msg; 1772 1773 /* Make sure we got the right message. */ 1774 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1775 EXPECT_EQ(0, ret); 1776 1777 /* Validate and take action on expected syscalls. */ 1778 switch (msg) { 1779 case 0x1002: 1780 /* change getpid to getppid. */ 1781 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 1782 change_syscall(_metadata, tracee, __NR_getppid, 0); 1783 break; 1784 case 0x1003: 1785 /* skip gettid with valid return code. */ 1786 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 1787 change_syscall(_metadata, tracee, -1, 45000); 1788 break; 1789 case 0x1004: 1790 /* skip openat with error. */ 1791 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 1792 change_syscall(_metadata, tracee, -1, -ESRCH); 1793 break; 1794 case 0x1005: 1795 /* do nothing (allow getppid) */ 1796 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 1797 break; 1798 default: 1799 EXPECT_EQ(0, msg) { 1800 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 1801 kill(tracee, SIGKILL); 1802 } 1803 } 1804 1805 } 1806 1807 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 1808 int status, void *args) 1809 { 1810 int ret, nr; 1811 unsigned long msg; 1812 static bool entry; 1813 1814 /* 1815 * The traditional way to tell PTRACE_SYSCALL entry/exit 1816 * is by counting. 1817 */ 1818 entry = !entry; 1819 1820 /* Make sure we got an appropriate message. */ 1821 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1822 EXPECT_EQ(0, ret); 1823 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 1824 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 1825 1826 if (!entry) 1827 return; 1828 1829 nr = get_syscall(_metadata, tracee); 1830 1831 if (nr == __NR_getpid) 1832 change_syscall(_metadata, tracee, __NR_getppid, 0); 1833 if (nr == __NR_gettid) 1834 change_syscall(_metadata, tracee, -1, 45000); 1835 if (nr == __NR_openat) 1836 change_syscall(_metadata, tracee, -1, -ESRCH); 1837 } 1838 1839 FIXTURE(TRACE_syscall) { 1840 struct sock_fprog prog; 1841 pid_t tracer, mytid, mypid, parent; 1842 }; 1843 1844 FIXTURE_SETUP(TRACE_syscall) 1845 { 1846 struct sock_filter filter[] = { 1847 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1848 offsetof(struct seccomp_data, nr)), 1849 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1850 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 1851 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 1852 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 1853 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 1854 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 1855 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1856 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 1857 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1858 }; 1859 1860 memset(&self->prog, 0, sizeof(self->prog)); 1861 self->prog.filter = malloc(sizeof(filter)); 1862 ASSERT_NE(NULL, self->prog.filter); 1863 memcpy(self->prog.filter, filter, sizeof(filter)); 1864 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1865 1866 /* Prepare some testable syscall results. */ 1867 self->mytid = syscall(__NR_gettid); 1868 ASSERT_GT(self->mytid, 0); 1869 ASSERT_NE(self->mytid, 1) { 1870 TH_LOG("Running this test as init is not supported. :)"); 1871 } 1872 1873 self->mypid = getpid(); 1874 ASSERT_GT(self->mypid, 0); 1875 ASSERT_EQ(self->mytid, self->mypid); 1876 1877 self->parent = getppid(); 1878 ASSERT_GT(self->parent, 0); 1879 ASSERT_NE(self->parent, self->mypid); 1880 1881 /* Launch tracer. */ 1882 self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, 1883 false); 1884 } 1885 1886 FIXTURE_TEARDOWN(TRACE_syscall) 1887 { 1888 teardown_trace_fixture(_metadata, self->tracer); 1889 if (self->prog.filter) 1890 free(self->prog.filter); 1891 } 1892 1893 TEST_F(TRACE_syscall, ptrace_syscall_redirected) 1894 { 1895 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1896 teardown_trace_fixture(_metadata, self->tracer); 1897 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1898 true); 1899 1900 /* Tracer will redirect getpid to getppid. */ 1901 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1902 } 1903 1904 TEST_F(TRACE_syscall, ptrace_syscall_errno) 1905 { 1906 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1907 teardown_trace_fixture(_metadata, self->tracer); 1908 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1909 true); 1910 1911 /* Tracer should skip the open syscall, resulting in ESRCH. */ 1912 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1913 } 1914 1915 TEST_F(TRACE_syscall, ptrace_syscall_faked) 1916 { 1917 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1918 teardown_trace_fixture(_metadata, self->tracer); 1919 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1920 true); 1921 1922 /* Tracer should skip the gettid syscall, resulting fake pid. */ 1923 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1924 } 1925 1926 TEST_F(TRACE_syscall, syscall_allowed) 1927 { 1928 long ret; 1929 1930 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1931 ASSERT_EQ(0, ret); 1932 1933 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1934 ASSERT_EQ(0, ret); 1935 1936 /* getppid works as expected (no changes). */ 1937 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 1938 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 1939 } 1940 1941 TEST_F(TRACE_syscall, syscall_redirected) 1942 { 1943 long ret; 1944 1945 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1946 ASSERT_EQ(0, ret); 1947 1948 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1949 ASSERT_EQ(0, ret); 1950 1951 /* getpid has been redirected to getppid as expected. */ 1952 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 1953 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1954 } 1955 1956 TEST_F(TRACE_syscall, syscall_errno) 1957 { 1958 long ret; 1959 1960 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1961 ASSERT_EQ(0, ret); 1962 1963 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1964 ASSERT_EQ(0, ret); 1965 1966 /* openat has been skipped and an errno return. */ 1967 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1968 } 1969 1970 TEST_F(TRACE_syscall, syscall_faked) 1971 { 1972 long ret; 1973 1974 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1975 ASSERT_EQ(0, ret); 1976 1977 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1978 ASSERT_EQ(0, ret); 1979 1980 /* gettid has been skipped and an altered return value stored. */ 1981 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1982 } 1983 1984 TEST_F(TRACE_syscall, skip_after_RET_TRACE) 1985 { 1986 struct sock_filter filter[] = { 1987 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1988 offsetof(struct seccomp_data, nr)), 1989 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1990 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 1991 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1992 }; 1993 struct sock_fprog prog = { 1994 .len = (unsigned short)ARRAY_SIZE(filter), 1995 .filter = filter, 1996 }; 1997 long ret; 1998 1999 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2000 ASSERT_EQ(0, ret); 2001 2002 /* Install fixture filter. */ 2003 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 2004 ASSERT_EQ(0, ret); 2005 2006 /* Install "errno on getppid" filter. */ 2007 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2008 ASSERT_EQ(0, ret); 2009 2010 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2011 errno = 0; 2012 EXPECT_EQ(-1, syscall(__NR_getpid)); 2013 EXPECT_EQ(EPERM, errno); 2014 } 2015 2016 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) 2017 { 2018 struct sock_filter filter[] = { 2019 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2020 offsetof(struct seccomp_data, nr)), 2021 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2022 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2023 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2024 }; 2025 struct sock_fprog prog = { 2026 .len = (unsigned short)ARRAY_SIZE(filter), 2027 .filter = filter, 2028 }; 2029 long ret; 2030 2031 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2032 ASSERT_EQ(0, ret); 2033 2034 /* Install fixture filter. */ 2035 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 2036 ASSERT_EQ(0, ret); 2037 2038 /* Install "death on getppid" filter. */ 2039 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2040 ASSERT_EQ(0, ret); 2041 2042 /* Tracer will redirect getpid to getppid, and we should die. */ 2043 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2044 } 2045 2046 TEST_F(TRACE_syscall, skip_after_ptrace) 2047 { 2048 struct sock_filter filter[] = { 2049 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2050 offsetof(struct seccomp_data, nr)), 2051 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2052 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2053 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2054 }; 2055 struct sock_fprog prog = { 2056 .len = (unsigned short)ARRAY_SIZE(filter), 2057 .filter = filter, 2058 }; 2059 long ret; 2060 2061 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2062 teardown_trace_fixture(_metadata, self->tracer); 2063 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2064 true); 2065 2066 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2067 ASSERT_EQ(0, ret); 2068 2069 /* Install "errno on getppid" filter. */ 2070 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2071 ASSERT_EQ(0, ret); 2072 2073 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2074 EXPECT_EQ(-1, syscall(__NR_getpid)); 2075 EXPECT_EQ(EPERM, errno); 2076 } 2077 2078 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) 2079 { 2080 struct sock_filter filter[] = { 2081 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2082 offsetof(struct seccomp_data, nr)), 2083 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2084 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2085 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2086 }; 2087 struct sock_fprog prog = { 2088 .len = (unsigned short)ARRAY_SIZE(filter), 2089 .filter = filter, 2090 }; 2091 long ret; 2092 2093 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2094 teardown_trace_fixture(_metadata, self->tracer); 2095 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2096 true); 2097 2098 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2099 ASSERT_EQ(0, ret); 2100 2101 /* Install "death on getppid" filter. */ 2102 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2103 ASSERT_EQ(0, ret); 2104 2105 /* Tracer will redirect getpid to getppid, and we should die. */ 2106 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2107 } 2108 2109 TEST(seccomp_syscall) 2110 { 2111 struct sock_filter filter[] = { 2112 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2113 }; 2114 struct sock_fprog prog = { 2115 .len = (unsigned short)ARRAY_SIZE(filter), 2116 .filter = filter, 2117 }; 2118 long ret; 2119 2120 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2121 ASSERT_EQ(0, ret) { 2122 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2123 } 2124 2125 /* Reject insane operation. */ 2126 ret = seccomp(-1, 0, &prog); 2127 ASSERT_NE(ENOSYS, errno) { 2128 TH_LOG("Kernel does not support seccomp syscall!"); 2129 } 2130 EXPECT_EQ(EINVAL, errno) { 2131 TH_LOG("Did not reject crazy op value!"); 2132 } 2133 2134 /* Reject strict with flags or pointer. */ 2135 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2136 EXPECT_EQ(EINVAL, errno) { 2137 TH_LOG("Did not reject mode strict with flags!"); 2138 } 2139 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2140 EXPECT_EQ(EINVAL, errno) { 2141 TH_LOG("Did not reject mode strict with uargs!"); 2142 } 2143 2144 /* Reject insane args for filter. */ 2145 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2146 EXPECT_EQ(EINVAL, errno) { 2147 TH_LOG("Did not reject crazy filter flags!"); 2148 } 2149 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2150 EXPECT_EQ(EFAULT, errno) { 2151 TH_LOG("Did not reject NULL filter!"); 2152 } 2153 2154 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2155 EXPECT_EQ(0, errno) { 2156 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2157 strerror(errno)); 2158 } 2159 } 2160 2161 TEST(seccomp_syscall_mode_lock) 2162 { 2163 struct sock_filter filter[] = { 2164 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2165 }; 2166 struct sock_fprog prog = { 2167 .len = (unsigned short)ARRAY_SIZE(filter), 2168 .filter = filter, 2169 }; 2170 long ret; 2171 2172 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2173 ASSERT_EQ(0, ret) { 2174 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2175 } 2176 2177 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2178 ASSERT_NE(ENOSYS, errno) { 2179 TH_LOG("Kernel does not support seccomp syscall!"); 2180 } 2181 EXPECT_EQ(0, ret) { 2182 TH_LOG("Could not install filter!"); 2183 } 2184 2185 /* Make sure neither entry point will switch to strict. */ 2186 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2187 EXPECT_EQ(EINVAL, errno) { 2188 TH_LOG("Switched to mode strict!"); 2189 } 2190 2191 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2192 EXPECT_EQ(EINVAL, errno) { 2193 TH_LOG("Switched to mode strict!"); 2194 } 2195 } 2196 2197 /* 2198 * Test detection of known and unknown filter flags. Userspace needs to be able 2199 * to check if a filter flag is supported by the current kernel and a good way 2200 * of doing that is by attempting to enter filter mode, with the flag bit in 2201 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2202 * that the flag is valid and EINVAL indicates that the flag is invalid. 2203 */ 2204 TEST(detect_seccomp_filter_flags) 2205 { 2206 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2207 SECCOMP_FILTER_FLAG_LOG, 2208 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2209 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2210 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2211 unsigned int exclusive[] = { 2212 SECCOMP_FILTER_FLAG_TSYNC, 2213 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2214 unsigned int flag, all_flags, exclusive_mask; 2215 int i; 2216 long ret; 2217 2218 /* Test detection of individual known-good filter flags */ 2219 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2220 int bits = 0; 2221 2222 flag = flags[i]; 2223 /* Make sure the flag is a single bit! */ 2224 while (flag) { 2225 if (flag & 0x1) 2226 bits ++; 2227 flag >>= 1; 2228 } 2229 ASSERT_EQ(1, bits); 2230 flag = flags[i]; 2231 2232 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2233 ASSERT_NE(ENOSYS, errno) { 2234 TH_LOG("Kernel does not support seccomp syscall!"); 2235 } 2236 EXPECT_EQ(-1, ret); 2237 EXPECT_EQ(EFAULT, errno) { 2238 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2239 flag); 2240 } 2241 2242 all_flags |= flag; 2243 } 2244 2245 /* 2246 * Test detection of all known-good filter flags combined. But 2247 * for the exclusive flags we need to mask them out and try them 2248 * individually for the "all flags" testing. 2249 */ 2250 exclusive_mask = 0; 2251 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2252 exclusive_mask |= exclusive[i]; 2253 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2254 flag = all_flags & ~exclusive_mask; 2255 flag |= exclusive[i]; 2256 2257 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2258 EXPECT_EQ(-1, ret); 2259 EXPECT_EQ(EFAULT, errno) { 2260 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2261 flag); 2262 } 2263 } 2264 2265 /* Test detection of an unknown filter flags, without exclusives. */ 2266 flag = -1; 2267 flag &= ~exclusive_mask; 2268 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2269 EXPECT_EQ(-1, ret); 2270 EXPECT_EQ(EINVAL, errno) { 2271 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2272 flag); 2273 } 2274 2275 /* 2276 * Test detection of an unknown filter flag that may simply need to be 2277 * added to this test 2278 */ 2279 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2280 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2281 EXPECT_EQ(-1, ret); 2282 EXPECT_EQ(EINVAL, errno) { 2283 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2284 flag); 2285 } 2286 } 2287 2288 TEST(TSYNC_first) 2289 { 2290 struct sock_filter filter[] = { 2291 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2292 }; 2293 struct sock_fprog prog = { 2294 .len = (unsigned short)ARRAY_SIZE(filter), 2295 .filter = filter, 2296 }; 2297 long ret; 2298 2299 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2300 ASSERT_EQ(0, ret) { 2301 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2302 } 2303 2304 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2305 &prog); 2306 ASSERT_NE(ENOSYS, errno) { 2307 TH_LOG("Kernel does not support seccomp syscall!"); 2308 } 2309 EXPECT_EQ(0, ret) { 2310 TH_LOG("Could not install initial filter with TSYNC!"); 2311 } 2312 } 2313 2314 #define TSYNC_SIBLINGS 2 2315 struct tsync_sibling { 2316 pthread_t tid; 2317 pid_t system_tid; 2318 sem_t *started; 2319 pthread_cond_t *cond; 2320 pthread_mutex_t *mutex; 2321 int diverge; 2322 int num_waits; 2323 struct sock_fprog *prog; 2324 struct __test_metadata *metadata; 2325 }; 2326 2327 /* 2328 * To avoid joining joined threads (which is not allowed by Bionic), 2329 * make sure we both successfully join and clear the tid to skip a 2330 * later join attempt during fixture teardown. Any remaining threads 2331 * will be directly killed during teardown. 2332 */ 2333 #define PTHREAD_JOIN(tid, status) \ 2334 do { \ 2335 int _rc = pthread_join(tid, status); \ 2336 if (_rc) { \ 2337 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2338 (unsigned int)tid, _rc); \ 2339 } else { \ 2340 tid = 0; \ 2341 } \ 2342 } while (0) 2343 2344 FIXTURE(TSYNC) { 2345 struct sock_fprog root_prog, apply_prog; 2346 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2347 sem_t started; 2348 pthread_cond_t cond; 2349 pthread_mutex_t mutex; 2350 int sibling_count; 2351 }; 2352 2353 FIXTURE_SETUP(TSYNC) 2354 { 2355 struct sock_filter root_filter[] = { 2356 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2357 }; 2358 struct sock_filter apply_filter[] = { 2359 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2360 offsetof(struct seccomp_data, nr)), 2361 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2362 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2363 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2364 }; 2365 2366 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2367 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2368 memset(&self->sibling, 0, sizeof(self->sibling)); 2369 self->root_prog.filter = malloc(sizeof(root_filter)); 2370 ASSERT_NE(NULL, self->root_prog.filter); 2371 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2372 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2373 2374 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2375 ASSERT_NE(NULL, self->apply_prog.filter); 2376 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2377 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2378 2379 self->sibling_count = 0; 2380 pthread_mutex_init(&self->mutex, NULL); 2381 pthread_cond_init(&self->cond, NULL); 2382 sem_init(&self->started, 0, 0); 2383 self->sibling[0].tid = 0; 2384 self->sibling[0].cond = &self->cond; 2385 self->sibling[0].started = &self->started; 2386 self->sibling[0].mutex = &self->mutex; 2387 self->sibling[0].diverge = 0; 2388 self->sibling[0].num_waits = 1; 2389 self->sibling[0].prog = &self->root_prog; 2390 self->sibling[0].metadata = _metadata; 2391 self->sibling[1].tid = 0; 2392 self->sibling[1].cond = &self->cond; 2393 self->sibling[1].started = &self->started; 2394 self->sibling[1].mutex = &self->mutex; 2395 self->sibling[1].diverge = 0; 2396 self->sibling[1].prog = &self->root_prog; 2397 self->sibling[1].num_waits = 1; 2398 self->sibling[1].metadata = _metadata; 2399 } 2400 2401 FIXTURE_TEARDOWN(TSYNC) 2402 { 2403 int sib = 0; 2404 2405 if (self->root_prog.filter) 2406 free(self->root_prog.filter); 2407 if (self->apply_prog.filter) 2408 free(self->apply_prog.filter); 2409 2410 for ( ; sib < self->sibling_count; ++sib) { 2411 struct tsync_sibling *s = &self->sibling[sib]; 2412 2413 if (!s->tid) 2414 continue; 2415 /* 2416 * If a thread is still running, it may be stuck, so hit 2417 * it over the head really hard. 2418 */ 2419 pthread_kill(s->tid, 9); 2420 } 2421 pthread_mutex_destroy(&self->mutex); 2422 pthread_cond_destroy(&self->cond); 2423 sem_destroy(&self->started); 2424 } 2425 2426 void *tsync_sibling(void *data) 2427 { 2428 long ret = 0; 2429 struct tsync_sibling *me = data; 2430 2431 me->system_tid = syscall(__NR_gettid); 2432 2433 pthread_mutex_lock(me->mutex); 2434 if (me->diverge) { 2435 /* Just re-apply the root prog to fork the tree */ 2436 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2437 me->prog, 0, 0); 2438 } 2439 sem_post(me->started); 2440 /* Return outside of started so parent notices failures. */ 2441 if (ret) { 2442 pthread_mutex_unlock(me->mutex); 2443 return (void *)SIBLING_EXIT_FAILURE; 2444 } 2445 do { 2446 pthread_cond_wait(me->cond, me->mutex); 2447 me->num_waits = me->num_waits - 1; 2448 } while (me->num_waits); 2449 pthread_mutex_unlock(me->mutex); 2450 2451 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2452 if (!ret) 2453 return (void *)SIBLING_EXIT_NEWPRIVS; 2454 read(0, NULL, 0); 2455 return (void *)SIBLING_EXIT_UNKILLED; 2456 } 2457 2458 void tsync_start_sibling(struct tsync_sibling *sibling) 2459 { 2460 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2461 } 2462 2463 TEST_F(TSYNC, siblings_fail_prctl) 2464 { 2465 long ret; 2466 void *status; 2467 struct sock_filter filter[] = { 2468 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2469 offsetof(struct seccomp_data, nr)), 2470 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2471 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2472 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2473 }; 2474 struct sock_fprog prog = { 2475 .len = (unsigned short)ARRAY_SIZE(filter), 2476 .filter = filter, 2477 }; 2478 2479 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2480 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2481 } 2482 2483 /* Check prctl failure detection by requesting sib 0 diverge. */ 2484 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2485 ASSERT_NE(ENOSYS, errno) { 2486 TH_LOG("Kernel does not support seccomp syscall!"); 2487 } 2488 ASSERT_EQ(0, ret) { 2489 TH_LOG("setting filter failed"); 2490 } 2491 2492 self->sibling[0].diverge = 1; 2493 tsync_start_sibling(&self->sibling[0]); 2494 tsync_start_sibling(&self->sibling[1]); 2495 2496 while (self->sibling_count < TSYNC_SIBLINGS) { 2497 sem_wait(&self->started); 2498 self->sibling_count++; 2499 } 2500 2501 /* Signal the threads to clean up*/ 2502 pthread_mutex_lock(&self->mutex); 2503 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2504 TH_LOG("cond broadcast non-zero"); 2505 } 2506 pthread_mutex_unlock(&self->mutex); 2507 2508 /* Ensure diverging sibling failed to call prctl. */ 2509 PTHREAD_JOIN(self->sibling[0].tid, &status); 2510 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2511 PTHREAD_JOIN(self->sibling[1].tid, &status); 2512 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2513 } 2514 2515 TEST_F(TSYNC, two_siblings_with_ancestor) 2516 { 2517 long ret; 2518 void *status; 2519 2520 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2521 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2522 } 2523 2524 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2525 ASSERT_NE(ENOSYS, errno) { 2526 TH_LOG("Kernel does not support seccomp syscall!"); 2527 } 2528 ASSERT_EQ(0, ret) { 2529 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2530 } 2531 tsync_start_sibling(&self->sibling[0]); 2532 tsync_start_sibling(&self->sibling[1]); 2533 2534 while (self->sibling_count < TSYNC_SIBLINGS) { 2535 sem_wait(&self->started); 2536 self->sibling_count++; 2537 } 2538 2539 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2540 &self->apply_prog); 2541 ASSERT_EQ(0, ret) { 2542 TH_LOG("Could install filter on all threads!"); 2543 } 2544 /* Tell the siblings to test the policy */ 2545 pthread_mutex_lock(&self->mutex); 2546 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2547 TH_LOG("cond broadcast non-zero"); 2548 } 2549 pthread_mutex_unlock(&self->mutex); 2550 /* Ensure they are both killed and don't exit cleanly. */ 2551 PTHREAD_JOIN(self->sibling[0].tid, &status); 2552 EXPECT_EQ(0x0, (long)status); 2553 PTHREAD_JOIN(self->sibling[1].tid, &status); 2554 EXPECT_EQ(0x0, (long)status); 2555 } 2556 2557 TEST_F(TSYNC, two_sibling_want_nnp) 2558 { 2559 void *status; 2560 2561 /* start siblings before any prctl() operations */ 2562 tsync_start_sibling(&self->sibling[0]); 2563 tsync_start_sibling(&self->sibling[1]); 2564 while (self->sibling_count < TSYNC_SIBLINGS) { 2565 sem_wait(&self->started); 2566 self->sibling_count++; 2567 } 2568 2569 /* Tell the siblings to test no policy */ 2570 pthread_mutex_lock(&self->mutex); 2571 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2572 TH_LOG("cond broadcast non-zero"); 2573 } 2574 pthread_mutex_unlock(&self->mutex); 2575 2576 /* Ensure they are both upset about lacking nnp. */ 2577 PTHREAD_JOIN(self->sibling[0].tid, &status); 2578 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2579 PTHREAD_JOIN(self->sibling[1].tid, &status); 2580 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2581 } 2582 2583 TEST_F(TSYNC, two_siblings_with_no_filter) 2584 { 2585 long ret; 2586 void *status; 2587 2588 /* start siblings before any prctl() operations */ 2589 tsync_start_sibling(&self->sibling[0]); 2590 tsync_start_sibling(&self->sibling[1]); 2591 while (self->sibling_count < TSYNC_SIBLINGS) { 2592 sem_wait(&self->started); 2593 self->sibling_count++; 2594 } 2595 2596 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2597 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2598 } 2599 2600 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2601 &self->apply_prog); 2602 ASSERT_NE(ENOSYS, errno) { 2603 TH_LOG("Kernel does not support seccomp syscall!"); 2604 } 2605 ASSERT_EQ(0, ret) { 2606 TH_LOG("Could install filter on all threads!"); 2607 } 2608 2609 /* Tell the siblings to test the policy */ 2610 pthread_mutex_lock(&self->mutex); 2611 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2612 TH_LOG("cond broadcast non-zero"); 2613 } 2614 pthread_mutex_unlock(&self->mutex); 2615 2616 /* Ensure they are both killed and don't exit cleanly. */ 2617 PTHREAD_JOIN(self->sibling[0].tid, &status); 2618 EXPECT_EQ(0x0, (long)status); 2619 PTHREAD_JOIN(self->sibling[1].tid, &status); 2620 EXPECT_EQ(0x0, (long)status); 2621 } 2622 2623 TEST_F(TSYNC, two_siblings_with_one_divergence) 2624 { 2625 long ret; 2626 void *status; 2627 2628 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2629 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2630 } 2631 2632 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2633 ASSERT_NE(ENOSYS, errno) { 2634 TH_LOG("Kernel does not support seccomp syscall!"); 2635 } 2636 ASSERT_EQ(0, ret) { 2637 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2638 } 2639 self->sibling[0].diverge = 1; 2640 tsync_start_sibling(&self->sibling[0]); 2641 tsync_start_sibling(&self->sibling[1]); 2642 2643 while (self->sibling_count < TSYNC_SIBLINGS) { 2644 sem_wait(&self->started); 2645 self->sibling_count++; 2646 } 2647 2648 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2649 &self->apply_prog); 2650 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2651 TH_LOG("Did not fail on diverged sibling."); 2652 } 2653 2654 /* Wake the threads */ 2655 pthread_mutex_lock(&self->mutex); 2656 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2657 TH_LOG("cond broadcast non-zero"); 2658 } 2659 pthread_mutex_unlock(&self->mutex); 2660 2661 /* Ensure they are both unkilled. */ 2662 PTHREAD_JOIN(self->sibling[0].tid, &status); 2663 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2664 PTHREAD_JOIN(self->sibling[1].tid, &status); 2665 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2666 } 2667 2668 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2669 { 2670 long ret, flags; 2671 void *status; 2672 2673 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2674 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2675 } 2676 2677 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2678 ASSERT_NE(ENOSYS, errno) { 2679 TH_LOG("Kernel does not support seccomp syscall!"); 2680 } 2681 ASSERT_EQ(0, ret) { 2682 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2683 } 2684 self->sibling[0].diverge = 1; 2685 tsync_start_sibling(&self->sibling[0]); 2686 tsync_start_sibling(&self->sibling[1]); 2687 2688 while (self->sibling_count < TSYNC_SIBLINGS) { 2689 sem_wait(&self->started); 2690 self->sibling_count++; 2691 } 2692 2693 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2694 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2695 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2696 ASSERT_EQ(ESRCH, errno) { 2697 TH_LOG("Did not return ESRCH for diverged sibling."); 2698 } 2699 ASSERT_EQ(-1, ret) { 2700 TH_LOG("Did not fail on diverged sibling."); 2701 } 2702 2703 /* Wake the threads */ 2704 pthread_mutex_lock(&self->mutex); 2705 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2706 TH_LOG("cond broadcast non-zero"); 2707 } 2708 pthread_mutex_unlock(&self->mutex); 2709 2710 /* Ensure they are both unkilled. */ 2711 PTHREAD_JOIN(self->sibling[0].tid, &status); 2712 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2713 PTHREAD_JOIN(self->sibling[1].tid, &status); 2714 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2715 } 2716 2717 TEST_F(TSYNC, two_siblings_not_under_filter) 2718 { 2719 long ret, sib; 2720 void *status; 2721 struct timespec delay = { .tv_nsec = 100000000 }; 2722 2723 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2724 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2725 } 2726 2727 /* 2728 * Sibling 0 will have its own seccomp policy 2729 * and Sibling 1 will not be under seccomp at 2730 * all. Sibling 1 will enter seccomp and 0 2731 * will cause failure. 2732 */ 2733 self->sibling[0].diverge = 1; 2734 tsync_start_sibling(&self->sibling[0]); 2735 tsync_start_sibling(&self->sibling[1]); 2736 2737 while (self->sibling_count < TSYNC_SIBLINGS) { 2738 sem_wait(&self->started); 2739 self->sibling_count++; 2740 } 2741 2742 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2743 ASSERT_NE(ENOSYS, errno) { 2744 TH_LOG("Kernel does not support seccomp syscall!"); 2745 } 2746 ASSERT_EQ(0, ret) { 2747 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2748 } 2749 2750 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2751 &self->apply_prog); 2752 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2753 TH_LOG("Did not fail on diverged sibling."); 2754 } 2755 sib = 1; 2756 if (ret == self->sibling[0].system_tid) 2757 sib = 0; 2758 2759 pthread_mutex_lock(&self->mutex); 2760 2761 /* Increment the other siblings num_waits so we can clean up 2762 * the one we just saw. 2763 */ 2764 self->sibling[!sib].num_waits += 1; 2765 2766 /* Signal the thread to clean up*/ 2767 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2768 TH_LOG("cond broadcast non-zero"); 2769 } 2770 pthread_mutex_unlock(&self->mutex); 2771 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2772 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2773 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2774 while (!kill(self->sibling[sib].system_tid, 0)) 2775 nanosleep(&delay, NULL); 2776 /* Switch to the remaining sibling */ 2777 sib = !sib; 2778 2779 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2780 &self->apply_prog); 2781 ASSERT_EQ(0, ret) { 2782 TH_LOG("Expected the remaining sibling to sync"); 2783 }; 2784 2785 pthread_mutex_lock(&self->mutex); 2786 2787 /* If remaining sibling didn't have a chance to wake up during 2788 * the first broadcast, manually reduce the num_waits now. 2789 */ 2790 if (self->sibling[sib].num_waits > 1) 2791 self->sibling[sib].num_waits = 1; 2792 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2793 TH_LOG("cond broadcast non-zero"); 2794 } 2795 pthread_mutex_unlock(&self->mutex); 2796 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2797 EXPECT_EQ(0, (long)status); 2798 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2799 while (!kill(self->sibling[sib].system_tid, 0)) 2800 nanosleep(&delay, NULL); 2801 2802 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2803 &self->apply_prog); 2804 ASSERT_EQ(0, ret); /* just us chickens */ 2805 } 2806 2807 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 2808 TEST(syscall_restart) 2809 { 2810 long ret; 2811 unsigned long msg; 2812 pid_t child_pid; 2813 int pipefd[2]; 2814 int status; 2815 siginfo_t info = { }; 2816 struct sock_filter filter[] = { 2817 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2818 offsetof(struct seccomp_data, nr)), 2819 2820 #ifdef __NR_sigreturn 2821 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 2822 #endif 2823 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 2824 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 2825 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 2826 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 2827 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 2828 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 2829 2830 /* Allow __NR_write for easy logging. */ 2831 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 2832 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2833 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2834 /* The nanosleep jump target. */ 2835 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 2836 /* The restart_syscall jump target. */ 2837 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 2838 }; 2839 struct sock_fprog prog = { 2840 .len = (unsigned short)ARRAY_SIZE(filter), 2841 .filter = filter, 2842 }; 2843 #if defined(__arm__) 2844 struct utsname utsbuf; 2845 #endif 2846 2847 ASSERT_EQ(0, pipe(pipefd)); 2848 2849 child_pid = fork(); 2850 ASSERT_LE(0, child_pid); 2851 if (child_pid == 0) { 2852 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 2853 char buf = ' '; 2854 struct timespec timeout = { }; 2855 2856 /* Attach parent as tracer and stop. */ 2857 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 2858 EXPECT_EQ(0, raise(SIGSTOP)); 2859 2860 EXPECT_EQ(0, close(pipefd[1])); 2861 2862 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2863 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2864 } 2865 2866 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2867 EXPECT_EQ(0, ret) { 2868 TH_LOG("Failed to install filter!"); 2869 } 2870 2871 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2872 TH_LOG("Failed to read() sync from parent"); 2873 } 2874 EXPECT_EQ('.', buf) { 2875 TH_LOG("Failed to get sync data from read()"); 2876 } 2877 2878 /* Start nanosleep to be interrupted. */ 2879 timeout.tv_sec = 1; 2880 errno = 0; 2881 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 2882 TH_LOG("Call to nanosleep() failed (errno %d)", errno); 2883 } 2884 2885 /* Read final sync from parent. */ 2886 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2887 TH_LOG("Failed final read() from parent"); 2888 } 2889 EXPECT_EQ('!', buf) { 2890 TH_LOG("Failed to get final data from read()"); 2891 } 2892 2893 /* Directly report the status of our test harness results. */ 2894 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 2895 : EXIT_FAILURE); 2896 } 2897 EXPECT_EQ(0, close(pipefd[0])); 2898 2899 /* Attach to child, setup options, and release. */ 2900 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2901 ASSERT_EQ(true, WIFSTOPPED(status)); 2902 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 2903 PTRACE_O_TRACESECCOMP)); 2904 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2905 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 2906 2907 /* Wait for nanosleep() to start. */ 2908 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2909 ASSERT_EQ(true, WIFSTOPPED(status)); 2910 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2911 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2912 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2913 ASSERT_EQ(0x100, msg); 2914 ret = get_syscall(_metadata, child_pid); 2915 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 2916 2917 /* Might as well check siginfo for sanity while we're here. */ 2918 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2919 ASSERT_EQ(SIGTRAP, info.si_signo); 2920 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 2921 EXPECT_EQ(0, info.si_errno); 2922 EXPECT_EQ(getuid(), info.si_uid); 2923 /* Verify signal delivery came from child (seccomp-triggered). */ 2924 EXPECT_EQ(child_pid, info.si_pid); 2925 2926 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 2927 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 2928 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2929 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2930 ASSERT_EQ(true, WIFSTOPPED(status)); 2931 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 2932 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2933 /* 2934 * There is no siginfo on SIGSTOP any more, so we can't verify 2935 * signal delivery came from parent now (getpid() == info.si_pid). 2936 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 2937 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 2938 */ 2939 EXPECT_EQ(SIGSTOP, info.si_signo); 2940 2941 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 2942 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 2943 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2944 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2945 ASSERT_EQ(true, WIFSTOPPED(status)); 2946 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 2947 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2948 2949 /* Wait for restart_syscall() to start. */ 2950 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2951 ASSERT_EQ(true, WIFSTOPPED(status)); 2952 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2953 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2954 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2955 2956 ASSERT_EQ(0x200, msg); 2957 ret = get_syscall(_metadata, child_pid); 2958 #if defined(__arm__) 2959 /* 2960 * FIXME: 2961 * - native ARM registers do NOT expose true syscall. 2962 * - compat ARM registers on ARM64 DO expose true syscall. 2963 */ 2964 ASSERT_EQ(0, uname(&utsbuf)); 2965 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 2966 EXPECT_EQ(__NR_nanosleep, ret); 2967 } else 2968 #endif 2969 { 2970 EXPECT_EQ(__NR_restart_syscall, ret); 2971 } 2972 2973 /* Write again to end test. */ 2974 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2975 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 2976 EXPECT_EQ(0, close(pipefd[1])); 2977 2978 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2979 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 2980 _metadata->passed = 0; 2981 } 2982 2983 TEST_SIGNAL(filter_flag_log, SIGSYS) 2984 { 2985 struct sock_filter allow_filter[] = { 2986 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2987 }; 2988 struct sock_filter kill_filter[] = { 2989 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2990 offsetof(struct seccomp_data, nr)), 2991 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2992 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2993 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2994 }; 2995 struct sock_fprog allow_prog = { 2996 .len = (unsigned short)ARRAY_SIZE(allow_filter), 2997 .filter = allow_filter, 2998 }; 2999 struct sock_fprog kill_prog = { 3000 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3001 .filter = kill_filter, 3002 }; 3003 long ret; 3004 pid_t parent = getppid(); 3005 3006 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3007 ASSERT_EQ(0, ret); 3008 3009 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3010 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3011 &allow_prog); 3012 ASSERT_NE(ENOSYS, errno) { 3013 TH_LOG("Kernel does not support seccomp syscall!"); 3014 } 3015 EXPECT_NE(0, ret) { 3016 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3017 } 3018 EXPECT_EQ(EINVAL, errno) { 3019 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3020 } 3021 3022 /* Verify that a simple, permissive filter can be added with no flags */ 3023 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3024 EXPECT_EQ(0, ret); 3025 3026 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3027 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3028 &allow_prog); 3029 ASSERT_NE(EINVAL, errno) { 3030 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3031 } 3032 EXPECT_EQ(0, ret); 3033 3034 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3035 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3036 &kill_prog); 3037 EXPECT_EQ(0, ret); 3038 3039 EXPECT_EQ(parent, syscall(__NR_getppid)); 3040 /* getpid() should never return. */ 3041 EXPECT_EQ(0, syscall(__NR_getpid)); 3042 } 3043 3044 TEST(get_action_avail) 3045 { 3046 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3047 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3048 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3049 __u32 unknown_action = 0x10000000U; 3050 int i; 3051 long ret; 3052 3053 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3054 ASSERT_NE(ENOSYS, errno) { 3055 TH_LOG("Kernel does not support seccomp syscall!"); 3056 } 3057 ASSERT_NE(EINVAL, errno) { 3058 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3059 } 3060 EXPECT_EQ(ret, 0); 3061 3062 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3063 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3064 EXPECT_EQ(ret, 0) { 3065 TH_LOG("Expected action (0x%X) not available!", 3066 actions[i]); 3067 } 3068 } 3069 3070 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3071 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3072 EXPECT_EQ(ret, -1); 3073 EXPECT_EQ(errno, EOPNOTSUPP); 3074 } 3075 3076 TEST(get_metadata) 3077 { 3078 pid_t pid; 3079 int pipefd[2]; 3080 char buf; 3081 struct seccomp_metadata md; 3082 long ret; 3083 3084 /* Only real root can get metadata. */ 3085 if (geteuid()) { 3086 XFAIL(return, "get_metadata requires real root"); 3087 return; 3088 } 3089 3090 ASSERT_EQ(0, pipe(pipefd)); 3091 3092 pid = fork(); 3093 ASSERT_GE(pid, 0); 3094 if (pid == 0) { 3095 struct sock_filter filter[] = { 3096 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3097 }; 3098 struct sock_fprog prog = { 3099 .len = (unsigned short)ARRAY_SIZE(filter), 3100 .filter = filter, 3101 }; 3102 3103 /* one with log, one without */ 3104 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3105 SECCOMP_FILTER_FLAG_LOG, &prog)); 3106 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3107 3108 EXPECT_EQ(0, close(pipefd[0])); 3109 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3110 ASSERT_EQ(0, close(pipefd[1])); 3111 3112 while (1) 3113 sleep(100); 3114 } 3115 3116 ASSERT_EQ(0, close(pipefd[1])); 3117 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3118 3119 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3120 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3121 3122 /* Past here must not use ASSERT or child process is never killed. */ 3123 3124 md.filter_off = 0; 3125 errno = 0; 3126 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3127 EXPECT_EQ(sizeof(md), ret) { 3128 if (errno == EINVAL) 3129 XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3130 } 3131 3132 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3133 EXPECT_EQ(md.filter_off, 0); 3134 3135 md.filter_off = 1; 3136 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3137 EXPECT_EQ(sizeof(md), ret); 3138 EXPECT_EQ(md.flags, 0); 3139 EXPECT_EQ(md.filter_off, 1); 3140 3141 skip: 3142 ASSERT_EQ(0, kill(pid, SIGKILL)); 3143 } 3144 3145 static int user_trap_syscall(int nr, unsigned int flags) 3146 { 3147 struct sock_filter filter[] = { 3148 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 3149 offsetof(struct seccomp_data, nr)), 3150 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), 3151 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), 3152 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), 3153 }; 3154 3155 struct sock_fprog prog = { 3156 .len = (unsigned short)ARRAY_SIZE(filter), 3157 .filter = filter, 3158 }; 3159 3160 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3161 } 3162 3163 #define USER_NOTIF_MAGIC INT_MAX 3164 TEST(user_notification_basic) 3165 { 3166 pid_t pid; 3167 long ret; 3168 int status, listener; 3169 struct seccomp_notif req = {}; 3170 struct seccomp_notif_resp resp = {}; 3171 struct pollfd pollfd; 3172 3173 struct sock_filter filter[] = { 3174 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3175 }; 3176 struct sock_fprog prog = { 3177 .len = (unsigned short)ARRAY_SIZE(filter), 3178 .filter = filter, 3179 }; 3180 3181 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3182 ASSERT_EQ(0, ret) { 3183 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3184 } 3185 3186 pid = fork(); 3187 ASSERT_GE(pid, 0); 3188 3189 /* Check that we get -ENOSYS with no listener attached */ 3190 if (pid == 0) { 3191 if (user_trap_syscall(__NR_getppid, 0) < 0) 3192 exit(1); 3193 ret = syscall(__NR_getppid); 3194 exit(ret >= 0 || errno != ENOSYS); 3195 } 3196 3197 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3198 EXPECT_EQ(true, WIFEXITED(status)); 3199 EXPECT_EQ(0, WEXITSTATUS(status)); 3200 3201 /* Add some no-op filters for grins. */ 3202 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3203 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3204 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3205 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3206 3207 /* Check that the basic notification machinery works */ 3208 listener = user_trap_syscall(__NR_getppid, 3209 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3210 ASSERT_GE(listener, 0); 3211 3212 /* Installing a second listener in the chain should EBUSY */ 3213 EXPECT_EQ(user_trap_syscall(__NR_getppid, 3214 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3215 -1); 3216 EXPECT_EQ(errno, EBUSY); 3217 3218 pid = fork(); 3219 ASSERT_GE(pid, 0); 3220 3221 if (pid == 0) { 3222 ret = syscall(__NR_getppid); 3223 exit(ret != USER_NOTIF_MAGIC); 3224 } 3225 3226 pollfd.fd = listener; 3227 pollfd.events = POLLIN | POLLOUT; 3228 3229 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3230 EXPECT_EQ(pollfd.revents, POLLIN); 3231 3232 /* Test that we can't pass garbage to the kernel. */ 3233 memset(&req, 0, sizeof(req)); 3234 req.pid = -1; 3235 errno = 0; 3236 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3237 EXPECT_EQ(-1, ret); 3238 EXPECT_EQ(EINVAL, errno); 3239 3240 if (ret) { 3241 req.pid = 0; 3242 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3243 } 3244 3245 pollfd.fd = listener; 3246 pollfd.events = POLLIN | POLLOUT; 3247 3248 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3249 EXPECT_EQ(pollfd.revents, POLLOUT); 3250 3251 EXPECT_EQ(req.data.nr, __NR_getppid); 3252 3253 resp.id = req.id; 3254 resp.error = 0; 3255 resp.val = USER_NOTIF_MAGIC; 3256 3257 /* check that we make sure flags == 0 */ 3258 resp.flags = 1; 3259 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3260 EXPECT_EQ(errno, EINVAL); 3261 3262 resp.flags = 0; 3263 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3264 3265 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3266 EXPECT_EQ(true, WIFEXITED(status)); 3267 EXPECT_EQ(0, WEXITSTATUS(status)); 3268 } 3269 3270 TEST(user_notification_with_tsync) 3271 { 3272 int ret; 3273 unsigned int flags; 3274 3275 /* these were exclusive */ 3276 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3277 SECCOMP_FILTER_FLAG_TSYNC; 3278 ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags)); 3279 ASSERT_EQ(EINVAL, errno); 3280 3281 /* but now they're not */ 3282 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3283 ret = user_trap_syscall(__NR_getppid, flags); 3284 close(ret); 3285 ASSERT_LE(0, ret); 3286 } 3287 3288 TEST(user_notification_kill_in_middle) 3289 { 3290 pid_t pid; 3291 long ret; 3292 int listener; 3293 struct seccomp_notif req = {}; 3294 struct seccomp_notif_resp resp = {}; 3295 3296 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3297 ASSERT_EQ(0, ret) { 3298 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3299 } 3300 3301 listener = user_trap_syscall(__NR_getppid, 3302 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3303 ASSERT_GE(listener, 0); 3304 3305 /* 3306 * Check that nothing bad happens when we kill the task in the middle 3307 * of a syscall. 3308 */ 3309 pid = fork(); 3310 ASSERT_GE(pid, 0); 3311 3312 if (pid == 0) { 3313 ret = syscall(__NR_getppid); 3314 exit(ret != USER_NOTIF_MAGIC); 3315 } 3316 3317 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3318 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3319 3320 EXPECT_EQ(kill(pid, SIGKILL), 0); 3321 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3322 3323 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3324 3325 resp.id = req.id; 3326 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3327 EXPECT_EQ(ret, -1); 3328 EXPECT_EQ(errno, ENOENT); 3329 } 3330 3331 static int handled = -1; 3332 3333 static void signal_handler(int signal) 3334 { 3335 if (write(handled, "c", 1) != 1) 3336 perror("write from signal"); 3337 } 3338 3339 TEST(user_notification_signal) 3340 { 3341 pid_t pid; 3342 long ret; 3343 int status, listener, sk_pair[2]; 3344 struct seccomp_notif req = {}; 3345 struct seccomp_notif_resp resp = {}; 3346 char c; 3347 3348 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3349 ASSERT_EQ(0, ret) { 3350 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3351 } 3352 3353 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3354 3355 listener = user_trap_syscall(__NR_gettid, 3356 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3357 ASSERT_GE(listener, 0); 3358 3359 pid = fork(); 3360 ASSERT_GE(pid, 0); 3361 3362 if (pid == 0) { 3363 close(sk_pair[0]); 3364 handled = sk_pair[1]; 3365 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3366 perror("signal"); 3367 exit(1); 3368 } 3369 /* 3370 * ERESTARTSYS behavior is a bit hard to test, because we need 3371 * to rely on a signal that has not yet been handled. Let's at 3372 * least check that the error code gets propagated through, and 3373 * hope that it doesn't break when there is actually a signal :) 3374 */ 3375 ret = syscall(__NR_gettid); 3376 exit(!(ret == -1 && errno == 512)); 3377 } 3378 3379 close(sk_pair[1]); 3380 3381 memset(&req, 0, sizeof(req)); 3382 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3383 3384 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3385 3386 /* 3387 * Make sure the signal really is delivered, which means we're not 3388 * stuck in the user notification code any more and the notification 3389 * should be dead. 3390 */ 3391 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3392 3393 resp.id = req.id; 3394 resp.error = -EPERM; 3395 resp.val = 0; 3396 3397 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3398 EXPECT_EQ(errno, ENOENT); 3399 3400 memset(&req, 0, sizeof(req)); 3401 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3402 3403 resp.id = req.id; 3404 resp.error = -512; /* -ERESTARTSYS */ 3405 resp.val = 0; 3406 3407 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3408 3409 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3410 EXPECT_EQ(true, WIFEXITED(status)); 3411 EXPECT_EQ(0, WEXITSTATUS(status)); 3412 } 3413 3414 TEST(user_notification_closed_listener) 3415 { 3416 pid_t pid; 3417 long ret; 3418 int status, listener; 3419 3420 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3421 ASSERT_EQ(0, ret) { 3422 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3423 } 3424 3425 listener = user_trap_syscall(__NR_getppid, 3426 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3427 ASSERT_GE(listener, 0); 3428 3429 /* 3430 * Check that we get an ENOSYS when the listener is closed. 3431 */ 3432 pid = fork(); 3433 ASSERT_GE(pid, 0); 3434 if (pid == 0) { 3435 close(listener); 3436 ret = syscall(__NR_getppid); 3437 exit(ret != -1 && errno != ENOSYS); 3438 } 3439 3440 close(listener); 3441 3442 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3443 EXPECT_EQ(true, WIFEXITED(status)); 3444 EXPECT_EQ(0, WEXITSTATUS(status)); 3445 } 3446 3447 /* 3448 * Check that a pid in a child namespace still shows up as valid in ours. 3449 */ 3450 TEST(user_notification_child_pid_ns) 3451 { 3452 pid_t pid; 3453 int status, listener; 3454 struct seccomp_notif req = {}; 3455 struct seccomp_notif_resp resp = {}; 3456 3457 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); 3458 3459 listener = user_trap_syscall(__NR_getppid, 3460 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3461 ASSERT_GE(listener, 0); 3462 3463 pid = fork(); 3464 ASSERT_GE(pid, 0); 3465 3466 if (pid == 0) 3467 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3468 3469 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3470 EXPECT_EQ(req.pid, pid); 3471 3472 resp.id = req.id; 3473 resp.error = 0; 3474 resp.val = USER_NOTIF_MAGIC; 3475 3476 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3477 3478 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3479 EXPECT_EQ(true, WIFEXITED(status)); 3480 EXPECT_EQ(0, WEXITSTATUS(status)); 3481 close(listener); 3482 } 3483 3484 /* 3485 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3486 * invalid. 3487 */ 3488 TEST(user_notification_sibling_pid_ns) 3489 { 3490 pid_t pid, pid2; 3491 int status, listener; 3492 struct seccomp_notif req = {}; 3493 struct seccomp_notif_resp resp = {}; 3494 3495 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3496 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3497 } 3498 3499 listener = user_trap_syscall(__NR_getppid, 3500 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3501 ASSERT_GE(listener, 0); 3502 3503 pid = fork(); 3504 ASSERT_GE(pid, 0); 3505 3506 if (pid == 0) { 3507 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3508 3509 pid2 = fork(); 3510 ASSERT_GE(pid2, 0); 3511 3512 if (pid2 == 0) 3513 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3514 3515 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3516 EXPECT_EQ(true, WIFEXITED(status)); 3517 EXPECT_EQ(0, WEXITSTATUS(status)); 3518 exit(WEXITSTATUS(status)); 3519 } 3520 3521 /* Create the sibling ns, and sibling in it. */ 3522 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3523 ASSERT_EQ(errno, 0); 3524 3525 pid2 = fork(); 3526 ASSERT_GE(pid2, 0); 3527 3528 if (pid2 == 0) { 3529 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3530 /* 3531 * The pid should be 0, i.e. the task is in some namespace that 3532 * we can't "see". 3533 */ 3534 EXPECT_EQ(req.pid, 0); 3535 3536 resp.id = req.id; 3537 resp.error = 0; 3538 resp.val = USER_NOTIF_MAGIC; 3539 3540 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3541 exit(0); 3542 } 3543 3544 close(listener); 3545 3546 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3547 EXPECT_EQ(true, WIFEXITED(status)); 3548 EXPECT_EQ(0, WEXITSTATUS(status)); 3549 3550 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3551 EXPECT_EQ(true, WIFEXITED(status)); 3552 EXPECT_EQ(0, WEXITSTATUS(status)); 3553 } 3554 3555 TEST(user_notification_fault_recv) 3556 { 3557 pid_t pid; 3558 int status, listener; 3559 struct seccomp_notif req = {}; 3560 struct seccomp_notif_resp resp = {}; 3561 3562 ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 3563 3564 listener = user_trap_syscall(__NR_getppid, 3565 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3566 ASSERT_GE(listener, 0); 3567 3568 pid = fork(); 3569 ASSERT_GE(pid, 0); 3570 3571 if (pid == 0) 3572 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3573 3574 /* Do a bad recv() */ 3575 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3576 EXPECT_EQ(errno, EFAULT); 3577 3578 /* We should still be able to receive this notification, though. */ 3579 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3580 EXPECT_EQ(req.pid, pid); 3581 3582 resp.id = req.id; 3583 resp.error = 0; 3584 resp.val = USER_NOTIF_MAGIC; 3585 3586 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3587 3588 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3589 EXPECT_EQ(true, WIFEXITED(status)); 3590 EXPECT_EQ(0, WEXITSTATUS(status)); 3591 } 3592 3593 TEST(seccomp_get_notif_sizes) 3594 { 3595 struct seccomp_notif_sizes sizes; 3596 3597 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3598 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3599 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3600 } 3601 3602 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 3603 { 3604 #ifdef __NR_kcmp 3605 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 3606 #else 3607 errno = ENOSYS; 3608 return -1; 3609 #endif 3610 } 3611 3612 TEST(user_notification_continue) 3613 { 3614 pid_t pid; 3615 long ret; 3616 int status, listener; 3617 struct seccomp_notif req = {}; 3618 struct seccomp_notif_resp resp = {}; 3619 struct pollfd pollfd; 3620 3621 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3622 ASSERT_EQ(0, ret) { 3623 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3624 } 3625 3626 listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3627 ASSERT_GE(listener, 0); 3628 3629 pid = fork(); 3630 ASSERT_GE(pid, 0); 3631 3632 if (pid == 0) { 3633 int dup_fd, pipe_fds[2]; 3634 pid_t self; 3635 3636 ret = pipe(pipe_fds); 3637 if (ret < 0) 3638 exit(1); 3639 3640 dup_fd = dup(pipe_fds[0]); 3641 if (dup_fd < 0) 3642 exit(1); 3643 3644 self = getpid(); 3645 3646 ret = filecmp(self, self, pipe_fds[0], dup_fd); 3647 if (ret) 3648 exit(2); 3649 3650 exit(0); 3651 } 3652 3653 pollfd.fd = listener; 3654 pollfd.events = POLLIN | POLLOUT; 3655 3656 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3657 EXPECT_EQ(pollfd.revents, POLLIN); 3658 3659 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3660 3661 pollfd.fd = listener; 3662 pollfd.events = POLLIN | POLLOUT; 3663 3664 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3665 EXPECT_EQ(pollfd.revents, POLLOUT); 3666 3667 EXPECT_EQ(req.data.nr, __NR_dup); 3668 3669 resp.id = req.id; 3670 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3671 3672 /* 3673 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3674 * args be set to 0. 3675 */ 3676 resp.error = 0; 3677 resp.val = USER_NOTIF_MAGIC; 3678 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3679 EXPECT_EQ(errno, EINVAL); 3680 3681 resp.error = USER_NOTIF_MAGIC; 3682 resp.val = 0; 3683 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3684 EXPECT_EQ(errno, EINVAL); 3685 3686 resp.error = 0; 3687 resp.val = 0; 3688 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3689 if (errno == EINVAL) 3690 XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3691 } 3692 3693 skip: 3694 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3695 EXPECT_EQ(true, WIFEXITED(status)); 3696 EXPECT_EQ(0, WEXITSTATUS(status)) { 3697 if (WEXITSTATUS(status) == 2) { 3698 XFAIL(return, "Kernel does not support kcmp() syscall"); 3699 return; 3700 } 3701 } 3702 } 3703 3704 /* 3705 * TODO: 3706 * - add microbenchmarks 3707 * - expand NNP testing 3708 * - better arch-specific TRACE and TRAP handlers. 3709 * - endianness checking when appropriate 3710 * - 64-bit arg prodding 3711 * - arch value testing (x86 modes especially) 3712 * - verify that FILTER_FLAG_LOG filters generate log messages 3713 * - verify that RET_LOG generates log messages 3714 * - ... 3715 */ 3716 3717 TEST_HARNESS_MAIN 3718