1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 #include <sys/resource.h> 49 #include <sys/capability.h> 50 51 #include <unistd.h> 52 #include <sys/syscall.h> 53 #include <poll.h> 54 55 #include "../kselftest_harness.h" 56 #include "../clone3/clone3_selftests.h" 57 58 /* Attempt to de-conflict with the selftests tree. */ 59 #ifndef SKIP 60 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 61 #endif 62 63 #ifndef MIN 64 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 65 #endif 66 67 #ifndef PR_SET_PTRACER 68 # define PR_SET_PTRACER 0x59616d61 69 #endif 70 71 #ifndef PR_SET_NO_NEW_PRIVS 72 #define PR_SET_NO_NEW_PRIVS 38 73 #define PR_GET_NO_NEW_PRIVS 39 74 #endif 75 76 #ifndef PR_SECCOMP_EXT 77 #define PR_SECCOMP_EXT 43 78 #endif 79 80 #ifndef SECCOMP_EXT_ACT 81 #define SECCOMP_EXT_ACT 1 82 #endif 83 84 #ifndef SECCOMP_EXT_ACT_TSYNC 85 #define SECCOMP_EXT_ACT_TSYNC 1 86 #endif 87 88 #ifndef SECCOMP_MODE_STRICT 89 #define SECCOMP_MODE_STRICT 1 90 #endif 91 92 #ifndef SECCOMP_MODE_FILTER 93 #define SECCOMP_MODE_FILTER 2 94 #endif 95 96 #ifndef SECCOMP_RET_ALLOW 97 struct seccomp_data { 98 int nr; 99 __u32 arch; 100 __u64 instruction_pointer; 101 __u64 args[6]; 102 }; 103 #endif 104 105 #ifndef SECCOMP_RET_KILL_PROCESS 106 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 107 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 108 #endif 109 #ifndef SECCOMP_RET_KILL 110 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 111 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 112 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 113 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 114 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 115 #endif 116 #ifndef SECCOMP_RET_LOG 117 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 118 #endif 119 120 #ifndef __NR_seccomp 121 # if defined(__i386__) 122 # define __NR_seccomp 354 123 # elif defined(__x86_64__) 124 # define __NR_seccomp 317 125 # elif defined(__arm__) 126 # define __NR_seccomp 383 127 # elif defined(__aarch64__) 128 # define __NR_seccomp 277 129 # elif defined(__riscv) 130 # define __NR_seccomp 277 131 # elif defined(__csky__) 132 # define __NR_seccomp 277 133 # elif defined(__loongarch__) 134 # define __NR_seccomp 277 135 # elif defined(__hppa__) 136 # define __NR_seccomp 338 137 # elif defined(__powerpc__) 138 # define __NR_seccomp 358 139 # elif defined(__s390__) 140 # define __NR_seccomp 348 141 # elif defined(__xtensa__) 142 # define __NR_seccomp 337 143 # elif defined(__sh__) 144 # define __NR_seccomp 372 145 # elif defined(__mc68000__) 146 # define __NR_seccomp 380 147 # else 148 # warning "seccomp syscall number unknown for this architecture" 149 # define __NR_seccomp 0xffff 150 # endif 151 #endif 152 153 #ifndef SECCOMP_SET_MODE_STRICT 154 #define SECCOMP_SET_MODE_STRICT 0 155 #endif 156 157 #ifndef SECCOMP_SET_MODE_FILTER 158 #define SECCOMP_SET_MODE_FILTER 1 159 #endif 160 161 #ifndef SECCOMP_GET_ACTION_AVAIL 162 #define SECCOMP_GET_ACTION_AVAIL 2 163 #endif 164 165 #ifndef SECCOMP_GET_NOTIF_SIZES 166 #define SECCOMP_GET_NOTIF_SIZES 3 167 #endif 168 169 #ifndef SECCOMP_FILTER_FLAG_TSYNC 170 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 171 #endif 172 173 #ifndef SECCOMP_FILTER_FLAG_LOG 174 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 175 #endif 176 177 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 178 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 179 #endif 180 181 #ifndef PTRACE_SECCOMP_GET_METADATA 182 #define PTRACE_SECCOMP_GET_METADATA 0x420d 183 184 struct seccomp_metadata { 185 __u64 filter_off; /* Input: which filter */ 186 __u64 flags; /* Output: filter's flags */ 187 }; 188 #endif 189 190 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 191 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 192 #endif 193 194 #ifndef SECCOMP_RET_USER_NOTIF 195 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 196 197 #define SECCOMP_IOC_MAGIC '!' 198 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 199 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 200 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 201 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 202 203 /* Flags for seccomp notification fd ioctl. */ 204 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 205 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 206 struct seccomp_notif_resp) 207 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 208 209 struct seccomp_notif { 210 __u64 id; 211 __u32 pid; 212 __u32 flags; 213 struct seccomp_data data; 214 }; 215 216 struct seccomp_notif_resp { 217 __u64 id; 218 __s64 val; 219 __s32 error; 220 __u32 flags; 221 }; 222 223 struct seccomp_notif_sizes { 224 __u16 seccomp_notif; 225 __u16 seccomp_notif_resp; 226 __u16 seccomp_data; 227 }; 228 #endif 229 230 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 231 /* On success, the return value is the remote process's added fd number */ 232 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 233 struct seccomp_notif_addfd) 234 235 /* valid flags for seccomp_notif_addfd */ 236 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 237 238 struct seccomp_notif_addfd { 239 __u64 id; 240 __u32 flags; 241 __u32 srcfd; 242 __u32 newfd; 243 __u32 newfd_flags; 244 }; 245 #endif 246 247 #ifndef SECCOMP_ADDFD_FLAG_SEND 248 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 249 #endif 250 251 struct seccomp_notif_addfd_small { 252 __u64 id; 253 char weird[4]; 254 }; 255 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 256 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 257 258 struct seccomp_notif_addfd_big { 259 union { 260 struct seccomp_notif_addfd addfd; 261 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 262 }; 263 }; 264 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 265 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 266 267 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 268 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 269 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 270 #endif 271 272 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 273 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 274 #endif 275 276 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 277 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 278 #endif 279 280 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV 281 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 282 #endif 283 284 #ifndef seccomp 285 int seccomp(unsigned int op, unsigned int flags, void *args) 286 { 287 errno = 0; 288 return syscall(__NR_seccomp, op, flags, args); 289 } 290 #endif 291 292 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 293 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 294 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 295 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 296 #else 297 #error "wut? Unknown __BYTE_ORDER__?!" 298 #endif 299 300 #define SIBLING_EXIT_UNKILLED 0xbadbeef 301 #define SIBLING_EXIT_FAILURE 0xbadface 302 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 303 304 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 305 { 306 #ifdef __NR_kcmp 307 errno = 0; 308 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 309 #else 310 errno = ENOSYS; 311 return -1; 312 #endif 313 } 314 315 /* Have TH_LOG report actual location filecmp() is used. */ 316 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 317 int _ret; \ 318 \ 319 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 320 if (_ret != 0) { \ 321 if (_ret < 0 && errno == ENOSYS) { \ 322 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 323 _ret = 0; \ 324 } \ 325 } \ 326 _ret; }) 327 328 TEST(kcmp) 329 { 330 int ret; 331 332 ret = __filecmp(getpid(), getpid(), 1, 1); 333 EXPECT_EQ(ret, 0); 334 if (ret != 0 && errno == ENOSYS) 335 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); 336 } 337 338 TEST(mode_strict_support) 339 { 340 long ret; 341 342 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 343 ASSERT_EQ(0, ret) { 344 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 345 } 346 syscall(__NR_exit, 0); 347 } 348 349 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 350 { 351 long ret; 352 353 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 354 ASSERT_EQ(0, ret) { 355 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 356 } 357 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 358 NULL, NULL, NULL); 359 EXPECT_FALSE(true) { 360 TH_LOG("Unreachable!"); 361 } 362 } 363 364 /* Note! This doesn't test no new privs behavior */ 365 TEST(no_new_privs_support) 366 { 367 long ret; 368 369 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 370 EXPECT_EQ(0, ret) { 371 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 372 } 373 } 374 375 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 376 TEST(mode_filter_support) 377 { 378 long ret; 379 380 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 381 ASSERT_EQ(0, ret) { 382 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 383 } 384 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 385 EXPECT_EQ(-1, ret); 386 EXPECT_EQ(EFAULT, errno) { 387 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 388 } 389 } 390 391 TEST(mode_filter_without_nnp) 392 { 393 struct sock_filter filter[] = { 394 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 395 }; 396 struct sock_fprog prog = { 397 .len = (unsigned short)ARRAY_SIZE(filter), 398 .filter = filter, 399 }; 400 long ret; 401 cap_t cap = cap_get_proc(); 402 cap_flag_value_t is_cap_sys_admin = 0; 403 404 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 405 ASSERT_LE(0, ret) { 406 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 407 } 408 errno = 0; 409 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 410 /* Succeeds with CAP_SYS_ADMIN, fails without */ 411 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); 412 if (!is_cap_sys_admin) { 413 EXPECT_EQ(-1, ret); 414 EXPECT_EQ(EACCES, errno); 415 } else { 416 EXPECT_EQ(0, ret); 417 } 418 } 419 420 #define MAX_INSNS_PER_PATH 32768 421 422 TEST(filter_size_limits) 423 { 424 int i; 425 int count = BPF_MAXINSNS + 1; 426 struct sock_filter allow[] = { 427 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 428 }; 429 struct sock_filter *filter; 430 struct sock_fprog prog = { }; 431 long ret; 432 433 filter = calloc(count, sizeof(*filter)); 434 ASSERT_NE(NULL, filter); 435 436 for (i = 0; i < count; i++) 437 filter[i] = allow[0]; 438 439 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 440 ASSERT_EQ(0, ret); 441 442 prog.filter = filter; 443 prog.len = count; 444 445 /* Too many filter instructions in a single filter. */ 446 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 447 ASSERT_NE(0, ret) { 448 TH_LOG("Installing %d insn filter was allowed", prog.len); 449 } 450 451 /* One less is okay, though. */ 452 prog.len -= 1; 453 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 454 ASSERT_EQ(0, ret) { 455 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 456 } 457 } 458 459 TEST(filter_chain_limits) 460 { 461 int i; 462 int count = BPF_MAXINSNS; 463 struct sock_filter allow[] = { 464 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 465 }; 466 struct sock_filter *filter; 467 struct sock_fprog prog = { }; 468 long ret; 469 470 filter = calloc(count, sizeof(*filter)); 471 ASSERT_NE(NULL, filter); 472 473 for (i = 0; i < count; i++) 474 filter[i] = allow[0]; 475 476 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 477 ASSERT_EQ(0, ret); 478 479 prog.filter = filter; 480 prog.len = 1; 481 482 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 483 ASSERT_EQ(0, ret); 484 485 prog.len = count; 486 487 /* Too many total filter instructions. */ 488 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 489 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 490 if (ret != 0) 491 break; 492 } 493 ASSERT_NE(0, ret) { 494 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 495 i, count, i * (count + 4)); 496 } 497 } 498 499 TEST(mode_filter_cannot_move_to_strict) 500 { 501 struct sock_filter filter[] = { 502 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 503 }; 504 struct sock_fprog prog = { 505 .len = (unsigned short)ARRAY_SIZE(filter), 506 .filter = filter, 507 }; 508 long ret; 509 510 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 511 ASSERT_EQ(0, ret); 512 513 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 514 ASSERT_EQ(0, ret); 515 516 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 517 EXPECT_EQ(-1, ret); 518 EXPECT_EQ(EINVAL, errno); 519 } 520 521 522 TEST(mode_filter_get_seccomp) 523 { 524 struct sock_filter filter[] = { 525 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 526 }; 527 struct sock_fprog prog = { 528 .len = (unsigned short)ARRAY_SIZE(filter), 529 .filter = filter, 530 }; 531 long ret; 532 533 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 534 ASSERT_EQ(0, ret); 535 536 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 537 EXPECT_EQ(0, ret); 538 539 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 540 ASSERT_EQ(0, ret); 541 542 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 543 EXPECT_EQ(2, ret); 544 } 545 546 547 TEST(ALLOW_all) 548 { 549 struct sock_filter filter[] = { 550 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 551 }; 552 struct sock_fprog prog = { 553 .len = (unsigned short)ARRAY_SIZE(filter), 554 .filter = filter, 555 }; 556 long ret; 557 558 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 559 ASSERT_EQ(0, ret); 560 561 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 562 ASSERT_EQ(0, ret); 563 } 564 565 TEST(empty_prog) 566 { 567 struct sock_filter filter[] = { 568 }; 569 struct sock_fprog prog = { 570 .len = (unsigned short)ARRAY_SIZE(filter), 571 .filter = filter, 572 }; 573 long ret; 574 575 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 576 ASSERT_EQ(0, ret); 577 578 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 579 EXPECT_EQ(-1, ret); 580 EXPECT_EQ(EINVAL, errno); 581 } 582 583 TEST(log_all) 584 { 585 struct sock_filter filter[] = { 586 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 587 }; 588 struct sock_fprog prog = { 589 .len = (unsigned short)ARRAY_SIZE(filter), 590 .filter = filter, 591 }; 592 long ret; 593 pid_t parent = getppid(); 594 595 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 596 ASSERT_EQ(0, ret); 597 598 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 599 ASSERT_EQ(0, ret); 600 601 /* getppid() should succeed and be logged (no check for logging) */ 602 EXPECT_EQ(parent, syscall(__NR_getppid)); 603 } 604 605 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 606 { 607 struct sock_filter filter[] = { 608 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 609 }; 610 struct sock_fprog prog = { 611 .len = (unsigned short)ARRAY_SIZE(filter), 612 .filter = filter, 613 }; 614 long ret; 615 616 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 617 ASSERT_EQ(0, ret); 618 619 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 620 ASSERT_EQ(0, ret); 621 EXPECT_EQ(0, syscall(__NR_getpid)) { 622 TH_LOG("getpid() shouldn't ever return"); 623 } 624 } 625 626 /* return code >= 0x80000000 is unused. */ 627 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 628 { 629 struct sock_filter filter[] = { 630 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 631 }; 632 struct sock_fprog prog = { 633 .len = (unsigned short)ARRAY_SIZE(filter), 634 .filter = filter, 635 }; 636 long ret; 637 638 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 639 ASSERT_EQ(0, ret); 640 641 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 642 ASSERT_EQ(0, ret); 643 EXPECT_EQ(0, syscall(__NR_getpid)) { 644 TH_LOG("getpid() shouldn't ever return"); 645 } 646 } 647 648 TEST_SIGNAL(KILL_all, SIGSYS) 649 { 650 struct sock_filter filter[] = { 651 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 652 }; 653 struct sock_fprog prog = { 654 .len = (unsigned short)ARRAY_SIZE(filter), 655 .filter = filter, 656 }; 657 long ret; 658 659 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 660 ASSERT_EQ(0, ret); 661 662 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 663 ASSERT_EQ(0, ret); 664 } 665 666 TEST_SIGNAL(KILL_one, SIGSYS) 667 { 668 struct sock_filter filter[] = { 669 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 670 offsetof(struct seccomp_data, nr)), 671 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 672 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 673 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 674 }; 675 struct sock_fprog prog = { 676 .len = (unsigned short)ARRAY_SIZE(filter), 677 .filter = filter, 678 }; 679 long ret; 680 pid_t parent = getppid(); 681 682 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 683 ASSERT_EQ(0, ret); 684 685 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 686 ASSERT_EQ(0, ret); 687 688 EXPECT_EQ(parent, syscall(__NR_getppid)); 689 /* getpid() should never return. */ 690 EXPECT_EQ(0, syscall(__NR_getpid)); 691 } 692 693 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 694 { 695 void *fatal_address; 696 struct sock_filter filter[] = { 697 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 698 offsetof(struct seccomp_data, nr)), 699 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 700 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 701 /* Only both with lower 32-bit for now. */ 702 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 703 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 704 (unsigned long)&fatal_address, 0, 1), 705 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 706 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 707 }; 708 struct sock_fprog prog = { 709 .len = (unsigned short)ARRAY_SIZE(filter), 710 .filter = filter, 711 }; 712 long ret; 713 pid_t parent = getppid(); 714 struct tms timebuf; 715 clock_t clock = times(&timebuf); 716 717 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 718 ASSERT_EQ(0, ret); 719 720 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 721 ASSERT_EQ(0, ret); 722 723 EXPECT_EQ(parent, syscall(__NR_getppid)); 724 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 725 /* times() should never return. */ 726 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 727 } 728 729 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 730 { 731 #ifndef __NR_mmap2 732 int sysno = __NR_mmap; 733 #else 734 int sysno = __NR_mmap2; 735 #endif 736 struct sock_filter filter[] = { 737 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 738 offsetof(struct seccomp_data, nr)), 739 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 740 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 741 /* Only both with lower 32-bit for now. */ 742 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 743 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 744 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 745 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 746 }; 747 struct sock_fprog prog = { 748 .len = (unsigned short)ARRAY_SIZE(filter), 749 .filter = filter, 750 }; 751 long ret; 752 pid_t parent = getppid(); 753 int fd; 754 void *map1, *map2; 755 int page_size = sysconf(_SC_PAGESIZE); 756 757 ASSERT_LT(0, page_size); 758 759 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 760 ASSERT_EQ(0, ret); 761 762 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 763 ASSERT_EQ(0, ret); 764 765 fd = open("/dev/zero", O_RDONLY); 766 ASSERT_NE(-1, fd); 767 768 EXPECT_EQ(parent, syscall(__NR_getppid)); 769 map1 = (void *)syscall(sysno, 770 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 771 EXPECT_NE(MAP_FAILED, map1); 772 /* mmap2() should never return. */ 773 map2 = (void *)syscall(sysno, 774 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 775 EXPECT_EQ(MAP_FAILED, map2); 776 777 /* The test failed, so clean up the resources. */ 778 munmap(map1, page_size); 779 munmap(map2, page_size); 780 close(fd); 781 } 782 783 /* This is a thread task to die via seccomp filter violation. */ 784 void *kill_thread(void *data) 785 { 786 bool die = (bool)data; 787 788 if (die) { 789 syscall(__NR_getpid); 790 return (void *)SIBLING_EXIT_FAILURE; 791 } 792 793 return (void *)SIBLING_EXIT_UNKILLED; 794 } 795 796 enum kill_t { 797 KILL_THREAD, 798 KILL_PROCESS, 799 RET_UNKNOWN 800 }; 801 802 /* Prepare a thread that will kill itself or both of us. */ 803 void kill_thread_or_group(struct __test_metadata *_metadata, 804 enum kill_t kill_how) 805 { 806 pthread_t thread; 807 void *status; 808 /* Kill only when calling __NR_getpid. */ 809 struct sock_filter filter_thread[] = { 810 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 811 offsetof(struct seccomp_data, nr)), 812 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 813 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 814 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 815 }; 816 struct sock_fprog prog_thread = { 817 .len = (unsigned short)ARRAY_SIZE(filter_thread), 818 .filter = filter_thread, 819 }; 820 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; 821 struct sock_filter filter_process[] = { 822 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 823 offsetof(struct seccomp_data, nr)), 824 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 825 BPF_STMT(BPF_RET|BPF_K, kill), 826 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 827 }; 828 struct sock_fprog prog_process = { 829 .len = (unsigned short)ARRAY_SIZE(filter_process), 830 .filter = filter_process, 831 }; 832 833 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 834 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 835 } 836 837 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 838 kill_how == KILL_THREAD ? &prog_thread 839 : &prog_process)); 840 841 /* 842 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 843 * flag cannot be downgraded by a new filter. 844 */ 845 if (kill_how == KILL_PROCESS) 846 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 847 848 /* Start a thread that will exit immediately. */ 849 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 850 ASSERT_EQ(0, pthread_join(thread, &status)); 851 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 852 853 /* Start a thread that will die immediately. */ 854 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 855 ASSERT_EQ(0, pthread_join(thread, &status)); 856 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 857 858 /* 859 * If we get here, only the spawned thread died. Let the parent know 860 * the whole process didn't die (i.e. this thread, the spawner, 861 * stayed running). 862 */ 863 exit(42); 864 } 865 866 TEST(KILL_thread) 867 { 868 int status; 869 pid_t child_pid; 870 871 child_pid = fork(); 872 ASSERT_LE(0, child_pid); 873 if (child_pid == 0) { 874 kill_thread_or_group(_metadata, KILL_THREAD); 875 _exit(38); 876 } 877 878 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 879 880 /* If only the thread was killed, we'll see exit 42. */ 881 ASSERT_TRUE(WIFEXITED(status)); 882 ASSERT_EQ(42, WEXITSTATUS(status)); 883 } 884 885 TEST(KILL_process) 886 { 887 int status; 888 pid_t child_pid; 889 890 child_pid = fork(); 891 ASSERT_LE(0, child_pid); 892 if (child_pid == 0) { 893 kill_thread_or_group(_metadata, KILL_PROCESS); 894 _exit(38); 895 } 896 897 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 898 899 /* If the entire process was killed, we'll see SIGSYS. */ 900 ASSERT_TRUE(WIFSIGNALED(status)); 901 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 902 } 903 904 TEST(KILL_unknown) 905 { 906 int status; 907 pid_t child_pid; 908 909 child_pid = fork(); 910 ASSERT_LE(0, child_pid); 911 if (child_pid == 0) { 912 kill_thread_or_group(_metadata, RET_UNKNOWN); 913 _exit(38); 914 } 915 916 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 917 918 /* If the entire process was killed, we'll see SIGSYS. */ 919 EXPECT_TRUE(WIFSIGNALED(status)) { 920 TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); 921 } 922 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 923 } 924 925 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 926 TEST(arg_out_of_range) 927 { 928 struct sock_filter filter[] = { 929 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 930 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 931 }; 932 struct sock_fprog prog = { 933 .len = (unsigned short)ARRAY_SIZE(filter), 934 .filter = filter, 935 }; 936 long ret; 937 938 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 939 ASSERT_EQ(0, ret); 940 941 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 942 EXPECT_EQ(-1, ret); 943 EXPECT_EQ(EINVAL, errno); 944 } 945 946 #define ERRNO_FILTER(name, errno) \ 947 struct sock_filter _read_filter_##name[] = { \ 948 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 949 offsetof(struct seccomp_data, nr)), \ 950 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 951 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 952 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 953 }; \ 954 struct sock_fprog prog_##name = { \ 955 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 956 .filter = _read_filter_##name, \ 957 } 958 959 /* Make sure basic errno values are correctly passed through a filter. */ 960 TEST(ERRNO_valid) 961 { 962 ERRNO_FILTER(valid, E2BIG); 963 long ret; 964 pid_t parent = getppid(); 965 966 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 967 ASSERT_EQ(0, ret); 968 969 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 970 ASSERT_EQ(0, ret); 971 972 EXPECT_EQ(parent, syscall(__NR_getppid)); 973 EXPECT_EQ(-1, read(-1, NULL, 0)); 974 EXPECT_EQ(E2BIG, errno); 975 } 976 977 /* Make sure an errno of zero is correctly handled by the arch code. */ 978 TEST(ERRNO_zero) 979 { 980 ERRNO_FILTER(zero, 0); 981 long ret; 982 pid_t parent = getppid(); 983 984 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 985 ASSERT_EQ(0, ret); 986 987 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 988 ASSERT_EQ(0, ret); 989 990 EXPECT_EQ(parent, syscall(__NR_getppid)); 991 /* "errno" of 0 is ok. */ 992 EXPECT_EQ(0, read(-1, NULL, 0)); 993 } 994 995 /* 996 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 997 * This tests that the errno value gets capped correctly, fixed by 998 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 999 */ 1000 TEST(ERRNO_capped) 1001 { 1002 ERRNO_FILTER(capped, 4096); 1003 long ret; 1004 pid_t parent = getppid(); 1005 1006 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1007 ASSERT_EQ(0, ret); 1008 1009 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 1010 ASSERT_EQ(0, ret); 1011 1012 EXPECT_EQ(parent, syscall(__NR_getppid)); 1013 EXPECT_EQ(-1, read(-1, NULL, 0)); 1014 EXPECT_EQ(4095, errno); 1015 } 1016 1017 /* 1018 * Filters are processed in reverse order: last applied is executed first. 1019 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 1020 * SECCOMP_RET_DATA mask results will follow the most recently applied 1021 * matching filter return (and not the lowest or highest value). 1022 */ 1023 TEST(ERRNO_order) 1024 { 1025 ERRNO_FILTER(first, 11); 1026 ERRNO_FILTER(second, 13); 1027 ERRNO_FILTER(third, 12); 1028 long ret; 1029 pid_t parent = getppid(); 1030 1031 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1032 ASSERT_EQ(0, ret); 1033 1034 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 1035 ASSERT_EQ(0, ret); 1036 1037 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 1038 ASSERT_EQ(0, ret); 1039 1040 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 1041 ASSERT_EQ(0, ret); 1042 1043 EXPECT_EQ(parent, syscall(__NR_getppid)); 1044 EXPECT_EQ(-1, read(-1, NULL, 0)); 1045 EXPECT_EQ(12, errno); 1046 } 1047 1048 FIXTURE(TRAP) { 1049 struct sock_fprog prog; 1050 }; 1051 1052 FIXTURE_SETUP(TRAP) 1053 { 1054 struct sock_filter filter[] = { 1055 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1056 offsetof(struct seccomp_data, nr)), 1057 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1058 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1059 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1060 }; 1061 1062 memset(&self->prog, 0, sizeof(self->prog)); 1063 self->prog.filter = malloc(sizeof(filter)); 1064 ASSERT_NE(NULL, self->prog.filter); 1065 memcpy(self->prog.filter, filter, sizeof(filter)); 1066 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1067 } 1068 1069 FIXTURE_TEARDOWN(TRAP) 1070 { 1071 if (self->prog.filter) 1072 free(self->prog.filter); 1073 } 1074 1075 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1076 { 1077 long ret; 1078 1079 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1080 ASSERT_EQ(0, ret); 1081 1082 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1083 ASSERT_EQ(0, ret); 1084 syscall(__NR_getpid); 1085 } 1086 1087 /* Ensure that SIGSYS overrides SIG_IGN */ 1088 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1089 { 1090 long ret; 1091 1092 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1093 ASSERT_EQ(0, ret); 1094 1095 signal(SIGSYS, SIG_IGN); 1096 1097 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1098 ASSERT_EQ(0, ret); 1099 syscall(__NR_getpid); 1100 } 1101 1102 static siginfo_t TRAP_info; 1103 static volatile int TRAP_nr; 1104 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1105 { 1106 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1107 TRAP_nr = nr; 1108 } 1109 1110 TEST_F(TRAP, handler) 1111 { 1112 int ret, test; 1113 struct sigaction act; 1114 sigset_t mask; 1115 1116 memset(&act, 0, sizeof(act)); 1117 sigemptyset(&mask); 1118 sigaddset(&mask, SIGSYS); 1119 1120 act.sa_sigaction = &TRAP_action; 1121 act.sa_flags = SA_SIGINFO; 1122 ret = sigaction(SIGSYS, &act, NULL); 1123 ASSERT_EQ(0, ret) { 1124 TH_LOG("sigaction failed"); 1125 } 1126 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1127 ASSERT_EQ(0, ret) { 1128 TH_LOG("sigprocmask failed"); 1129 } 1130 1131 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1132 ASSERT_EQ(0, ret); 1133 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1134 ASSERT_EQ(0, ret); 1135 TRAP_nr = 0; 1136 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1137 /* Expect the registers to be rolled back. (nr = error) may vary 1138 * based on arch. */ 1139 ret = syscall(__NR_getpid); 1140 /* Silence gcc warning about volatile. */ 1141 test = TRAP_nr; 1142 EXPECT_EQ(SIGSYS, test); 1143 struct local_sigsys { 1144 void *_call_addr; /* calling user insn */ 1145 int _syscall; /* triggering system call number */ 1146 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1147 } *sigsys = (struct local_sigsys *) 1148 #ifdef si_syscall 1149 &(TRAP_info.si_call_addr); 1150 #else 1151 &TRAP_info.si_pid; 1152 #endif 1153 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1154 /* Make sure arch is non-zero. */ 1155 EXPECT_NE(0, sigsys->_arch); 1156 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1157 } 1158 1159 FIXTURE(precedence) { 1160 struct sock_fprog allow; 1161 struct sock_fprog log; 1162 struct sock_fprog trace; 1163 struct sock_fprog error; 1164 struct sock_fprog trap; 1165 struct sock_fprog kill; 1166 }; 1167 1168 FIXTURE_SETUP(precedence) 1169 { 1170 struct sock_filter allow_insns[] = { 1171 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1172 }; 1173 struct sock_filter log_insns[] = { 1174 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1175 offsetof(struct seccomp_data, nr)), 1176 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1177 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1178 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1179 }; 1180 struct sock_filter trace_insns[] = { 1181 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1182 offsetof(struct seccomp_data, nr)), 1183 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1184 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1185 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1186 }; 1187 struct sock_filter error_insns[] = { 1188 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1189 offsetof(struct seccomp_data, nr)), 1190 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1191 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1192 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1193 }; 1194 struct sock_filter trap_insns[] = { 1195 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1196 offsetof(struct seccomp_data, nr)), 1197 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1198 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1199 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1200 }; 1201 struct sock_filter kill_insns[] = { 1202 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1203 offsetof(struct seccomp_data, nr)), 1204 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1205 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1206 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1207 }; 1208 1209 memset(self, 0, sizeof(*self)); 1210 #define FILTER_ALLOC(_x) \ 1211 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1212 ASSERT_NE(NULL, self->_x.filter); \ 1213 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1214 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1215 FILTER_ALLOC(allow); 1216 FILTER_ALLOC(log); 1217 FILTER_ALLOC(trace); 1218 FILTER_ALLOC(error); 1219 FILTER_ALLOC(trap); 1220 FILTER_ALLOC(kill); 1221 } 1222 1223 FIXTURE_TEARDOWN(precedence) 1224 { 1225 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1226 FILTER_FREE(allow); 1227 FILTER_FREE(log); 1228 FILTER_FREE(trace); 1229 FILTER_FREE(error); 1230 FILTER_FREE(trap); 1231 FILTER_FREE(kill); 1232 } 1233 1234 TEST_F(precedence, allow_ok) 1235 { 1236 pid_t parent, res = 0; 1237 long ret; 1238 1239 parent = getppid(); 1240 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1241 ASSERT_EQ(0, ret); 1242 1243 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1244 ASSERT_EQ(0, ret); 1245 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1246 ASSERT_EQ(0, ret); 1247 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1248 ASSERT_EQ(0, ret); 1249 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1250 ASSERT_EQ(0, ret); 1251 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1252 ASSERT_EQ(0, ret); 1253 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1254 ASSERT_EQ(0, ret); 1255 /* Should work just fine. */ 1256 res = syscall(__NR_getppid); 1257 EXPECT_EQ(parent, res); 1258 } 1259 1260 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1261 { 1262 pid_t parent, res = 0; 1263 long ret; 1264 1265 parent = getppid(); 1266 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1267 ASSERT_EQ(0, ret); 1268 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1274 ASSERT_EQ(0, ret); 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1278 ASSERT_EQ(0, ret); 1279 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1280 ASSERT_EQ(0, ret); 1281 /* Should work just fine. */ 1282 res = syscall(__NR_getppid); 1283 EXPECT_EQ(parent, res); 1284 /* getpid() should never return. */ 1285 res = syscall(__NR_getpid); 1286 EXPECT_EQ(0, res); 1287 } 1288 1289 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1290 { 1291 pid_t parent; 1292 long ret; 1293 1294 parent = getppid(); 1295 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1296 ASSERT_EQ(0, ret); 1297 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1301 ASSERT_EQ(0, ret); 1302 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1303 ASSERT_EQ(0, ret); 1304 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1305 ASSERT_EQ(0, ret); 1306 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1307 ASSERT_EQ(0, ret); 1308 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1309 ASSERT_EQ(0, ret); 1310 /* Should work just fine. */ 1311 EXPECT_EQ(parent, syscall(__NR_getppid)); 1312 /* getpid() should never return. */ 1313 EXPECT_EQ(0, syscall(__NR_getpid)); 1314 } 1315 1316 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1317 { 1318 pid_t parent; 1319 long ret; 1320 1321 parent = getppid(); 1322 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1323 ASSERT_EQ(0, ret); 1324 1325 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1326 ASSERT_EQ(0, ret); 1327 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1328 ASSERT_EQ(0, ret); 1329 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1330 ASSERT_EQ(0, ret); 1331 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1332 ASSERT_EQ(0, ret); 1333 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1334 ASSERT_EQ(0, ret); 1335 /* Should work just fine. */ 1336 EXPECT_EQ(parent, syscall(__NR_getppid)); 1337 /* getpid() should never return. */ 1338 EXPECT_EQ(0, syscall(__NR_getpid)); 1339 } 1340 1341 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1342 { 1343 pid_t parent; 1344 long ret; 1345 1346 parent = getppid(); 1347 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1348 ASSERT_EQ(0, ret); 1349 1350 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1351 ASSERT_EQ(0, ret); 1352 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1353 ASSERT_EQ(0, ret); 1354 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1355 ASSERT_EQ(0, ret); 1356 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1357 ASSERT_EQ(0, ret); 1358 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1359 ASSERT_EQ(0, ret); 1360 /* Should work just fine. */ 1361 EXPECT_EQ(parent, syscall(__NR_getppid)); 1362 /* getpid() should never return. */ 1363 EXPECT_EQ(0, syscall(__NR_getpid)); 1364 } 1365 1366 TEST_F(precedence, errno_is_third) 1367 { 1368 pid_t parent; 1369 long ret; 1370 1371 parent = getppid(); 1372 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1373 ASSERT_EQ(0, ret); 1374 1375 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1376 ASSERT_EQ(0, ret); 1377 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1378 ASSERT_EQ(0, ret); 1379 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1380 ASSERT_EQ(0, ret); 1381 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1382 ASSERT_EQ(0, ret); 1383 /* Should work just fine. */ 1384 EXPECT_EQ(parent, syscall(__NR_getppid)); 1385 EXPECT_EQ(0, syscall(__NR_getpid)); 1386 } 1387 1388 TEST_F(precedence, errno_is_third_in_any_order) 1389 { 1390 pid_t parent; 1391 long ret; 1392 1393 parent = getppid(); 1394 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1395 ASSERT_EQ(0, ret); 1396 1397 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1398 ASSERT_EQ(0, ret); 1399 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1400 ASSERT_EQ(0, ret); 1401 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1402 ASSERT_EQ(0, ret); 1403 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1404 ASSERT_EQ(0, ret); 1405 /* Should work just fine. */ 1406 EXPECT_EQ(parent, syscall(__NR_getppid)); 1407 EXPECT_EQ(0, syscall(__NR_getpid)); 1408 } 1409 1410 TEST_F(precedence, trace_is_fourth) 1411 { 1412 pid_t parent; 1413 long ret; 1414 1415 parent = getppid(); 1416 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1417 ASSERT_EQ(0, ret); 1418 1419 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1420 ASSERT_EQ(0, ret); 1421 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1422 ASSERT_EQ(0, ret); 1423 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1424 ASSERT_EQ(0, ret); 1425 /* Should work just fine. */ 1426 EXPECT_EQ(parent, syscall(__NR_getppid)); 1427 /* No ptracer */ 1428 EXPECT_EQ(-1, syscall(__NR_getpid)); 1429 } 1430 1431 TEST_F(precedence, trace_is_fourth_in_any_order) 1432 { 1433 pid_t parent; 1434 long ret; 1435 1436 parent = getppid(); 1437 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1438 ASSERT_EQ(0, ret); 1439 1440 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1441 ASSERT_EQ(0, ret); 1442 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1443 ASSERT_EQ(0, ret); 1444 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1445 ASSERT_EQ(0, ret); 1446 /* Should work just fine. */ 1447 EXPECT_EQ(parent, syscall(__NR_getppid)); 1448 /* No ptracer */ 1449 EXPECT_EQ(-1, syscall(__NR_getpid)); 1450 } 1451 1452 TEST_F(precedence, log_is_fifth) 1453 { 1454 pid_t mypid, parent; 1455 long ret; 1456 1457 mypid = getpid(); 1458 parent = getppid(); 1459 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1460 ASSERT_EQ(0, ret); 1461 1462 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1463 ASSERT_EQ(0, ret); 1464 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1465 ASSERT_EQ(0, ret); 1466 /* Should work just fine. */ 1467 EXPECT_EQ(parent, syscall(__NR_getppid)); 1468 /* Should also work just fine */ 1469 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1470 } 1471 1472 TEST_F(precedence, log_is_fifth_in_any_order) 1473 { 1474 pid_t mypid, parent; 1475 long ret; 1476 1477 mypid = getpid(); 1478 parent = getppid(); 1479 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1480 ASSERT_EQ(0, ret); 1481 1482 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1483 ASSERT_EQ(0, ret); 1484 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1485 ASSERT_EQ(0, ret); 1486 /* Should work just fine. */ 1487 EXPECT_EQ(parent, syscall(__NR_getppid)); 1488 /* Should also work just fine */ 1489 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1490 } 1491 1492 #ifndef PTRACE_O_TRACESECCOMP 1493 #define PTRACE_O_TRACESECCOMP 0x00000080 1494 #endif 1495 1496 /* Catch the Ubuntu 12.04 value error. */ 1497 #if PTRACE_EVENT_SECCOMP != 7 1498 #undef PTRACE_EVENT_SECCOMP 1499 #endif 1500 1501 #ifndef PTRACE_EVENT_SECCOMP 1502 #define PTRACE_EVENT_SECCOMP 7 1503 #endif 1504 1505 #define PTRACE_EVENT_MASK(status) ((status) >> 16) 1506 bool tracer_running; 1507 void tracer_stop(int sig) 1508 { 1509 tracer_running = false; 1510 } 1511 1512 typedef void tracer_func_t(struct __test_metadata *_metadata, 1513 pid_t tracee, int status, void *args); 1514 1515 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1516 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1517 { 1518 int ret = -1; 1519 struct sigaction action = { 1520 .sa_handler = tracer_stop, 1521 }; 1522 1523 /* Allow external shutdown. */ 1524 tracer_running = true; 1525 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1526 1527 errno = 0; 1528 while (ret == -1 && errno != EINVAL) 1529 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1530 ASSERT_EQ(0, ret) { 1531 kill(tracee, SIGKILL); 1532 } 1533 /* Wait for attach stop */ 1534 wait(NULL); 1535 1536 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1537 PTRACE_O_TRACESYSGOOD : 1538 PTRACE_O_TRACESECCOMP); 1539 ASSERT_EQ(0, ret) { 1540 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1541 kill(tracee, SIGKILL); 1542 } 1543 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1544 tracee, NULL, 0); 1545 ASSERT_EQ(0, ret); 1546 1547 /* Unblock the tracee */ 1548 ASSERT_EQ(1, write(fd, "A", 1)); 1549 ASSERT_EQ(0, close(fd)); 1550 1551 /* Run until we're shut down. Must assert to stop execution. */ 1552 while (tracer_running) { 1553 int status; 1554 1555 if (wait(&status) != tracee) 1556 continue; 1557 1558 if (WIFSIGNALED(status)) { 1559 /* Child caught a fatal signal. */ 1560 return; 1561 } 1562 if (WIFEXITED(status)) { 1563 /* Child exited with code. */ 1564 return; 1565 } 1566 1567 /* Check if we got an expected event. */ 1568 ASSERT_EQ(WIFCONTINUED(status), false); 1569 ASSERT_EQ(WIFSTOPPED(status), true); 1570 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { 1571 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 1572 } 1573 1574 tracer_func(_metadata, tracee, status, args); 1575 1576 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1577 tracee, NULL, 0); 1578 ASSERT_EQ(0, ret); 1579 } 1580 /* Directly report the status of our test harness results. */ 1581 syscall(__NR_exit, _metadata->exit_code); 1582 } 1583 1584 /* Common tracer setup/teardown functions. */ 1585 void cont_handler(int num) 1586 { } 1587 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1588 tracer_func_t func, void *args, bool ptrace_syscall) 1589 { 1590 char sync; 1591 int pipefd[2]; 1592 pid_t tracer_pid; 1593 pid_t tracee = getpid(); 1594 1595 /* Setup a pipe for clean synchronization. */ 1596 ASSERT_EQ(0, pipe(pipefd)); 1597 1598 /* Fork a child which we'll promote to tracer */ 1599 tracer_pid = fork(); 1600 ASSERT_LE(0, tracer_pid); 1601 signal(SIGALRM, cont_handler); 1602 if (tracer_pid == 0) { 1603 close(pipefd[0]); 1604 start_tracer(_metadata, pipefd[1], tracee, func, args, 1605 ptrace_syscall); 1606 syscall(__NR_exit, 0); 1607 } 1608 close(pipefd[1]); 1609 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1610 read(pipefd[0], &sync, 1); 1611 close(pipefd[0]); 1612 1613 return tracer_pid; 1614 } 1615 1616 void teardown_trace_fixture(struct __test_metadata *_metadata, 1617 pid_t tracer) 1618 { 1619 if (tracer) { 1620 int status; 1621 /* 1622 * Extract the exit code from the other process and 1623 * adopt it for ourselves in case its asserts failed. 1624 */ 1625 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1626 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1627 if (WEXITSTATUS(status)) 1628 _metadata->exit_code = KSFT_FAIL; 1629 } 1630 } 1631 1632 /* "poke" tracer arguments and function. */ 1633 struct tracer_args_poke_t { 1634 unsigned long poke_addr; 1635 }; 1636 1637 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1638 void *args) 1639 { 1640 int ret; 1641 unsigned long msg; 1642 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1643 1644 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1645 EXPECT_EQ(0, ret); 1646 /* If this fails, don't try to recover. */ 1647 ASSERT_EQ(0x1001, msg) { 1648 kill(tracee, SIGKILL); 1649 } 1650 /* 1651 * Poke in the message. 1652 * Registers are not touched to try to keep this relatively arch 1653 * agnostic. 1654 */ 1655 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1656 EXPECT_EQ(0, ret); 1657 } 1658 1659 FIXTURE(TRACE_poke) { 1660 struct sock_fprog prog; 1661 pid_t tracer; 1662 long poked; 1663 struct tracer_args_poke_t tracer_args; 1664 }; 1665 1666 FIXTURE_SETUP(TRACE_poke) 1667 { 1668 struct sock_filter filter[] = { 1669 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1670 offsetof(struct seccomp_data, nr)), 1671 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1672 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1673 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1674 }; 1675 1676 self->poked = 0; 1677 memset(&self->prog, 0, sizeof(self->prog)); 1678 self->prog.filter = malloc(sizeof(filter)); 1679 ASSERT_NE(NULL, self->prog.filter); 1680 memcpy(self->prog.filter, filter, sizeof(filter)); 1681 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1682 1683 /* Set up tracer args. */ 1684 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1685 1686 /* Launch tracer. */ 1687 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1688 &self->tracer_args, false); 1689 } 1690 1691 FIXTURE_TEARDOWN(TRACE_poke) 1692 { 1693 teardown_trace_fixture(_metadata, self->tracer); 1694 if (self->prog.filter) 1695 free(self->prog.filter); 1696 } 1697 1698 TEST_F(TRACE_poke, read_has_side_effects) 1699 { 1700 ssize_t ret; 1701 1702 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1703 ASSERT_EQ(0, ret); 1704 1705 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1706 ASSERT_EQ(0, ret); 1707 1708 EXPECT_EQ(0, self->poked); 1709 ret = read(-1, NULL, 0); 1710 EXPECT_EQ(-1, ret); 1711 EXPECT_EQ(0x1001, self->poked); 1712 } 1713 1714 TEST_F(TRACE_poke, getpid_runs_normally) 1715 { 1716 long ret; 1717 1718 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1719 ASSERT_EQ(0, ret); 1720 1721 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1722 ASSERT_EQ(0, ret); 1723 1724 EXPECT_EQ(0, self->poked); 1725 EXPECT_NE(0, syscall(__NR_getpid)); 1726 EXPECT_EQ(0, self->poked); 1727 } 1728 1729 #if defined(__x86_64__) 1730 # define ARCH_REGS struct user_regs_struct 1731 # define SYSCALL_NUM(_regs) (_regs).orig_rax 1732 # define SYSCALL_RET(_regs) (_regs).rax 1733 #elif defined(__i386__) 1734 # define ARCH_REGS struct user_regs_struct 1735 # define SYSCALL_NUM(_regs) (_regs).orig_eax 1736 # define SYSCALL_RET(_regs) (_regs).eax 1737 #elif defined(__arm__) 1738 # define ARCH_REGS struct pt_regs 1739 # define SYSCALL_NUM(_regs) (_regs).ARM_r7 1740 # ifndef PTRACE_SET_SYSCALL 1741 # define PTRACE_SET_SYSCALL 23 1742 # endif 1743 # define SYSCALL_NUM_SET(_regs, _nr) \ 1744 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) 1745 # define SYSCALL_RET(_regs) (_regs).ARM_r0 1746 #elif defined(__aarch64__) 1747 # define ARCH_REGS struct user_pt_regs 1748 # define SYSCALL_NUM(_regs) (_regs).regs[8] 1749 # ifndef NT_ARM_SYSTEM_CALL 1750 # define NT_ARM_SYSTEM_CALL 0x404 1751 # endif 1752 # define SYSCALL_NUM_SET(_regs, _nr) \ 1753 do { \ 1754 struct iovec __v; \ 1755 typeof(_nr) __nr = (_nr); \ 1756 __v.iov_base = &__nr; \ 1757 __v.iov_len = sizeof(__nr); \ 1758 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ 1759 NT_ARM_SYSTEM_CALL, &__v)); \ 1760 } while (0) 1761 # define SYSCALL_RET(_regs) (_regs).regs[0] 1762 #elif defined(__loongarch__) 1763 # define ARCH_REGS struct user_pt_regs 1764 # define SYSCALL_NUM(_regs) (_regs).regs[11] 1765 # define SYSCALL_RET(_regs) (_regs).regs[4] 1766 #elif defined(__riscv) && __riscv_xlen == 64 1767 # define ARCH_REGS struct user_regs_struct 1768 # define SYSCALL_NUM(_regs) (_regs).a7 1769 # define SYSCALL_RET(_regs) (_regs).a0 1770 #elif defined(__csky__) 1771 # define ARCH_REGS struct pt_regs 1772 # if defined(__CSKYABIV2__) 1773 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1774 # else 1775 # define SYSCALL_NUM(_regs) (_regs).regs[9] 1776 # endif 1777 # define SYSCALL_RET(_regs) (_regs).a0 1778 #elif defined(__hppa__) 1779 # define ARCH_REGS struct user_regs_struct 1780 # define SYSCALL_NUM(_regs) (_regs).gr[20] 1781 # define SYSCALL_RET(_regs) (_regs).gr[28] 1782 #elif defined(__powerpc__) 1783 # define ARCH_REGS struct pt_regs 1784 # define SYSCALL_NUM(_regs) (_regs).gpr[0] 1785 # define SYSCALL_RET(_regs) (_regs).gpr[3] 1786 # define SYSCALL_RET_SET(_regs, _val) \ 1787 do { \ 1788 typeof(_val) _result = (_val); \ 1789 if ((_regs.trap & 0xfff0) == 0x3000) { \ 1790 /* \ 1791 * scv 0 system call uses -ve result \ 1792 * for error, so no need to adjust. \ 1793 */ \ 1794 SYSCALL_RET(_regs) = _result; \ 1795 } else { \ 1796 /* \ 1797 * A syscall error is signaled by the \ 1798 * CR0 SO bit and the code is stored as \ 1799 * a positive value. \ 1800 */ \ 1801 if (_result < 0) { \ 1802 SYSCALL_RET(_regs) = -_result; \ 1803 (_regs).ccr |= 0x10000000; \ 1804 } else { \ 1805 SYSCALL_RET(_regs) = _result; \ 1806 (_regs).ccr &= ~0x10000000; \ 1807 } \ 1808 } \ 1809 } while (0) 1810 # define SYSCALL_RET_SET_ON_PTRACE_EXIT 1811 #elif defined(__s390__) 1812 # define ARCH_REGS s390_regs 1813 # define SYSCALL_NUM(_regs) (_regs).gprs[2] 1814 # define SYSCALL_RET_SET(_regs, _val) \ 1815 TH_LOG("Can't modify syscall return on this architecture") 1816 #elif defined(__mips__) 1817 # include <asm/unistd_nr_n32.h> 1818 # include <asm/unistd_nr_n64.h> 1819 # include <asm/unistd_nr_o32.h> 1820 # define ARCH_REGS struct pt_regs 1821 # define SYSCALL_NUM(_regs) \ 1822 ({ \ 1823 typeof((_regs).regs[2]) _nr; \ 1824 if ((_regs).regs[2] == __NR_O32_Linux) \ 1825 _nr = (_regs).regs[4]; \ 1826 else \ 1827 _nr = (_regs).regs[2]; \ 1828 _nr; \ 1829 }) 1830 # define SYSCALL_NUM_SET(_regs, _nr) \ 1831 do { \ 1832 if ((_regs).regs[2] == __NR_O32_Linux) \ 1833 (_regs).regs[4] = _nr; \ 1834 else \ 1835 (_regs).regs[2] = _nr; \ 1836 } while (0) 1837 # define SYSCALL_RET_SET(_regs, _val) \ 1838 TH_LOG("Can't modify syscall return on this architecture") 1839 #elif defined(__xtensa__) 1840 # define ARCH_REGS struct user_pt_regs 1841 # define SYSCALL_NUM(_regs) (_regs).syscall 1842 /* 1843 * On xtensa syscall return value is in the register 1844 * a2 of the current window which is not fixed. 1845 */ 1846 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] 1847 #elif defined(__sh__) 1848 # define ARCH_REGS struct pt_regs 1849 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1850 # define SYSCALL_RET(_regs) (_regs).regs[0] 1851 #elif defined(__mc68000__) 1852 # define ARCH_REGS struct user_regs_struct 1853 # define SYSCALL_NUM(_regs) (_regs).orig_d0 1854 # define SYSCALL_RET(_regs) (_regs).d0 1855 #else 1856 # error "Do not know how to find your architecture's registers and syscalls" 1857 #endif 1858 1859 /* 1860 * Most architectures can change the syscall by just updating the 1861 * associated register. This is the default if not defined above. 1862 */ 1863 #ifndef SYSCALL_NUM_SET 1864 # define SYSCALL_NUM_SET(_regs, _nr) \ 1865 do { \ 1866 SYSCALL_NUM(_regs) = (_nr); \ 1867 } while (0) 1868 #endif 1869 /* 1870 * Most architectures can change the syscall return value by just 1871 * writing to the SYSCALL_RET register. This is the default if not 1872 * defined above. If an architecture cannot set the return value 1873 * (for example when the syscall and return value register is 1874 * shared), report it with TH_LOG() in an arch-specific definition 1875 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. 1876 */ 1877 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) 1878 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" 1879 #endif 1880 #ifndef SYSCALL_RET_SET 1881 # define SYSCALL_RET_SET(_regs, _val) \ 1882 do { \ 1883 SYSCALL_RET(_regs) = (_val); \ 1884 } while (0) 1885 #endif 1886 1887 /* When the syscall return can't be changed, stub out the tests for it. */ 1888 #ifndef SYSCALL_RET 1889 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1890 #else 1891 # define EXPECT_SYSCALL_RETURN(val, action) \ 1892 do { \ 1893 errno = 0; \ 1894 if (val < 0) { \ 1895 EXPECT_EQ(-1, action); \ 1896 EXPECT_EQ(-(val), errno); \ 1897 } else { \ 1898 EXPECT_EQ(val, action); \ 1899 } \ 1900 } while (0) 1901 #endif 1902 1903 /* 1904 * Some architectures (e.g. powerpc) can only set syscall 1905 * return values on syscall exit during ptrace. 1906 */ 1907 const bool ptrace_entry_set_syscall_nr = true; 1908 const bool ptrace_entry_set_syscall_ret = 1909 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT 1910 true; 1911 #else 1912 false; 1913 #endif 1914 1915 /* 1916 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1917 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1918 */ 1919 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) 1920 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) 1921 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) 1922 #else 1923 # define ARCH_GETREGS(_regs) ({ \ 1924 struct iovec __v; \ 1925 __v.iov_base = &(_regs); \ 1926 __v.iov_len = sizeof(_regs); \ 1927 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ 1928 }) 1929 # define ARCH_SETREGS(_regs) ({ \ 1930 struct iovec __v; \ 1931 __v.iov_base = &(_regs); \ 1932 __v.iov_len = sizeof(_regs); \ 1933 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ 1934 }) 1935 #endif 1936 1937 /* Architecture-specific syscall fetching routine. */ 1938 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1939 { 1940 ARCH_REGS regs; 1941 1942 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1943 return -1; 1944 } 1945 1946 return SYSCALL_NUM(regs); 1947 } 1948 1949 /* Architecture-specific syscall changing routine. */ 1950 void __change_syscall(struct __test_metadata *_metadata, 1951 pid_t tracee, long *syscall, long *ret) 1952 { 1953 ARCH_REGS orig, regs; 1954 1955 /* Do not get/set registers if we have nothing to do. */ 1956 if (!syscall && !ret) 1957 return; 1958 1959 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1960 return; 1961 } 1962 orig = regs; 1963 1964 if (syscall) 1965 SYSCALL_NUM_SET(regs, *syscall); 1966 1967 if (ret) 1968 SYSCALL_RET_SET(regs, *ret); 1969 1970 /* Flush any register changes made. */ 1971 if (memcmp(&orig, ®s, sizeof(orig)) != 0) 1972 EXPECT_EQ(0, ARCH_SETREGS(regs)); 1973 } 1974 1975 /* Change only syscall number. */ 1976 void change_syscall_nr(struct __test_metadata *_metadata, 1977 pid_t tracee, long syscall) 1978 { 1979 __change_syscall(_metadata, tracee, &syscall, NULL); 1980 } 1981 1982 /* Change syscall return value (and set syscall number to -1). */ 1983 void change_syscall_ret(struct __test_metadata *_metadata, 1984 pid_t tracee, long ret) 1985 { 1986 long syscall = -1; 1987 1988 __change_syscall(_metadata, tracee, &syscall, &ret); 1989 } 1990 1991 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 1992 int status, void *args) 1993 { 1994 int ret; 1995 unsigned long msg; 1996 1997 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { 1998 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); 1999 return; 2000 } 2001 2002 /* Make sure we got the right message. */ 2003 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2004 EXPECT_EQ(0, ret); 2005 2006 /* Validate and take action on expected syscalls. */ 2007 switch (msg) { 2008 case 0x1002: 2009 /* change getpid to getppid. */ 2010 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 2011 change_syscall_nr(_metadata, tracee, __NR_getppid); 2012 break; 2013 case 0x1003: 2014 /* skip gettid with valid return code. */ 2015 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 2016 change_syscall_ret(_metadata, tracee, 45000); 2017 break; 2018 case 0x1004: 2019 /* skip openat with error. */ 2020 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 2021 change_syscall_ret(_metadata, tracee, -ESRCH); 2022 break; 2023 case 0x1005: 2024 /* do nothing (allow getppid) */ 2025 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 2026 break; 2027 default: 2028 EXPECT_EQ(0, msg) { 2029 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 2030 kill(tracee, SIGKILL); 2031 } 2032 } 2033 2034 } 2035 2036 FIXTURE(TRACE_syscall) { 2037 struct sock_fprog prog; 2038 pid_t tracer, mytid, mypid, parent; 2039 long syscall_nr; 2040 }; 2041 2042 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 2043 int status, void *args) 2044 { 2045 int ret; 2046 unsigned long msg; 2047 static bool entry; 2048 long syscall_nr_val, syscall_ret_val; 2049 long *syscall_nr = NULL, *syscall_ret = NULL; 2050 FIXTURE_DATA(TRACE_syscall) *self = args; 2051 2052 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { 2053 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 2054 return; 2055 } 2056 2057 /* 2058 * The traditional way to tell PTRACE_SYSCALL entry/exit 2059 * is by counting. 2060 */ 2061 entry = !entry; 2062 2063 /* Make sure we got an appropriate message. */ 2064 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2065 EXPECT_EQ(0, ret); 2066 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 2067 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 2068 2069 /* 2070 * Some architectures only support setting return values during 2071 * syscall exit under ptrace, and on exit the syscall number may 2072 * no longer be available. Therefore, save the initial sycall 2073 * number here, so it can be examined during both entry and exit 2074 * phases. 2075 */ 2076 if (entry) 2077 self->syscall_nr = get_syscall(_metadata, tracee); 2078 2079 /* 2080 * Depending on the architecture's syscall setting abilities, we 2081 * pick which things to set during this phase (entry or exit). 2082 */ 2083 if (entry == ptrace_entry_set_syscall_nr) 2084 syscall_nr = &syscall_nr_val; 2085 if (entry == ptrace_entry_set_syscall_ret) 2086 syscall_ret = &syscall_ret_val; 2087 2088 /* Now handle the actual rewriting cases. */ 2089 switch (self->syscall_nr) { 2090 case __NR_getpid: 2091 syscall_nr_val = __NR_getppid; 2092 /* Never change syscall return for this case. */ 2093 syscall_ret = NULL; 2094 break; 2095 case __NR_gettid: 2096 syscall_nr_val = -1; 2097 syscall_ret_val = 45000; 2098 break; 2099 case __NR_openat: 2100 syscall_nr_val = -1; 2101 syscall_ret_val = -ESRCH; 2102 break; 2103 default: 2104 /* Unhandled, do nothing. */ 2105 return; 2106 } 2107 2108 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); 2109 } 2110 2111 FIXTURE_VARIANT(TRACE_syscall) { 2112 /* 2113 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 2114 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 2115 * This indicates if we should use SECCOMP_RET_TRACE (false), or 2116 * ptrace (true). 2117 */ 2118 bool use_ptrace; 2119 }; 2120 2121 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 2122 .use_ptrace = true, 2123 }; 2124 2125 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 2126 .use_ptrace = false, 2127 }; 2128 2129 FIXTURE_SETUP(TRACE_syscall) 2130 { 2131 struct sock_filter filter[] = { 2132 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2133 offsetof(struct seccomp_data, nr)), 2134 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2135 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 2136 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 2137 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 2138 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 2139 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 2140 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2141 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 2142 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2143 }; 2144 struct sock_fprog prog = { 2145 .len = (unsigned short)ARRAY_SIZE(filter), 2146 .filter = filter, 2147 }; 2148 long ret; 2149 2150 /* Prepare some testable syscall results. */ 2151 self->mytid = syscall(__NR_gettid); 2152 ASSERT_GT(self->mytid, 0); 2153 ASSERT_NE(self->mytid, 1) { 2154 TH_LOG("Running this test as init is not supported. :)"); 2155 } 2156 2157 self->mypid = getpid(); 2158 ASSERT_GT(self->mypid, 0); 2159 ASSERT_EQ(self->mytid, self->mypid); 2160 2161 self->parent = getppid(); 2162 ASSERT_GT(self->parent, 0); 2163 ASSERT_NE(self->parent, self->mypid); 2164 2165 /* Launch tracer. */ 2166 self->tracer = setup_trace_fixture(_metadata, 2167 variant->use_ptrace ? tracer_ptrace 2168 : tracer_seccomp, 2169 self, variant->use_ptrace); 2170 2171 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2172 ASSERT_EQ(0, ret); 2173 2174 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ 2175 if (variant->use_ptrace) 2176 return; 2177 2178 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2179 ASSERT_EQ(0, ret); 2180 } 2181 2182 FIXTURE_TEARDOWN(TRACE_syscall) 2183 { 2184 teardown_trace_fixture(_metadata, self->tracer); 2185 } 2186 2187 TEST(negative_ENOSYS) 2188 { 2189 #if defined(__arm__) 2190 SKIP(return, "arm32 does not support calling syscall -1"); 2191 #endif 2192 /* 2193 * There should be no difference between an "internal" skip 2194 * and userspace asking for syscall "-1". 2195 */ 2196 errno = 0; 2197 EXPECT_EQ(-1, syscall(-1)); 2198 EXPECT_EQ(errno, ENOSYS); 2199 /* And no difference for "still not valid but not -1". */ 2200 errno = 0; 2201 EXPECT_EQ(-1, syscall(-101)); 2202 EXPECT_EQ(errno, ENOSYS); 2203 } 2204 2205 TEST_F(TRACE_syscall, negative_ENOSYS) 2206 { 2207 negative_ENOSYS(_metadata); 2208 } 2209 2210 TEST_F(TRACE_syscall, syscall_allowed) 2211 { 2212 /* getppid works as expected (no changes). */ 2213 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2214 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2215 } 2216 2217 TEST_F(TRACE_syscall, syscall_redirected) 2218 { 2219 /* getpid has been redirected to getppid as expected. */ 2220 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2221 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2222 } 2223 2224 TEST_F(TRACE_syscall, syscall_errno) 2225 { 2226 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2227 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2228 } 2229 2230 TEST_F(TRACE_syscall, syscall_faked) 2231 { 2232 /* Tracer skips the gettid syscall and store altered return value. */ 2233 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2234 } 2235 2236 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) 2237 { 2238 struct sock_filter filter[] = { 2239 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2240 offsetof(struct seccomp_data, nr)), 2241 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), 2242 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 2243 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2244 }; 2245 struct sock_fprog prog = { 2246 .len = (unsigned short)ARRAY_SIZE(filter), 2247 .filter = filter, 2248 }; 2249 long ret; 2250 2251 /* Install "kill on mknodat" filter. */ 2252 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2253 ASSERT_EQ(0, ret); 2254 2255 /* This should immediately die with SIGSYS, regardless of tracer. */ 2256 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); 2257 } 2258 2259 TEST_F(TRACE_syscall, skip_after) 2260 { 2261 struct sock_filter filter[] = { 2262 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2263 offsetof(struct seccomp_data, nr)), 2264 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2265 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2266 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2267 }; 2268 struct sock_fprog prog = { 2269 .len = (unsigned short)ARRAY_SIZE(filter), 2270 .filter = filter, 2271 }; 2272 long ret; 2273 2274 /* Install additional "errno on getppid" filter. */ 2275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2276 ASSERT_EQ(0, ret); 2277 2278 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2279 errno = 0; 2280 EXPECT_EQ(-1, syscall(__NR_getpid)); 2281 EXPECT_EQ(EPERM, errno); 2282 } 2283 2284 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2285 { 2286 struct sock_filter filter[] = { 2287 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2288 offsetof(struct seccomp_data, nr)), 2289 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2290 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2291 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2292 }; 2293 struct sock_fprog prog = { 2294 .len = (unsigned short)ARRAY_SIZE(filter), 2295 .filter = filter, 2296 }; 2297 long ret; 2298 2299 /* Install additional "death on getppid" filter. */ 2300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2301 ASSERT_EQ(0, ret); 2302 2303 /* Tracer will redirect getpid to getppid, and we should die. */ 2304 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2305 } 2306 2307 TEST(seccomp_syscall) 2308 { 2309 struct sock_filter filter[] = { 2310 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2311 }; 2312 struct sock_fprog prog = { 2313 .len = (unsigned short)ARRAY_SIZE(filter), 2314 .filter = filter, 2315 }; 2316 long ret; 2317 2318 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2319 ASSERT_EQ(0, ret) { 2320 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2321 } 2322 2323 /* Reject insane operation. */ 2324 ret = seccomp(-1, 0, &prog); 2325 ASSERT_NE(ENOSYS, errno) { 2326 TH_LOG("Kernel does not support seccomp syscall!"); 2327 } 2328 EXPECT_EQ(EINVAL, errno) { 2329 TH_LOG("Did not reject crazy op value!"); 2330 } 2331 2332 /* Reject strict with flags or pointer. */ 2333 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2334 EXPECT_EQ(EINVAL, errno) { 2335 TH_LOG("Did not reject mode strict with flags!"); 2336 } 2337 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2338 EXPECT_EQ(EINVAL, errno) { 2339 TH_LOG("Did not reject mode strict with uargs!"); 2340 } 2341 2342 /* Reject insane args for filter. */ 2343 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2344 EXPECT_EQ(EINVAL, errno) { 2345 TH_LOG("Did not reject crazy filter flags!"); 2346 } 2347 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2348 EXPECT_EQ(EFAULT, errno) { 2349 TH_LOG("Did not reject NULL filter!"); 2350 } 2351 2352 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2353 EXPECT_EQ(0, errno) { 2354 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2355 strerror(errno)); 2356 } 2357 } 2358 2359 TEST(seccomp_syscall_mode_lock) 2360 { 2361 struct sock_filter filter[] = { 2362 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2363 }; 2364 struct sock_fprog prog = { 2365 .len = (unsigned short)ARRAY_SIZE(filter), 2366 .filter = filter, 2367 }; 2368 long ret; 2369 2370 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2371 ASSERT_EQ(0, ret) { 2372 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2373 } 2374 2375 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2376 ASSERT_NE(ENOSYS, errno) { 2377 TH_LOG("Kernel does not support seccomp syscall!"); 2378 } 2379 EXPECT_EQ(0, ret) { 2380 TH_LOG("Could not install filter!"); 2381 } 2382 2383 /* Make sure neither entry point will switch to strict. */ 2384 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2385 EXPECT_EQ(EINVAL, errno) { 2386 TH_LOG("Switched to mode strict!"); 2387 } 2388 2389 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2390 EXPECT_EQ(EINVAL, errno) { 2391 TH_LOG("Switched to mode strict!"); 2392 } 2393 } 2394 2395 /* 2396 * Test detection of known and unknown filter flags. Userspace needs to be able 2397 * to check if a filter flag is supported by the current kernel and a good way 2398 * of doing that is by attempting to enter filter mode, with the flag bit in 2399 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2400 * that the flag is valid and EINVAL indicates that the flag is invalid. 2401 */ 2402 TEST(detect_seccomp_filter_flags) 2403 { 2404 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2405 SECCOMP_FILTER_FLAG_LOG, 2406 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2407 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2408 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2409 unsigned int exclusive[] = { 2410 SECCOMP_FILTER_FLAG_TSYNC, 2411 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2412 unsigned int flag, all_flags, exclusive_mask; 2413 int i; 2414 long ret; 2415 2416 /* Test detection of individual known-good filter flags */ 2417 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2418 int bits = 0; 2419 2420 flag = flags[i]; 2421 /* Make sure the flag is a single bit! */ 2422 while (flag) { 2423 if (flag & 0x1) 2424 bits ++; 2425 flag >>= 1; 2426 } 2427 ASSERT_EQ(1, bits); 2428 flag = flags[i]; 2429 2430 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2431 ASSERT_NE(ENOSYS, errno) { 2432 TH_LOG("Kernel does not support seccomp syscall!"); 2433 } 2434 EXPECT_EQ(-1, ret); 2435 EXPECT_EQ(EFAULT, errno) { 2436 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2437 flag); 2438 } 2439 2440 all_flags |= flag; 2441 } 2442 2443 /* 2444 * Test detection of all known-good filter flags combined. But 2445 * for the exclusive flags we need to mask them out and try them 2446 * individually for the "all flags" testing. 2447 */ 2448 exclusive_mask = 0; 2449 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2450 exclusive_mask |= exclusive[i]; 2451 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2452 flag = all_flags & ~exclusive_mask; 2453 flag |= exclusive[i]; 2454 2455 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2456 EXPECT_EQ(-1, ret); 2457 EXPECT_EQ(EFAULT, errno) { 2458 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2459 flag); 2460 } 2461 } 2462 2463 /* Test detection of an unknown filter flags, without exclusives. */ 2464 flag = -1; 2465 flag &= ~exclusive_mask; 2466 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2467 EXPECT_EQ(-1, ret); 2468 EXPECT_EQ(EINVAL, errno) { 2469 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2470 flag); 2471 } 2472 2473 /* 2474 * Test detection of an unknown filter flag that may simply need to be 2475 * added to this test 2476 */ 2477 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2478 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2479 EXPECT_EQ(-1, ret); 2480 EXPECT_EQ(EINVAL, errno) { 2481 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2482 flag); 2483 } 2484 } 2485 2486 TEST(TSYNC_first) 2487 { 2488 struct sock_filter filter[] = { 2489 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2490 }; 2491 struct sock_fprog prog = { 2492 .len = (unsigned short)ARRAY_SIZE(filter), 2493 .filter = filter, 2494 }; 2495 long ret; 2496 2497 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2498 ASSERT_EQ(0, ret) { 2499 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2500 } 2501 2502 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2503 &prog); 2504 ASSERT_NE(ENOSYS, errno) { 2505 TH_LOG("Kernel does not support seccomp syscall!"); 2506 } 2507 EXPECT_EQ(0, ret) { 2508 TH_LOG("Could not install initial filter with TSYNC!"); 2509 } 2510 } 2511 2512 #define TSYNC_SIBLINGS 2 2513 struct tsync_sibling { 2514 pthread_t tid; 2515 pid_t system_tid; 2516 sem_t *started; 2517 pthread_cond_t *cond; 2518 pthread_mutex_t *mutex; 2519 int diverge; 2520 int num_waits; 2521 struct sock_fprog *prog; 2522 struct __test_metadata *metadata; 2523 }; 2524 2525 /* 2526 * To avoid joining joined threads (which is not allowed by Bionic), 2527 * make sure we both successfully join and clear the tid to skip a 2528 * later join attempt during fixture teardown. Any remaining threads 2529 * will be directly killed during teardown. 2530 */ 2531 #define PTHREAD_JOIN(tid, status) \ 2532 do { \ 2533 int _rc = pthread_join(tid, status); \ 2534 if (_rc) { \ 2535 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2536 (unsigned int)tid, _rc); \ 2537 } else { \ 2538 tid = 0; \ 2539 } \ 2540 } while (0) 2541 2542 FIXTURE(TSYNC) { 2543 struct sock_fprog root_prog, apply_prog; 2544 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2545 sem_t started; 2546 pthread_cond_t cond; 2547 pthread_mutex_t mutex; 2548 int sibling_count; 2549 }; 2550 2551 FIXTURE_SETUP(TSYNC) 2552 { 2553 struct sock_filter root_filter[] = { 2554 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2555 }; 2556 struct sock_filter apply_filter[] = { 2557 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2558 offsetof(struct seccomp_data, nr)), 2559 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2560 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2561 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2562 }; 2563 2564 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2565 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2566 memset(&self->sibling, 0, sizeof(self->sibling)); 2567 self->root_prog.filter = malloc(sizeof(root_filter)); 2568 ASSERT_NE(NULL, self->root_prog.filter); 2569 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2570 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2571 2572 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2573 ASSERT_NE(NULL, self->apply_prog.filter); 2574 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2575 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2576 2577 self->sibling_count = 0; 2578 pthread_mutex_init(&self->mutex, NULL); 2579 pthread_cond_init(&self->cond, NULL); 2580 sem_init(&self->started, 0, 0); 2581 self->sibling[0].tid = 0; 2582 self->sibling[0].cond = &self->cond; 2583 self->sibling[0].started = &self->started; 2584 self->sibling[0].mutex = &self->mutex; 2585 self->sibling[0].diverge = 0; 2586 self->sibling[0].num_waits = 1; 2587 self->sibling[0].prog = &self->root_prog; 2588 self->sibling[0].metadata = _metadata; 2589 self->sibling[1].tid = 0; 2590 self->sibling[1].cond = &self->cond; 2591 self->sibling[1].started = &self->started; 2592 self->sibling[1].mutex = &self->mutex; 2593 self->sibling[1].diverge = 0; 2594 self->sibling[1].prog = &self->root_prog; 2595 self->sibling[1].num_waits = 1; 2596 self->sibling[1].metadata = _metadata; 2597 } 2598 2599 FIXTURE_TEARDOWN(TSYNC) 2600 { 2601 int sib = 0; 2602 2603 if (self->root_prog.filter) 2604 free(self->root_prog.filter); 2605 if (self->apply_prog.filter) 2606 free(self->apply_prog.filter); 2607 2608 for ( ; sib < self->sibling_count; ++sib) { 2609 struct tsync_sibling *s = &self->sibling[sib]; 2610 2611 if (!s->tid) 2612 continue; 2613 /* 2614 * If a thread is still running, it may be stuck, so hit 2615 * it over the head really hard. 2616 */ 2617 pthread_kill(s->tid, 9); 2618 } 2619 pthread_mutex_destroy(&self->mutex); 2620 pthread_cond_destroy(&self->cond); 2621 sem_destroy(&self->started); 2622 } 2623 2624 void *tsync_sibling(void *data) 2625 { 2626 long ret = 0; 2627 struct tsync_sibling *me = data; 2628 2629 me->system_tid = syscall(__NR_gettid); 2630 2631 pthread_mutex_lock(me->mutex); 2632 if (me->diverge) { 2633 /* Just re-apply the root prog to fork the tree */ 2634 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2635 me->prog, 0, 0); 2636 } 2637 sem_post(me->started); 2638 /* Return outside of started so parent notices failures. */ 2639 if (ret) { 2640 pthread_mutex_unlock(me->mutex); 2641 return (void *)SIBLING_EXIT_FAILURE; 2642 } 2643 do { 2644 pthread_cond_wait(me->cond, me->mutex); 2645 me->num_waits = me->num_waits - 1; 2646 } while (me->num_waits); 2647 pthread_mutex_unlock(me->mutex); 2648 2649 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2650 if (!ret) 2651 return (void *)SIBLING_EXIT_NEWPRIVS; 2652 read(-1, NULL, 0); 2653 return (void *)SIBLING_EXIT_UNKILLED; 2654 } 2655 2656 void tsync_start_sibling(struct tsync_sibling *sibling) 2657 { 2658 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2659 } 2660 2661 TEST_F(TSYNC, siblings_fail_prctl) 2662 { 2663 long ret; 2664 void *status; 2665 struct sock_filter filter[] = { 2666 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2667 offsetof(struct seccomp_data, nr)), 2668 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2669 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2670 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2671 }; 2672 struct sock_fprog prog = { 2673 .len = (unsigned short)ARRAY_SIZE(filter), 2674 .filter = filter, 2675 }; 2676 2677 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2678 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2679 } 2680 2681 /* Check prctl failure detection by requesting sib 0 diverge. */ 2682 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2683 ASSERT_NE(ENOSYS, errno) { 2684 TH_LOG("Kernel does not support seccomp syscall!"); 2685 } 2686 ASSERT_EQ(0, ret) { 2687 TH_LOG("setting filter failed"); 2688 } 2689 2690 self->sibling[0].diverge = 1; 2691 tsync_start_sibling(&self->sibling[0]); 2692 tsync_start_sibling(&self->sibling[1]); 2693 2694 while (self->sibling_count < TSYNC_SIBLINGS) { 2695 sem_wait(&self->started); 2696 self->sibling_count++; 2697 } 2698 2699 /* Signal the threads to clean up*/ 2700 pthread_mutex_lock(&self->mutex); 2701 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2702 TH_LOG("cond broadcast non-zero"); 2703 } 2704 pthread_mutex_unlock(&self->mutex); 2705 2706 /* Ensure diverging sibling failed to call prctl. */ 2707 PTHREAD_JOIN(self->sibling[0].tid, &status); 2708 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2709 PTHREAD_JOIN(self->sibling[1].tid, &status); 2710 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2711 } 2712 2713 TEST_F(TSYNC, two_siblings_with_ancestor) 2714 { 2715 long ret; 2716 void *status; 2717 2718 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2719 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2720 } 2721 2722 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2723 ASSERT_NE(ENOSYS, errno) { 2724 TH_LOG("Kernel does not support seccomp syscall!"); 2725 } 2726 ASSERT_EQ(0, ret) { 2727 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2728 } 2729 tsync_start_sibling(&self->sibling[0]); 2730 tsync_start_sibling(&self->sibling[1]); 2731 2732 while (self->sibling_count < TSYNC_SIBLINGS) { 2733 sem_wait(&self->started); 2734 self->sibling_count++; 2735 } 2736 2737 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2738 &self->apply_prog); 2739 ASSERT_EQ(0, ret) { 2740 TH_LOG("Could install filter on all threads!"); 2741 } 2742 /* Tell the siblings to test the policy */ 2743 pthread_mutex_lock(&self->mutex); 2744 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2745 TH_LOG("cond broadcast non-zero"); 2746 } 2747 pthread_mutex_unlock(&self->mutex); 2748 /* Ensure they are both killed and don't exit cleanly. */ 2749 PTHREAD_JOIN(self->sibling[0].tid, &status); 2750 EXPECT_EQ(0x0, (long)status); 2751 PTHREAD_JOIN(self->sibling[1].tid, &status); 2752 EXPECT_EQ(0x0, (long)status); 2753 } 2754 2755 TEST_F(TSYNC, two_sibling_want_nnp) 2756 { 2757 void *status; 2758 2759 /* start siblings before any prctl() operations */ 2760 tsync_start_sibling(&self->sibling[0]); 2761 tsync_start_sibling(&self->sibling[1]); 2762 while (self->sibling_count < TSYNC_SIBLINGS) { 2763 sem_wait(&self->started); 2764 self->sibling_count++; 2765 } 2766 2767 /* Tell the siblings to test no policy */ 2768 pthread_mutex_lock(&self->mutex); 2769 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2770 TH_LOG("cond broadcast non-zero"); 2771 } 2772 pthread_mutex_unlock(&self->mutex); 2773 2774 /* Ensure they are both upset about lacking nnp. */ 2775 PTHREAD_JOIN(self->sibling[0].tid, &status); 2776 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2777 PTHREAD_JOIN(self->sibling[1].tid, &status); 2778 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2779 } 2780 2781 TEST_F(TSYNC, two_siblings_with_no_filter) 2782 { 2783 long ret; 2784 void *status; 2785 2786 /* start siblings before any prctl() operations */ 2787 tsync_start_sibling(&self->sibling[0]); 2788 tsync_start_sibling(&self->sibling[1]); 2789 while (self->sibling_count < TSYNC_SIBLINGS) { 2790 sem_wait(&self->started); 2791 self->sibling_count++; 2792 } 2793 2794 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2795 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2796 } 2797 2798 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2799 &self->apply_prog); 2800 ASSERT_NE(ENOSYS, errno) { 2801 TH_LOG("Kernel does not support seccomp syscall!"); 2802 } 2803 ASSERT_EQ(0, ret) { 2804 TH_LOG("Could install filter on all threads!"); 2805 } 2806 2807 /* Tell the siblings to test the policy */ 2808 pthread_mutex_lock(&self->mutex); 2809 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2810 TH_LOG("cond broadcast non-zero"); 2811 } 2812 pthread_mutex_unlock(&self->mutex); 2813 2814 /* Ensure they are both killed and don't exit cleanly. */ 2815 PTHREAD_JOIN(self->sibling[0].tid, &status); 2816 EXPECT_EQ(0x0, (long)status); 2817 PTHREAD_JOIN(self->sibling[1].tid, &status); 2818 EXPECT_EQ(0x0, (long)status); 2819 } 2820 2821 TEST_F(TSYNC, two_siblings_with_one_divergence) 2822 { 2823 long ret; 2824 void *status; 2825 2826 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2827 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2828 } 2829 2830 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2831 ASSERT_NE(ENOSYS, errno) { 2832 TH_LOG("Kernel does not support seccomp syscall!"); 2833 } 2834 ASSERT_EQ(0, ret) { 2835 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2836 } 2837 self->sibling[0].diverge = 1; 2838 tsync_start_sibling(&self->sibling[0]); 2839 tsync_start_sibling(&self->sibling[1]); 2840 2841 while (self->sibling_count < TSYNC_SIBLINGS) { 2842 sem_wait(&self->started); 2843 self->sibling_count++; 2844 } 2845 2846 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2847 &self->apply_prog); 2848 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2849 TH_LOG("Did not fail on diverged sibling."); 2850 } 2851 2852 /* Wake the threads */ 2853 pthread_mutex_lock(&self->mutex); 2854 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2855 TH_LOG("cond broadcast non-zero"); 2856 } 2857 pthread_mutex_unlock(&self->mutex); 2858 2859 /* Ensure they are both unkilled. */ 2860 PTHREAD_JOIN(self->sibling[0].tid, &status); 2861 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2862 PTHREAD_JOIN(self->sibling[1].tid, &status); 2863 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2864 } 2865 2866 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2867 { 2868 long ret, flags; 2869 void *status; 2870 2871 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2872 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2873 } 2874 2875 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2876 ASSERT_NE(ENOSYS, errno) { 2877 TH_LOG("Kernel does not support seccomp syscall!"); 2878 } 2879 ASSERT_EQ(0, ret) { 2880 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2881 } 2882 self->sibling[0].diverge = 1; 2883 tsync_start_sibling(&self->sibling[0]); 2884 tsync_start_sibling(&self->sibling[1]); 2885 2886 while (self->sibling_count < TSYNC_SIBLINGS) { 2887 sem_wait(&self->started); 2888 self->sibling_count++; 2889 } 2890 2891 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2892 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2893 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2894 ASSERT_EQ(ESRCH, errno) { 2895 TH_LOG("Did not return ESRCH for diverged sibling."); 2896 } 2897 ASSERT_EQ(-1, ret) { 2898 TH_LOG("Did not fail on diverged sibling."); 2899 } 2900 2901 /* Wake the threads */ 2902 pthread_mutex_lock(&self->mutex); 2903 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2904 TH_LOG("cond broadcast non-zero"); 2905 } 2906 pthread_mutex_unlock(&self->mutex); 2907 2908 /* Ensure they are both unkilled. */ 2909 PTHREAD_JOIN(self->sibling[0].tid, &status); 2910 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2911 PTHREAD_JOIN(self->sibling[1].tid, &status); 2912 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2913 } 2914 2915 TEST_F(TSYNC, two_siblings_not_under_filter) 2916 { 2917 long ret, sib; 2918 void *status; 2919 struct timespec delay = { .tv_nsec = 100000000 }; 2920 2921 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2922 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2923 } 2924 2925 /* 2926 * Sibling 0 will have its own seccomp policy 2927 * and Sibling 1 will not be under seccomp at 2928 * all. Sibling 1 will enter seccomp and 0 2929 * will cause failure. 2930 */ 2931 self->sibling[0].diverge = 1; 2932 tsync_start_sibling(&self->sibling[0]); 2933 tsync_start_sibling(&self->sibling[1]); 2934 2935 while (self->sibling_count < TSYNC_SIBLINGS) { 2936 sem_wait(&self->started); 2937 self->sibling_count++; 2938 } 2939 2940 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2941 ASSERT_NE(ENOSYS, errno) { 2942 TH_LOG("Kernel does not support seccomp syscall!"); 2943 } 2944 ASSERT_EQ(0, ret) { 2945 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2946 } 2947 2948 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2949 &self->apply_prog); 2950 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2951 TH_LOG("Did not fail on diverged sibling."); 2952 } 2953 sib = 1; 2954 if (ret == self->sibling[0].system_tid) 2955 sib = 0; 2956 2957 pthread_mutex_lock(&self->mutex); 2958 2959 /* Increment the other siblings num_waits so we can clean up 2960 * the one we just saw. 2961 */ 2962 self->sibling[!sib].num_waits += 1; 2963 2964 /* Signal the thread to clean up*/ 2965 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2966 TH_LOG("cond broadcast non-zero"); 2967 } 2968 pthread_mutex_unlock(&self->mutex); 2969 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2970 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2971 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2972 while (!kill(self->sibling[sib].system_tid, 0)) 2973 nanosleep(&delay, NULL); 2974 /* Switch to the remaining sibling */ 2975 sib = !sib; 2976 2977 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2978 &self->apply_prog); 2979 ASSERT_EQ(0, ret) { 2980 TH_LOG("Expected the remaining sibling to sync"); 2981 }; 2982 2983 pthread_mutex_lock(&self->mutex); 2984 2985 /* If remaining sibling didn't have a chance to wake up during 2986 * the first broadcast, manually reduce the num_waits now. 2987 */ 2988 if (self->sibling[sib].num_waits > 1) 2989 self->sibling[sib].num_waits = 1; 2990 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2991 TH_LOG("cond broadcast non-zero"); 2992 } 2993 pthread_mutex_unlock(&self->mutex); 2994 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2995 EXPECT_EQ(0, (long)status); 2996 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2997 while (!kill(self->sibling[sib].system_tid, 0)) 2998 nanosleep(&delay, NULL); 2999 3000 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 3001 &self->apply_prog); 3002 ASSERT_EQ(0, ret); /* just us chickens */ 3003 } 3004 3005 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 3006 TEST(syscall_restart) 3007 { 3008 long ret; 3009 unsigned long msg; 3010 pid_t child_pid; 3011 int pipefd[2]; 3012 int status; 3013 siginfo_t info = { }; 3014 struct sock_filter filter[] = { 3015 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3016 offsetof(struct seccomp_data, nr)), 3017 3018 #ifdef __NR_sigreturn 3019 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 3020 #endif 3021 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 3022 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 3023 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 3024 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 3025 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 3026 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 3027 3028 /* Allow __NR_write for easy logging. */ 3029 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 3030 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3031 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3032 /* The nanosleep jump target. */ 3033 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 3034 /* The restart_syscall jump target. */ 3035 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 3036 }; 3037 struct sock_fprog prog = { 3038 .len = (unsigned short)ARRAY_SIZE(filter), 3039 .filter = filter, 3040 }; 3041 #if defined(__arm__) 3042 struct utsname utsbuf; 3043 #endif 3044 3045 ASSERT_EQ(0, pipe(pipefd)); 3046 3047 child_pid = fork(); 3048 ASSERT_LE(0, child_pid); 3049 if (child_pid == 0) { 3050 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 3051 char buf = ' '; 3052 struct timespec timeout = { }; 3053 3054 /* Attach parent as tracer and stop. */ 3055 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 3056 EXPECT_EQ(0, raise(SIGSTOP)); 3057 3058 EXPECT_EQ(0, close(pipefd[1])); 3059 3060 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 3061 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3062 } 3063 3064 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 3065 EXPECT_EQ(0, ret) { 3066 TH_LOG("Failed to install filter!"); 3067 } 3068 3069 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3070 TH_LOG("Failed to read() sync from parent"); 3071 } 3072 EXPECT_EQ('.', buf) { 3073 TH_LOG("Failed to get sync data from read()"); 3074 } 3075 3076 /* Start nanosleep to be interrupted. */ 3077 timeout.tv_sec = 1; 3078 errno = 0; 3079 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 3080 TH_LOG("Call to nanosleep() failed (errno %d: %s)", 3081 errno, strerror(errno)); 3082 } 3083 3084 /* Read final sync from parent. */ 3085 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3086 TH_LOG("Failed final read() from parent"); 3087 } 3088 EXPECT_EQ('!', buf) { 3089 TH_LOG("Failed to get final data from read()"); 3090 } 3091 3092 /* Directly report the status of our test harness results. */ 3093 syscall(__NR_exit, _metadata->exit_code); 3094 } 3095 EXPECT_EQ(0, close(pipefd[0])); 3096 3097 /* Attach to child, setup options, and release. */ 3098 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3099 ASSERT_EQ(true, WIFSTOPPED(status)); 3100 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 3101 PTRACE_O_TRACESECCOMP)); 3102 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3103 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 3104 3105 /* Wait for nanosleep() to start. */ 3106 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3107 ASSERT_EQ(true, WIFSTOPPED(status)); 3108 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3109 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3110 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3111 ASSERT_EQ(0x100, msg); 3112 ret = get_syscall(_metadata, child_pid); 3113 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 3114 3115 /* Might as well check siginfo for sanity while we're here. */ 3116 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3117 ASSERT_EQ(SIGTRAP, info.si_signo); 3118 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 3119 EXPECT_EQ(0, info.si_errno); 3120 EXPECT_EQ(getuid(), info.si_uid); 3121 /* Verify signal delivery came from child (seccomp-triggered). */ 3122 EXPECT_EQ(child_pid, info.si_pid); 3123 3124 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 3125 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 3126 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3127 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3128 ASSERT_EQ(true, WIFSTOPPED(status)); 3129 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 3130 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3131 /* 3132 * There is no siginfo on SIGSTOP any more, so we can't verify 3133 * signal delivery came from parent now (getpid() == info.si_pid). 3134 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 3135 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 3136 */ 3137 EXPECT_EQ(SIGSTOP, info.si_signo); 3138 3139 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 3140 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 3141 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3142 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3143 ASSERT_EQ(true, WIFSTOPPED(status)); 3144 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 3145 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3146 3147 /* Wait for restart_syscall() to start. */ 3148 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3149 ASSERT_EQ(true, WIFSTOPPED(status)); 3150 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3151 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3152 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3153 3154 ASSERT_EQ(0x200, msg); 3155 ret = get_syscall(_metadata, child_pid); 3156 #if defined(__arm__) 3157 /* 3158 * FIXME: 3159 * - native ARM registers do NOT expose true syscall. 3160 * - compat ARM registers on ARM64 DO expose true syscall. 3161 */ 3162 ASSERT_EQ(0, uname(&utsbuf)); 3163 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 3164 EXPECT_EQ(__NR_nanosleep, ret); 3165 } else 3166 #endif 3167 { 3168 EXPECT_EQ(__NR_restart_syscall, ret); 3169 } 3170 3171 /* Write again to end test. */ 3172 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3173 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 3174 EXPECT_EQ(0, close(pipefd[1])); 3175 3176 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3177 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 3178 _metadata->exit_code = KSFT_FAIL; 3179 } 3180 3181 TEST_SIGNAL(filter_flag_log, SIGSYS) 3182 { 3183 struct sock_filter allow_filter[] = { 3184 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3185 }; 3186 struct sock_filter kill_filter[] = { 3187 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3188 offsetof(struct seccomp_data, nr)), 3189 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 3190 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3191 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3192 }; 3193 struct sock_fprog allow_prog = { 3194 .len = (unsigned short)ARRAY_SIZE(allow_filter), 3195 .filter = allow_filter, 3196 }; 3197 struct sock_fprog kill_prog = { 3198 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3199 .filter = kill_filter, 3200 }; 3201 long ret; 3202 pid_t parent = getppid(); 3203 3204 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3205 ASSERT_EQ(0, ret); 3206 3207 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3208 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3209 &allow_prog); 3210 ASSERT_NE(ENOSYS, errno) { 3211 TH_LOG("Kernel does not support seccomp syscall!"); 3212 } 3213 EXPECT_NE(0, ret) { 3214 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3215 } 3216 EXPECT_EQ(EINVAL, errno) { 3217 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3218 } 3219 3220 /* Verify that a simple, permissive filter can be added with no flags */ 3221 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3222 EXPECT_EQ(0, ret); 3223 3224 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3225 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3226 &allow_prog); 3227 ASSERT_NE(EINVAL, errno) { 3228 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3229 } 3230 EXPECT_EQ(0, ret); 3231 3232 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3233 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3234 &kill_prog); 3235 EXPECT_EQ(0, ret); 3236 3237 EXPECT_EQ(parent, syscall(__NR_getppid)); 3238 /* getpid() should never return. */ 3239 EXPECT_EQ(0, syscall(__NR_getpid)); 3240 } 3241 3242 TEST(get_action_avail) 3243 { 3244 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3245 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3246 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3247 __u32 unknown_action = 0x10000000U; 3248 int i; 3249 long ret; 3250 3251 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3252 ASSERT_NE(ENOSYS, errno) { 3253 TH_LOG("Kernel does not support seccomp syscall!"); 3254 } 3255 ASSERT_NE(EINVAL, errno) { 3256 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3257 } 3258 EXPECT_EQ(ret, 0); 3259 3260 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3261 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3262 EXPECT_EQ(ret, 0) { 3263 TH_LOG("Expected action (0x%X) not available!", 3264 actions[i]); 3265 } 3266 } 3267 3268 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3269 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3270 EXPECT_EQ(ret, -1); 3271 EXPECT_EQ(errno, EOPNOTSUPP); 3272 } 3273 3274 TEST(get_metadata) 3275 { 3276 pid_t pid; 3277 int pipefd[2]; 3278 char buf; 3279 struct seccomp_metadata md; 3280 long ret; 3281 3282 /* Only real root can get metadata. */ 3283 if (geteuid()) { 3284 SKIP(return, "get_metadata requires real root"); 3285 return; 3286 } 3287 3288 ASSERT_EQ(0, pipe(pipefd)); 3289 3290 pid = fork(); 3291 ASSERT_GE(pid, 0); 3292 if (pid == 0) { 3293 struct sock_filter filter[] = { 3294 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3295 }; 3296 struct sock_fprog prog = { 3297 .len = (unsigned short)ARRAY_SIZE(filter), 3298 .filter = filter, 3299 }; 3300 3301 /* one with log, one without */ 3302 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3303 SECCOMP_FILTER_FLAG_LOG, &prog)); 3304 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3305 3306 EXPECT_EQ(0, close(pipefd[0])); 3307 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3308 ASSERT_EQ(0, close(pipefd[1])); 3309 3310 while (1) 3311 sleep(100); 3312 } 3313 3314 ASSERT_EQ(0, close(pipefd[1])); 3315 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3316 3317 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3318 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3319 3320 /* Past here must not use ASSERT or child process is never killed. */ 3321 3322 md.filter_off = 0; 3323 errno = 0; 3324 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3325 EXPECT_EQ(sizeof(md), ret) { 3326 if (errno == EINVAL) 3327 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3328 } 3329 3330 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3331 EXPECT_EQ(md.filter_off, 0); 3332 3333 md.filter_off = 1; 3334 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3335 EXPECT_EQ(sizeof(md), ret); 3336 EXPECT_EQ(md.flags, 0); 3337 EXPECT_EQ(md.filter_off, 1); 3338 3339 skip: 3340 ASSERT_EQ(0, kill(pid, SIGKILL)); 3341 } 3342 3343 static int user_notif_syscall(int nr, unsigned int flags) 3344 { 3345 struct sock_filter filter[] = { 3346 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3347 offsetof(struct seccomp_data, nr)), 3348 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), 3349 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), 3350 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3351 }; 3352 3353 struct sock_fprog prog = { 3354 .len = (unsigned short)ARRAY_SIZE(filter), 3355 .filter = filter, 3356 }; 3357 3358 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3359 } 3360 3361 #define USER_NOTIF_MAGIC INT_MAX 3362 TEST(user_notification_basic) 3363 { 3364 pid_t pid; 3365 long ret; 3366 int status, listener; 3367 struct seccomp_notif req = {}; 3368 struct seccomp_notif_resp resp = {}; 3369 struct pollfd pollfd; 3370 3371 struct sock_filter filter[] = { 3372 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3373 }; 3374 struct sock_fprog prog = { 3375 .len = (unsigned short)ARRAY_SIZE(filter), 3376 .filter = filter, 3377 }; 3378 3379 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3380 ASSERT_EQ(0, ret) { 3381 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3382 } 3383 3384 pid = fork(); 3385 ASSERT_GE(pid, 0); 3386 3387 /* Check that we get -ENOSYS with no listener attached */ 3388 if (pid == 0) { 3389 if (user_notif_syscall(__NR_getppid, 0) < 0) 3390 exit(1); 3391 ret = syscall(__NR_getppid); 3392 exit(ret >= 0 || errno != ENOSYS); 3393 } 3394 3395 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3396 EXPECT_EQ(true, WIFEXITED(status)); 3397 EXPECT_EQ(0, WEXITSTATUS(status)); 3398 3399 /* Add some no-op filters for grins. */ 3400 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3401 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3402 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3403 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3404 3405 /* Check that the basic notification machinery works */ 3406 listener = user_notif_syscall(__NR_getppid, 3407 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3408 ASSERT_GE(listener, 0); 3409 3410 /* Installing a second listener in the chain should EBUSY */ 3411 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3412 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3413 -1); 3414 EXPECT_EQ(errno, EBUSY); 3415 3416 pid = fork(); 3417 ASSERT_GE(pid, 0); 3418 3419 if (pid == 0) { 3420 ret = syscall(__NR_getppid); 3421 exit(ret != USER_NOTIF_MAGIC); 3422 } 3423 3424 pollfd.fd = listener; 3425 pollfd.events = POLLIN | POLLOUT; 3426 3427 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3428 EXPECT_EQ(pollfd.revents, POLLIN); 3429 3430 /* Test that we can't pass garbage to the kernel. */ 3431 memset(&req, 0, sizeof(req)); 3432 req.pid = -1; 3433 errno = 0; 3434 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3435 EXPECT_EQ(-1, ret); 3436 EXPECT_EQ(EINVAL, errno); 3437 3438 if (ret) { 3439 req.pid = 0; 3440 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3441 } 3442 3443 pollfd.fd = listener; 3444 pollfd.events = POLLIN | POLLOUT; 3445 3446 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3447 EXPECT_EQ(pollfd.revents, POLLOUT); 3448 3449 EXPECT_EQ(req.data.nr, __NR_getppid); 3450 3451 resp.id = req.id; 3452 resp.error = 0; 3453 resp.val = USER_NOTIF_MAGIC; 3454 3455 /* check that we make sure flags == 0 */ 3456 resp.flags = 1; 3457 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3458 EXPECT_EQ(errno, EINVAL); 3459 3460 resp.flags = 0; 3461 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3462 3463 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3464 EXPECT_EQ(true, WIFEXITED(status)); 3465 EXPECT_EQ(0, WEXITSTATUS(status)); 3466 } 3467 3468 TEST(user_notification_with_tsync) 3469 { 3470 int ret; 3471 unsigned int flags; 3472 3473 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3474 ASSERT_EQ(0, ret) { 3475 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3476 } 3477 3478 /* these were exclusive */ 3479 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3480 SECCOMP_FILTER_FLAG_TSYNC; 3481 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3482 ASSERT_EQ(EINVAL, errno); 3483 3484 /* but now they're not */ 3485 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3486 ret = user_notif_syscall(__NR_getppid, flags); 3487 close(ret); 3488 ASSERT_LE(0, ret); 3489 } 3490 3491 TEST(user_notification_kill_in_middle) 3492 { 3493 pid_t pid; 3494 long ret; 3495 int listener; 3496 struct seccomp_notif req = {}; 3497 struct seccomp_notif_resp resp = {}; 3498 3499 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3500 ASSERT_EQ(0, ret) { 3501 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3502 } 3503 3504 listener = user_notif_syscall(__NR_getppid, 3505 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3506 ASSERT_GE(listener, 0); 3507 3508 /* 3509 * Check that nothing bad happens when we kill the task in the middle 3510 * of a syscall. 3511 */ 3512 pid = fork(); 3513 ASSERT_GE(pid, 0); 3514 3515 if (pid == 0) { 3516 ret = syscall(__NR_getppid); 3517 exit(ret != USER_NOTIF_MAGIC); 3518 } 3519 3520 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3521 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3522 3523 EXPECT_EQ(kill(pid, SIGKILL), 0); 3524 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3525 3526 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3527 3528 resp.id = req.id; 3529 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3530 EXPECT_EQ(ret, -1); 3531 EXPECT_EQ(errno, ENOENT); 3532 } 3533 3534 static int handled = -1; 3535 3536 static void signal_handler(int signal) 3537 { 3538 if (write(handled, "c", 1) != 1) 3539 perror("write from signal"); 3540 } 3541 3542 TEST(user_notification_signal) 3543 { 3544 pid_t pid; 3545 long ret; 3546 int status, listener, sk_pair[2]; 3547 struct seccomp_notif req = {}; 3548 struct seccomp_notif_resp resp = {}; 3549 char c; 3550 3551 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3552 ASSERT_EQ(0, ret) { 3553 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3554 } 3555 3556 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3557 3558 listener = user_notif_syscall(__NR_gettid, 3559 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3560 ASSERT_GE(listener, 0); 3561 3562 pid = fork(); 3563 ASSERT_GE(pid, 0); 3564 3565 if (pid == 0) { 3566 close(sk_pair[0]); 3567 handled = sk_pair[1]; 3568 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3569 perror("signal"); 3570 exit(1); 3571 } 3572 /* 3573 * ERESTARTSYS behavior is a bit hard to test, because we need 3574 * to rely on a signal that has not yet been handled. Let's at 3575 * least check that the error code gets propagated through, and 3576 * hope that it doesn't break when there is actually a signal :) 3577 */ 3578 ret = syscall(__NR_gettid); 3579 exit(!(ret == -1 && errno == 512)); 3580 } 3581 3582 close(sk_pair[1]); 3583 3584 memset(&req, 0, sizeof(req)); 3585 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3586 3587 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3588 3589 /* 3590 * Make sure the signal really is delivered, which means we're not 3591 * stuck in the user notification code any more and the notification 3592 * should be dead. 3593 */ 3594 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3595 3596 resp.id = req.id; 3597 resp.error = -EPERM; 3598 resp.val = 0; 3599 3600 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3601 EXPECT_EQ(errno, ENOENT); 3602 3603 memset(&req, 0, sizeof(req)); 3604 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3605 3606 resp.id = req.id; 3607 resp.error = -512; /* -ERESTARTSYS */ 3608 resp.val = 0; 3609 3610 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3611 3612 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3613 EXPECT_EQ(true, WIFEXITED(status)); 3614 EXPECT_EQ(0, WEXITSTATUS(status)); 3615 } 3616 3617 TEST(user_notification_closed_listener) 3618 { 3619 pid_t pid; 3620 long ret; 3621 int status, listener; 3622 3623 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3624 ASSERT_EQ(0, ret) { 3625 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3626 } 3627 3628 listener = user_notif_syscall(__NR_getppid, 3629 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3630 ASSERT_GE(listener, 0); 3631 3632 /* 3633 * Check that we get an ENOSYS when the listener is closed. 3634 */ 3635 pid = fork(); 3636 ASSERT_GE(pid, 0); 3637 if (pid == 0) { 3638 close(listener); 3639 ret = syscall(__NR_getppid); 3640 exit(ret != -1 && errno != ENOSYS); 3641 } 3642 3643 close(listener); 3644 3645 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3646 EXPECT_EQ(true, WIFEXITED(status)); 3647 EXPECT_EQ(0, WEXITSTATUS(status)); 3648 } 3649 3650 /* 3651 * Check that a pid in a child namespace still shows up as valid in ours. 3652 */ 3653 TEST(user_notification_child_pid_ns) 3654 { 3655 pid_t pid; 3656 int status, listener; 3657 struct seccomp_notif req = {}; 3658 struct seccomp_notif_resp resp = {}; 3659 3660 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3661 if (errno == EINVAL) 3662 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3663 }; 3664 3665 listener = user_notif_syscall(__NR_getppid, 3666 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3667 ASSERT_GE(listener, 0); 3668 3669 pid = fork(); 3670 ASSERT_GE(pid, 0); 3671 3672 if (pid == 0) 3673 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3674 3675 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3676 EXPECT_EQ(req.pid, pid); 3677 3678 resp.id = req.id; 3679 resp.error = 0; 3680 resp.val = USER_NOTIF_MAGIC; 3681 3682 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3683 3684 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3685 EXPECT_EQ(true, WIFEXITED(status)); 3686 EXPECT_EQ(0, WEXITSTATUS(status)); 3687 close(listener); 3688 } 3689 3690 /* 3691 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3692 * invalid. 3693 */ 3694 TEST(user_notification_sibling_pid_ns) 3695 { 3696 pid_t pid, pid2; 3697 int status, listener; 3698 struct seccomp_notif req = {}; 3699 struct seccomp_notif_resp resp = {}; 3700 3701 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3702 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3703 } 3704 3705 listener = user_notif_syscall(__NR_getppid, 3706 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3707 ASSERT_GE(listener, 0); 3708 3709 pid = fork(); 3710 ASSERT_GE(pid, 0); 3711 3712 if (pid == 0) { 3713 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3714 if (errno == EPERM) 3715 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3716 else if (errno == EINVAL) 3717 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3718 } 3719 3720 pid2 = fork(); 3721 ASSERT_GE(pid2, 0); 3722 3723 if (pid2 == 0) 3724 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3725 3726 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3727 EXPECT_EQ(true, WIFEXITED(status)); 3728 EXPECT_EQ(0, WEXITSTATUS(status)); 3729 exit(WEXITSTATUS(status)); 3730 } 3731 3732 /* Create the sibling ns, and sibling in it. */ 3733 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3734 if (errno == EPERM) 3735 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3736 else if (errno == EINVAL) 3737 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3738 } 3739 ASSERT_EQ(errno, 0); 3740 3741 pid2 = fork(); 3742 ASSERT_GE(pid2, 0); 3743 3744 if (pid2 == 0) { 3745 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3746 /* 3747 * The pid should be 0, i.e. the task is in some namespace that 3748 * we can't "see". 3749 */ 3750 EXPECT_EQ(req.pid, 0); 3751 3752 resp.id = req.id; 3753 resp.error = 0; 3754 resp.val = USER_NOTIF_MAGIC; 3755 3756 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3757 exit(0); 3758 } 3759 3760 close(listener); 3761 3762 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3763 EXPECT_EQ(true, WIFEXITED(status)); 3764 EXPECT_EQ(0, WEXITSTATUS(status)); 3765 3766 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3767 EXPECT_EQ(true, WIFEXITED(status)); 3768 EXPECT_EQ(0, WEXITSTATUS(status)); 3769 } 3770 3771 TEST(user_notification_fault_recv) 3772 { 3773 pid_t pid; 3774 int status, listener; 3775 struct seccomp_notif req = {}; 3776 struct seccomp_notif_resp resp = {}; 3777 3778 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { 3779 if (errno == EINVAL) 3780 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3781 } 3782 3783 listener = user_notif_syscall(__NR_getppid, 3784 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3785 ASSERT_GE(listener, 0); 3786 3787 pid = fork(); 3788 ASSERT_GE(pid, 0); 3789 3790 if (pid == 0) 3791 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3792 3793 /* Do a bad recv() */ 3794 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3795 EXPECT_EQ(errno, EFAULT); 3796 3797 /* We should still be able to receive this notification, though. */ 3798 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3799 EXPECT_EQ(req.pid, pid); 3800 3801 resp.id = req.id; 3802 resp.error = 0; 3803 resp.val = USER_NOTIF_MAGIC; 3804 3805 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3806 3807 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3808 EXPECT_EQ(true, WIFEXITED(status)); 3809 EXPECT_EQ(0, WEXITSTATUS(status)); 3810 } 3811 3812 TEST(seccomp_get_notif_sizes) 3813 { 3814 struct seccomp_notif_sizes sizes; 3815 3816 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3817 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3818 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3819 } 3820 3821 TEST(user_notification_continue) 3822 { 3823 pid_t pid; 3824 long ret; 3825 int status, listener; 3826 struct seccomp_notif req = {}; 3827 struct seccomp_notif_resp resp = {}; 3828 struct pollfd pollfd; 3829 3830 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3831 ASSERT_EQ(0, ret) { 3832 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3833 } 3834 3835 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3836 ASSERT_GE(listener, 0); 3837 3838 pid = fork(); 3839 ASSERT_GE(pid, 0); 3840 3841 if (pid == 0) { 3842 int dup_fd, pipe_fds[2]; 3843 pid_t self; 3844 3845 ASSERT_GE(pipe(pipe_fds), 0); 3846 3847 dup_fd = dup(pipe_fds[0]); 3848 ASSERT_GE(dup_fd, 0); 3849 EXPECT_NE(pipe_fds[0], dup_fd); 3850 3851 self = getpid(); 3852 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3853 exit(0); 3854 } 3855 3856 pollfd.fd = listener; 3857 pollfd.events = POLLIN | POLLOUT; 3858 3859 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3860 EXPECT_EQ(pollfd.revents, POLLIN); 3861 3862 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3863 3864 pollfd.fd = listener; 3865 pollfd.events = POLLIN | POLLOUT; 3866 3867 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3868 EXPECT_EQ(pollfd.revents, POLLOUT); 3869 3870 EXPECT_EQ(req.data.nr, __NR_dup); 3871 3872 resp.id = req.id; 3873 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3874 3875 /* 3876 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3877 * args be set to 0. 3878 */ 3879 resp.error = 0; 3880 resp.val = USER_NOTIF_MAGIC; 3881 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3882 EXPECT_EQ(errno, EINVAL); 3883 3884 resp.error = USER_NOTIF_MAGIC; 3885 resp.val = 0; 3886 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3887 EXPECT_EQ(errno, EINVAL); 3888 3889 resp.error = 0; 3890 resp.val = 0; 3891 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3892 if (errno == EINVAL) 3893 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3894 } 3895 3896 skip: 3897 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3898 EXPECT_EQ(true, WIFEXITED(status)); 3899 EXPECT_EQ(0, WEXITSTATUS(status)) { 3900 if (WEXITSTATUS(status) == 2) { 3901 SKIP(return, "Kernel does not support kcmp() syscall"); 3902 return; 3903 } 3904 } 3905 } 3906 3907 TEST(user_notification_filter_empty) 3908 { 3909 pid_t pid; 3910 long ret; 3911 int status; 3912 struct pollfd pollfd; 3913 struct __clone_args args = { 3914 .flags = CLONE_FILES, 3915 .exit_signal = SIGCHLD, 3916 }; 3917 3918 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3919 ASSERT_EQ(0, ret) { 3920 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3921 } 3922 3923 if (__NR_clone3 < 0) 3924 SKIP(return, "Test not built with clone3 support"); 3925 3926 pid = sys_clone3(&args, sizeof(args)); 3927 ASSERT_GE(pid, 0); 3928 3929 if (pid == 0) { 3930 int listener; 3931 3932 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3933 if (listener < 0) 3934 _exit(EXIT_FAILURE); 3935 3936 if (dup2(listener, 200) != 200) 3937 _exit(EXIT_FAILURE); 3938 3939 close(listener); 3940 3941 _exit(EXIT_SUCCESS); 3942 } 3943 3944 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3945 EXPECT_EQ(true, WIFEXITED(status)); 3946 EXPECT_EQ(0, WEXITSTATUS(status)); 3947 3948 /* 3949 * The seccomp filter has become unused so we should be notified once 3950 * the kernel gets around to cleaning up task struct. 3951 */ 3952 pollfd.fd = 200; 3953 pollfd.events = POLLHUP; 3954 3955 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3956 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3957 } 3958 3959 TEST(user_ioctl_notification_filter_empty) 3960 { 3961 pid_t pid; 3962 long ret; 3963 int status, p[2]; 3964 struct __clone_args args = { 3965 .flags = CLONE_FILES, 3966 .exit_signal = SIGCHLD, 3967 }; 3968 struct seccomp_notif req = {}; 3969 3970 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3971 ASSERT_EQ(0, ret) { 3972 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3973 } 3974 3975 if (__NR_clone3 < 0) 3976 SKIP(return, "Test not built with clone3 support"); 3977 3978 ASSERT_EQ(0, pipe(p)); 3979 3980 pid = sys_clone3(&args, sizeof(args)); 3981 ASSERT_GE(pid, 0); 3982 3983 if (pid == 0) { 3984 int listener; 3985 3986 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3987 if (listener < 0) 3988 _exit(EXIT_FAILURE); 3989 3990 if (dup2(listener, 200) != 200) 3991 _exit(EXIT_FAILURE); 3992 close(p[1]); 3993 close(listener); 3994 sleep(1); 3995 3996 _exit(EXIT_SUCCESS); 3997 } 3998 if (read(p[0], &status, 1) != 0) 3999 _exit(EXIT_SUCCESS); 4000 close(p[0]); 4001 /* 4002 * The seccomp filter has become unused so we should be notified once 4003 * the kernel gets around to cleaning up task struct. 4004 */ 4005 EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); 4006 EXPECT_EQ(errno, ENOENT); 4007 4008 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4009 EXPECT_EQ(true, WIFEXITED(status)); 4010 EXPECT_EQ(0, WEXITSTATUS(status)); 4011 } 4012 4013 static void *do_thread(void *data) 4014 { 4015 return NULL; 4016 } 4017 4018 TEST(user_notification_filter_empty_threaded) 4019 { 4020 pid_t pid; 4021 long ret; 4022 int status; 4023 struct pollfd pollfd; 4024 struct __clone_args args = { 4025 .flags = CLONE_FILES, 4026 .exit_signal = SIGCHLD, 4027 }; 4028 4029 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4030 ASSERT_EQ(0, ret) { 4031 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4032 } 4033 4034 if (__NR_clone3 < 0) 4035 SKIP(return, "Test not built with clone3 support"); 4036 4037 pid = sys_clone3(&args, sizeof(args)); 4038 ASSERT_GE(pid, 0); 4039 4040 if (pid == 0) { 4041 pid_t pid1, pid2; 4042 int listener, status; 4043 pthread_t thread; 4044 4045 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4046 if (listener < 0) 4047 _exit(EXIT_FAILURE); 4048 4049 if (dup2(listener, 200) != 200) 4050 _exit(EXIT_FAILURE); 4051 4052 close(listener); 4053 4054 pid1 = fork(); 4055 if (pid1 < 0) 4056 _exit(EXIT_FAILURE); 4057 4058 if (pid1 == 0) 4059 _exit(EXIT_SUCCESS); 4060 4061 pid2 = fork(); 4062 if (pid2 < 0) 4063 _exit(EXIT_FAILURE); 4064 4065 if (pid2 == 0) 4066 _exit(EXIT_SUCCESS); 4067 4068 if (pthread_create(&thread, NULL, do_thread, NULL) || 4069 pthread_join(thread, NULL)) 4070 _exit(EXIT_FAILURE); 4071 4072 if (pthread_create(&thread, NULL, do_thread, NULL) || 4073 pthread_join(thread, NULL)) 4074 _exit(EXIT_FAILURE); 4075 4076 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 4077 WEXITSTATUS(status)) 4078 _exit(EXIT_FAILURE); 4079 4080 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 4081 WEXITSTATUS(status)) 4082 _exit(EXIT_FAILURE); 4083 4084 exit(EXIT_SUCCESS); 4085 } 4086 4087 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4088 EXPECT_EQ(true, WIFEXITED(status)); 4089 EXPECT_EQ(0, WEXITSTATUS(status)); 4090 4091 /* 4092 * The seccomp filter has become unused so we should be notified once 4093 * the kernel gets around to cleaning up task struct. 4094 */ 4095 pollfd.fd = 200; 4096 pollfd.events = POLLHUP; 4097 4098 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 4099 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 4100 } 4101 4102 4103 int get_next_fd(int prev_fd) 4104 { 4105 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { 4106 if (fcntl(i, F_GETFD) == -1) 4107 return i; 4108 } 4109 _exit(EXIT_FAILURE); 4110 } 4111 4112 TEST(user_notification_addfd) 4113 { 4114 pid_t pid; 4115 long ret; 4116 int status, listener, memfd, fd, nextfd; 4117 struct seccomp_notif_addfd addfd = {}; 4118 struct seccomp_notif_addfd_small small = {}; 4119 struct seccomp_notif_addfd_big big = {}; 4120 struct seccomp_notif req = {}; 4121 struct seccomp_notif_resp resp = {}; 4122 /* 100 ms */ 4123 struct timespec delay = { .tv_nsec = 100000000 }; 4124 4125 /* There may be arbitrary already-open fds at test start. */ 4126 memfd = memfd_create("test", 0); 4127 ASSERT_GE(memfd, 0); 4128 nextfd = get_next_fd(memfd); 4129 4130 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4131 ASSERT_EQ(0, ret) { 4132 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4133 } 4134 4135 /* fd: 4 */ 4136 /* Check that the basic notification machinery works */ 4137 listener = user_notif_syscall(__NR_getppid, 4138 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4139 ASSERT_EQ(listener, nextfd); 4140 nextfd = get_next_fd(nextfd); 4141 4142 pid = fork(); 4143 ASSERT_GE(pid, 0); 4144 4145 if (pid == 0) { 4146 /* fds will be added and this value is expected */ 4147 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 4148 exit(1); 4149 4150 /* Atomic addfd+send is received here. Check it is a valid fd */ 4151 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) 4152 exit(1); 4153 4154 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4155 } 4156 4157 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4158 4159 addfd.srcfd = memfd; 4160 addfd.newfd = 0; 4161 addfd.id = req.id; 4162 addfd.flags = 0x0; 4163 4164 /* Verify bad newfd_flags cannot be set */ 4165 addfd.newfd_flags = ~O_CLOEXEC; 4166 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4167 EXPECT_EQ(errno, EINVAL); 4168 addfd.newfd_flags = O_CLOEXEC; 4169 4170 /* Verify bad flags cannot be set */ 4171 addfd.flags = 0xff; 4172 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4173 EXPECT_EQ(errno, EINVAL); 4174 addfd.flags = 0; 4175 4176 /* Verify that remote_fd cannot be set without setting flags */ 4177 addfd.newfd = 1; 4178 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4179 EXPECT_EQ(errno, EINVAL); 4180 addfd.newfd = 0; 4181 4182 /* Verify small size cannot be set */ 4183 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 4184 EXPECT_EQ(errno, EINVAL); 4185 4186 /* Verify we can't send bits filled in unknown buffer area */ 4187 memset(&big, 0xAA, sizeof(big)); 4188 big.addfd = addfd; 4189 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 4190 EXPECT_EQ(errno, E2BIG); 4191 4192 4193 /* Verify we can set an arbitrary remote fd */ 4194 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4195 EXPECT_EQ(fd, nextfd); 4196 nextfd = get_next_fd(nextfd); 4197 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4198 4199 /* Verify we can set an arbitrary remote fd with large size */ 4200 memset(&big, 0x0, sizeof(big)); 4201 big.addfd = addfd; 4202 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 4203 EXPECT_EQ(fd, nextfd); 4204 nextfd = get_next_fd(nextfd); 4205 4206 /* Verify we can set a specific remote fd */ 4207 addfd.newfd = 42; 4208 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4209 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4210 EXPECT_EQ(fd, 42); 4211 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4212 4213 /* Resume syscall */ 4214 resp.id = req.id; 4215 resp.error = 0; 4216 resp.val = USER_NOTIF_MAGIC; 4217 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4218 4219 /* 4220 * This sets the ID of the ADD FD to the last request plus 1. The 4221 * notification ID increments 1 per notification. 4222 */ 4223 addfd.id = req.id + 1; 4224 4225 /* This spins until the underlying notification is generated */ 4226 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4227 errno != -EINPROGRESS) 4228 nanosleep(&delay, NULL); 4229 4230 memset(&req, 0, sizeof(req)); 4231 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4232 ASSERT_EQ(addfd.id, req.id); 4233 4234 /* Verify we can do an atomic addfd and send */ 4235 addfd.newfd = 0; 4236 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4237 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4238 /* 4239 * Child has earlier "low" fds and now 42, so we expect the next 4240 * lowest available fd to be assigned here. 4241 */ 4242 EXPECT_EQ(fd, nextfd); 4243 nextfd = get_next_fd(nextfd); 4244 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4245 4246 /* 4247 * This sets the ID of the ADD FD to the last request plus 1. The 4248 * notification ID increments 1 per notification. 4249 */ 4250 addfd.id = req.id + 1; 4251 4252 /* This spins until the underlying notification is generated */ 4253 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4254 errno != -EINPROGRESS) 4255 nanosleep(&delay, NULL); 4256 4257 memset(&req, 0, sizeof(req)); 4258 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4259 ASSERT_EQ(addfd.id, req.id); 4260 4261 resp.id = req.id; 4262 resp.error = 0; 4263 resp.val = USER_NOTIF_MAGIC; 4264 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4265 4266 /* Wait for child to finish. */ 4267 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4268 EXPECT_EQ(true, WIFEXITED(status)); 4269 EXPECT_EQ(0, WEXITSTATUS(status)); 4270 4271 close(memfd); 4272 } 4273 4274 TEST(user_notification_addfd_rlimit) 4275 { 4276 pid_t pid; 4277 long ret; 4278 int status, listener, memfd; 4279 struct seccomp_notif_addfd addfd = {}; 4280 struct seccomp_notif req = {}; 4281 struct seccomp_notif_resp resp = {}; 4282 const struct rlimit lim = { 4283 .rlim_cur = 0, 4284 .rlim_max = 0, 4285 }; 4286 4287 memfd = memfd_create("test", 0); 4288 ASSERT_GE(memfd, 0); 4289 4290 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4291 ASSERT_EQ(0, ret) { 4292 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4293 } 4294 4295 /* Check that the basic notification machinery works */ 4296 listener = user_notif_syscall(__NR_getppid, 4297 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4298 ASSERT_GE(listener, 0); 4299 4300 pid = fork(); 4301 ASSERT_GE(pid, 0); 4302 4303 if (pid == 0) 4304 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4305 4306 4307 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4308 4309 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 4310 4311 addfd.srcfd = memfd; 4312 addfd.newfd_flags = O_CLOEXEC; 4313 addfd.newfd = 0; 4314 addfd.id = req.id; 4315 addfd.flags = 0; 4316 4317 /* Should probably spot check /proc/sys/fs/file-nr */ 4318 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4319 EXPECT_EQ(errno, EMFILE); 4320 4321 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4322 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4323 EXPECT_EQ(errno, EMFILE); 4324 4325 addfd.newfd = 100; 4326 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4327 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4328 EXPECT_EQ(errno, EBADF); 4329 4330 resp.id = req.id; 4331 resp.error = 0; 4332 resp.val = USER_NOTIF_MAGIC; 4333 4334 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4335 4336 /* Wait for child to finish. */ 4337 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4338 EXPECT_EQ(true, WIFEXITED(status)); 4339 EXPECT_EQ(0, WEXITSTATUS(status)); 4340 4341 close(memfd); 4342 } 4343 4344 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP 4345 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 4346 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 4347 #endif 4348 4349 TEST(user_notification_sync) 4350 { 4351 struct seccomp_notif req = {}; 4352 struct seccomp_notif_resp resp = {}; 4353 int status, listener; 4354 pid_t pid; 4355 long ret; 4356 4357 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4358 ASSERT_EQ(0, ret) { 4359 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4360 } 4361 4362 listener = user_notif_syscall(__NR_getppid, 4363 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4364 ASSERT_GE(listener, 0); 4365 4366 /* Try to set invalid flags. */ 4367 EXPECT_SYSCALL_RETURN(-EINVAL, 4368 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); 4369 4370 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 4371 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); 4372 4373 pid = fork(); 4374 ASSERT_GE(pid, 0); 4375 if (pid == 0) { 4376 ret = syscall(__NR_getppid); 4377 ASSERT_EQ(ret, USER_NOTIF_MAGIC) { 4378 _exit(1); 4379 } 4380 _exit(0); 4381 } 4382 4383 req.pid = 0; 4384 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4385 4386 ASSERT_EQ(req.data.nr, __NR_getppid); 4387 4388 resp.id = req.id; 4389 resp.error = 0; 4390 resp.val = USER_NOTIF_MAGIC; 4391 resp.flags = 0; 4392 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4393 4394 ASSERT_EQ(waitpid(pid, &status, 0), pid); 4395 ASSERT_EQ(status, 0); 4396 } 4397 4398 4399 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ 4400 FIXTURE(O_SUSPEND_SECCOMP) { 4401 pid_t pid; 4402 }; 4403 4404 FIXTURE_SETUP(O_SUSPEND_SECCOMP) 4405 { 4406 ERRNO_FILTER(block_read, E2BIG); 4407 cap_value_t cap_list[] = { CAP_SYS_ADMIN }; 4408 cap_t caps; 4409 4410 self->pid = 0; 4411 4412 /* make sure we don't have CAP_SYS_ADMIN */ 4413 caps = cap_get_proc(); 4414 ASSERT_NE(NULL, caps); 4415 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); 4416 ASSERT_EQ(0, cap_set_proc(caps)); 4417 cap_free(caps); 4418 4419 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 4420 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); 4421 4422 self->pid = fork(); 4423 ASSERT_GE(self->pid, 0); 4424 4425 if (self->pid == 0) { 4426 while (1) 4427 pause(); 4428 _exit(127); 4429 } 4430 } 4431 4432 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) 4433 { 4434 if (self->pid) 4435 kill(self->pid, SIGKILL); 4436 } 4437 4438 TEST_F(O_SUSPEND_SECCOMP, setoptions) 4439 { 4440 int wstatus; 4441 4442 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); 4443 ASSERT_EQ(self->pid, wait(&wstatus)); 4444 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); 4445 if (errno == EINVAL) 4446 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4447 ASSERT_EQ(EPERM, errno); 4448 } 4449 4450 TEST_F(O_SUSPEND_SECCOMP, seize) 4451 { 4452 int ret; 4453 4454 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); 4455 ASSERT_EQ(-1, ret); 4456 if (errno == EINVAL) 4457 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4458 ASSERT_EQ(EPERM, errno); 4459 } 4460 4461 /* 4462 * get_nth - Get the nth, space separated entry in a file. 4463 * 4464 * Returns the length of the read field. 4465 * Throws error if field is zero-lengthed. 4466 */ 4467 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, 4468 const unsigned int position, char **entry) 4469 { 4470 char *line = NULL; 4471 unsigned int i; 4472 ssize_t nread; 4473 size_t len = 0; 4474 FILE *f; 4475 4476 f = fopen(path, "r"); 4477 ASSERT_NE(f, NULL) { 4478 TH_LOG("Could not open %s: %s", path, strerror(errno)); 4479 } 4480 4481 for (i = 0; i < position; i++) { 4482 nread = getdelim(&line, &len, ' ', f); 4483 ASSERT_GE(nread, 0) { 4484 TH_LOG("Failed to read %d entry in file %s", i, path); 4485 } 4486 } 4487 fclose(f); 4488 4489 ASSERT_GT(nread, 0) { 4490 TH_LOG("Entry in file %s had zero length", path); 4491 } 4492 4493 *entry = line; 4494 return nread - 1; 4495 } 4496 4497 /* For a given PID, get the task state (D, R, etc...) */ 4498 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) 4499 { 4500 char proc_path[100] = {0}; 4501 char status; 4502 char *line; 4503 4504 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); 4505 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); 4506 4507 status = *line; 4508 free(line); 4509 4510 return status; 4511 } 4512 4513 TEST(user_notification_fifo) 4514 { 4515 struct seccomp_notif_resp resp = {}; 4516 struct seccomp_notif req = {}; 4517 int i, status, listener; 4518 pid_t pid, pids[3]; 4519 __u64 baseid; 4520 long ret; 4521 /* 100 ms */ 4522 struct timespec delay = { .tv_nsec = 100000000 }; 4523 4524 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4525 ASSERT_EQ(0, ret) { 4526 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4527 } 4528 4529 /* Setup a listener */ 4530 listener = user_notif_syscall(__NR_getppid, 4531 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4532 ASSERT_GE(listener, 0); 4533 4534 pid = fork(); 4535 ASSERT_GE(pid, 0); 4536 4537 if (pid == 0) { 4538 ret = syscall(__NR_getppid); 4539 exit(ret != USER_NOTIF_MAGIC); 4540 } 4541 4542 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4543 baseid = req.id + 1; 4544 4545 resp.id = req.id; 4546 resp.error = 0; 4547 resp.val = USER_NOTIF_MAGIC; 4548 4549 /* check that we make sure flags == 0 */ 4550 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4551 4552 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4553 EXPECT_EQ(true, WIFEXITED(status)); 4554 EXPECT_EQ(0, WEXITSTATUS(status)); 4555 4556 /* Start children, and generate notifications */ 4557 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4558 pid = fork(); 4559 if (pid == 0) { 4560 ret = syscall(__NR_getppid); 4561 exit(ret != USER_NOTIF_MAGIC); 4562 } 4563 pids[i] = pid; 4564 } 4565 4566 /* This spins until all of the children are sleeping */ 4567 restart_wait: 4568 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4569 if (get_proc_stat(_metadata, pids[i]) != 'S') { 4570 nanosleep(&delay, NULL); 4571 goto restart_wait; 4572 } 4573 } 4574 4575 /* Read the notifications in order (and respond) */ 4576 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4577 memset(&req, 0, sizeof(req)); 4578 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4579 EXPECT_EQ(req.id, baseid + i); 4580 resp.id = req.id; 4581 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4582 } 4583 4584 /* Make sure notifications were received */ 4585 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4586 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); 4587 EXPECT_EQ(true, WIFEXITED(status)); 4588 EXPECT_EQ(0, WEXITSTATUS(status)); 4589 } 4590 } 4591 4592 /* get_proc_syscall - Get the syscall in progress for a given pid 4593 * 4594 * Returns the current syscall number for a given process 4595 * Returns -1 if not in syscall (running or blocked) 4596 */ 4597 static long get_proc_syscall(struct __test_metadata *_metadata, int pid) 4598 { 4599 char proc_path[100] = {0}; 4600 long ret = -1; 4601 ssize_t nread; 4602 char *line; 4603 4604 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); 4605 nread = get_nth(_metadata, proc_path, 1, &line); 4606 ASSERT_GT(nread, 0); 4607 4608 if (!strncmp("running", line, MIN(7, nread))) 4609 ret = strtol(line, NULL, 16); 4610 4611 free(line); 4612 return ret; 4613 } 4614 4615 /* Ensure non-fatal signals prior to receive are unmodified */ 4616 TEST(user_notification_wait_killable_pre_notification) 4617 { 4618 struct sigaction new_action = { 4619 .sa_handler = signal_handler, 4620 }; 4621 int listener, status, sk_pair[2]; 4622 pid_t pid; 4623 long ret; 4624 char c; 4625 /* 100 ms */ 4626 struct timespec delay = { .tv_nsec = 100000000 }; 4627 4628 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4629 4630 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4631 ASSERT_EQ(0, ret) 4632 { 4633 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4634 } 4635 4636 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4637 4638 listener = user_notif_syscall( 4639 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4640 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4641 ASSERT_GE(listener, 0); 4642 4643 /* 4644 * Check that we can kill the process with SIGUSR1 prior to receiving 4645 * the notification. SIGUSR1 is wired up to a custom signal handler, 4646 * and make sure it gets called. 4647 */ 4648 pid = fork(); 4649 ASSERT_GE(pid, 0); 4650 4651 if (pid == 0) { 4652 close(sk_pair[0]); 4653 handled = sk_pair[1]; 4654 4655 /* Setup the non-fatal sigaction without SA_RESTART */ 4656 if (sigaction(SIGUSR1, &new_action, NULL)) { 4657 perror("sigaction"); 4658 exit(1); 4659 } 4660 4661 ret = syscall(__NR_getppid); 4662 /* Make sure we got a return from a signal interruption */ 4663 exit(ret != -1 || errno != EINTR); 4664 } 4665 4666 /* 4667 * Make sure we've gotten to the seccomp user notification wait 4668 * from getppid prior to sending any signals 4669 */ 4670 while (get_proc_syscall(_metadata, pid) != __NR_getppid && 4671 get_proc_stat(_metadata, pid) != 'S') 4672 nanosleep(&delay, NULL); 4673 4674 /* Send non-fatal kill signal */ 4675 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4676 4677 /* wait for process to exit (exit checks for EINTR) */ 4678 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4679 EXPECT_EQ(true, WIFEXITED(status)); 4680 EXPECT_EQ(0, WEXITSTATUS(status)); 4681 4682 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4683 } 4684 4685 /* Ensure non-fatal signals after receive are blocked */ 4686 TEST(user_notification_wait_killable) 4687 { 4688 struct sigaction new_action = { 4689 .sa_handler = signal_handler, 4690 }; 4691 struct seccomp_notif_resp resp = {}; 4692 struct seccomp_notif req = {}; 4693 int listener, status, sk_pair[2]; 4694 pid_t pid; 4695 long ret; 4696 char c; 4697 /* 100 ms */ 4698 struct timespec delay = { .tv_nsec = 100000000 }; 4699 4700 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4701 4702 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4703 ASSERT_EQ(0, ret) 4704 { 4705 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4706 } 4707 4708 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4709 4710 listener = user_notif_syscall( 4711 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4712 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4713 ASSERT_GE(listener, 0); 4714 4715 pid = fork(); 4716 ASSERT_GE(pid, 0); 4717 4718 if (pid == 0) { 4719 close(sk_pair[0]); 4720 handled = sk_pair[1]; 4721 4722 /* Setup the sigaction without SA_RESTART */ 4723 if (sigaction(SIGUSR1, &new_action, NULL)) { 4724 perror("sigaction"); 4725 exit(1); 4726 } 4727 4728 /* Make sure that the syscall is completed (no EINTR) */ 4729 ret = syscall(__NR_getppid); 4730 exit(ret != USER_NOTIF_MAGIC); 4731 } 4732 4733 /* 4734 * Get the notification, to make move the notifying process into a 4735 * non-preemptible (TASK_KILLABLE) state. 4736 */ 4737 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4738 /* Send non-fatal kill signal */ 4739 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4740 4741 /* 4742 * Make sure the task enters moves to TASK_KILLABLE by waiting for 4743 * D (Disk Sleep) state after receiving non-fatal signal. 4744 */ 4745 while (get_proc_stat(_metadata, pid) != 'D') 4746 nanosleep(&delay, NULL); 4747 4748 resp.id = req.id; 4749 resp.val = USER_NOTIF_MAGIC; 4750 /* Make sure the notification is found and able to be replied to */ 4751 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4752 4753 /* 4754 * Make sure that the signal handler does get called once we're back in 4755 * userspace. 4756 */ 4757 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4758 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ 4759 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4760 EXPECT_EQ(true, WIFEXITED(status)); 4761 EXPECT_EQ(0, WEXITSTATUS(status)); 4762 } 4763 4764 /* Ensure fatal signals after receive are not blocked */ 4765 TEST(user_notification_wait_killable_fatal) 4766 { 4767 struct seccomp_notif req = {}; 4768 int listener, status; 4769 pid_t pid; 4770 long ret; 4771 /* 100 ms */ 4772 struct timespec delay = { .tv_nsec = 100000000 }; 4773 4774 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4775 ASSERT_EQ(0, ret) 4776 { 4777 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4778 } 4779 4780 listener = user_notif_syscall( 4781 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4782 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4783 ASSERT_GE(listener, 0); 4784 4785 pid = fork(); 4786 ASSERT_GE(pid, 0); 4787 4788 if (pid == 0) { 4789 /* This should never complete as it should get a SIGTERM */ 4790 syscall(__NR_getppid); 4791 exit(1); 4792 } 4793 4794 while (get_proc_stat(_metadata, pid) != 'S') 4795 nanosleep(&delay, NULL); 4796 4797 /* 4798 * Get the notification, to make move the notifying process into a 4799 * non-preemptible (TASK_KILLABLE) state. 4800 */ 4801 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4802 /* Kill the process with a fatal signal */ 4803 EXPECT_EQ(kill(pid, SIGTERM), 0); 4804 4805 /* 4806 * Wait for the process to exit, and make sure the process terminated 4807 * due to the SIGTERM signal. 4808 */ 4809 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4810 EXPECT_EQ(true, WIFSIGNALED(status)); 4811 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4812 } 4813 4814 struct tsync_vs_thread_leader_args { 4815 pthread_t leader; 4816 }; 4817 4818 static void *tsync_vs_dead_thread_leader_sibling(void *_args) 4819 { 4820 struct sock_filter allow_filter[] = { 4821 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4822 }; 4823 struct sock_fprog allow_prog = { 4824 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4825 .filter = allow_filter, 4826 }; 4827 struct tsync_vs_thread_leader_args *args = _args; 4828 void *retval; 4829 long ret; 4830 4831 ret = pthread_join(args->leader, &retval); 4832 if (ret) 4833 exit(1); 4834 if (retval != _args) 4835 exit(2); 4836 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); 4837 if (ret) 4838 exit(3); 4839 4840 exit(0); 4841 } 4842 4843 /* 4844 * Ensure that a dead thread leader doesn't prevent installing new filters with 4845 * SECCOMP_FILTER_FLAG_TSYNC from other threads. 4846 */ 4847 TEST(tsync_vs_dead_thread_leader) 4848 { 4849 int status; 4850 pid_t pid; 4851 long ret; 4852 4853 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4854 ASSERT_EQ(0, ret) { 4855 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4856 } 4857 4858 pid = fork(); 4859 ASSERT_GE(pid, 0); 4860 4861 if (pid == 0) { 4862 struct sock_filter allow_filter[] = { 4863 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4864 }; 4865 struct sock_fprog allow_prog = { 4866 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4867 .filter = allow_filter, 4868 }; 4869 struct tsync_vs_thread_leader_args *args; 4870 pthread_t sibling; 4871 4872 args = malloc(sizeof(*args)); 4873 ASSERT_NE(NULL, args); 4874 args->leader = pthread_self(); 4875 4876 ret = pthread_create(&sibling, NULL, 4877 tsync_vs_dead_thread_leader_sibling, args); 4878 ASSERT_EQ(0, ret); 4879 4880 /* Install a new filter just to the leader thread. */ 4881 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 4882 ASSERT_EQ(0, ret); 4883 pthread_exit(args); 4884 exit(1); 4885 } 4886 4887 EXPECT_EQ(pid, waitpid(pid, &status, 0)); 4888 EXPECT_EQ(0, status); 4889 } 4890 4891 /* 4892 * TODO: 4893 * - expand NNP testing 4894 * - better arch-specific TRACE and TRAP handlers. 4895 * - endianness checking when appropriate 4896 * - 64-bit arg prodding 4897 * - arch value testing (x86 modes especially) 4898 * - verify that FILTER_FLAG_LOG filters generate log messages 4899 * - verify that RET_LOG generates log messages 4900 */ 4901 4902 TEST_HARNESS_MAIN 4903