1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 #include <sys/resource.h> 49 #include <sys/capability.h> 50 #include <linux/perf_event.h> 51 52 #include <unistd.h> 53 #include <sys/syscall.h> 54 #include <poll.h> 55 56 #include "../kselftest_harness.h" 57 #include "../clone3/clone3_selftests.h" 58 59 /* Attempt to de-conflict with the selftests tree. */ 60 #ifndef SKIP 61 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 62 #endif 63 64 #ifndef MIN 65 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 66 #endif 67 68 #ifndef PR_SET_PTRACER 69 # define PR_SET_PTRACER 0x59616d61 70 #endif 71 72 #ifndef noinline 73 #define noinline __attribute__((noinline)) 74 #endif 75 76 #ifndef PR_SET_NO_NEW_PRIVS 77 #define PR_SET_NO_NEW_PRIVS 38 78 #define PR_GET_NO_NEW_PRIVS 39 79 #endif 80 81 #ifndef PR_SECCOMP_EXT 82 #define PR_SECCOMP_EXT 43 83 #endif 84 85 #ifndef SECCOMP_EXT_ACT 86 #define SECCOMP_EXT_ACT 1 87 #endif 88 89 #ifndef SECCOMP_EXT_ACT_TSYNC 90 #define SECCOMP_EXT_ACT_TSYNC 1 91 #endif 92 93 #ifndef SECCOMP_MODE_STRICT 94 #define SECCOMP_MODE_STRICT 1 95 #endif 96 97 #ifndef SECCOMP_MODE_FILTER 98 #define SECCOMP_MODE_FILTER 2 99 #endif 100 101 #ifndef SECCOMP_RET_ALLOW 102 struct seccomp_data { 103 int nr; 104 __u32 arch; 105 __u64 instruction_pointer; 106 __u64 args[6]; 107 }; 108 #endif 109 110 #ifndef SECCOMP_RET_KILL_PROCESS 111 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 112 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 113 #endif 114 #ifndef SECCOMP_RET_KILL 115 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 116 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 117 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 118 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 119 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 120 #endif 121 #ifndef SECCOMP_RET_LOG 122 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 123 #endif 124 125 #ifndef __NR_seccomp 126 # if defined(__i386__) 127 # define __NR_seccomp 354 128 # elif defined(__x86_64__) 129 # define __NR_seccomp 317 130 # elif defined(__arm__) 131 # define __NR_seccomp 383 132 # elif defined(__aarch64__) 133 # define __NR_seccomp 277 134 # elif defined(__riscv) 135 # define __NR_seccomp 277 136 # elif defined(__csky__) 137 # define __NR_seccomp 277 138 # elif defined(__loongarch__) 139 # define __NR_seccomp 277 140 # elif defined(__hppa__) 141 # define __NR_seccomp 338 142 # elif defined(__powerpc__) 143 # define __NR_seccomp 358 144 # elif defined(__s390__) 145 # define __NR_seccomp 348 146 # elif defined(__xtensa__) 147 # define __NR_seccomp 337 148 # elif defined(__sh__) 149 # define __NR_seccomp 372 150 # elif defined(__mc68000__) 151 # define __NR_seccomp 380 152 # else 153 # warning "seccomp syscall number unknown for this architecture" 154 # define __NR_seccomp 0xffff 155 # endif 156 #endif 157 158 #ifndef __NR_uretprobe 159 # if defined(__x86_64__) 160 # define __NR_uretprobe 335 161 # endif 162 #endif 163 164 #ifndef SECCOMP_SET_MODE_STRICT 165 #define SECCOMP_SET_MODE_STRICT 0 166 #endif 167 168 #ifndef SECCOMP_SET_MODE_FILTER 169 #define SECCOMP_SET_MODE_FILTER 1 170 #endif 171 172 #ifndef SECCOMP_GET_ACTION_AVAIL 173 #define SECCOMP_GET_ACTION_AVAIL 2 174 #endif 175 176 #ifndef SECCOMP_GET_NOTIF_SIZES 177 #define SECCOMP_GET_NOTIF_SIZES 3 178 #endif 179 180 #ifndef SECCOMP_FILTER_FLAG_TSYNC 181 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 182 #endif 183 184 #ifndef SECCOMP_FILTER_FLAG_LOG 185 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 186 #endif 187 188 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 189 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 190 #endif 191 192 #ifndef PTRACE_SECCOMP_GET_METADATA 193 #define PTRACE_SECCOMP_GET_METADATA 0x420d 194 195 struct seccomp_metadata { 196 __u64 filter_off; /* Input: which filter */ 197 __u64 flags; /* Output: filter's flags */ 198 }; 199 #endif 200 201 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 202 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 203 #endif 204 205 #ifndef SECCOMP_RET_USER_NOTIF 206 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 207 208 #define SECCOMP_IOC_MAGIC '!' 209 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 210 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 211 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 212 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 213 214 /* Flags for seccomp notification fd ioctl. */ 215 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 216 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 217 struct seccomp_notif_resp) 218 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 219 220 struct seccomp_notif { 221 __u64 id; 222 __u32 pid; 223 __u32 flags; 224 struct seccomp_data data; 225 }; 226 227 struct seccomp_notif_resp { 228 __u64 id; 229 __s64 val; 230 __s32 error; 231 __u32 flags; 232 }; 233 234 struct seccomp_notif_sizes { 235 __u16 seccomp_notif; 236 __u16 seccomp_notif_resp; 237 __u16 seccomp_data; 238 }; 239 #endif 240 241 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 242 /* On success, the return value is the remote process's added fd number */ 243 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 244 struct seccomp_notif_addfd) 245 246 /* valid flags for seccomp_notif_addfd */ 247 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 248 249 struct seccomp_notif_addfd { 250 __u64 id; 251 __u32 flags; 252 __u32 srcfd; 253 __u32 newfd; 254 __u32 newfd_flags; 255 }; 256 #endif 257 258 #ifndef SECCOMP_ADDFD_FLAG_SEND 259 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 260 #endif 261 262 struct seccomp_notif_addfd_small { 263 __u64 id; 264 char weird[4]; 265 }; 266 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 267 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 268 269 struct seccomp_notif_addfd_big { 270 union { 271 struct seccomp_notif_addfd addfd; 272 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 273 }; 274 }; 275 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 276 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 277 278 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 279 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 280 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 281 #endif 282 283 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 284 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 285 #endif 286 287 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 288 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 289 #endif 290 291 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV 292 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 293 #endif 294 295 #ifndef seccomp 296 int seccomp(unsigned int op, unsigned int flags, void *args) 297 { 298 errno = 0; 299 return syscall(__NR_seccomp, op, flags, args); 300 } 301 #endif 302 303 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 304 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 305 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 306 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 307 #else 308 #error "wut? Unknown __BYTE_ORDER__?!" 309 #endif 310 311 #define SIBLING_EXIT_UNKILLED 0xbadbeef 312 #define SIBLING_EXIT_FAILURE 0xbadface 313 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 314 315 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 316 { 317 #ifdef __NR_kcmp 318 errno = 0; 319 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 320 #else 321 errno = ENOSYS; 322 return -1; 323 #endif 324 } 325 326 /* Have TH_LOG report actual location filecmp() is used. */ 327 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 328 int _ret; \ 329 \ 330 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 331 if (_ret != 0) { \ 332 if (_ret < 0 && errno == ENOSYS) { \ 333 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 334 _ret = 0; \ 335 } \ 336 } \ 337 _ret; }) 338 339 TEST(kcmp) 340 { 341 int ret; 342 343 ret = __filecmp(getpid(), getpid(), 1, 1); 344 EXPECT_EQ(ret, 0); 345 if (ret != 0 && errno == ENOSYS) 346 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); 347 } 348 349 TEST(mode_strict_support) 350 { 351 long ret; 352 353 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 354 ASSERT_EQ(0, ret) { 355 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 356 } 357 syscall(__NR_exit, 0); 358 } 359 360 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 361 { 362 long ret; 363 364 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 365 ASSERT_EQ(0, ret) { 366 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 367 } 368 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 369 NULL, NULL, NULL); 370 EXPECT_FALSE(true) { 371 TH_LOG("Unreachable!"); 372 } 373 } 374 375 /* Note! This doesn't test no new privs behavior */ 376 TEST(no_new_privs_support) 377 { 378 long ret; 379 380 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 381 EXPECT_EQ(0, ret) { 382 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 383 } 384 } 385 386 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 387 TEST(mode_filter_support) 388 { 389 long ret; 390 391 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 392 ASSERT_EQ(0, ret) { 393 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 394 } 395 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 396 EXPECT_EQ(-1, ret); 397 EXPECT_EQ(EFAULT, errno) { 398 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 399 } 400 } 401 402 TEST(mode_filter_without_nnp) 403 { 404 struct sock_filter filter[] = { 405 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 406 }; 407 struct sock_fprog prog = { 408 .len = (unsigned short)ARRAY_SIZE(filter), 409 .filter = filter, 410 }; 411 long ret; 412 cap_t cap = cap_get_proc(); 413 cap_flag_value_t is_cap_sys_admin = 0; 414 415 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 416 ASSERT_LE(0, ret) { 417 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 418 } 419 errno = 0; 420 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 421 /* Succeeds with CAP_SYS_ADMIN, fails without */ 422 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); 423 if (!is_cap_sys_admin) { 424 EXPECT_EQ(-1, ret); 425 EXPECT_EQ(EACCES, errno); 426 } else { 427 EXPECT_EQ(0, ret); 428 } 429 } 430 431 #define MAX_INSNS_PER_PATH 32768 432 433 TEST(filter_size_limits) 434 { 435 int i; 436 int count = BPF_MAXINSNS + 1; 437 struct sock_filter allow[] = { 438 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 439 }; 440 struct sock_filter *filter; 441 struct sock_fprog prog = { }; 442 long ret; 443 444 filter = calloc(count, sizeof(*filter)); 445 ASSERT_NE(NULL, filter); 446 447 for (i = 0; i < count; i++) 448 filter[i] = allow[0]; 449 450 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 451 ASSERT_EQ(0, ret); 452 453 prog.filter = filter; 454 prog.len = count; 455 456 /* Too many filter instructions in a single filter. */ 457 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 458 ASSERT_NE(0, ret) { 459 TH_LOG("Installing %d insn filter was allowed", prog.len); 460 } 461 462 /* One less is okay, though. */ 463 prog.len -= 1; 464 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 465 ASSERT_EQ(0, ret) { 466 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 467 } 468 } 469 470 TEST(filter_chain_limits) 471 { 472 int i; 473 int count = BPF_MAXINSNS; 474 struct sock_filter allow[] = { 475 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 476 }; 477 struct sock_filter *filter; 478 struct sock_fprog prog = { }; 479 long ret; 480 481 filter = calloc(count, sizeof(*filter)); 482 ASSERT_NE(NULL, filter); 483 484 for (i = 0; i < count; i++) 485 filter[i] = allow[0]; 486 487 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 488 ASSERT_EQ(0, ret); 489 490 prog.filter = filter; 491 prog.len = 1; 492 493 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 494 ASSERT_EQ(0, ret); 495 496 prog.len = count; 497 498 /* Too many total filter instructions. */ 499 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 500 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 501 if (ret != 0) 502 break; 503 } 504 ASSERT_NE(0, ret) { 505 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 506 i, count, i * (count + 4)); 507 } 508 } 509 510 TEST(mode_filter_cannot_move_to_strict) 511 { 512 struct sock_filter filter[] = { 513 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 514 }; 515 struct sock_fprog prog = { 516 .len = (unsigned short)ARRAY_SIZE(filter), 517 .filter = filter, 518 }; 519 long ret; 520 521 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 522 ASSERT_EQ(0, ret); 523 524 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 525 ASSERT_EQ(0, ret); 526 527 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 528 EXPECT_EQ(-1, ret); 529 EXPECT_EQ(EINVAL, errno); 530 } 531 532 533 TEST(mode_filter_get_seccomp) 534 { 535 struct sock_filter filter[] = { 536 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 537 }; 538 struct sock_fprog prog = { 539 .len = (unsigned short)ARRAY_SIZE(filter), 540 .filter = filter, 541 }; 542 long ret; 543 544 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 545 ASSERT_EQ(0, ret); 546 547 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 548 EXPECT_EQ(0, ret); 549 550 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 551 ASSERT_EQ(0, ret); 552 553 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 554 EXPECT_EQ(2, ret); 555 } 556 557 558 TEST(ALLOW_all) 559 { 560 struct sock_filter filter[] = { 561 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 562 }; 563 struct sock_fprog prog = { 564 .len = (unsigned short)ARRAY_SIZE(filter), 565 .filter = filter, 566 }; 567 long ret; 568 569 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 570 ASSERT_EQ(0, ret); 571 572 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 573 ASSERT_EQ(0, ret); 574 } 575 576 TEST(empty_prog) 577 { 578 struct sock_filter filter[] = { 579 }; 580 struct sock_fprog prog = { 581 .len = (unsigned short)ARRAY_SIZE(filter), 582 .filter = filter, 583 }; 584 long ret; 585 586 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 587 ASSERT_EQ(0, ret); 588 589 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 590 EXPECT_EQ(-1, ret); 591 EXPECT_EQ(EINVAL, errno); 592 } 593 594 TEST(log_all) 595 { 596 struct sock_filter filter[] = { 597 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 598 }; 599 struct sock_fprog prog = { 600 .len = (unsigned short)ARRAY_SIZE(filter), 601 .filter = filter, 602 }; 603 long ret; 604 pid_t parent = getppid(); 605 606 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 607 ASSERT_EQ(0, ret); 608 609 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 610 ASSERT_EQ(0, ret); 611 612 /* getppid() should succeed and be logged (no check for logging) */ 613 EXPECT_EQ(parent, syscall(__NR_getppid)); 614 } 615 616 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 617 { 618 struct sock_filter filter[] = { 619 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 620 }; 621 struct sock_fprog prog = { 622 .len = (unsigned short)ARRAY_SIZE(filter), 623 .filter = filter, 624 }; 625 long ret; 626 627 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 628 ASSERT_EQ(0, ret); 629 630 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 631 ASSERT_EQ(0, ret); 632 EXPECT_EQ(0, syscall(__NR_getpid)) { 633 TH_LOG("getpid() shouldn't ever return"); 634 } 635 } 636 637 /* return code >= 0x80000000 is unused. */ 638 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 639 { 640 struct sock_filter filter[] = { 641 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 642 }; 643 struct sock_fprog prog = { 644 .len = (unsigned short)ARRAY_SIZE(filter), 645 .filter = filter, 646 }; 647 long ret; 648 649 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 650 ASSERT_EQ(0, ret); 651 652 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 653 ASSERT_EQ(0, ret); 654 EXPECT_EQ(0, syscall(__NR_getpid)) { 655 TH_LOG("getpid() shouldn't ever return"); 656 } 657 } 658 659 TEST_SIGNAL(KILL_all, SIGSYS) 660 { 661 struct sock_filter filter[] = { 662 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 663 }; 664 struct sock_fprog prog = { 665 .len = (unsigned short)ARRAY_SIZE(filter), 666 .filter = filter, 667 }; 668 long ret; 669 670 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 671 ASSERT_EQ(0, ret); 672 673 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 674 ASSERT_EQ(0, ret); 675 } 676 677 TEST_SIGNAL(KILL_one, SIGSYS) 678 { 679 struct sock_filter filter[] = { 680 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 681 offsetof(struct seccomp_data, nr)), 682 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 683 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 684 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 685 }; 686 struct sock_fprog prog = { 687 .len = (unsigned short)ARRAY_SIZE(filter), 688 .filter = filter, 689 }; 690 long ret; 691 pid_t parent = getppid(); 692 693 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 694 ASSERT_EQ(0, ret); 695 696 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 697 ASSERT_EQ(0, ret); 698 699 EXPECT_EQ(parent, syscall(__NR_getppid)); 700 /* getpid() should never return. */ 701 EXPECT_EQ(0, syscall(__NR_getpid)); 702 } 703 704 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 705 { 706 void *fatal_address; 707 struct sock_filter filter[] = { 708 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 709 offsetof(struct seccomp_data, nr)), 710 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 711 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 712 /* Only both with lower 32-bit for now. */ 713 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 714 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 715 (unsigned long)&fatal_address, 0, 1), 716 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 717 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 718 }; 719 struct sock_fprog prog = { 720 .len = (unsigned short)ARRAY_SIZE(filter), 721 .filter = filter, 722 }; 723 long ret; 724 pid_t parent = getppid(); 725 struct tms timebuf; 726 clock_t clock = times(&timebuf); 727 728 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 729 ASSERT_EQ(0, ret); 730 731 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 732 ASSERT_EQ(0, ret); 733 734 EXPECT_EQ(parent, syscall(__NR_getppid)); 735 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 736 /* times() should never return. */ 737 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 738 } 739 740 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 741 { 742 #ifndef __NR_mmap2 743 int sysno = __NR_mmap; 744 #else 745 int sysno = __NR_mmap2; 746 #endif 747 struct sock_filter filter[] = { 748 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 749 offsetof(struct seccomp_data, nr)), 750 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 751 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 752 /* Only both with lower 32-bit for now. */ 753 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 754 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 755 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 756 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 757 }; 758 struct sock_fprog prog = { 759 .len = (unsigned short)ARRAY_SIZE(filter), 760 .filter = filter, 761 }; 762 long ret; 763 pid_t parent = getppid(); 764 int fd; 765 void *map1, *map2; 766 int page_size = sysconf(_SC_PAGESIZE); 767 768 ASSERT_LT(0, page_size); 769 770 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 771 ASSERT_EQ(0, ret); 772 773 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 774 ASSERT_EQ(0, ret); 775 776 fd = open("/dev/zero", O_RDONLY); 777 ASSERT_NE(-1, fd); 778 779 EXPECT_EQ(parent, syscall(__NR_getppid)); 780 map1 = (void *)syscall(sysno, 781 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 782 EXPECT_NE(MAP_FAILED, map1); 783 /* mmap2() should never return. */ 784 map2 = (void *)syscall(sysno, 785 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 786 EXPECT_EQ(MAP_FAILED, map2); 787 788 /* The test failed, so clean up the resources. */ 789 munmap(map1, page_size); 790 munmap(map2, page_size); 791 close(fd); 792 } 793 794 /* This is a thread task to die via seccomp filter violation. */ 795 void *kill_thread(void *data) 796 { 797 bool die = (bool)data; 798 799 if (die) { 800 syscall(__NR_getpid); 801 return (void *)SIBLING_EXIT_FAILURE; 802 } 803 804 return (void *)SIBLING_EXIT_UNKILLED; 805 } 806 807 enum kill_t { 808 KILL_THREAD, 809 KILL_PROCESS, 810 RET_UNKNOWN 811 }; 812 813 /* Prepare a thread that will kill itself or both of us. */ 814 void kill_thread_or_group(struct __test_metadata *_metadata, 815 enum kill_t kill_how) 816 { 817 pthread_t thread; 818 void *status; 819 /* Kill only when calling __NR_getpid. */ 820 struct sock_filter filter_thread[] = { 821 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 822 offsetof(struct seccomp_data, nr)), 823 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 824 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 825 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 826 }; 827 struct sock_fprog prog_thread = { 828 .len = (unsigned short)ARRAY_SIZE(filter_thread), 829 .filter = filter_thread, 830 }; 831 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; 832 struct sock_filter filter_process[] = { 833 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 834 offsetof(struct seccomp_data, nr)), 835 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 836 BPF_STMT(BPF_RET|BPF_K, kill), 837 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 838 }; 839 struct sock_fprog prog_process = { 840 .len = (unsigned short)ARRAY_SIZE(filter_process), 841 .filter = filter_process, 842 }; 843 844 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 845 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 846 } 847 848 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 849 kill_how == KILL_THREAD ? &prog_thread 850 : &prog_process)); 851 852 /* 853 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 854 * flag cannot be downgraded by a new filter. 855 */ 856 if (kill_how == KILL_PROCESS) 857 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 858 859 /* Start a thread that will exit immediately. */ 860 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 861 ASSERT_EQ(0, pthread_join(thread, &status)); 862 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 863 864 /* Start a thread that will die immediately. */ 865 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 866 ASSERT_EQ(0, pthread_join(thread, &status)); 867 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 868 869 /* 870 * If we get here, only the spawned thread died. Let the parent know 871 * the whole process didn't die (i.e. this thread, the spawner, 872 * stayed running). 873 */ 874 exit(42); 875 } 876 877 TEST(KILL_thread) 878 { 879 int status; 880 pid_t child_pid; 881 882 child_pid = fork(); 883 ASSERT_LE(0, child_pid); 884 if (child_pid == 0) { 885 kill_thread_or_group(_metadata, KILL_THREAD); 886 _exit(38); 887 } 888 889 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 890 891 /* If only the thread was killed, we'll see exit 42. */ 892 ASSERT_TRUE(WIFEXITED(status)); 893 ASSERT_EQ(42, WEXITSTATUS(status)); 894 } 895 896 TEST(KILL_process) 897 { 898 int status; 899 pid_t child_pid; 900 901 child_pid = fork(); 902 ASSERT_LE(0, child_pid); 903 if (child_pid == 0) { 904 kill_thread_or_group(_metadata, KILL_PROCESS); 905 _exit(38); 906 } 907 908 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 909 910 /* If the entire process was killed, we'll see SIGSYS. */ 911 ASSERT_TRUE(WIFSIGNALED(status)); 912 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 913 } 914 915 TEST(KILL_unknown) 916 { 917 int status; 918 pid_t child_pid; 919 920 child_pid = fork(); 921 ASSERT_LE(0, child_pid); 922 if (child_pid == 0) { 923 kill_thread_or_group(_metadata, RET_UNKNOWN); 924 _exit(38); 925 } 926 927 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 928 929 /* If the entire process was killed, we'll see SIGSYS. */ 930 EXPECT_TRUE(WIFSIGNALED(status)) { 931 TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); 932 } 933 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 934 } 935 936 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 937 TEST(arg_out_of_range) 938 { 939 struct sock_filter filter[] = { 940 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 941 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 942 }; 943 struct sock_fprog prog = { 944 .len = (unsigned short)ARRAY_SIZE(filter), 945 .filter = filter, 946 }; 947 long ret; 948 949 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 950 ASSERT_EQ(0, ret); 951 952 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 953 EXPECT_EQ(-1, ret); 954 EXPECT_EQ(EINVAL, errno); 955 } 956 957 #define ERRNO_FILTER(name, errno) \ 958 struct sock_filter _read_filter_##name[] = { \ 959 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 960 offsetof(struct seccomp_data, nr)), \ 961 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 962 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 963 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 964 }; \ 965 struct sock_fprog prog_##name = { \ 966 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 967 .filter = _read_filter_##name, \ 968 } 969 970 /* Make sure basic errno values are correctly passed through a filter. */ 971 TEST(ERRNO_valid) 972 { 973 ERRNO_FILTER(valid, E2BIG); 974 long ret; 975 pid_t parent = getppid(); 976 977 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 978 ASSERT_EQ(0, ret); 979 980 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 981 ASSERT_EQ(0, ret); 982 983 EXPECT_EQ(parent, syscall(__NR_getppid)); 984 EXPECT_EQ(-1, read(-1, NULL, 0)); 985 EXPECT_EQ(E2BIG, errno); 986 } 987 988 /* Make sure an errno of zero is correctly handled by the arch code. */ 989 TEST(ERRNO_zero) 990 { 991 ERRNO_FILTER(zero, 0); 992 long ret; 993 pid_t parent = getppid(); 994 995 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 996 ASSERT_EQ(0, ret); 997 998 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 999 ASSERT_EQ(0, ret); 1000 1001 EXPECT_EQ(parent, syscall(__NR_getppid)); 1002 /* "errno" of 0 is ok. */ 1003 EXPECT_EQ(0, read(-1, NULL, 0)); 1004 } 1005 1006 /* 1007 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 1008 * This tests that the errno value gets capped correctly, fixed by 1009 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 1010 */ 1011 TEST(ERRNO_capped) 1012 { 1013 ERRNO_FILTER(capped, 4096); 1014 long ret; 1015 pid_t parent = getppid(); 1016 1017 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1018 ASSERT_EQ(0, ret); 1019 1020 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 1021 ASSERT_EQ(0, ret); 1022 1023 EXPECT_EQ(parent, syscall(__NR_getppid)); 1024 EXPECT_EQ(-1, read(-1, NULL, 0)); 1025 EXPECT_EQ(4095, errno); 1026 } 1027 1028 /* 1029 * Filters are processed in reverse order: last applied is executed first. 1030 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 1031 * SECCOMP_RET_DATA mask results will follow the most recently applied 1032 * matching filter return (and not the lowest or highest value). 1033 */ 1034 TEST(ERRNO_order) 1035 { 1036 ERRNO_FILTER(first, 11); 1037 ERRNO_FILTER(second, 13); 1038 ERRNO_FILTER(third, 12); 1039 long ret; 1040 pid_t parent = getppid(); 1041 1042 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1043 ASSERT_EQ(0, ret); 1044 1045 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 1046 ASSERT_EQ(0, ret); 1047 1048 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 1049 ASSERT_EQ(0, ret); 1050 1051 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 1052 ASSERT_EQ(0, ret); 1053 1054 EXPECT_EQ(parent, syscall(__NR_getppid)); 1055 EXPECT_EQ(-1, read(-1, NULL, 0)); 1056 EXPECT_EQ(12, errno); 1057 } 1058 1059 FIXTURE(TRAP) { 1060 struct sock_fprog prog; 1061 }; 1062 1063 FIXTURE_SETUP(TRAP) 1064 { 1065 struct sock_filter filter[] = { 1066 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1067 offsetof(struct seccomp_data, nr)), 1068 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1069 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1070 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1071 }; 1072 1073 memset(&self->prog, 0, sizeof(self->prog)); 1074 self->prog.filter = malloc(sizeof(filter)); 1075 ASSERT_NE(NULL, self->prog.filter); 1076 memcpy(self->prog.filter, filter, sizeof(filter)); 1077 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1078 } 1079 1080 FIXTURE_TEARDOWN(TRAP) 1081 { 1082 if (self->prog.filter) 1083 free(self->prog.filter); 1084 } 1085 1086 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1087 { 1088 long ret; 1089 1090 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1091 ASSERT_EQ(0, ret); 1092 1093 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1094 ASSERT_EQ(0, ret); 1095 syscall(__NR_getpid); 1096 } 1097 1098 /* Ensure that SIGSYS overrides SIG_IGN */ 1099 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1100 { 1101 long ret; 1102 1103 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1104 ASSERT_EQ(0, ret); 1105 1106 signal(SIGSYS, SIG_IGN); 1107 1108 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1109 ASSERT_EQ(0, ret); 1110 syscall(__NR_getpid); 1111 } 1112 1113 static siginfo_t TRAP_info; 1114 static volatile int TRAP_nr; 1115 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1116 { 1117 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1118 TRAP_nr = nr; 1119 } 1120 1121 TEST_F(TRAP, handler) 1122 { 1123 int ret, test; 1124 struct sigaction act; 1125 sigset_t mask; 1126 1127 memset(&act, 0, sizeof(act)); 1128 sigemptyset(&mask); 1129 sigaddset(&mask, SIGSYS); 1130 1131 act.sa_sigaction = &TRAP_action; 1132 act.sa_flags = SA_SIGINFO; 1133 ret = sigaction(SIGSYS, &act, NULL); 1134 ASSERT_EQ(0, ret) { 1135 TH_LOG("sigaction failed"); 1136 } 1137 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1138 ASSERT_EQ(0, ret) { 1139 TH_LOG("sigprocmask failed"); 1140 } 1141 1142 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1143 ASSERT_EQ(0, ret); 1144 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1145 ASSERT_EQ(0, ret); 1146 TRAP_nr = 0; 1147 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1148 /* Expect the registers to be rolled back. (nr = error) may vary 1149 * based on arch. */ 1150 ret = syscall(__NR_getpid); 1151 /* Silence gcc warning about volatile. */ 1152 test = TRAP_nr; 1153 EXPECT_EQ(SIGSYS, test); 1154 struct local_sigsys { 1155 void *_call_addr; /* calling user insn */ 1156 int _syscall; /* triggering system call number */ 1157 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1158 } *sigsys = (struct local_sigsys *) 1159 #ifdef si_syscall 1160 &(TRAP_info.si_call_addr); 1161 #else 1162 &TRAP_info.si_pid; 1163 #endif 1164 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1165 /* Make sure arch is non-zero. */ 1166 EXPECT_NE(0, sigsys->_arch); 1167 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1168 } 1169 1170 FIXTURE(precedence) { 1171 struct sock_fprog allow; 1172 struct sock_fprog log; 1173 struct sock_fprog trace; 1174 struct sock_fprog error; 1175 struct sock_fprog trap; 1176 struct sock_fprog kill; 1177 }; 1178 1179 FIXTURE_SETUP(precedence) 1180 { 1181 struct sock_filter allow_insns[] = { 1182 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1183 }; 1184 struct sock_filter log_insns[] = { 1185 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1186 offsetof(struct seccomp_data, nr)), 1187 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1188 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1189 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1190 }; 1191 struct sock_filter trace_insns[] = { 1192 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1193 offsetof(struct seccomp_data, nr)), 1194 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1195 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1196 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1197 }; 1198 struct sock_filter error_insns[] = { 1199 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1200 offsetof(struct seccomp_data, nr)), 1201 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1202 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1203 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1204 }; 1205 struct sock_filter trap_insns[] = { 1206 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1207 offsetof(struct seccomp_data, nr)), 1208 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1209 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1210 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1211 }; 1212 struct sock_filter kill_insns[] = { 1213 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1214 offsetof(struct seccomp_data, nr)), 1215 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1216 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1217 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1218 }; 1219 1220 memset(self, 0, sizeof(*self)); 1221 #define FILTER_ALLOC(_x) \ 1222 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1223 ASSERT_NE(NULL, self->_x.filter); \ 1224 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1225 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1226 FILTER_ALLOC(allow); 1227 FILTER_ALLOC(log); 1228 FILTER_ALLOC(trace); 1229 FILTER_ALLOC(error); 1230 FILTER_ALLOC(trap); 1231 FILTER_ALLOC(kill); 1232 } 1233 1234 FIXTURE_TEARDOWN(precedence) 1235 { 1236 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1237 FILTER_FREE(allow); 1238 FILTER_FREE(log); 1239 FILTER_FREE(trace); 1240 FILTER_FREE(error); 1241 FILTER_FREE(trap); 1242 FILTER_FREE(kill); 1243 } 1244 1245 TEST_F(precedence, allow_ok) 1246 { 1247 pid_t parent, res = 0; 1248 long ret; 1249 1250 parent = getppid(); 1251 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1252 ASSERT_EQ(0, ret); 1253 1254 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1255 ASSERT_EQ(0, ret); 1256 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1257 ASSERT_EQ(0, ret); 1258 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1259 ASSERT_EQ(0, ret); 1260 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1261 ASSERT_EQ(0, ret); 1262 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1263 ASSERT_EQ(0, ret); 1264 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1265 ASSERT_EQ(0, ret); 1266 /* Should work just fine. */ 1267 res = syscall(__NR_getppid); 1268 EXPECT_EQ(parent, res); 1269 } 1270 1271 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1272 { 1273 pid_t parent, res = 0; 1274 long ret; 1275 1276 parent = getppid(); 1277 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1278 ASSERT_EQ(0, ret); 1279 1280 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1281 ASSERT_EQ(0, ret); 1282 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1283 ASSERT_EQ(0, ret); 1284 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1285 ASSERT_EQ(0, ret); 1286 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1287 ASSERT_EQ(0, ret); 1288 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1289 ASSERT_EQ(0, ret); 1290 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1291 ASSERT_EQ(0, ret); 1292 /* Should work just fine. */ 1293 res = syscall(__NR_getppid); 1294 EXPECT_EQ(parent, res); 1295 /* getpid() should never return. */ 1296 res = syscall(__NR_getpid); 1297 EXPECT_EQ(0, res); 1298 } 1299 1300 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1301 { 1302 pid_t parent; 1303 long ret; 1304 1305 parent = getppid(); 1306 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1307 ASSERT_EQ(0, ret); 1308 1309 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1310 ASSERT_EQ(0, ret); 1311 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1312 ASSERT_EQ(0, ret); 1313 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1314 ASSERT_EQ(0, ret); 1315 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1316 ASSERT_EQ(0, ret); 1317 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1318 ASSERT_EQ(0, ret); 1319 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1320 ASSERT_EQ(0, ret); 1321 /* Should work just fine. */ 1322 EXPECT_EQ(parent, syscall(__NR_getppid)); 1323 /* getpid() should never return. */ 1324 EXPECT_EQ(0, syscall(__NR_getpid)); 1325 } 1326 1327 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1328 { 1329 pid_t parent; 1330 long ret; 1331 1332 parent = getppid(); 1333 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1334 ASSERT_EQ(0, ret); 1335 1336 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1337 ASSERT_EQ(0, ret); 1338 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1339 ASSERT_EQ(0, ret); 1340 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1341 ASSERT_EQ(0, ret); 1342 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1343 ASSERT_EQ(0, ret); 1344 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1345 ASSERT_EQ(0, ret); 1346 /* Should work just fine. */ 1347 EXPECT_EQ(parent, syscall(__NR_getppid)); 1348 /* getpid() should never return. */ 1349 EXPECT_EQ(0, syscall(__NR_getpid)); 1350 } 1351 1352 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1353 { 1354 pid_t parent; 1355 long ret; 1356 1357 parent = getppid(); 1358 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1359 ASSERT_EQ(0, ret); 1360 1361 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1362 ASSERT_EQ(0, ret); 1363 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1364 ASSERT_EQ(0, ret); 1365 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1366 ASSERT_EQ(0, ret); 1367 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1368 ASSERT_EQ(0, ret); 1369 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1370 ASSERT_EQ(0, ret); 1371 /* Should work just fine. */ 1372 EXPECT_EQ(parent, syscall(__NR_getppid)); 1373 /* getpid() should never return. */ 1374 EXPECT_EQ(0, syscall(__NR_getpid)); 1375 } 1376 1377 TEST_F(precedence, errno_is_third) 1378 { 1379 pid_t parent; 1380 long ret; 1381 1382 parent = getppid(); 1383 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1384 ASSERT_EQ(0, ret); 1385 1386 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1387 ASSERT_EQ(0, ret); 1388 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1389 ASSERT_EQ(0, ret); 1390 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1391 ASSERT_EQ(0, ret); 1392 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1393 ASSERT_EQ(0, ret); 1394 /* Should work just fine. */ 1395 EXPECT_EQ(parent, syscall(__NR_getppid)); 1396 EXPECT_EQ(0, syscall(__NR_getpid)); 1397 } 1398 1399 TEST_F(precedence, errno_is_third_in_any_order) 1400 { 1401 pid_t parent; 1402 long ret; 1403 1404 parent = getppid(); 1405 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1406 ASSERT_EQ(0, ret); 1407 1408 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1409 ASSERT_EQ(0, ret); 1410 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1411 ASSERT_EQ(0, ret); 1412 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1413 ASSERT_EQ(0, ret); 1414 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1415 ASSERT_EQ(0, ret); 1416 /* Should work just fine. */ 1417 EXPECT_EQ(parent, syscall(__NR_getppid)); 1418 EXPECT_EQ(0, syscall(__NR_getpid)); 1419 } 1420 1421 TEST_F(precedence, trace_is_fourth) 1422 { 1423 pid_t parent; 1424 long ret; 1425 1426 parent = getppid(); 1427 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1428 ASSERT_EQ(0, ret); 1429 1430 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1431 ASSERT_EQ(0, ret); 1432 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1433 ASSERT_EQ(0, ret); 1434 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1435 ASSERT_EQ(0, ret); 1436 /* Should work just fine. */ 1437 EXPECT_EQ(parent, syscall(__NR_getppid)); 1438 /* No ptracer */ 1439 EXPECT_EQ(-1, syscall(__NR_getpid)); 1440 } 1441 1442 TEST_F(precedence, trace_is_fourth_in_any_order) 1443 { 1444 pid_t parent; 1445 long ret; 1446 1447 parent = getppid(); 1448 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1449 ASSERT_EQ(0, ret); 1450 1451 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1452 ASSERT_EQ(0, ret); 1453 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1454 ASSERT_EQ(0, ret); 1455 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1456 ASSERT_EQ(0, ret); 1457 /* Should work just fine. */ 1458 EXPECT_EQ(parent, syscall(__NR_getppid)); 1459 /* No ptracer */ 1460 EXPECT_EQ(-1, syscall(__NR_getpid)); 1461 } 1462 1463 TEST_F(precedence, log_is_fifth) 1464 { 1465 pid_t mypid, parent; 1466 long ret; 1467 1468 mypid = getpid(); 1469 parent = getppid(); 1470 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1471 ASSERT_EQ(0, ret); 1472 1473 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1474 ASSERT_EQ(0, ret); 1475 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1476 ASSERT_EQ(0, ret); 1477 /* Should work just fine. */ 1478 EXPECT_EQ(parent, syscall(__NR_getppid)); 1479 /* Should also work just fine */ 1480 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1481 } 1482 1483 TEST_F(precedence, log_is_fifth_in_any_order) 1484 { 1485 pid_t mypid, parent; 1486 long ret; 1487 1488 mypid = getpid(); 1489 parent = getppid(); 1490 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1491 ASSERT_EQ(0, ret); 1492 1493 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1494 ASSERT_EQ(0, ret); 1495 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1496 ASSERT_EQ(0, ret); 1497 /* Should work just fine. */ 1498 EXPECT_EQ(parent, syscall(__NR_getppid)); 1499 /* Should also work just fine */ 1500 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1501 } 1502 1503 #ifndef PTRACE_O_TRACESECCOMP 1504 #define PTRACE_O_TRACESECCOMP 0x00000080 1505 #endif 1506 1507 /* Catch the Ubuntu 12.04 value error. */ 1508 #if PTRACE_EVENT_SECCOMP != 7 1509 #undef PTRACE_EVENT_SECCOMP 1510 #endif 1511 1512 #ifndef PTRACE_EVENT_SECCOMP 1513 #define PTRACE_EVENT_SECCOMP 7 1514 #endif 1515 1516 #define PTRACE_EVENT_MASK(status) ((status) >> 16) 1517 bool tracer_running; 1518 void tracer_stop(int sig) 1519 { 1520 tracer_running = false; 1521 } 1522 1523 typedef void tracer_func_t(struct __test_metadata *_metadata, 1524 pid_t tracee, int status, void *args); 1525 1526 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1527 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1528 { 1529 int ret = -1; 1530 struct sigaction action = { 1531 .sa_handler = tracer_stop, 1532 }; 1533 1534 /* Allow external shutdown. */ 1535 tracer_running = true; 1536 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1537 1538 errno = 0; 1539 while (ret == -1 && errno != EINVAL) 1540 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1541 ASSERT_EQ(0, ret) { 1542 kill(tracee, SIGKILL); 1543 } 1544 /* Wait for attach stop */ 1545 wait(NULL); 1546 1547 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1548 PTRACE_O_TRACESYSGOOD : 1549 PTRACE_O_TRACESECCOMP); 1550 ASSERT_EQ(0, ret) { 1551 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1552 kill(tracee, SIGKILL); 1553 } 1554 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1555 tracee, NULL, 0); 1556 ASSERT_EQ(0, ret); 1557 1558 /* Unblock the tracee */ 1559 ASSERT_EQ(1, write(fd, "A", 1)); 1560 ASSERT_EQ(0, close(fd)); 1561 1562 /* Run until we're shut down. Must assert to stop execution. */ 1563 while (tracer_running) { 1564 int status; 1565 1566 if (wait(&status) != tracee) 1567 continue; 1568 1569 if (WIFSIGNALED(status)) { 1570 /* Child caught a fatal signal. */ 1571 return; 1572 } 1573 if (WIFEXITED(status)) { 1574 /* Child exited with code. */ 1575 return; 1576 } 1577 1578 /* Check if we got an expected event. */ 1579 ASSERT_EQ(WIFCONTINUED(status), false); 1580 ASSERT_EQ(WIFSTOPPED(status), true); 1581 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { 1582 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 1583 } 1584 1585 tracer_func(_metadata, tracee, status, args); 1586 1587 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1588 tracee, NULL, 0); 1589 ASSERT_EQ(0, ret); 1590 } 1591 /* Directly report the status of our test harness results. */ 1592 syscall(__NR_exit, _metadata->exit_code); 1593 } 1594 1595 /* Common tracer setup/teardown functions. */ 1596 void cont_handler(int num) 1597 { } 1598 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1599 tracer_func_t func, void *args, bool ptrace_syscall) 1600 { 1601 char sync; 1602 int pipefd[2]; 1603 pid_t tracer_pid; 1604 pid_t tracee = getpid(); 1605 1606 /* Setup a pipe for clean synchronization. */ 1607 ASSERT_EQ(0, pipe(pipefd)); 1608 1609 /* Fork a child which we'll promote to tracer */ 1610 tracer_pid = fork(); 1611 ASSERT_LE(0, tracer_pid); 1612 signal(SIGALRM, cont_handler); 1613 if (tracer_pid == 0) { 1614 close(pipefd[0]); 1615 start_tracer(_metadata, pipefd[1], tracee, func, args, 1616 ptrace_syscall); 1617 syscall(__NR_exit, 0); 1618 } 1619 close(pipefd[1]); 1620 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1621 read(pipefd[0], &sync, 1); 1622 close(pipefd[0]); 1623 1624 return tracer_pid; 1625 } 1626 1627 void teardown_trace_fixture(struct __test_metadata *_metadata, 1628 pid_t tracer) 1629 { 1630 if (tracer) { 1631 int status; 1632 /* 1633 * Extract the exit code from the other process and 1634 * adopt it for ourselves in case its asserts failed. 1635 */ 1636 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1637 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1638 if (WEXITSTATUS(status)) 1639 _metadata->exit_code = KSFT_FAIL; 1640 } 1641 } 1642 1643 /* "poke" tracer arguments and function. */ 1644 struct tracer_args_poke_t { 1645 unsigned long poke_addr; 1646 }; 1647 1648 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1649 void *args) 1650 { 1651 int ret; 1652 unsigned long msg; 1653 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1654 1655 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1656 EXPECT_EQ(0, ret); 1657 /* If this fails, don't try to recover. */ 1658 ASSERT_EQ(0x1001, msg) { 1659 kill(tracee, SIGKILL); 1660 } 1661 /* 1662 * Poke in the message. 1663 * Registers are not touched to try to keep this relatively arch 1664 * agnostic. 1665 */ 1666 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1667 EXPECT_EQ(0, ret); 1668 } 1669 1670 FIXTURE(TRACE_poke) { 1671 struct sock_fprog prog; 1672 pid_t tracer; 1673 long poked; 1674 struct tracer_args_poke_t tracer_args; 1675 }; 1676 1677 FIXTURE_SETUP(TRACE_poke) 1678 { 1679 struct sock_filter filter[] = { 1680 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1681 offsetof(struct seccomp_data, nr)), 1682 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1683 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1684 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1685 }; 1686 1687 self->poked = 0; 1688 memset(&self->prog, 0, sizeof(self->prog)); 1689 self->prog.filter = malloc(sizeof(filter)); 1690 ASSERT_NE(NULL, self->prog.filter); 1691 memcpy(self->prog.filter, filter, sizeof(filter)); 1692 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1693 1694 /* Set up tracer args. */ 1695 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1696 1697 /* Launch tracer. */ 1698 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1699 &self->tracer_args, false); 1700 } 1701 1702 FIXTURE_TEARDOWN(TRACE_poke) 1703 { 1704 teardown_trace_fixture(_metadata, self->tracer); 1705 if (self->prog.filter) 1706 free(self->prog.filter); 1707 } 1708 1709 TEST_F(TRACE_poke, read_has_side_effects) 1710 { 1711 ssize_t ret; 1712 1713 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1714 ASSERT_EQ(0, ret); 1715 1716 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1717 ASSERT_EQ(0, ret); 1718 1719 EXPECT_EQ(0, self->poked); 1720 ret = read(-1, NULL, 0); 1721 EXPECT_EQ(-1, ret); 1722 EXPECT_EQ(0x1001, self->poked); 1723 } 1724 1725 TEST_F(TRACE_poke, getpid_runs_normally) 1726 { 1727 long ret; 1728 1729 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1730 ASSERT_EQ(0, ret); 1731 1732 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1733 ASSERT_EQ(0, ret); 1734 1735 EXPECT_EQ(0, self->poked); 1736 EXPECT_NE(0, syscall(__NR_getpid)); 1737 EXPECT_EQ(0, self->poked); 1738 } 1739 1740 #if defined(__x86_64__) 1741 # define ARCH_REGS struct user_regs_struct 1742 # define SYSCALL_NUM(_regs) (_regs).orig_rax 1743 # define SYSCALL_RET(_regs) (_regs).rax 1744 #elif defined(__i386__) 1745 # define ARCH_REGS struct user_regs_struct 1746 # define SYSCALL_NUM(_regs) (_regs).orig_eax 1747 # define SYSCALL_RET(_regs) (_regs).eax 1748 #elif defined(__arm__) 1749 # define ARCH_REGS struct pt_regs 1750 # define SYSCALL_NUM(_regs) (_regs).ARM_r7 1751 # ifndef PTRACE_SET_SYSCALL 1752 # define PTRACE_SET_SYSCALL 23 1753 # endif 1754 # define SYSCALL_NUM_SET(_regs, _nr) \ 1755 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) 1756 # define SYSCALL_RET(_regs) (_regs).ARM_r0 1757 #elif defined(__aarch64__) 1758 # define ARCH_REGS struct user_pt_regs 1759 # define SYSCALL_NUM(_regs) (_regs).regs[8] 1760 # ifndef NT_ARM_SYSTEM_CALL 1761 # define NT_ARM_SYSTEM_CALL 0x404 1762 # endif 1763 # define SYSCALL_NUM_SET(_regs, _nr) \ 1764 do { \ 1765 struct iovec __v; \ 1766 typeof(_nr) __nr = (_nr); \ 1767 __v.iov_base = &__nr; \ 1768 __v.iov_len = sizeof(__nr); \ 1769 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ 1770 NT_ARM_SYSTEM_CALL, &__v)); \ 1771 } while (0) 1772 # define SYSCALL_RET(_regs) (_regs).regs[0] 1773 #elif defined(__loongarch__) 1774 # define ARCH_REGS struct user_pt_regs 1775 # define SYSCALL_NUM(_regs) (_regs).regs[11] 1776 # define SYSCALL_RET(_regs) (_regs).regs[4] 1777 #elif defined(__riscv) && __riscv_xlen == 64 1778 # define ARCH_REGS struct user_regs_struct 1779 # define SYSCALL_NUM(_regs) (_regs).a7 1780 # define SYSCALL_RET(_regs) (_regs).a0 1781 #elif defined(__csky__) 1782 # define ARCH_REGS struct pt_regs 1783 # if defined(__CSKYABIV2__) 1784 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1785 # else 1786 # define SYSCALL_NUM(_regs) (_regs).regs[9] 1787 # endif 1788 # define SYSCALL_RET(_regs) (_regs).a0 1789 #elif defined(__hppa__) 1790 # define ARCH_REGS struct user_regs_struct 1791 # define SYSCALL_NUM(_regs) (_regs).gr[20] 1792 # define SYSCALL_RET(_regs) (_regs).gr[28] 1793 #elif defined(__powerpc__) 1794 # define ARCH_REGS struct pt_regs 1795 # define SYSCALL_NUM(_regs) (_regs).gpr[0] 1796 # define SYSCALL_RET(_regs) (_regs).gpr[3] 1797 # define SYSCALL_RET_SET(_regs, _val) \ 1798 do { \ 1799 typeof(_val) _result = (_val); \ 1800 if ((_regs.trap & 0xfff0) == 0x3000) { \ 1801 /* \ 1802 * scv 0 system call uses -ve result \ 1803 * for error, so no need to adjust. \ 1804 */ \ 1805 SYSCALL_RET(_regs) = _result; \ 1806 } else { \ 1807 /* \ 1808 * A syscall error is signaled by the \ 1809 * CR0 SO bit and the code is stored as \ 1810 * a positive value. \ 1811 */ \ 1812 if (_result < 0) { \ 1813 SYSCALL_RET(_regs) = -_result; \ 1814 (_regs).ccr |= 0x10000000; \ 1815 } else { \ 1816 SYSCALL_RET(_regs) = _result; \ 1817 (_regs).ccr &= ~0x10000000; \ 1818 } \ 1819 } \ 1820 } while (0) 1821 # define SYSCALL_RET_SET_ON_PTRACE_EXIT 1822 #elif defined(__s390__) 1823 # define ARCH_REGS s390_regs 1824 # define SYSCALL_NUM(_regs) (_regs).gprs[2] 1825 # define SYSCALL_RET_SET(_regs, _val) \ 1826 TH_LOG("Can't modify syscall return on this architecture") 1827 #elif defined(__mips__) 1828 # include <asm/unistd_nr_n32.h> 1829 # include <asm/unistd_nr_n64.h> 1830 # include <asm/unistd_nr_o32.h> 1831 # define ARCH_REGS struct pt_regs 1832 # define SYSCALL_NUM(_regs) \ 1833 ({ \ 1834 typeof((_regs).regs[2]) _nr; \ 1835 if ((_regs).regs[2] == __NR_O32_Linux) \ 1836 _nr = (_regs).regs[4]; \ 1837 else \ 1838 _nr = (_regs).regs[2]; \ 1839 _nr; \ 1840 }) 1841 # define SYSCALL_NUM_SET(_regs, _nr) \ 1842 do { \ 1843 if ((_regs).regs[2] == __NR_O32_Linux) \ 1844 (_regs).regs[4] = _nr; \ 1845 else \ 1846 (_regs).regs[2] = _nr; \ 1847 } while (0) 1848 # define SYSCALL_RET_SET(_regs, _val) \ 1849 TH_LOG("Can't modify syscall return on this architecture") 1850 #elif defined(__xtensa__) 1851 # define ARCH_REGS struct user_pt_regs 1852 # define SYSCALL_NUM(_regs) (_regs).syscall 1853 /* 1854 * On xtensa syscall return value is in the register 1855 * a2 of the current window which is not fixed. 1856 */ 1857 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] 1858 #elif defined(__sh__) 1859 # define ARCH_REGS struct pt_regs 1860 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1861 # define SYSCALL_RET(_regs) (_regs).regs[0] 1862 #elif defined(__mc68000__) 1863 # define ARCH_REGS struct user_regs_struct 1864 # define SYSCALL_NUM(_regs) (_regs).orig_d0 1865 # define SYSCALL_RET(_regs) (_regs).d0 1866 #else 1867 # error "Do not know how to find your architecture's registers and syscalls" 1868 #endif 1869 1870 /* 1871 * Most architectures can change the syscall by just updating the 1872 * associated register. This is the default if not defined above. 1873 */ 1874 #ifndef SYSCALL_NUM_SET 1875 # define SYSCALL_NUM_SET(_regs, _nr) \ 1876 do { \ 1877 SYSCALL_NUM(_regs) = (_nr); \ 1878 } while (0) 1879 #endif 1880 /* 1881 * Most architectures can change the syscall return value by just 1882 * writing to the SYSCALL_RET register. This is the default if not 1883 * defined above. If an architecture cannot set the return value 1884 * (for example when the syscall and return value register is 1885 * shared), report it with TH_LOG() in an arch-specific definition 1886 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. 1887 */ 1888 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) 1889 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" 1890 #endif 1891 #ifndef SYSCALL_RET_SET 1892 # define SYSCALL_RET_SET(_regs, _val) \ 1893 do { \ 1894 SYSCALL_RET(_regs) = (_val); \ 1895 } while (0) 1896 #endif 1897 1898 /* When the syscall return can't be changed, stub out the tests for it. */ 1899 #ifndef SYSCALL_RET 1900 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1901 #else 1902 # define EXPECT_SYSCALL_RETURN(val, action) \ 1903 do { \ 1904 errno = 0; \ 1905 if (val < 0) { \ 1906 EXPECT_EQ(-1, action); \ 1907 EXPECT_EQ(-(val), errno); \ 1908 } else { \ 1909 EXPECT_EQ(val, action); \ 1910 } \ 1911 } while (0) 1912 #endif 1913 1914 /* 1915 * Some architectures (e.g. powerpc) can only set syscall 1916 * return values on syscall exit during ptrace. 1917 */ 1918 const bool ptrace_entry_set_syscall_nr = true; 1919 const bool ptrace_entry_set_syscall_ret = 1920 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT 1921 true; 1922 #else 1923 false; 1924 #endif 1925 1926 /* 1927 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1928 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1929 */ 1930 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) 1931 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) 1932 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) 1933 #else 1934 # define ARCH_GETREGS(_regs) ({ \ 1935 struct iovec __v; \ 1936 __v.iov_base = &(_regs); \ 1937 __v.iov_len = sizeof(_regs); \ 1938 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ 1939 }) 1940 # define ARCH_SETREGS(_regs) ({ \ 1941 struct iovec __v; \ 1942 __v.iov_base = &(_regs); \ 1943 __v.iov_len = sizeof(_regs); \ 1944 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ 1945 }) 1946 #endif 1947 1948 /* Architecture-specific syscall fetching routine. */ 1949 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1950 { 1951 ARCH_REGS regs; 1952 1953 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1954 return -1; 1955 } 1956 1957 return SYSCALL_NUM(regs); 1958 } 1959 1960 /* Architecture-specific syscall changing routine. */ 1961 void __change_syscall(struct __test_metadata *_metadata, 1962 pid_t tracee, long *syscall, long *ret) 1963 { 1964 ARCH_REGS orig, regs; 1965 1966 /* Do not get/set registers if we have nothing to do. */ 1967 if (!syscall && !ret) 1968 return; 1969 1970 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1971 return; 1972 } 1973 orig = regs; 1974 1975 if (syscall) 1976 SYSCALL_NUM_SET(regs, *syscall); 1977 1978 if (ret) 1979 SYSCALL_RET_SET(regs, *ret); 1980 1981 /* Flush any register changes made. */ 1982 if (memcmp(&orig, ®s, sizeof(orig)) != 0) 1983 EXPECT_EQ(0, ARCH_SETREGS(regs)); 1984 } 1985 1986 /* Change only syscall number. */ 1987 void change_syscall_nr(struct __test_metadata *_metadata, 1988 pid_t tracee, long syscall) 1989 { 1990 __change_syscall(_metadata, tracee, &syscall, NULL); 1991 } 1992 1993 /* Change syscall return value (and set syscall number to -1). */ 1994 void change_syscall_ret(struct __test_metadata *_metadata, 1995 pid_t tracee, long ret) 1996 { 1997 long syscall = -1; 1998 1999 __change_syscall(_metadata, tracee, &syscall, &ret); 2000 } 2001 2002 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 2003 int status, void *args) 2004 { 2005 int ret; 2006 unsigned long msg; 2007 2008 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { 2009 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); 2010 return; 2011 } 2012 2013 /* Make sure we got the right message. */ 2014 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2015 EXPECT_EQ(0, ret); 2016 2017 /* Validate and take action on expected syscalls. */ 2018 switch (msg) { 2019 case 0x1002: 2020 /* change getpid to getppid. */ 2021 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 2022 change_syscall_nr(_metadata, tracee, __NR_getppid); 2023 break; 2024 case 0x1003: 2025 /* skip gettid with valid return code. */ 2026 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 2027 change_syscall_ret(_metadata, tracee, 45000); 2028 break; 2029 case 0x1004: 2030 /* skip openat with error. */ 2031 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 2032 change_syscall_ret(_metadata, tracee, -ESRCH); 2033 break; 2034 case 0x1005: 2035 /* do nothing (allow getppid) */ 2036 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 2037 break; 2038 default: 2039 EXPECT_EQ(0, msg) { 2040 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 2041 kill(tracee, SIGKILL); 2042 } 2043 } 2044 2045 } 2046 2047 FIXTURE(TRACE_syscall) { 2048 struct sock_fprog prog; 2049 pid_t tracer, mytid, mypid, parent; 2050 long syscall_nr; 2051 }; 2052 2053 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 2054 int status, void *args) 2055 { 2056 int ret; 2057 unsigned long msg; 2058 static bool entry; 2059 long syscall_nr_val, syscall_ret_val; 2060 long *syscall_nr = NULL, *syscall_ret = NULL; 2061 FIXTURE_DATA(TRACE_syscall) *self = args; 2062 2063 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { 2064 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 2065 return; 2066 } 2067 2068 /* 2069 * The traditional way to tell PTRACE_SYSCALL entry/exit 2070 * is by counting. 2071 */ 2072 entry = !entry; 2073 2074 /* Make sure we got an appropriate message. */ 2075 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2076 EXPECT_EQ(0, ret); 2077 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 2078 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 2079 2080 /* 2081 * Some architectures only support setting return values during 2082 * syscall exit under ptrace, and on exit the syscall number may 2083 * no longer be available. Therefore, save the initial sycall 2084 * number here, so it can be examined during both entry and exit 2085 * phases. 2086 */ 2087 if (entry) 2088 self->syscall_nr = get_syscall(_metadata, tracee); 2089 2090 /* 2091 * Depending on the architecture's syscall setting abilities, we 2092 * pick which things to set during this phase (entry or exit). 2093 */ 2094 if (entry == ptrace_entry_set_syscall_nr) 2095 syscall_nr = &syscall_nr_val; 2096 if (entry == ptrace_entry_set_syscall_ret) 2097 syscall_ret = &syscall_ret_val; 2098 2099 /* Now handle the actual rewriting cases. */ 2100 switch (self->syscall_nr) { 2101 case __NR_getpid: 2102 syscall_nr_val = __NR_getppid; 2103 /* Never change syscall return for this case. */ 2104 syscall_ret = NULL; 2105 break; 2106 case __NR_gettid: 2107 syscall_nr_val = -1; 2108 syscall_ret_val = 45000; 2109 break; 2110 case __NR_openat: 2111 syscall_nr_val = -1; 2112 syscall_ret_val = -ESRCH; 2113 break; 2114 default: 2115 /* Unhandled, do nothing. */ 2116 return; 2117 } 2118 2119 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); 2120 } 2121 2122 FIXTURE_VARIANT(TRACE_syscall) { 2123 /* 2124 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 2125 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 2126 * This indicates if we should use SECCOMP_RET_TRACE (false), or 2127 * ptrace (true). 2128 */ 2129 bool use_ptrace; 2130 }; 2131 2132 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 2133 .use_ptrace = true, 2134 }; 2135 2136 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 2137 .use_ptrace = false, 2138 }; 2139 2140 FIXTURE_SETUP(TRACE_syscall) 2141 { 2142 struct sock_filter filter[] = { 2143 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2144 offsetof(struct seccomp_data, nr)), 2145 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2146 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 2147 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 2148 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 2149 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 2150 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 2151 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2152 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 2153 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2154 }; 2155 struct sock_fprog prog = { 2156 .len = (unsigned short)ARRAY_SIZE(filter), 2157 .filter = filter, 2158 }; 2159 long ret; 2160 2161 /* Prepare some testable syscall results. */ 2162 self->mytid = syscall(__NR_gettid); 2163 ASSERT_GT(self->mytid, 0); 2164 ASSERT_NE(self->mytid, 1) { 2165 TH_LOG("Running this test as init is not supported. :)"); 2166 } 2167 2168 self->mypid = getpid(); 2169 ASSERT_GT(self->mypid, 0); 2170 ASSERT_EQ(self->mytid, self->mypid); 2171 2172 self->parent = getppid(); 2173 ASSERT_GT(self->parent, 0); 2174 ASSERT_NE(self->parent, self->mypid); 2175 2176 /* Launch tracer. */ 2177 self->tracer = setup_trace_fixture(_metadata, 2178 variant->use_ptrace ? tracer_ptrace 2179 : tracer_seccomp, 2180 self, variant->use_ptrace); 2181 2182 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2183 ASSERT_EQ(0, ret); 2184 2185 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ 2186 if (variant->use_ptrace) 2187 return; 2188 2189 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2190 ASSERT_EQ(0, ret); 2191 } 2192 2193 FIXTURE_TEARDOWN(TRACE_syscall) 2194 { 2195 teardown_trace_fixture(_metadata, self->tracer); 2196 } 2197 2198 TEST(negative_ENOSYS) 2199 { 2200 #if defined(__arm__) 2201 SKIP(return, "arm32 does not support calling syscall -1"); 2202 #endif 2203 /* 2204 * There should be no difference between an "internal" skip 2205 * and userspace asking for syscall "-1". 2206 */ 2207 errno = 0; 2208 EXPECT_EQ(-1, syscall(-1)); 2209 EXPECT_EQ(errno, ENOSYS); 2210 /* And no difference for "still not valid but not -1". */ 2211 errno = 0; 2212 EXPECT_EQ(-1, syscall(-101)); 2213 EXPECT_EQ(errno, ENOSYS); 2214 } 2215 2216 TEST_F(TRACE_syscall, negative_ENOSYS) 2217 { 2218 negative_ENOSYS(_metadata); 2219 } 2220 2221 TEST_F(TRACE_syscall, syscall_allowed) 2222 { 2223 /* getppid works as expected (no changes). */ 2224 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2225 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2226 } 2227 2228 TEST_F(TRACE_syscall, syscall_redirected) 2229 { 2230 /* getpid has been redirected to getppid as expected. */ 2231 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2232 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2233 } 2234 2235 TEST_F(TRACE_syscall, syscall_errno) 2236 { 2237 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2238 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2239 } 2240 2241 TEST_F(TRACE_syscall, syscall_faked) 2242 { 2243 /* Tracer skips the gettid syscall and store altered return value. */ 2244 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2245 } 2246 2247 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) 2248 { 2249 struct sock_filter filter[] = { 2250 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2251 offsetof(struct seccomp_data, nr)), 2252 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), 2253 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 2254 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2255 }; 2256 struct sock_fprog prog = { 2257 .len = (unsigned short)ARRAY_SIZE(filter), 2258 .filter = filter, 2259 }; 2260 long ret; 2261 2262 /* Install "kill on mknodat" filter. */ 2263 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2264 ASSERT_EQ(0, ret); 2265 2266 /* This should immediately die with SIGSYS, regardless of tracer. */ 2267 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); 2268 } 2269 2270 TEST_F(TRACE_syscall, skip_after) 2271 { 2272 struct sock_filter filter[] = { 2273 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2274 offsetof(struct seccomp_data, nr)), 2275 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2276 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2277 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2278 }; 2279 struct sock_fprog prog = { 2280 .len = (unsigned short)ARRAY_SIZE(filter), 2281 .filter = filter, 2282 }; 2283 long ret; 2284 2285 /* Install additional "errno on getppid" filter. */ 2286 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2287 ASSERT_EQ(0, ret); 2288 2289 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2290 errno = 0; 2291 EXPECT_EQ(-1, syscall(__NR_getpid)); 2292 EXPECT_EQ(EPERM, errno); 2293 } 2294 2295 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2296 { 2297 struct sock_filter filter[] = { 2298 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2299 offsetof(struct seccomp_data, nr)), 2300 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2301 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2302 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2303 }; 2304 struct sock_fprog prog = { 2305 .len = (unsigned short)ARRAY_SIZE(filter), 2306 .filter = filter, 2307 }; 2308 long ret; 2309 2310 /* Install additional "death on getppid" filter. */ 2311 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2312 ASSERT_EQ(0, ret); 2313 2314 /* Tracer will redirect getpid to getppid, and we should die. */ 2315 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2316 } 2317 2318 TEST(seccomp_syscall) 2319 { 2320 struct sock_filter filter[] = { 2321 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2322 }; 2323 struct sock_fprog prog = { 2324 .len = (unsigned short)ARRAY_SIZE(filter), 2325 .filter = filter, 2326 }; 2327 long ret; 2328 2329 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2330 ASSERT_EQ(0, ret) { 2331 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2332 } 2333 2334 /* Reject insane operation. */ 2335 ret = seccomp(-1, 0, &prog); 2336 ASSERT_NE(ENOSYS, errno) { 2337 TH_LOG("Kernel does not support seccomp syscall!"); 2338 } 2339 EXPECT_EQ(EINVAL, errno) { 2340 TH_LOG("Did not reject crazy op value!"); 2341 } 2342 2343 /* Reject strict with flags or pointer. */ 2344 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2345 EXPECT_EQ(EINVAL, errno) { 2346 TH_LOG("Did not reject mode strict with flags!"); 2347 } 2348 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2349 EXPECT_EQ(EINVAL, errno) { 2350 TH_LOG("Did not reject mode strict with uargs!"); 2351 } 2352 2353 /* Reject insane args for filter. */ 2354 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2355 EXPECT_EQ(EINVAL, errno) { 2356 TH_LOG("Did not reject crazy filter flags!"); 2357 } 2358 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2359 EXPECT_EQ(EFAULT, errno) { 2360 TH_LOG("Did not reject NULL filter!"); 2361 } 2362 2363 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2364 EXPECT_EQ(0, errno) { 2365 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2366 strerror(errno)); 2367 } 2368 } 2369 2370 TEST(seccomp_syscall_mode_lock) 2371 { 2372 struct sock_filter filter[] = { 2373 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2374 }; 2375 struct sock_fprog prog = { 2376 .len = (unsigned short)ARRAY_SIZE(filter), 2377 .filter = filter, 2378 }; 2379 long ret; 2380 2381 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2382 ASSERT_EQ(0, ret) { 2383 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2384 } 2385 2386 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2387 ASSERT_NE(ENOSYS, errno) { 2388 TH_LOG("Kernel does not support seccomp syscall!"); 2389 } 2390 EXPECT_EQ(0, ret) { 2391 TH_LOG("Could not install filter!"); 2392 } 2393 2394 /* Make sure neither entry point will switch to strict. */ 2395 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2396 EXPECT_EQ(EINVAL, errno) { 2397 TH_LOG("Switched to mode strict!"); 2398 } 2399 2400 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2401 EXPECT_EQ(EINVAL, errno) { 2402 TH_LOG("Switched to mode strict!"); 2403 } 2404 } 2405 2406 /* 2407 * Test detection of known and unknown filter flags. Userspace needs to be able 2408 * to check if a filter flag is supported by the current kernel and a good way 2409 * of doing that is by attempting to enter filter mode, with the flag bit in 2410 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2411 * that the flag is valid and EINVAL indicates that the flag is invalid. 2412 */ 2413 TEST(detect_seccomp_filter_flags) 2414 { 2415 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2416 SECCOMP_FILTER_FLAG_LOG, 2417 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2418 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2419 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2420 unsigned int exclusive[] = { 2421 SECCOMP_FILTER_FLAG_TSYNC, 2422 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2423 unsigned int flag, all_flags, exclusive_mask; 2424 int i; 2425 long ret; 2426 2427 /* Test detection of individual known-good filter flags */ 2428 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2429 int bits = 0; 2430 2431 flag = flags[i]; 2432 /* Make sure the flag is a single bit! */ 2433 while (flag) { 2434 if (flag & 0x1) 2435 bits ++; 2436 flag >>= 1; 2437 } 2438 ASSERT_EQ(1, bits); 2439 flag = flags[i]; 2440 2441 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2442 ASSERT_NE(ENOSYS, errno) { 2443 TH_LOG("Kernel does not support seccomp syscall!"); 2444 } 2445 EXPECT_EQ(-1, ret); 2446 EXPECT_EQ(EFAULT, errno) { 2447 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2448 flag); 2449 } 2450 2451 all_flags |= flag; 2452 } 2453 2454 /* 2455 * Test detection of all known-good filter flags combined. But 2456 * for the exclusive flags we need to mask them out and try them 2457 * individually for the "all flags" testing. 2458 */ 2459 exclusive_mask = 0; 2460 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2461 exclusive_mask |= exclusive[i]; 2462 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2463 flag = all_flags & ~exclusive_mask; 2464 flag |= exclusive[i]; 2465 2466 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2467 EXPECT_EQ(-1, ret); 2468 EXPECT_EQ(EFAULT, errno) { 2469 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2470 flag); 2471 } 2472 } 2473 2474 /* Test detection of an unknown filter flags, without exclusives. */ 2475 flag = -1; 2476 flag &= ~exclusive_mask; 2477 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2478 EXPECT_EQ(-1, ret); 2479 EXPECT_EQ(EINVAL, errno) { 2480 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2481 flag); 2482 } 2483 2484 /* 2485 * Test detection of an unknown filter flag that may simply need to be 2486 * added to this test 2487 */ 2488 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2489 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2490 EXPECT_EQ(-1, ret); 2491 EXPECT_EQ(EINVAL, errno) { 2492 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2493 flag); 2494 } 2495 } 2496 2497 TEST(TSYNC_first) 2498 { 2499 struct sock_filter filter[] = { 2500 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2501 }; 2502 struct sock_fprog prog = { 2503 .len = (unsigned short)ARRAY_SIZE(filter), 2504 .filter = filter, 2505 }; 2506 long ret; 2507 2508 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2509 ASSERT_EQ(0, ret) { 2510 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2511 } 2512 2513 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2514 &prog); 2515 ASSERT_NE(ENOSYS, errno) { 2516 TH_LOG("Kernel does not support seccomp syscall!"); 2517 } 2518 EXPECT_EQ(0, ret) { 2519 TH_LOG("Could not install initial filter with TSYNC!"); 2520 } 2521 } 2522 2523 #define TSYNC_SIBLINGS 2 2524 struct tsync_sibling { 2525 pthread_t tid; 2526 pid_t system_tid; 2527 sem_t *started; 2528 pthread_cond_t *cond; 2529 pthread_mutex_t *mutex; 2530 int diverge; 2531 int num_waits; 2532 struct sock_fprog *prog; 2533 struct __test_metadata *metadata; 2534 }; 2535 2536 /* 2537 * To avoid joining joined threads (which is not allowed by Bionic), 2538 * make sure we both successfully join and clear the tid to skip a 2539 * later join attempt during fixture teardown. Any remaining threads 2540 * will be directly killed during teardown. 2541 */ 2542 #define PTHREAD_JOIN(tid, status) \ 2543 do { \ 2544 int _rc = pthread_join(tid, status); \ 2545 if (_rc) { \ 2546 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2547 (unsigned int)tid, _rc); \ 2548 } else { \ 2549 tid = 0; \ 2550 } \ 2551 } while (0) 2552 2553 FIXTURE(TSYNC) { 2554 struct sock_fprog root_prog, apply_prog; 2555 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2556 sem_t started; 2557 pthread_cond_t cond; 2558 pthread_mutex_t mutex; 2559 int sibling_count; 2560 }; 2561 2562 FIXTURE_SETUP(TSYNC) 2563 { 2564 struct sock_filter root_filter[] = { 2565 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2566 }; 2567 struct sock_filter apply_filter[] = { 2568 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2569 offsetof(struct seccomp_data, nr)), 2570 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2571 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2572 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2573 }; 2574 2575 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2576 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2577 memset(&self->sibling, 0, sizeof(self->sibling)); 2578 self->root_prog.filter = malloc(sizeof(root_filter)); 2579 ASSERT_NE(NULL, self->root_prog.filter); 2580 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2581 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2582 2583 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2584 ASSERT_NE(NULL, self->apply_prog.filter); 2585 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2586 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2587 2588 self->sibling_count = 0; 2589 pthread_mutex_init(&self->mutex, NULL); 2590 pthread_cond_init(&self->cond, NULL); 2591 sem_init(&self->started, 0, 0); 2592 self->sibling[0].tid = 0; 2593 self->sibling[0].cond = &self->cond; 2594 self->sibling[0].started = &self->started; 2595 self->sibling[0].mutex = &self->mutex; 2596 self->sibling[0].diverge = 0; 2597 self->sibling[0].num_waits = 1; 2598 self->sibling[0].prog = &self->root_prog; 2599 self->sibling[0].metadata = _metadata; 2600 self->sibling[1].tid = 0; 2601 self->sibling[1].cond = &self->cond; 2602 self->sibling[1].started = &self->started; 2603 self->sibling[1].mutex = &self->mutex; 2604 self->sibling[1].diverge = 0; 2605 self->sibling[1].prog = &self->root_prog; 2606 self->sibling[1].num_waits = 1; 2607 self->sibling[1].metadata = _metadata; 2608 } 2609 2610 FIXTURE_TEARDOWN(TSYNC) 2611 { 2612 int sib = 0; 2613 2614 if (self->root_prog.filter) 2615 free(self->root_prog.filter); 2616 if (self->apply_prog.filter) 2617 free(self->apply_prog.filter); 2618 2619 for ( ; sib < self->sibling_count; ++sib) { 2620 struct tsync_sibling *s = &self->sibling[sib]; 2621 2622 if (!s->tid) 2623 continue; 2624 /* 2625 * If a thread is still running, it may be stuck, so hit 2626 * it over the head really hard. 2627 */ 2628 pthread_kill(s->tid, 9); 2629 } 2630 pthread_mutex_destroy(&self->mutex); 2631 pthread_cond_destroy(&self->cond); 2632 sem_destroy(&self->started); 2633 } 2634 2635 void *tsync_sibling(void *data) 2636 { 2637 long ret = 0; 2638 struct tsync_sibling *me = data; 2639 2640 me->system_tid = syscall(__NR_gettid); 2641 2642 pthread_mutex_lock(me->mutex); 2643 if (me->diverge) { 2644 /* Just re-apply the root prog to fork the tree */ 2645 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2646 me->prog, 0, 0); 2647 } 2648 sem_post(me->started); 2649 /* Return outside of started so parent notices failures. */ 2650 if (ret) { 2651 pthread_mutex_unlock(me->mutex); 2652 return (void *)SIBLING_EXIT_FAILURE; 2653 } 2654 do { 2655 pthread_cond_wait(me->cond, me->mutex); 2656 me->num_waits = me->num_waits - 1; 2657 } while (me->num_waits); 2658 pthread_mutex_unlock(me->mutex); 2659 2660 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2661 if (!ret) 2662 return (void *)SIBLING_EXIT_NEWPRIVS; 2663 read(-1, NULL, 0); 2664 return (void *)SIBLING_EXIT_UNKILLED; 2665 } 2666 2667 void tsync_start_sibling(struct tsync_sibling *sibling) 2668 { 2669 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2670 } 2671 2672 TEST_F(TSYNC, siblings_fail_prctl) 2673 { 2674 long ret; 2675 void *status; 2676 struct sock_filter filter[] = { 2677 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2678 offsetof(struct seccomp_data, nr)), 2679 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2680 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2681 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2682 }; 2683 struct sock_fprog prog = { 2684 .len = (unsigned short)ARRAY_SIZE(filter), 2685 .filter = filter, 2686 }; 2687 2688 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2689 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2690 } 2691 2692 /* Check prctl failure detection by requesting sib 0 diverge. */ 2693 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2694 ASSERT_NE(ENOSYS, errno) { 2695 TH_LOG("Kernel does not support seccomp syscall!"); 2696 } 2697 ASSERT_EQ(0, ret) { 2698 TH_LOG("setting filter failed"); 2699 } 2700 2701 self->sibling[0].diverge = 1; 2702 tsync_start_sibling(&self->sibling[0]); 2703 tsync_start_sibling(&self->sibling[1]); 2704 2705 while (self->sibling_count < TSYNC_SIBLINGS) { 2706 sem_wait(&self->started); 2707 self->sibling_count++; 2708 } 2709 2710 /* Signal the threads to clean up*/ 2711 pthread_mutex_lock(&self->mutex); 2712 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2713 TH_LOG("cond broadcast non-zero"); 2714 } 2715 pthread_mutex_unlock(&self->mutex); 2716 2717 /* Ensure diverging sibling failed to call prctl. */ 2718 PTHREAD_JOIN(self->sibling[0].tid, &status); 2719 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2720 PTHREAD_JOIN(self->sibling[1].tid, &status); 2721 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2722 } 2723 2724 TEST_F(TSYNC, two_siblings_with_ancestor) 2725 { 2726 long ret; 2727 void *status; 2728 2729 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2730 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2731 } 2732 2733 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2734 ASSERT_NE(ENOSYS, errno) { 2735 TH_LOG("Kernel does not support seccomp syscall!"); 2736 } 2737 ASSERT_EQ(0, ret) { 2738 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2739 } 2740 tsync_start_sibling(&self->sibling[0]); 2741 tsync_start_sibling(&self->sibling[1]); 2742 2743 while (self->sibling_count < TSYNC_SIBLINGS) { 2744 sem_wait(&self->started); 2745 self->sibling_count++; 2746 } 2747 2748 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2749 &self->apply_prog); 2750 ASSERT_EQ(0, ret) { 2751 TH_LOG("Could install filter on all threads!"); 2752 } 2753 /* Tell the siblings to test the policy */ 2754 pthread_mutex_lock(&self->mutex); 2755 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2756 TH_LOG("cond broadcast non-zero"); 2757 } 2758 pthread_mutex_unlock(&self->mutex); 2759 /* Ensure they are both killed and don't exit cleanly. */ 2760 PTHREAD_JOIN(self->sibling[0].tid, &status); 2761 EXPECT_EQ(0x0, (long)status); 2762 PTHREAD_JOIN(self->sibling[1].tid, &status); 2763 EXPECT_EQ(0x0, (long)status); 2764 } 2765 2766 TEST_F(TSYNC, two_sibling_want_nnp) 2767 { 2768 void *status; 2769 2770 /* start siblings before any prctl() operations */ 2771 tsync_start_sibling(&self->sibling[0]); 2772 tsync_start_sibling(&self->sibling[1]); 2773 while (self->sibling_count < TSYNC_SIBLINGS) { 2774 sem_wait(&self->started); 2775 self->sibling_count++; 2776 } 2777 2778 /* Tell the siblings to test no policy */ 2779 pthread_mutex_lock(&self->mutex); 2780 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2781 TH_LOG("cond broadcast non-zero"); 2782 } 2783 pthread_mutex_unlock(&self->mutex); 2784 2785 /* Ensure they are both upset about lacking nnp. */ 2786 PTHREAD_JOIN(self->sibling[0].tid, &status); 2787 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2788 PTHREAD_JOIN(self->sibling[1].tid, &status); 2789 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2790 } 2791 2792 TEST_F(TSYNC, two_siblings_with_no_filter) 2793 { 2794 long ret; 2795 void *status; 2796 2797 /* start siblings before any prctl() operations */ 2798 tsync_start_sibling(&self->sibling[0]); 2799 tsync_start_sibling(&self->sibling[1]); 2800 while (self->sibling_count < TSYNC_SIBLINGS) { 2801 sem_wait(&self->started); 2802 self->sibling_count++; 2803 } 2804 2805 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2806 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2807 } 2808 2809 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2810 &self->apply_prog); 2811 ASSERT_NE(ENOSYS, errno) { 2812 TH_LOG("Kernel does not support seccomp syscall!"); 2813 } 2814 ASSERT_EQ(0, ret) { 2815 TH_LOG("Could install filter on all threads!"); 2816 } 2817 2818 /* Tell the siblings to test the policy */ 2819 pthread_mutex_lock(&self->mutex); 2820 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2821 TH_LOG("cond broadcast non-zero"); 2822 } 2823 pthread_mutex_unlock(&self->mutex); 2824 2825 /* Ensure they are both killed and don't exit cleanly. */ 2826 PTHREAD_JOIN(self->sibling[0].tid, &status); 2827 EXPECT_EQ(0x0, (long)status); 2828 PTHREAD_JOIN(self->sibling[1].tid, &status); 2829 EXPECT_EQ(0x0, (long)status); 2830 } 2831 2832 TEST_F(TSYNC, two_siblings_with_one_divergence) 2833 { 2834 long ret; 2835 void *status; 2836 2837 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2838 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2839 } 2840 2841 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2842 ASSERT_NE(ENOSYS, errno) { 2843 TH_LOG("Kernel does not support seccomp syscall!"); 2844 } 2845 ASSERT_EQ(0, ret) { 2846 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2847 } 2848 self->sibling[0].diverge = 1; 2849 tsync_start_sibling(&self->sibling[0]); 2850 tsync_start_sibling(&self->sibling[1]); 2851 2852 while (self->sibling_count < TSYNC_SIBLINGS) { 2853 sem_wait(&self->started); 2854 self->sibling_count++; 2855 } 2856 2857 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2858 &self->apply_prog); 2859 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2860 TH_LOG("Did not fail on diverged sibling."); 2861 } 2862 2863 /* Wake the threads */ 2864 pthread_mutex_lock(&self->mutex); 2865 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2866 TH_LOG("cond broadcast non-zero"); 2867 } 2868 pthread_mutex_unlock(&self->mutex); 2869 2870 /* Ensure they are both unkilled. */ 2871 PTHREAD_JOIN(self->sibling[0].tid, &status); 2872 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2873 PTHREAD_JOIN(self->sibling[1].tid, &status); 2874 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2875 } 2876 2877 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2878 { 2879 long ret, flags; 2880 void *status; 2881 2882 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2883 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2884 } 2885 2886 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2887 ASSERT_NE(ENOSYS, errno) { 2888 TH_LOG("Kernel does not support seccomp syscall!"); 2889 } 2890 ASSERT_EQ(0, ret) { 2891 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2892 } 2893 self->sibling[0].diverge = 1; 2894 tsync_start_sibling(&self->sibling[0]); 2895 tsync_start_sibling(&self->sibling[1]); 2896 2897 while (self->sibling_count < TSYNC_SIBLINGS) { 2898 sem_wait(&self->started); 2899 self->sibling_count++; 2900 } 2901 2902 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2903 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2904 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2905 ASSERT_EQ(ESRCH, errno) { 2906 TH_LOG("Did not return ESRCH for diverged sibling."); 2907 } 2908 ASSERT_EQ(-1, ret) { 2909 TH_LOG("Did not fail on diverged sibling."); 2910 } 2911 2912 /* Wake the threads */ 2913 pthread_mutex_lock(&self->mutex); 2914 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2915 TH_LOG("cond broadcast non-zero"); 2916 } 2917 pthread_mutex_unlock(&self->mutex); 2918 2919 /* Ensure they are both unkilled. */ 2920 PTHREAD_JOIN(self->sibling[0].tid, &status); 2921 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2922 PTHREAD_JOIN(self->sibling[1].tid, &status); 2923 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2924 } 2925 2926 TEST_F(TSYNC, two_siblings_not_under_filter) 2927 { 2928 long ret, sib; 2929 void *status; 2930 struct timespec delay = { .tv_nsec = 100000000 }; 2931 2932 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2933 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2934 } 2935 2936 /* 2937 * Sibling 0 will have its own seccomp policy 2938 * and Sibling 1 will not be under seccomp at 2939 * all. Sibling 1 will enter seccomp and 0 2940 * will cause failure. 2941 */ 2942 self->sibling[0].diverge = 1; 2943 tsync_start_sibling(&self->sibling[0]); 2944 tsync_start_sibling(&self->sibling[1]); 2945 2946 while (self->sibling_count < TSYNC_SIBLINGS) { 2947 sem_wait(&self->started); 2948 self->sibling_count++; 2949 } 2950 2951 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2952 ASSERT_NE(ENOSYS, errno) { 2953 TH_LOG("Kernel does not support seccomp syscall!"); 2954 } 2955 ASSERT_EQ(0, ret) { 2956 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2957 } 2958 2959 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2960 &self->apply_prog); 2961 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2962 TH_LOG("Did not fail on diverged sibling."); 2963 } 2964 sib = 1; 2965 if (ret == self->sibling[0].system_tid) 2966 sib = 0; 2967 2968 pthread_mutex_lock(&self->mutex); 2969 2970 /* Increment the other siblings num_waits so we can clean up 2971 * the one we just saw. 2972 */ 2973 self->sibling[!sib].num_waits += 1; 2974 2975 /* Signal the thread to clean up*/ 2976 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2977 TH_LOG("cond broadcast non-zero"); 2978 } 2979 pthread_mutex_unlock(&self->mutex); 2980 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2981 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2982 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2983 while (!kill(self->sibling[sib].system_tid, 0)) 2984 nanosleep(&delay, NULL); 2985 /* Switch to the remaining sibling */ 2986 sib = !sib; 2987 2988 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2989 &self->apply_prog); 2990 ASSERT_EQ(0, ret) { 2991 TH_LOG("Expected the remaining sibling to sync"); 2992 }; 2993 2994 pthread_mutex_lock(&self->mutex); 2995 2996 /* If remaining sibling didn't have a chance to wake up during 2997 * the first broadcast, manually reduce the num_waits now. 2998 */ 2999 if (self->sibling[sib].num_waits > 1) 3000 self->sibling[sib].num_waits = 1; 3001 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 3002 TH_LOG("cond broadcast non-zero"); 3003 } 3004 pthread_mutex_unlock(&self->mutex); 3005 PTHREAD_JOIN(self->sibling[sib].tid, &status); 3006 EXPECT_EQ(0, (long)status); 3007 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 3008 while (!kill(self->sibling[sib].system_tid, 0)) 3009 nanosleep(&delay, NULL); 3010 3011 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 3012 &self->apply_prog); 3013 ASSERT_EQ(0, ret); /* just us chickens */ 3014 } 3015 3016 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 3017 TEST(syscall_restart) 3018 { 3019 long ret; 3020 unsigned long msg; 3021 pid_t child_pid; 3022 int pipefd[2]; 3023 int status; 3024 siginfo_t info = { }; 3025 struct sock_filter filter[] = { 3026 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3027 offsetof(struct seccomp_data, nr)), 3028 3029 #ifdef __NR_sigreturn 3030 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 3031 #endif 3032 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 3033 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 3034 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 3035 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 3036 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 3037 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 3038 3039 /* Allow __NR_write for easy logging. */ 3040 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 3041 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3042 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3043 /* The nanosleep jump target. */ 3044 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 3045 /* The restart_syscall jump target. */ 3046 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 3047 }; 3048 struct sock_fprog prog = { 3049 .len = (unsigned short)ARRAY_SIZE(filter), 3050 .filter = filter, 3051 }; 3052 #if defined(__arm__) 3053 struct utsname utsbuf; 3054 #endif 3055 3056 ASSERT_EQ(0, pipe(pipefd)); 3057 3058 child_pid = fork(); 3059 ASSERT_LE(0, child_pid); 3060 if (child_pid == 0) { 3061 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 3062 char buf = ' '; 3063 struct timespec timeout = { }; 3064 3065 /* Attach parent as tracer and stop. */ 3066 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 3067 EXPECT_EQ(0, raise(SIGSTOP)); 3068 3069 EXPECT_EQ(0, close(pipefd[1])); 3070 3071 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 3072 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3073 } 3074 3075 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 3076 EXPECT_EQ(0, ret) { 3077 TH_LOG("Failed to install filter!"); 3078 } 3079 3080 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3081 TH_LOG("Failed to read() sync from parent"); 3082 } 3083 EXPECT_EQ('.', buf) { 3084 TH_LOG("Failed to get sync data from read()"); 3085 } 3086 3087 /* Start nanosleep to be interrupted. */ 3088 timeout.tv_sec = 1; 3089 errno = 0; 3090 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 3091 TH_LOG("Call to nanosleep() failed (errno %d: %s)", 3092 errno, strerror(errno)); 3093 } 3094 3095 /* Read final sync from parent. */ 3096 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3097 TH_LOG("Failed final read() from parent"); 3098 } 3099 EXPECT_EQ('!', buf) { 3100 TH_LOG("Failed to get final data from read()"); 3101 } 3102 3103 /* Directly report the status of our test harness results. */ 3104 syscall(__NR_exit, _metadata->exit_code); 3105 } 3106 EXPECT_EQ(0, close(pipefd[0])); 3107 3108 /* Attach to child, setup options, and release. */ 3109 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3110 ASSERT_EQ(true, WIFSTOPPED(status)); 3111 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 3112 PTRACE_O_TRACESECCOMP)); 3113 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3114 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 3115 3116 /* Wait for nanosleep() to start. */ 3117 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3118 ASSERT_EQ(true, WIFSTOPPED(status)); 3119 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3120 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3121 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3122 ASSERT_EQ(0x100, msg); 3123 ret = get_syscall(_metadata, child_pid); 3124 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 3125 3126 /* Might as well check siginfo for sanity while we're here. */ 3127 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3128 ASSERT_EQ(SIGTRAP, info.si_signo); 3129 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 3130 EXPECT_EQ(0, info.si_errno); 3131 EXPECT_EQ(getuid(), info.si_uid); 3132 /* Verify signal delivery came from child (seccomp-triggered). */ 3133 EXPECT_EQ(child_pid, info.si_pid); 3134 3135 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 3136 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 3137 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3138 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3139 ASSERT_EQ(true, WIFSTOPPED(status)); 3140 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 3141 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3142 /* 3143 * There is no siginfo on SIGSTOP any more, so we can't verify 3144 * signal delivery came from parent now (getpid() == info.si_pid). 3145 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 3146 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 3147 */ 3148 EXPECT_EQ(SIGSTOP, info.si_signo); 3149 3150 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 3151 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 3152 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3153 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3154 ASSERT_EQ(true, WIFSTOPPED(status)); 3155 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 3156 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3157 3158 /* Wait for restart_syscall() to start. */ 3159 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3160 ASSERT_EQ(true, WIFSTOPPED(status)); 3161 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3162 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3163 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3164 3165 ASSERT_EQ(0x200, msg); 3166 ret = get_syscall(_metadata, child_pid); 3167 #if defined(__arm__) 3168 /* 3169 * FIXME: 3170 * - native ARM registers do NOT expose true syscall. 3171 * - compat ARM registers on ARM64 DO expose true syscall. 3172 */ 3173 ASSERT_EQ(0, uname(&utsbuf)); 3174 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 3175 EXPECT_EQ(__NR_nanosleep, ret); 3176 } else 3177 #endif 3178 { 3179 EXPECT_EQ(__NR_restart_syscall, ret); 3180 } 3181 3182 /* Write again to end test. */ 3183 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3184 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 3185 EXPECT_EQ(0, close(pipefd[1])); 3186 3187 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3188 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 3189 _metadata->exit_code = KSFT_FAIL; 3190 } 3191 3192 TEST_SIGNAL(filter_flag_log, SIGSYS) 3193 { 3194 struct sock_filter allow_filter[] = { 3195 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3196 }; 3197 struct sock_filter kill_filter[] = { 3198 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3199 offsetof(struct seccomp_data, nr)), 3200 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 3201 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3202 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3203 }; 3204 struct sock_fprog allow_prog = { 3205 .len = (unsigned short)ARRAY_SIZE(allow_filter), 3206 .filter = allow_filter, 3207 }; 3208 struct sock_fprog kill_prog = { 3209 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3210 .filter = kill_filter, 3211 }; 3212 long ret; 3213 pid_t parent = getppid(); 3214 3215 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3216 ASSERT_EQ(0, ret); 3217 3218 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3219 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3220 &allow_prog); 3221 ASSERT_NE(ENOSYS, errno) { 3222 TH_LOG("Kernel does not support seccomp syscall!"); 3223 } 3224 EXPECT_NE(0, ret) { 3225 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3226 } 3227 EXPECT_EQ(EINVAL, errno) { 3228 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3229 } 3230 3231 /* Verify that a simple, permissive filter can be added with no flags */ 3232 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3233 EXPECT_EQ(0, ret); 3234 3235 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3236 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3237 &allow_prog); 3238 ASSERT_NE(EINVAL, errno) { 3239 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3240 } 3241 EXPECT_EQ(0, ret); 3242 3243 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3244 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3245 &kill_prog); 3246 EXPECT_EQ(0, ret); 3247 3248 EXPECT_EQ(parent, syscall(__NR_getppid)); 3249 /* getpid() should never return. */ 3250 EXPECT_EQ(0, syscall(__NR_getpid)); 3251 } 3252 3253 TEST(get_action_avail) 3254 { 3255 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3256 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3257 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3258 __u32 unknown_action = 0x10000000U; 3259 int i; 3260 long ret; 3261 3262 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3263 ASSERT_NE(ENOSYS, errno) { 3264 TH_LOG("Kernel does not support seccomp syscall!"); 3265 } 3266 ASSERT_NE(EINVAL, errno) { 3267 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3268 } 3269 EXPECT_EQ(ret, 0); 3270 3271 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3272 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3273 EXPECT_EQ(ret, 0) { 3274 TH_LOG("Expected action (0x%X) not available!", 3275 actions[i]); 3276 } 3277 } 3278 3279 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3280 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3281 EXPECT_EQ(ret, -1); 3282 EXPECT_EQ(errno, EOPNOTSUPP); 3283 } 3284 3285 TEST(get_metadata) 3286 { 3287 pid_t pid; 3288 int pipefd[2]; 3289 char buf; 3290 struct seccomp_metadata md; 3291 long ret; 3292 3293 /* Only real root can get metadata. */ 3294 if (geteuid()) { 3295 SKIP(return, "get_metadata requires real root"); 3296 return; 3297 } 3298 3299 ASSERT_EQ(0, pipe(pipefd)); 3300 3301 pid = fork(); 3302 ASSERT_GE(pid, 0); 3303 if (pid == 0) { 3304 struct sock_filter filter[] = { 3305 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3306 }; 3307 struct sock_fprog prog = { 3308 .len = (unsigned short)ARRAY_SIZE(filter), 3309 .filter = filter, 3310 }; 3311 3312 /* one with log, one without */ 3313 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3314 SECCOMP_FILTER_FLAG_LOG, &prog)); 3315 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3316 3317 EXPECT_EQ(0, close(pipefd[0])); 3318 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3319 ASSERT_EQ(0, close(pipefd[1])); 3320 3321 while (1) 3322 sleep(100); 3323 } 3324 3325 ASSERT_EQ(0, close(pipefd[1])); 3326 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3327 3328 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3329 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3330 3331 /* Past here must not use ASSERT or child process is never killed. */ 3332 3333 md.filter_off = 0; 3334 errno = 0; 3335 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3336 EXPECT_EQ(sizeof(md), ret) { 3337 if (errno == EINVAL) 3338 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3339 } 3340 3341 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3342 EXPECT_EQ(md.filter_off, 0); 3343 3344 md.filter_off = 1; 3345 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3346 EXPECT_EQ(sizeof(md), ret); 3347 EXPECT_EQ(md.flags, 0); 3348 EXPECT_EQ(md.filter_off, 1); 3349 3350 skip: 3351 ASSERT_EQ(0, kill(pid, SIGKILL)); 3352 } 3353 3354 static int user_notif_syscall(int nr, unsigned int flags) 3355 { 3356 struct sock_filter filter[] = { 3357 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3358 offsetof(struct seccomp_data, nr)), 3359 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), 3360 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), 3361 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3362 }; 3363 3364 struct sock_fprog prog = { 3365 .len = (unsigned short)ARRAY_SIZE(filter), 3366 .filter = filter, 3367 }; 3368 3369 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3370 } 3371 3372 #define USER_NOTIF_MAGIC INT_MAX 3373 TEST(user_notification_basic) 3374 { 3375 pid_t pid; 3376 long ret; 3377 int status, listener; 3378 struct seccomp_notif req = {}; 3379 struct seccomp_notif_resp resp = {}; 3380 struct pollfd pollfd; 3381 3382 struct sock_filter filter[] = { 3383 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3384 }; 3385 struct sock_fprog prog = { 3386 .len = (unsigned short)ARRAY_SIZE(filter), 3387 .filter = filter, 3388 }; 3389 3390 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3391 ASSERT_EQ(0, ret) { 3392 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3393 } 3394 3395 pid = fork(); 3396 ASSERT_GE(pid, 0); 3397 3398 /* Check that we get -ENOSYS with no listener attached */ 3399 if (pid == 0) { 3400 if (user_notif_syscall(__NR_getppid, 0) < 0) 3401 exit(1); 3402 ret = syscall(__NR_getppid); 3403 exit(ret >= 0 || errno != ENOSYS); 3404 } 3405 3406 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3407 EXPECT_EQ(true, WIFEXITED(status)); 3408 EXPECT_EQ(0, WEXITSTATUS(status)); 3409 3410 /* Add some no-op filters for grins. */ 3411 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3412 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3413 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3414 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3415 3416 /* Check that the basic notification machinery works */ 3417 listener = user_notif_syscall(__NR_getppid, 3418 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3419 ASSERT_GE(listener, 0); 3420 3421 /* Installing a second listener in the chain should EBUSY */ 3422 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3423 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3424 -1); 3425 EXPECT_EQ(errno, EBUSY); 3426 3427 pid = fork(); 3428 ASSERT_GE(pid, 0); 3429 3430 if (pid == 0) { 3431 ret = syscall(__NR_getppid); 3432 exit(ret != USER_NOTIF_MAGIC); 3433 } 3434 3435 pollfd.fd = listener; 3436 pollfd.events = POLLIN | POLLOUT; 3437 3438 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3439 EXPECT_EQ(pollfd.revents, POLLIN); 3440 3441 /* Test that we can't pass garbage to the kernel. */ 3442 memset(&req, 0, sizeof(req)); 3443 req.pid = -1; 3444 errno = 0; 3445 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3446 EXPECT_EQ(-1, ret); 3447 EXPECT_EQ(EINVAL, errno); 3448 3449 if (ret) { 3450 req.pid = 0; 3451 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3452 } 3453 3454 pollfd.fd = listener; 3455 pollfd.events = POLLIN | POLLOUT; 3456 3457 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3458 EXPECT_EQ(pollfd.revents, POLLOUT); 3459 3460 EXPECT_EQ(req.data.nr, __NR_getppid); 3461 3462 resp.id = req.id; 3463 resp.error = 0; 3464 resp.val = USER_NOTIF_MAGIC; 3465 3466 /* check that we make sure flags == 0 */ 3467 resp.flags = 1; 3468 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3469 EXPECT_EQ(errno, EINVAL); 3470 3471 resp.flags = 0; 3472 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3473 3474 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3475 EXPECT_EQ(true, WIFEXITED(status)); 3476 EXPECT_EQ(0, WEXITSTATUS(status)); 3477 } 3478 3479 TEST(user_notification_with_tsync) 3480 { 3481 int ret; 3482 unsigned int flags; 3483 3484 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3485 ASSERT_EQ(0, ret) { 3486 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3487 } 3488 3489 /* these were exclusive */ 3490 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3491 SECCOMP_FILTER_FLAG_TSYNC; 3492 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3493 ASSERT_EQ(EINVAL, errno); 3494 3495 /* but now they're not */ 3496 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3497 ret = user_notif_syscall(__NR_getppid, flags); 3498 close(ret); 3499 ASSERT_LE(0, ret); 3500 } 3501 3502 TEST(user_notification_kill_in_middle) 3503 { 3504 pid_t pid; 3505 long ret; 3506 int listener; 3507 struct seccomp_notif req = {}; 3508 struct seccomp_notif_resp resp = {}; 3509 3510 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3511 ASSERT_EQ(0, ret) { 3512 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3513 } 3514 3515 listener = user_notif_syscall(__NR_getppid, 3516 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3517 ASSERT_GE(listener, 0); 3518 3519 /* 3520 * Check that nothing bad happens when we kill the task in the middle 3521 * of a syscall. 3522 */ 3523 pid = fork(); 3524 ASSERT_GE(pid, 0); 3525 3526 if (pid == 0) { 3527 ret = syscall(__NR_getppid); 3528 exit(ret != USER_NOTIF_MAGIC); 3529 } 3530 3531 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3532 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3533 3534 EXPECT_EQ(kill(pid, SIGKILL), 0); 3535 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3536 3537 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3538 3539 resp.id = req.id; 3540 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3541 EXPECT_EQ(ret, -1); 3542 EXPECT_EQ(errno, ENOENT); 3543 } 3544 3545 static int handled = -1; 3546 3547 static void signal_handler(int signal) 3548 { 3549 if (write(handled, "c", 1) != 1) 3550 perror("write from signal"); 3551 } 3552 3553 TEST(user_notification_signal) 3554 { 3555 pid_t pid; 3556 long ret; 3557 int status, listener, sk_pair[2]; 3558 struct seccomp_notif req = {}; 3559 struct seccomp_notif_resp resp = {}; 3560 char c; 3561 3562 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3563 ASSERT_EQ(0, ret) { 3564 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3565 } 3566 3567 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3568 3569 listener = user_notif_syscall(__NR_gettid, 3570 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3571 ASSERT_GE(listener, 0); 3572 3573 pid = fork(); 3574 ASSERT_GE(pid, 0); 3575 3576 if (pid == 0) { 3577 close(sk_pair[0]); 3578 handled = sk_pair[1]; 3579 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3580 perror("signal"); 3581 exit(1); 3582 } 3583 /* 3584 * ERESTARTSYS behavior is a bit hard to test, because we need 3585 * to rely on a signal that has not yet been handled. Let's at 3586 * least check that the error code gets propagated through, and 3587 * hope that it doesn't break when there is actually a signal :) 3588 */ 3589 ret = syscall(__NR_gettid); 3590 exit(!(ret == -1 && errno == 512)); 3591 } 3592 3593 close(sk_pair[1]); 3594 3595 memset(&req, 0, sizeof(req)); 3596 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3597 3598 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3599 3600 /* 3601 * Make sure the signal really is delivered, which means we're not 3602 * stuck in the user notification code any more and the notification 3603 * should be dead. 3604 */ 3605 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3606 3607 resp.id = req.id; 3608 resp.error = -EPERM; 3609 resp.val = 0; 3610 3611 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3612 EXPECT_EQ(errno, ENOENT); 3613 3614 memset(&req, 0, sizeof(req)); 3615 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3616 3617 resp.id = req.id; 3618 resp.error = -512; /* -ERESTARTSYS */ 3619 resp.val = 0; 3620 3621 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3622 3623 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3624 EXPECT_EQ(true, WIFEXITED(status)); 3625 EXPECT_EQ(0, WEXITSTATUS(status)); 3626 } 3627 3628 TEST(user_notification_closed_listener) 3629 { 3630 pid_t pid; 3631 long ret; 3632 int status, listener; 3633 3634 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3635 ASSERT_EQ(0, ret) { 3636 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3637 } 3638 3639 listener = user_notif_syscall(__NR_getppid, 3640 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3641 ASSERT_GE(listener, 0); 3642 3643 /* 3644 * Check that we get an ENOSYS when the listener is closed. 3645 */ 3646 pid = fork(); 3647 ASSERT_GE(pid, 0); 3648 if (pid == 0) { 3649 close(listener); 3650 ret = syscall(__NR_getppid); 3651 exit(ret != -1 && errno != ENOSYS); 3652 } 3653 3654 close(listener); 3655 3656 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3657 EXPECT_EQ(true, WIFEXITED(status)); 3658 EXPECT_EQ(0, WEXITSTATUS(status)); 3659 } 3660 3661 /* 3662 * Check that a pid in a child namespace still shows up as valid in ours. 3663 */ 3664 TEST(user_notification_child_pid_ns) 3665 { 3666 pid_t pid; 3667 int status, listener; 3668 struct seccomp_notif req = {}; 3669 struct seccomp_notif_resp resp = {}; 3670 3671 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3672 if (errno == EINVAL) 3673 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3674 }; 3675 3676 listener = user_notif_syscall(__NR_getppid, 3677 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3678 ASSERT_GE(listener, 0); 3679 3680 pid = fork(); 3681 ASSERT_GE(pid, 0); 3682 3683 if (pid == 0) 3684 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3685 3686 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3687 EXPECT_EQ(req.pid, pid); 3688 3689 resp.id = req.id; 3690 resp.error = 0; 3691 resp.val = USER_NOTIF_MAGIC; 3692 3693 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3694 3695 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3696 EXPECT_EQ(true, WIFEXITED(status)); 3697 EXPECT_EQ(0, WEXITSTATUS(status)); 3698 close(listener); 3699 } 3700 3701 /* 3702 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3703 * invalid. 3704 */ 3705 TEST(user_notification_sibling_pid_ns) 3706 { 3707 pid_t pid, pid2; 3708 int status, listener; 3709 struct seccomp_notif req = {}; 3710 struct seccomp_notif_resp resp = {}; 3711 3712 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3713 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3714 } 3715 3716 listener = user_notif_syscall(__NR_getppid, 3717 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3718 ASSERT_GE(listener, 0); 3719 3720 pid = fork(); 3721 ASSERT_GE(pid, 0); 3722 3723 if (pid == 0) { 3724 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3725 if (errno == EPERM) 3726 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3727 else if (errno == EINVAL) 3728 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3729 } 3730 3731 pid2 = fork(); 3732 ASSERT_GE(pid2, 0); 3733 3734 if (pid2 == 0) 3735 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3736 3737 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3738 EXPECT_EQ(true, WIFEXITED(status)); 3739 EXPECT_EQ(0, WEXITSTATUS(status)); 3740 exit(WEXITSTATUS(status)); 3741 } 3742 3743 /* Create the sibling ns, and sibling in it. */ 3744 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3745 if (errno == EPERM) 3746 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3747 else if (errno == EINVAL) 3748 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3749 } 3750 ASSERT_EQ(errno, 0); 3751 3752 pid2 = fork(); 3753 ASSERT_GE(pid2, 0); 3754 3755 if (pid2 == 0) { 3756 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3757 /* 3758 * The pid should be 0, i.e. the task is in some namespace that 3759 * we can't "see". 3760 */ 3761 EXPECT_EQ(req.pid, 0); 3762 3763 resp.id = req.id; 3764 resp.error = 0; 3765 resp.val = USER_NOTIF_MAGIC; 3766 3767 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3768 exit(0); 3769 } 3770 3771 close(listener); 3772 3773 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3774 EXPECT_EQ(true, WIFEXITED(status)); 3775 EXPECT_EQ(0, WEXITSTATUS(status)); 3776 3777 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3778 EXPECT_EQ(true, WIFEXITED(status)); 3779 EXPECT_EQ(0, WEXITSTATUS(status)); 3780 } 3781 3782 TEST(user_notification_fault_recv) 3783 { 3784 pid_t pid; 3785 int status, listener; 3786 struct seccomp_notif req = {}; 3787 struct seccomp_notif_resp resp = {}; 3788 3789 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { 3790 if (errno == EINVAL) 3791 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3792 } 3793 3794 listener = user_notif_syscall(__NR_getppid, 3795 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3796 ASSERT_GE(listener, 0); 3797 3798 pid = fork(); 3799 ASSERT_GE(pid, 0); 3800 3801 if (pid == 0) 3802 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3803 3804 /* Do a bad recv() */ 3805 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3806 EXPECT_EQ(errno, EFAULT); 3807 3808 /* We should still be able to receive this notification, though. */ 3809 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3810 EXPECT_EQ(req.pid, pid); 3811 3812 resp.id = req.id; 3813 resp.error = 0; 3814 resp.val = USER_NOTIF_MAGIC; 3815 3816 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3817 3818 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3819 EXPECT_EQ(true, WIFEXITED(status)); 3820 EXPECT_EQ(0, WEXITSTATUS(status)); 3821 } 3822 3823 TEST(seccomp_get_notif_sizes) 3824 { 3825 struct seccomp_notif_sizes sizes; 3826 3827 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3828 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3829 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3830 } 3831 3832 TEST(user_notification_continue) 3833 { 3834 pid_t pid; 3835 long ret; 3836 int status, listener; 3837 struct seccomp_notif req = {}; 3838 struct seccomp_notif_resp resp = {}; 3839 struct pollfd pollfd; 3840 3841 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3842 ASSERT_EQ(0, ret) { 3843 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3844 } 3845 3846 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3847 ASSERT_GE(listener, 0); 3848 3849 pid = fork(); 3850 ASSERT_GE(pid, 0); 3851 3852 if (pid == 0) { 3853 int dup_fd, pipe_fds[2]; 3854 pid_t self; 3855 3856 ASSERT_GE(pipe(pipe_fds), 0); 3857 3858 dup_fd = dup(pipe_fds[0]); 3859 ASSERT_GE(dup_fd, 0); 3860 EXPECT_NE(pipe_fds[0], dup_fd); 3861 3862 self = getpid(); 3863 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3864 exit(0); 3865 } 3866 3867 pollfd.fd = listener; 3868 pollfd.events = POLLIN | POLLOUT; 3869 3870 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3871 EXPECT_EQ(pollfd.revents, POLLIN); 3872 3873 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3874 3875 pollfd.fd = listener; 3876 pollfd.events = POLLIN | POLLOUT; 3877 3878 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3879 EXPECT_EQ(pollfd.revents, POLLOUT); 3880 3881 EXPECT_EQ(req.data.nr, __NR_dup); 3882 3883 resp.id = req.id; 3884 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3885 3886 /* 3887 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3888 * args be set to 0. 3889 */ 3890 resp.error = 0; 3891 resp.val = USER_NOTIF_MAGIC; 3892 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3893 EXPECT_EQ(errno, EINVAL); 3894 3895 resp.error = USER_NOTIF_MAGIC; 3896 resp.val = 0; 3897 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3898 EXPECT_EQ(errno, EINVAL); 3899 3900 resp.error = 0; 3901 resp.val = 0; 3902 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3903 if (errno == EINVAL) 3904 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3905 } 3906 3907 skip: 3908 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3909 EXPECT_EQ(true, WIFEXITED(status)); 3910 EXPECT_EQ(0, WEXITSTATUS(status)) { 3911 if (WEXITSTATUS(status) == 2) { 3912 SKIP(return, "Kernel does not support kcmp() syscall"); 3913 return; 3914 } 3915 } 3916 } 3917 3918 TEST(user_notification_filter_empty) 3919 { 3920 pid_t pid; 3921 long ret; 3922 int status; 3923 struct pollfd pollfd; 3924 struct __clone_args args = { 3925 .flags = CLONE_FILES, 3926 .exit_signal = SIGCHLD, 3927 }; 3928 3929 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3930 ASSERT_EQ(0, ret) { 3931 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3932 } 3933 3934 if (__NR_clone3 < 0) 3935 SKIP(return, "Test not built with clone3 support"); 3936 3937 pid = sys_clone3(&args, sizeof(args)); 3938 ASSERT_GE(pid, 0); 3939 3940 if (pid == 0) { 3941 int listener; 3942 3943 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3944 if (listener < 0) 3945 _exit(EXIT_FAILURE); 3946 3947 if (dup2(listener, 200) != 200) 3948 _exit(EXIT_FAILURE); 3949 3950 close(listener); 3951 3952 _exit(EXIT_SUCCESS); 3953 } 3954 3955 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3956 EXPECT_EQ(true, WIFEXITED(status)); 3957 EXPECT_EQ(0, WEXITSTATUS(status)); 3958 3959 /* 3960 * The seccomp filter has become unused so we should be notified once 3961 * the kernel gets around to cleaning up task struct. 3962 */ 3963 pollfd.fd = 200; 3964 pollfd.events = POLLHUP; 3965 3966 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3967 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3968 } 3969 3970 TEST(user_ioctl_notification_filter_empty) 3971 { 3972 pid_t pid; 3973 long ret; 3974 int status, p[2]; 3975 struct __clone_args args = { 3976 .flags = CLONE_FILES, 3977 .exit_signal = SIGCHLD, 3978 }; 3979 struct seccomp_notif req = {}; 3980 3981 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3982 ASSERT_EQ(0, ret) { 3983 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3984 } 3985 3986 if (__NR_clone3 < 0) 3987 SKIP(return, "Test not built with clone3 support"); 3988 3989 ASSERT_EQ(0, pipe(p)); 3990 3991 pid = sys_clone3(&args, sizeof(args)); 3992 ASSERT_GE(pid, 0); 3993 3994 if (pid == 0) { 3995 int listener; 3996 3997 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3998 if (listener < 0) 3999 _exit(EXIT_FAILURE); 4000 4001 if (dup2(listener, 200) != 200) 4002 _exit(EXIT_FAILURE); 4003 close(p[1]); 4004 close(listener); 4005 sleep(1); 4006 4007 _exit(EXIT_SUCCESS); 4008 } 4009 if (read(p[0], &status, 1) != 0) 4010 _exit(EXIT_SUCCESS); 4011 close(p[0]); 4012 /* 4013 * The seccomp filter has become unused so we should be notified once 4014 * the kernel gets around to cleaning up task struct. 4015 */ 4016 EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); 4017 EXPECT_EQ(errno, ENOENT); 4018 4019 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4020 EXPECT_EQ(true, WIFEXITED(status)); 4021 EXPECT_EQ(0, WEXITSTATUS(status)); 4022 } 4023 4024 static void *do_thread(void *data) 4025 { 4026 return NULL; 4027 } 4028 4029 TEST(user_notification_filter_empty_threaded) 4030 { 4031 pid_t pid; 4032 long ret; 4033 int status; 4034 struct pollfd pollfd; 4035 struct __clone_args args = { 4036 .flags = CLONE_FILES, 4037 .exit_signal = SIGCHLD, 4038 }; 4039 4040 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4041 ASSERT_EQ(0, ret) { 4042 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4043 } 4044 4045 if (__NR_clone3 < 0) 4046 SKIP(return, "Test not built with clone3 support"); 4047 4048 pid = sys_clone3(&args, sizeof(args)); 4049 ASSERT_GE(pid, 0); 4050 4051 if (pid == 0) { 4052 pid_t pid1, pid2; 4053 int listener, status; 4054 pthread_t thread; 4055 4056 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4057 if (listener < 0) 4058 _exit(EXIT_FAILURE); 4059 4060 if (dup2(listener, 200) != 200) 4061 _exit(EXIT_FAILURE); 4062 4063 close(listener); 4064 4065 pid1 = fork(); 4066 if (pid1 < 0) 4067 _exit(EXIT_FAILURE); 4068 4069 if (pid1 == 0) 4070 _exit(EXIT_SUCCESS); 4071 4072 pid2 = fork(); 4073 if (pid2 < 0) 4074 _exit(EXIT_FAILURE); 4075 4076 if (pid2 == 0) 4077 _exit(EXIT_SUCCESS); 4078 4079 if (pthread_create(&thread, NULL, do_thread, NULL) || 4080 pthread_join(thread, NULL)) 4081 _exit(EXIT_FAILURE); 4082 4083 if (pthread_create(&thread, NULL, do_thread, NULL) || 4084 pthread_join(thread, NULL)) 4085 _exit(EXIT_FAILURE); 4086 4087 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 4088 WEXITSTATUS(status)) 4089 _exit(EXIT_FAILURE); 4090 4091 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 4092 WEXITSTATUS(status)) 4093 _exit(EXIT_FAILURE); 4094 4095 exit(EXIT_SUCCESS); 4096 } 4097 4098 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4099 EXPECT_EQ(true, WIFEXITED(status)); 4100 EXPECT_EQ(0, WEXITSTATUS(status)); 4101 4102 /* 4103 * The seccomp filter has become unused so we should be notified once 4104 * the kernel gets around to cleaning up task struct. 4105 */ 4106 pollfd.fd = 200; 4107 pollfd.events = POLLHUP; 4108 4109 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 4110 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 4111 } 4112 4113 4114 int get_next_fd(int prev_fd) 4115 { 4116 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { 4117 if (fcntl(i, F_GETFD) == -1) 4118 return i; 4119 } 4120 _exit(EXIT_FAILURE); 4121 } 4122 4123 TEST(user_notification_addfd) 4124 { 4125 pid_t pid; 4126 long ret; 4127 int status, listener, memfd, fd, nextfd; 4128 struct seccomp_notif_addfd addfd = {}; 4129 struct seccomp_notif_addfd_small small = {}; 4130 struct seccomp_notif_addfd_big big = {}; 4131 struct seccomp_notif req = {}; 4132 struct seccomp_notif_resp resp = {}; 4133 /* 100 ms */ 4134 struct timespec delay = { .tv_nsec = 100000000 }; 4135 4136 /* There may be arbitrary already-open fds at test start. */ 4137 memfd = memfd_create("test", 0); 4138 ASSERT_GE(memfd, 0); 4139 nextfd = get_next_fd(memfd); 4140 4141 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4142 ASSERT_EQ(0, ret) { 4143 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4144 } 4145 4146 /* fd: 4 */ 4147 /* Check that the basic notification machinery works */ 4148 listener = user_notif_syscall(__NR_getppid, 4149 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4150 ASSERT_EQ(listener, nextfd); 4151 nextfd = get_next_fd(nextfd); 4152 4153 pid = fork(); 4154 ASSERT_GE(pid, 0); 4155 4156 if (pid == 0) { 4157 /* fds will be added and this value is expected */ 4158 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 4159 exit(1); 4160 4161 /* Atomic addfd+send is received here. Check it is a valid fd */ 4162 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) 4163 exit(1); 4164 4165 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4166 } 4167 4168 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4169 4170 addfd.srcfd = memfd; 4171 addfd.newfd = 0; 4172 addfd.id = req.id; 4173 addfd.flags = 0x0; 4174 4175 /* Verify bad newfd_flags cannot be set */ 4176 addfd.newfd_flags = ~O_CLOEXEC; 4177 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4178 EXPECT_EQ(errno, EINVAL); 4179 addfd.newfd_flags = O_CLOEXEC; 4180 4181 /* Verify bad flags cannot be set */ 4182 addfd.flags = 0xff; 4183 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4184 EXPECT_EQ(errno, EINVAL); 4185 addfd.flags = 0; 4186 4187 /* Verify that remote_fd cannot be set without setting flags */ 4188 addfd.newfd = 1; 4189 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4190 EXPECT_EQ(errno, EINVAL); 4191 addfd.newfd = 0; 4192 4193 /* Verify small size cannot be set */ 4194 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 4195 EXPECT_EQ(errno, EINVAL); 4196 4197 /* Verify we can't send bits filled in unknown buffer area */ 4198 memset(&big, 0xAA, sizeof(big)); 4199 big.addfd = addfd; 4200 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 4201 EXPECT_EQ(errno, E2BIG); 4202 4203 4204 /* Verify we can set an arbitrary remote fd */ 4205 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4206 EXPECT_EQ(fd, nextfd); 4207 nextfd = get_next_fd(nextfd); 4208 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4209 4210 /* Verify we can set an arbitrary remote fd with large size */ 4211 memset(&big, 0x0, sizeof(big)); 4212 big.addfd = addfd; 4213 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 4214 EXPECT_EQ(fd, nextfd); 4215 nextfd = get_next_fd(nextfd); 4216 4217 /* Verify we can set a specific remote fd */ 4218 addfd.newfd = 42; 4219 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4220 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4221 EXPECT_EQ(fd, 42); 4222 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4223 4224 /* Resume syscall */ 4225 resp.id = req.id; 4226 resp.error = 0; 4227 resp.val = USER_NOTIF_MAGIC; 4228 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4229 4230 /* 4231 * This sets the ID of the ADD FD to the last request plus 1. The 4232 * notification ID increments 1 per notification. 4233 */ 4234 addfd.id = req.id + 1; 4235 4236 /* This spins until the underlying notification is generated */ 4237 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4238 errno != -EINPROGRESS) 4239 nanosleep(&delay, NULL); 4240 4241 memset(&req, 0, sizeof(req)); 4242 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4243 ASSERT_EQ(addfd.id, req.id); 4244 4245 /* Verify we can do an atomic addfd and send */ 4246 addfd.newfd = 0; 4247 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4248 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4249 /* 4250 * Child has earlier "low" fds and now 42, so we expect the next 4251 * lowest available fd to be assigned here. 4252 */ 4253 EXPECT_EQ(fd, nextfd); 4254 nextfd = get_next_fd(nextfd); 4255 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4256 4257 /* 4258 * This sets the ID of the ADD FD to the last request plus 1. The 4259 * notification ID increments 1 per notification. 4260 */ 4261 addfd.id = req.id + 1; 4262 4263 /* This spins until the underlying notification is generated */ 4264 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4265 errno != -EINPROGRESS) 4266 nanosleep(&delay, NULL); 4267 4268 memset(&req, 0, sizeof(req)); 4269 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4270 ASSERT_EQ(addfd.id, req.id); 4271 4272 resp.id = req.id; 4273 resp.error = 0; 4274 resp.val = USER_NOTIF_MAGIC; 4275 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4276 4277 /* Wait for child to finish. */ 4278 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4279 EXPECT_EQ(true, WIFEXITED(status)); 4280 EXPECT_EQ(0, WEXITSTATUS(status)); 4281 4282 close(memfd); 4283 } 4284 4285 TEST(user_notification_addfd_rlimit) 4286 { 4287 pid_t pid; 4288 long ret; 4289 int status, listener, memfd; 4290 struct seccomp_notif_addfd addfd = {}; 4291 struct seccomp_notif req = {}; 4292 struct seccomp_notif_resp resp = {}; 4293 const struct rlimit lim = { 4294 .rlim_cur = 0, 4295 .rlim_max = 0, 4296 }; 4297 4298 memfd = memfd_create("test", 0); 4299 ASSERT_GE(memfd, 0); 4300 4301 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4302 ASSERT_EQ(0, ret) { 4303 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4304 } 4305 4306 /* Check that the basic notification machinery works */ 4307 listener = user_notif_syscall(__NR_getppid, 4308 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4309 ASSERT_GE(listener, 0); 4310 4311 pid = fork(); 4312 ASSERT_GE(pid, 0); 4313 4314 if (pid == 0) 4315 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4316 4317 4318 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4319 4320 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 4321 4322 addfd.srcfd = memfd; 4323 addfd.newfd_flags = O_CLOEXEC; 4324 addfd.newfd = 0; 4325 addfd.id = req.id; 4326 addfd.flags = 0; 4327 4328 /* Should probably spot check /proc/sys/fs/file-nr */ 4329 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4330 EXPECT_EQ(errno, EMFILE); 4331 4332 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4333 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4334 EXPECT_EQ(errno, EMFILE); 4335 4336 addfd.newfd = 100; 4337 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4338 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4339 EXPECT_EQ(errno, EBADF); 4340 4341 resp.id = req.id; 4342 resp.error = 0; 4343 resp.val = USER_NOTIF_MAGIC; 4344 4345 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4346 4347 /* Wait for child to finish. */ 4348 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4349 EXPECT_EQ(true, WIFEXITED(status)); 4350 EXPECT_EQ(0, WEXITSTATUS(status)); 4351 4352 close(memfd); 4353 } 4354 4355 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP 4356 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 4357 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 4358 #endif 4359 4360 TEST(user_notification_sync) 4361 { 4362 struct seccomp_notif req = {}; 4363 struct seccomp_notif_resp resp = {}; 4364 int status, listener; 4365 pid_t pid; 4366 long ret; 4367 4368 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4369 ASSERT_EQ(0, ret) { 4370 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4371 } 4372 4373 listener = user_notif_syscall(__NR_getppid, 4374 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4375 ASSERT_GE(listener, 0); 4376 4377 /* Try to set invalid flags. */ 4378 EXPECT_SYSCALL_RETURN(-EINVAL, 4379 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); 4380 4381 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 4382 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); 4383 4384 pid = fork(); 4385 ASSERT_GE(pid, 0); 4386 if (pid == 0) { 4387 ret = syscall(__NR_getppid); 4388 ASSERT_EQ(ret, USER_NOTIF_MAGIC) { 4389 _exit(1); 4390 } 4391 _exit(0); 4392 } 4393 4394 req.pid = 0; 4395 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4396 4397 ASSERT_EQ(req.data.nr, __NR_getppid); 4398 4399 resp.id = req.id; 4400 resp.error = 0; 4401 resp.val = USER_NOTIF_MAGIC; 4402 resp.flags = 0; 4403 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4404 4405 ASSERT_EQ(waitpid(pid, &status, 0), pid); 4406 ASSERT_EQ(status, 0); 4407 } 4408 4409 4410 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ 4411 FIXTURE(O_SUSPEND_SECCOMP) { 4412 pid_t pid; 4413 }; 4414 4415 FIXTURE_SETUP(O_SUSPEND_SECCOMP) 4416 { 4417 ERRNO_FILTER(block_read, E2BIG); 4418 cap_value_t cap_list[] = { CAP_SYS_ADMIN }; 4419 cap_t caps; 4420 4421 self->pid = 0; 4422 4423 /* make sure we don't have CAP_SYS_ADMIN */ 4424 caps = cap_get_proc(); 4425 ASSERT_NE(NULL, caps); 4426 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); 4427 ASSERT_EQ(0, cap_set_proc(caps)); 4428 cap_free(caps); 4429 4430 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 4431 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); 4432 4433 self->pid = fork(); 4434 ASSERT_GE(self->pid, 0); 4435 4436 if (self->pid == 0) { 4437 while (1) 4438 pause(); 4439 _exit(127); 4440 } 4441 } 4442 4443 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) 4444 { 4445 if (self->pid) 4446 kill(self->pid, SIGKILL); 4447 } 4448 4449 TEST_F(O_SUSPEND_SECCOMP, setoptions) 4450 { 4451 int wstatus; 4452 4453 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); 4454 ASSERT_EQ(self->pid, wait(&wstatus)); 4455 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); 4456 if (errno == EINVAL) 4457 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4458 ASSERT_EQ(EPERM, errno); 4459 } 4460 4461 TEST_F(O_SUSPEND_SECCOMP, seize) 4462 { 4463 int ret; 4464 4465 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); 4466 ASSERT_EQ(-1, ret); 4467 if (errno == EINVAL) 4468 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4469 ASSERT_EQ(EPERM, errno); 4470 } 4471 4472 /* 4473 * get_nth - Get the nth, space separated entry in a file. 4474 * 4475 * Returns the length of the read field. 4476 * Throws error if field is zero-lengthed. 4477 */ 4478 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, 4479 const unsigned int position, char **entry) 4480 { 4481 char *line = NULL; 4482 unsigned int i; 4483 ssize_t nread; 4484 size_t len = 0; 4485 FILE *f; 4486 4487 f = fopen(path, "r"); 4488 ASSERT_NE(f, NULL) { 4489 TH_LOG("Could not open %s: %s", path, strerror(errno)); 4490 } 4491 4492 for (i = 0; i < position; i++) { 4493 nread = getdelim(&line, &len, ' ', f); 4494 ASSERT_GE(nread, 0) { 4495 TH_LOG("Failed to read %d entry in file %s", i, path); 4496 } 4497 } 4498 fclose(f); 4499 4500 ASSERT_GT(nread, 0) { 4501 TH_LOG("Entry in file %s had zero length", path); 4502 } 4503 4504 *entry = line; 4505 return nread - 1; 4506 } 4507 4508 /* For a given PID, get the task state (D, R, etc...) */ 4509 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) 4510 { 4511 char proc_path[100] = {0}; 4512 char status; 4513 char *line; 4514 4515 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); 4516 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); 4517 4518 status = *line; 4519 free(line); 4520 4521 return status; 4522 } 4523 4524 TEST(user_notification_fifo) 4525 { 4526 struct seccomp_notif_resp resp = {}; 4527 struct seccomp_notif req = {}; 4528 int i, status, listener; 4529 pid_t pid, pids[3]; 4530 __u64 baseid; 4531 long ret; 4532 /* 100 ms */ 4533 struct timespec delay = { .tv_nsec = 100000000 }; 4534 4535 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4536 ASSERT_EQ(0, ret) { 4537 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4538 } 4539 4540 /* Setup a listener */ 4541 listener = user_notif_syscall(__NR_getppid, 4542 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4543 ASSERT_GE(listener, 0); 4544 4545 pid = fork(); 4546 ASSERT_GE(pid, 0); 4547 4548 if (pid == 0) { 4549 ret = syscall(__NR_getppid); 4550 exit(ret != USER_NOTIF_MAGIC); 4551 } 4552 4553 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4554 baseid = req.id + 1; 4555 4556 resp.id = req.id; 4557 resp.error = 0; 4558 resp.val = USER_NOTIF_MAGIC; 4559 4560 /* check that we make sure flags == 0 */ 4561 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4562 4563 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4564 EXPECT_EQ(true, WIFEXITED(status)); 4565 EXPECT_EQ(0, WEXITSTATUS(status)); 4566 4567 /* Start children, and generate notifications */ 4568 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4569 pid = fork(); 4570 if (pid == 0) { 4571 ret = syscall(__NR_getppid); 4572 exit(ret != USER_NOTIF_MAGIC); 4573 } 4574 pids[i] = pid; 4575 } 4576 4577 /* This spins until all of the children are sleeping */ 4578 restart_wait: 4579 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4580 if (get_proc_stat(_metadata, pids[i]) != 'S') { 4581 nanosleep(&delay, NULL); 4582 goto restart_wait; 4583 } 4584 } 4585 4586 /* Read the notifications in order (and respond) */ 4587 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4588 memset(&req, 0, sizeof(req)); 4589 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4590 EXPECT_EQ(req.id, baseid + i); 4591 resp.id = req.id; 4592 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4593 } 4594 4595 /* Make sure notifications were received */ 4596 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4597 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); 4598 EXPECT_EQ(true, WIFEXITED(status)); 4599 EXPECT_EQ(0, WEXITSTATUS(status)); 4600 } 4601 } 4602 4603 /* get_proc_syscall - Get the syscall in progress for a given pid 4604 * 4605 * Returns the current syscall number for a given process 4606 * Returns -1 if not in syscall (running or blocked) 4607 */ 4608 static long get_proc_syscall(struct __test_metadata *_metadata, int pid) 4609 { 4610 char proc_path[100] = {0}; 4611 long ret = -1; 4612 ssize_t nread; 4613 char *line; 4614 4615 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); 4616 nread = get_nth(_metadata, proc_path, 1, &line); 4617 ASSERT_GT(nread, 0); 4618 4619 if (!strncmp("running", line, MIN(7, nread))) 4620 ret = strtol(line, NULL, 16); 4621 4622 free(line); 4623 return ret; 4624 } 4625 4626 /* Ensure non-fatal signals prior to receive are unmodified */ 4627 TEST(user_notification_wait_killable_pre_notification) 4628 { 4629 struct sigaction new_action = { 4630 .sa_handler = signal_handler, 4631 }; 4632 int listener, status, sk_pair[2]; 4633 pid_t pid; 4634 long ret; 4635 char c; 4636 /* 100 ms */ 4637 struct timespec delay = { .tv_nsec = 100000000 }; 4638 4639 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4640 4641 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4642 ASSERT_EQ(0, ret) 4643 { 4644 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4645 } 4646 4647 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4648 4649 listener = user_notif_syscall( 4650 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4651 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4652 ASSERT_GE(listener, 0); 4653 4654 /* 4655 * Check that we can kill the process with SIGUSR1 prior to receiving 4656 * the notification. SIGUSR1 is wired up to a custom signal handler, 4657 * and make sure it gets called. 4658 */ 4659 pid = fork(); 4660 ASSERT_GE(pid, 0); 4661 4662 if (pid == 0) { 4663 close(sk_pair[0]); 4664 handled = sk_pair[1]; 4665 4666 /* Setup the non-fatal sigaction without SA_RESTART */ 4667 if (sigaction(SIGUSR1, &new_action, NULL)) { 4668 perror("sigaction"); 4669 exit(1); 4670 } 4671 4672 ret = syscall(__NR_getppid); 4673 /* Make sure we got a return from a signal interruption */ 4674 exit(ret != -1 || errno != EINTR); 4675 } 4676 4677 /* 4678 * Make sure we've gotten to the seccomp user notification wait 4679 * from getppid prior to sending any signals 4680 */ 4681 while (get_proc_syscall(_metadata, pid) != __NR_getppid && 4682 get_proc_stat(_metadata, pid) != 'S') 4683 nanosleep(&delay, NULL); 4684 4685 /* Send non-fatal kill signal */ 4686 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4687 4688 /* wait for process to exit (exit checks for EINTR) */ 4689 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4690 EXPECT_EQ(true, WIFEXITED(status)); 4691 EXPECT_EQ(0, WEXITSTATUS(status)); 4692 4693 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4694 } 4695 4696 /* Ensure non-fatal signals after receive are blocked */ 4697 TEST(user_notification_wait_killable) 4698 { 4699 struct sigaction new_action = { 4700 .sa_handler = signal_handler, 4701 }; 4702 struct seccomp_notif_resp resp = {}; 4703 struct seccomp_notif req = {}; 4704 int listener, status, sk_pair[2]; 4705 pid_t pid; 4706 long ret; 4707 char c; 4708 /* 100 ms */ 4709 struct timespec delay = { .tv_nsec = 100000000 }; 4710 4711 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4712 4713 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4714 ASSERT_EQ(0, ret) 4715 { 4716 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4717 } 4718 4719 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4720 4721 listener = user_notif_syscall( 4722 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4723 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4724 ASSERT_GE(listener, 0); 4725 4726 pid = fork(); 4727 ASSERT_GE(pid, 0); 4728 4729 if (pid == 0) { 4730 close(sk_pair[0]); 4731 handled = sk_pair[1]; 4732 4733 /* Setup the sigaction without SA_RESTART */ 4734 if (sigaction(SIGUSR1, &new_action, NULL)) { 4735 perror("sigaction"); 4736 exit(1); 4737 } 4738 4739 /* Make sure that the syscall is completed (no EINTR) */ 4740 ret = syscall(__NR_getppid); 4741 exit(ret != USER_NOTIF_MAGIC); 4742 } 4743 4744 /* 4745 * Get the notification, to make move the notifying process into a 4746 * non-preemptible (TASK_KILLABLE) state. 4747 */ 4748 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4749 /* Send non-fatal kill signal */ 4750 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4751 4752 /* 4753 * Make sure the task enters moves to TASK_KILLABLE by waiting for 4754 * D (Disk Sleep) state after receiving non-fatal signal. 4755 */ 4756 while (get_proc_stat(_metadata, pid) != 'D') 4757 nanosleep(&delay, NULL); 4758 4759 resp.id = req.id; 4760 resp.val = USER_NOTIF_MAGIC; 4761 /* Make sure the notification is found and able to be replied to */ 4762 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4763 4764 /* 4765 * Make sure that the signal handler does get called once we're back in 4766 * userspace. 4767 */ 4768 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4769 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ 4770 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4771 EXPECT_EQ(true, WIFEXITED(status)); 4772 EXPECT_EQ(0, WEXITSTATUS(status)); 4773 } 4774 4775 /* Ensure fatal signals after receive are not blocked */ 4776 TEST(user_notification_wait_killable_fatal) 4777 { 4778 struct seccomp_notif req = {}; 4779 int listener, status; 4780 pid_t pid; 4781 long ret; 4782 /* 100 ms */ 4783 struct timespec delay = { .tv_nsec = 100000000 }; 4784 4785 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4786 ASSERT_EQ(0, ret) 4787 { 4788 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4789 } 4790 4791 listener = user_notif_syscall( 4792 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4793 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4794 ASSERT_GE(listener, 0); 4795 4796 pid = fork(); 4797 ASSERT_GE(pid, 0); 4798 4799 if (pid == 0) { 4800 /* This should never complete as it should get a SIGTERM */ 4801 syscall(__NR_getppid); 4802 exit(1); 4803 } 4804 4805 while (get_proc_stat(_metadata, pid) != 'S') 4806 nanosleep(&delay, NULL); 4807 4808 /* 4809 * Get the notification, to make move the notifying process into a 4810 * non-preemptible (TASK_KILLABLE) state. 4811 */ 4812 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4813 /* Kill the process with a fatal signal */ 4814 EXPECT_EQ(kill(pid, SIGTERM), 0); 4815 4816 /* 4817 * Wait for the process to exit, and make sure the process terminated 4818 * due to the SIGTERM signal. 4819 */ 4820 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4821 EXPECT_EQ(true, WIFSIGNALED(status)); 4822 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4823 } 4824 4825 struct tsync_vs_thread_leader_args { 4826 pthread_t leader; 4827 }; 4828 4829 static void *tsync_vs_dead_thread_leader_sibling(void *_args) 4830 { 4831 struct sock_filter allow_filter[] = { 4832 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4833 }; 4834 struct sock_fprog allow_prog = { 4835 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4836 .filter = allow_filter, 4837 }; 4838 struct tsync_vs_thread_leader_args *args = _args; 4839 void *retval; 4840 long ret; 4841 4842 ret = pthread_join(args->leader, &retval); 4843 if (ret) 4844 exit(1); 4845 if (retval != _args) 4846 exit(2); 4847 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); 4848 if (ret) 4849 exit(3); 4850 4851 exit(0); 4852 } 4853 4854 /* 4855 * Ensure that a dead thread leader doesn't prevent installing new filters with 4856 * SECCOMP_FILTER_FLAG_TSYNC from other threads. 4857 */ 4858 TEST(tsync_vs_dead_thread_leader) 4859 { 4860 int status; 4861 pid_t pid; 4862 long ret; 4863 4864 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4865 ASSERT_EQ(0, ret) { 4866 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4867 } 4868 4869 pid = fork(); 4870 ASSERT_GE(pid, 0); 4871 4872 if (pid == 0) { 4873 struct sock_filter allow_filter[] = { 4874 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4875 }; 4876 struct sock_fprog allow_prog = { 4877 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4878 .filter = allow_filter, 4879 }; 4880 struct tsync_vs_thread_leader_args *args; 4881 pthread_t sibling; 4882 4883 args = malloc(sizeof(*args)); 4884 ASSERT_NE(NULL, args); 4885 args->leader = pthread_self(); 4886 4887 ret = pthread_create(&sibling, NULL, 4888 tsync_vs_dead_thread_leader_sibling, args); 4889 ASSERT_EQ(0, ret); 4890 4891 /* Install a new filter just to the leader thread. */ 4892 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 4893 ASSERT_EQ(0, ret); 4894 pthread_exit(args); 4895 exit(1); 4896 } 4897 4898 EXPECT_EQ(pid, waitpid(pid, &status, 0)); 4899 EXPECT_EQ(0, status); 4900 } 4901 4902 noinline int probed(void) 4903 { 4904 return 1; 4905 } 4906 4907 static int parse_uint_from_file(const char *file, const char *fmt) 4908 { 4909 int err = -1, ret; 4910 FILE *f; 4911 4912 f = fopen(file, "re"); 4913 if (f) { 4914 err = fscanf(f, fmt, &ret); 4915 fclose(f); 4916 } 4917 return err == 1 ? ret : err; 4918 } 4919 4920 static int determine_uprobe_perf_type(void) 4921 { 4922 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 4923 4924 return parse_uint_from_file(file, "%d\n"); 4925 } 4926 4927 static int determine_uprobe_retprobe_bit(void) 4928 { 4929 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 4930 4931 return parse_uint_from_file(file, "config:%d\n"); 4932 } 4933 4934 static ssize_t get_uprobe_offset(const void *addr) 4935 { 4936 size_t start, base, end; 4937 bool found = false; 4938 char buf[256]; 4939 FILE *f; 4940 4941 f = fopen("/proc/self/maps", "r"); 4942 if (!f) 4943 return -1; 4944 4945 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 4946 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 4947 found = true; 4948 break; 4949 } 4950 } 4951 fclose(f); 4952 return found ? (uintptr_t)addr - start + base : -1; 4953 } 4954 4955 FIXTURE(URETPROBE) { 4956 int fd; 4957 }; 4958 4959 FIXTURE_VARIANT(URETPROBE) { 4960 /* 4961 * All of the URETPROBE behaviors can be tested with either 4962 * uretprobe attached or not 4963 */ 4964 bool attach; 4965 }; 4966 4967 FIXTURE_VARIANT_ADD(URETPROBE, attached) { 4968 .attach = true, 4969 }; 4970 4971 FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { 4972 .attach = false, 4973 }; 4974 4975 FIXTURE_SETUP(URETPROBE) 4976 { 4977 const size_t attr_sz = sizeof(struct perf_event_attr); 4978 struct perf_event_attr attr; 4979 ssize_t offset; 4980 int type, bit; 4981 4982 #ifndef __NR_uretprobe 4983 SKIP(return, "__NR_uretprobe syscall not defined"); 4984 #endif 4985 4986 if (!variant->attach) 4987 return; 4988 4989 memset(&attr, 0, attr_sz); 4990 4991 type = determine_uprobe_perf_type(); 4992 ASSERT_GE(type, 0); 4993 bit = determine_uprobe_retprobe_bit(); 4994 ASSERT_GE(bit, 0); 4995 offset = get_uprobe_offset(probed); 4996 ASSERT_GE(offset, 0); 4997 4998 attr.config |= 1 << bit; 4999 attr.size = attr_sz; 5000 attr.type = type; 5001 attr.config1 = ptr_to_u64("/proc/self/exe"); 5002 attr.config2 = offset; 5003 5004 self->fd = syscall(__NR_perf_event_open, &attr, 5005 getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, 5006 PERF_FLAG_FD_CLOEXEC); 5007 } 5008 5009 FIXTURE_TEARDOWN(URETPROBE) 5010 { 5011 /* we could call close(self->fd), but we'd need extra filter for 5012 * that and since we are calling _exit right away.. 5013 */ 5014 } 5015 5016 static int run_probed_with_filter(struct sock_fprog *prog) 5017 { 5018 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || 5019 seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { 5020 return -1; 5021 } 5022 5023 probed(); 5024 return 0; 5025 } 5026 5027 TEST_F(URETPROBE, uretprobe_default_allow) 5028 { 5029 struct sock_filter filter[] = { 5030 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5031 }; 5032 struct sock_fprog prog = { 5033 .len = (unsigned short)ARRAY_SIZE(filter), 5034 .filter = filter, 5035 }; 5036 5037 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5038 } 5039 5040 TEST_F(URETPROBE, uretprobe_default_block) 5041 { 5042 struct sock_filter filter[] = { 5043 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5044 offsetof(struct seccomp_data, nr)), 5045 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5046 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5047 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5048 }; 5049 struct sock_fprog prog = { 5050 .len = (unsigned short)ARRAY_SIZE(filter), 5051 .filter = filter, 5052 }; 5053 5054 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5055 } 5056 5057 TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) 5058 { 5059 struct sock_filter filter[] = { 5060 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5061 offsetof(struct seccomp_data, nr)), 5062 #ifdef __NR_uretprobe 5063 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), 5064 #endif 5065 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5066 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5067 }; 5068 struct sock_fprog prog = { 5069 .len = (unsigned short)ARRAY_SIZE(filter), 5070 .filter = filter, 5071 }; 5072 5073 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5074 } 5075 5076 TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) 5077 { 5078 struct sock_filter filter[] = { 5079 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5080 offsetof(struct seccomp_data, nr)), 5081 #ifdef __NR_uretprobe 5082 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), 5083 #endif 5084 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5085 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5086 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5087 }; 5088 struct sock_fprog prog = { 5089 .len = (unsigned short)ARRAY_SIZE(filter), 5090 .filter = filter, 5091 }; 5092 5093 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5094 } 5095 5096 /* 5097 * TODO: 5098 * - expand NNP testing 5099 * - better arch-specific TRACE and TRAP handlers. 5100 * - endianness checking when appropriate 5101 * - 64-bit arg prodding 5102 * - arch value testing (x86 modes especially) 5103 * - verify that FILTER_FLAG_LOG filters generate log messages 5104 * - verify that RET_LOG generates log messages 5105 */ 5106 5107 TEST_HARNESS_MAIN 5108