1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/time.h> 28 #include <sys/user.h> 29 #include <linux/prctl.h> 30 #include <linux/ptrace.h> 31 #include <linux/seccomp.h> 32 #include <pthread.h> 33 #include <semaphore.h> 34 #include <signal.h> 35 #include <stddef.h> 36 #include <stdbool.h> 37 #include <string.h> 38 #include <time.h> 39 #include <limits.h> 40 #include <linux/elf.h> 41 #include <sys/uio.h> 42 #include <sys/utsname.h> 43 #include <sys/fcntl.h> 44 #include <sys/mman.h> 45 #include <sys/times.h> 46 #include <sys/socket.h> 47 #include <sys/ioctl.h> 48 #include <linux/kcmp.h> 49 #include <sys/resource.h> 50 #include <sys/capability.h> 51 #include <linux/perf_event.h> 52 53 #include <unistd.h> 54 #include <sys/syscall.h> 55 #include <poll.h> 56 57 #include "../kselftest_harness.h" 58 #include "../clone3/clone3_selftests.h" 59 60 /* Attempt to de-conflict with the selftests tree. */ 61 #ifndef SKIP 62 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 63 #endif 64 65 #ifndef MIN 66 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 67 #endif 68 69 #ifndef PR_SET_PTRACER 70 # define PR_SET_PTRACER 0x59616d61 71 #endif 72 73 #ifndef noinline 74 #define noinline __attribute__((noinline)) 75 #endif 76 77 #ifndef __nocf_check 78 #define __nocf_check __attribute__((nocf_check)) 79 #endif 80 81 #ifndef __naked 82 #define __naked __attribute__((__naked__)) 83 #endif 84 85 #ifndef PR_SET_NO_NEW_PRIVS 86 #define PR_SET_NO_NEW_PRIVS 38 87 #define PR_GET_NO_NEW_PRIVS 39 88 #endif 89 90 #ifndef PR_SECCOMP_EXT 91 #define PR_SECCOMP_EXT 43 92 #endif 93 94 #ifndef SECCOMP_EXT_ACT 95 #define SECCOMP_EXT_ACT 1 96 #endif 97 98 #ifndef SECCOMP_EXT_ACT_TSYNC 99 #define SECCOMP_EXT_ACT_TSYNC 1 100 #endif 101 102 #ifndef SECCOMP_MODE_STRICT 103 #define SECCOMP_MODE_STRICT 1 104 #endif 105 106 #ifndef SECCOMP_MODE_FILTER 107 #define SECCOMP_MODE_FILTER 2 108 #endif 109 110 #ifndef SECCOMP_RET_ALLOW 111 struct seccomp_data { 112 int nr; 113 __u32 arch; 114 __u64 instruction_pointer; 115 __u64 args[6]; 116 }; 117 #endif 118 119 #ifndef SECCOMP_RET_KILL_PROCESS 120 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 121 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 122 #endif 123 #ifndef SECCOMP_RET_KILL 124 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 125 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 126 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 127 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 128 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 129 #endif 130 #ifndef SECCOMP_RET_LOG 131 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 132 #endif 133 134 #ifndef __NR_seccomp 135 # if defined(__i386__) 136 # define __NR_seccomp 354 137 # elif defined(__x86_64__) 138 # define __NR_seccomp 317 139 # elif defined(__arm__) 140 # define __NR_seccomp 383 141 # elif defined(__aarch64__) 142 # define __NR_seccomp 277 143 # elif defined(__riscv) 144 # define __NR_seccomp 277 145 # elif defined(__csky__) 146 # define __NR_seccomp 277 147 # elif defined(__loongarch__) 148 # define __NR_seccomp 277 149 # elif defined(__hppa__) 150 # define __NR_seccomp 338 151 # elif defined(__powerpc__) 152 # define __NR_seccomp 358 153 # elif defined(__s390__) 154 # define __NR_seccomp 348 155 # elif defined(__xtensa__) 156 # define __NR_seccomp 337 157 # elif defined(__sh__) 158 # define __NR_seccomp 372 159 # elif defined(__mc68000__) 160 # define __NR_seccomp 380 161 # else 162 # warning "seccomp syscall number unknown for this architecture" 163 # define __NR_seccomp 0xffff 164 # endif 165 #endif 166 167 #ifndef __NR_uretprobe 168 # if defined(__x86_64__) 169 # define __NR_uretprobe 335 170 # endif 171 #endif 172 173 #ifndef SECCOMP_SET_MODE_STRICT 174 #define SECCOMP_SET_MODE_STRICT 0 175 #endif 176 177 #ifndef SECCOMP_SET_MODE_FILTER 178 #define SECCOMP_SET_MODE_FILTER 1 179 #endif 180 181 #ifndef SECCOMP_GET_ACTION_AVAIL 182 #define SECCOMP_GET_ACTION_AVAIL 2 183 #endif 184 185 #ifndef SECCOMP_GET_NOTIF_SIZES 186 #define SECCOMP_GET_NOTIF_SIZES 3 187 #endif 188 189 #ifndef SECCOMP_FILTER_FLAG_TSYNC 190 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 191 #endif 192 193 #ifndef SECCOMP_FILTER_FLAG_LOG 194 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 195 #endif 196 197 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 198 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 199 #endif 200 201 #ifndef PTRACE_SECCOMP_GET_METADATA 202 #define PTRACE_SECCOMP_GET_METADATA 0x420d 203 204 struct seccomp_metadata { 205 __u64 filter_off; /* Input: which filter */ 206 __u64 flags; /* Output: filter's flags */ 207 }; 208 #endif 209 210 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 211 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 212 #endif 213 214 #ifndef SECCOMP_RET_USER_NOTIF 215 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 216 217 #define SECCOMP_IOC_MAGIC '!' 218 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 219 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 220 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 221 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 222 223 /* Flags for seccomp notification fd ioctl. */ 224 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 225 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 226 struct seccomp_notif_resp) 227 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 228 229 struct seccomp_notif { 230 __u64 id; 231 __u32 pid; 232 __u32 flags; 233 struct seccomp_data data; 234 }; 235 236 struct seccomp_notif_resp { 237 __u64 id; 238 __s64 val; 239 __s32 error; 240 __u32 flags; 241 }; 242 243 struct seccomp_notif_sizes { 244 __u16 seccomp_notif; 245 __u16 seccomp_notif_resp; 246 __u16 seccomp_data; 247 }; 248 #endif 249 250 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 251 /* On success, the return value is the remote process's added fd number */ 252 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 253 struct seccomp_notif_addfd) 254 255 /* valid flags for seccomp_notif_addfd */ 256 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 257 258 struct seccomp_notif_addfd { 259 __u64 id; 260 __u32 flags; 261 __u32 srcfd; 262 __u32 newfd; 263 __u32 newfd_flags; 264 }; 265 #endif 266 267 #ifndef SECCOMP_ADDFD_FLAG_SEND 268 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 269 #endif 270 271 struct seccomp_notif_addfd_small { 272 __u64 id; 273 char weird[4]; 274 }; 275 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 276 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 277 278 struct seccomp_notif_addfd_big { 279 union { 280 struct seccomp_notif_addfd addfd; 281 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 282 }; 283 }; 284 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 285 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 286 287 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 288 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 289 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 290 #endif 291 292 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 293 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 294 #endif 295 296 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 297 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 298 #endif 299 300 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV 301 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 302 #endif 303 304 #ifndef seccomp 305 int seccomp(unsigned int op, unsigned int flags, void *args) 306 { 307 errno = 0; 308 return syscall(__NR_seccomp, op, flags, args); 309 } 310 #endif 311 312 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 313 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 314 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 315 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 316 #else 317 #error "wut? Unknown __BYTE_ORDER__?!" 318 #endif 319 320 #define SIBLING_EXIT_UNKILLED 0xbadbeef 321 #define SIBLING_EXIT_FAILURE 0xbadface 322 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 323 324 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 325 { 326 #ifdef __NR_kcmp 327 errno = 0; 328 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 329 #else 330 errno = ENOSYS; 331 return -1; 332 #endif 333 } 334 335 /* Have TH_LOG report actual location filecmp() is used. */ 336 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 337 int _ret; \ 338 \ 339 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 340 if (_ret != 0) { \ 341 if (_ret < 0 && errno == ENOSYS) { \ 342 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 343 _ret = 0; \ 344 } \ 345 } \ 346 _ret; }) 347 348 TEST(kcmp) 349 { 350 int ret; 351 352 ret = __filecmp(getpid(), getpid(), 1, 1); 353 EXPECT_EQ(ret, 0); 354 if (ret != 0 && errno == ENOSYS) 355 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); 356 } 357 358 TEST(mode_strict_support) 359 { 360 long ret; 361 362 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 363 ASSERT_EQ(0, ret) { 364 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 365 } 366 syscall(__NR_exit, 0); 367 } 368 369 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 370 { 371 long ret; 372 373 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 374 ASSERT_EQ(0, ret) { 375 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 376 } 377 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 378 NULL, NULL, NULL); 379 EXPECT_FALSE(true) { 380 TH_LOG("Unreachable!"); 381 } 382 } 383 384 /* Note! This doesn't test no new privs behavior */ 385 TEST(no_new_privs_support) 386 { 387 long ret; 388 389 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 390 EXPECT_EQ(0, ret) { 391 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 392 } 393 } 394 395 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 396 TEST(mode_filter_support) 397 { 398 long ret; 399 400 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 401 ASSERT_EQ(0, ret) { 402 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 403 } 404 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 405 EXPECT_EQ(-1, ret); 406 EXPECT_EQ(EFAULT, errno) { 407 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 408 } 409 } 410 411 TEST(mode_filter_without_nnp) 412 { 413 struct sock_filter filter[] = { 414 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 415 }; 416 struct sock_fprog prog = { 417 .len = (unsigned short)ARRAY_SIZE(filter), 418 .filter = filter, 419 }; 420 long ret; 421 cap_t cap = cap_get_proc(); 422 cap_flag_value_t is_cap_sys_admin = 0; 423 424 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 425 ASSERT_LE(0, ret) { 426 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 427 } 428 errno = 0; 429 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 430 /* Succeeds with CAP_SYS_ADMIN, fails without */ 431 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); 432 if (!is_cap_sys_admin) { 433 EXPECT_EQ(-1, ret); 434 EXPECT_EQ(EACCES, errno); 435 } else { 436 EXPECT_EQ(0, ret); 437 } 438 } 439 440 #define MAX_INSNS_PER_PATH 32768 441 442 TEST(filter_size_limits) 443 { 444 int i; 445 int count = BPF_MAXINSNS + 1; 446 struct sock_filter allow[] = { 447 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 448 }; 449 struct sock_filter *filter; 450 struct sock_fprog prog = { }; 451 long ret; 452 453 filter = calloc(count, sizeof(*filter)); 454 ASSERT_NE(NULL, filter); 455 456 for (i = 0; i < count; i++) 457 filter[i] = allow[0]; 458 459 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 460 ASSERT_EQ(0, ret); 461 462 prog.filter = filter; 463 prog.len = count; 464 465 /* Too many filter instructions in a single filter. */ 466 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 467 ASSERT_NE(0, ret) { 468 TH_LOG("Installing %d insn filter was allowed", prog.len); 469 } 470 471 /* One less is okay, though. */ 472 prog.len -= 1; 473 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 474 ASSERT_EQ(0, ret) { 475 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 476 } 477 } 478 479 TEST(filter_chain_limits) 480 { 481 int i; 482 int count = BPF_MAXINSNS; 483 struct sock_filter allow[] = { 484 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 485 }; 486 struct sock_filter *filter; 487 struct sock_fprog prog = { }; 488 long ret; 489 490 filter = calloc(count, sizeof(*filter)); 491 ASSERT_NE(NULL, filter); 492 493 for (i = 0; i < count; i++) 494 filter[i] = allow[0]; 495 496 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 497 ASSERT_EQ(0, ret); 498 499 prog.filter = filter; 500 prog.len = 1; 501 502 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 503 ASSERT_EQ(0, ret); 504 505 prog.len = count; 506 507 /* Too many total filter instructions. */ 508 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 509 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 510 if (ret != 0) 511 break; 512 } 513 ASSERT_NE(0, ret) { 514 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 515 i, count, i * (count + 4)); 516 } 517 } 518 519 TEST(mode_filter_cannot_move_to_strict) 520 { 521 struct sock_filter filter[] = { 522 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 523 }; 524 struct sock_fprog prog = { 525 .len = (unsigned short)ARRAY_SIZE(filter), 526 .filter = filter, 527 }; 528 long ret; 529 530 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 531 ASSERT_EQ(0, ret); 532 533 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 534 ASSERT_EQ(0, ret); 535 536 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 537 EXPECT_EQ(-1, ret); 538 EXPECT_EQ(EINVAL, errno); 539 } 540 541 542 TEST(mode_filter_get_seccomp) 543 { 544 struct sock_filter filter[] = { 545 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 546 }; 547 struct sock_fprog prog = { 548 .len = (unsigned short)ARRAY_SIZE(filter), 549 .filter = filter, 550 }; 551 long ret; 552 553 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 554 ASSERT_EQ(0, ret); 555 556 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 557 EXPECT_EQ(0, ret); 558 559 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 560 ASSERT_EQ(0, ret); 561 562 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 563 EXPECT_EQ(2, ret); 564 } 565 566 567 TEST(ALLOW_all) 568 { 569 struct sock_filter filter[] = { 570 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 571 }; 572 struct sock_fprog prog = { 573 .len = (unsigned short)ARRAY_SIZE(filter), 574 .filter = filter, 575 }; 576 long ret; 577 578 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 579 ASSERT_EQ(0, ret); 580 581 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 582 ASSERT_EQ(0, ret); 583 } 584 585 TEST(empty_prog) 586 { 587 struct sock_filter filter[] = { 588 }; 589 struct sock_fprog prog = { 590 .len = (unsigned short)ARRAY_SIZE(filter), 591 .filter = filter, 592 }; 593 long ret; 594 595 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 596 ASSERT_EQ(0, ret); 597 598 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 599 EXPECT_EQ(-1, ret); 600 EXPECT_EQ(EINVAL, errno); 601 } 602 603 TEST(log_all) 604 { 605 struct sock_filter filter[] = { 606 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 607 }; 608 struct sock_fprog prog = { 609 .len = (unsigned short)ARRAY_SIZE(filter), 610 .filter = filter, 611 }; 612 long ret; 613 pid_t parent = getppid(); 614 615 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 616 ASSERT_EQ(0, ret); 617 618 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 619 ASSERT_EQ(0, ret); 620 621 /* getppid() should succeed and be logged (no check for logging) */ 622 EXPECT_EQ(parent, syscall(__NR_getppid)); 623 } 624 625 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 626 { 627 struct sock_filter filter[] = { 628 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 629 }; 630 struct sock_fprog prog = { 631 .len = (unsigned short)ARRAY_SIZE(filter), 632 .filter = filter, 633 }; 634 long ret; 635 636 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 637 ASSERT_EQ(0, ret); 638 639 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 640 ASSERT_EQ(0, ret); 641 EXPECT_EQ(0, syscall(__NR_getpid)) { 642 TH_LOG("getpid() shouldn't ever return"); 643 } 644 } 645 646 /* return code >= 0x80000000 is unused. */ 647 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 648 { 649 struct sock_filter filter[] = { 650 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 651 }; 652 struct sock_fprog prog = { 653 .len = (unsigned short)ARRAY_SIZE(filter), 654 .filter = filter, 655 }; 656 long ret; 657 658 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 659 ASSERT_EQ(0, ret); 660 661 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 662 ASSERT_EQ(0, ret); 663 EXPECT_EQ(0, syscall(__NR_getpid)) { 664 TH_LOG("getpid() shouldn't ever return"); 665 } 666 } 667 668 TEST_SIGNAL(KILL_all, SIGSYS) 669 { 670 struct sock_filter filter[] = { 671 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 672 }; 673 struct sock_fprog prog = { 674 .len = (unsigned short)ARRAY_SIZE(filter), 675 .filter = filter, 676 }; 677 long ret; 678 679 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 680 ASSERT_EQ(0, ret); 681 682 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 683 ASSERT_EQ(0, ret); 684 } 685 686 TEST_SIGNAL(KILL_one, SIGSYS) 687 { 688 struct sock_filter filter[] = { 689 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 690 offsetof(struct seccomp_data, nr)), 691 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 692 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 693 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 694 }; 695 struct sock_fprog prog = { 696 .len = (unsigned short)ARRAY_SIZE(filter), 697 .filter = filter, 698 }; 699 long ret; 700 pid_t parent = getppid(); 701 702 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 703 ASSERT_EQ(0, ret); 704 705 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 706 ASSERT_EQ(0, ret); 707 708 EXPECT_EQ(parent, syscall(__NR_getppid)); 709 /* getpid() should never return. */ 710 EXPECT_EQ(0, syscall(__NR_getpid)); 711 } 712 713 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 714 { 715 void *fatal_address; 716 struct sock_filter filter[] = { 717 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 718 offsetof(struct seccomp_data, nr)), 719 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 720 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 721 /* Only both with lower 32-bit for now. */ 722 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 723 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 724 (unsigned long)&fatal_address, 0, 1), 725 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 726 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 727 }; 728 struct sock_fprog prog = { 729 .len = (unsigned short)ARRAY_SIZE(filter), 730 .filter = filter, 731 }; 732 long ret; 733 pid_t parent = getppid(); 734 struct tms timebuf; 735 clock_t clock = times(&timebuf); 736 737 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 738 ASSERT_EQ(0, ret); 739 740 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 741 ASSERT_EQ(0, ret); 742 743 EXPECT_EQ(parent, syscall(__NR_getppid)); 744 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 745 /* times() should never return. */ 746 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 747 } 748 749 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 750 { 751 #ifndef __NR_mmap2 752 int sysno = __NR_mmap; 753 #else 754 int sysno = __NR_mmap2; 755 #endif 756 struct sock_filter filter[] = { 757 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 758 offsetof(struct seccomp_data, nr)), 759 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 760 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 761 /* Only both with lower 32-bit for now. */ 762 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 763 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 764 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 765 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 766 }; 767 struct sock_fprog prog = { 768 .len = (unsigned short)ARRAY_SIZE(filter), 769 .filter = filter, 770 }; 771 long ret; 772 pid_t parent = getppid(); 773 int fd; 774 void *map1, *map2; 775 int page_size = sysconf(_SC_PAGESIZE); 776 777 ASSERT_LT(0, page_size); 778 779 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 780 ASSERT_EQ(0, ret); 781 782 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 783 ASSERT_EQ(0, ret); 784 785 fd = open("/dev/zero", O_RDONLY); 786 ASSERT_NE(-1, fd); 787 788 EXPECT_EQ(parent, syscall(__NR_getppid)); 789 map1 = (void *)syscall(sysno, 790 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 791 EXPECT_NE(MAP_FAILED, map1); 792 /* mmap2() should never return. */ 793 map2 = (void *)syscall(sysno, 794 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 795 EXPECT_EQ(MAP_FAILED, map2); 796 797 /* The test failed, so clean up the resources. */ 798 munmap(map1, page_size); 799 munmap(map2, page_size); 800 close(fd); 801 } 802 803 /* This is a thread task to die via seccomp filter violation. */ 804 void *kill_thread(void *data) 805 { 806 bool die = (bool)data; 807 808 if (die) { 809 syscall(__NR_getpid); 810 return (void *)SIBLING_EXIT_FAILURE; 811 } 812 813 return (void *)SIBLING_EXIT_UNKILLED; 814 } 815 816 enum kill_t { 817 KILL_THREAD, 818 KILL_PROCESS, 819 RET_UNKNOWN 820 }; 821 822 /* Prepare a thread that will kill itself or both of us. */ 823 void kill_thread_or_group(struct __test_metadata *_metadata, 824 enum kill_t kill_how) 825 { 826 pthread_t thread; 827 void *status; 828 /* Kill only when calling __NR_getpid. */ 829 struct sock_filter filter_thread[] = { 830 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 831 offsetof(struct seccomp_data, nr)), 832 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 833 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 834 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 835 }; 836 struct sock_fprog prog_thread = { 837 .len = (unsigned short)ARRAY_SIZE(filter_thread), 838 .filter = filter_thread, 839 }; 840 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; 841 struct sock_filter filter_process[] = { 842 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 843 offsetof(struct seccomp_data, nr)), 844 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 845 BPF_STMT(BPF_RET|BPF_K, kill), 846 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 847 }; 848 struct sock_fprog prog_process = { 849 .len = (unsigned short)ARRAY_SIZE(filter_process), 850 .filter = filter_process, 851 }; 852 853 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 854 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 855 } 856 857 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 858 kill_how == KILL_THREAD ? &prog_thread 859 : &prog_process)); 860 861 /* 862 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 863 * flag cannot be downgraded by a new filter. 864 */ 865 if (kill_how == KILL_PROCESS) 866 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 867 868 /* Start a thread that will exit immediately. */ 869 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 870 ASSERT_EQ(0, pthread_join(thread, &status)); 871 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 872 873 /* Start a thread that will die immediately. */ 874 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 875 ASSERT_EQ(0, pthread_join(thread, &status)); 876 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 877 878 /* 879 * If we get here, only the spawned thread died. Let the parent know 880 * the whole process didn't die (i.e. this thread, the spawner, 881 * stayed running). 882 */ 883 exit(42); 884 } 885 886 TEST(KILL_thread) 887 { 888 int status; 889 pid_t child_pid; 890 891 child_pid = fork(); 892 ASSERT_LE(0, child_pid); 893 if (child_pid == 0) { 894 kill_thread_or_group(_metadata, KILL_THREAD); 895 _exit(38); 896 } 897 898 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 899 900 /* If only the thread was killed, we'll see exit 42. */ 901 ASSERT_TRUE(WIFEXITED(status)); 902 ASSERT_EQ(42, WEXITSTATUS(status)); 903 } 904 905 TEST(KILL_process) 906 { 907 int status; 908 pid_t child_pid; 909 910 child_pid = fork(); 911 ASSERT_LE(0, child_pid); 912 if (child_pid == 0) { 913 kill_thread_or_group(_metadata, KILL_PROCESS); 914 _exit(38); 915 } 916 917 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 918 919 /* If the entire process was killed, we'll see SIGSYS. */ 920 ASSERT_TRUE(WIFSIGNALED(status)); 921 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 922 } 923 924 TEST(KILL_unknown) 925 { 926 int status; 927 pid_t child_pid; 928 929 child_pid = fork(); 930 ASSERT_LE(0, child_pid); 931 if (child_pid == 0) { 932 kill_thread_or_group(_metadata, RET_UNKNOWN); 933 _exit(38); 934 } 935 936 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 937 938 /* If the entire process was killed, we'll see SIGSYS. */ 939 EXPECT_TRUE(WIFSIGNALED(status)) { 940 TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); 941 } 942 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 943 } 944 945 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 946 TEST(arg_out_of_range) 947 { 948 struct sock_filter filter[] = { 949 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 950 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 951 }; 952 struct sock_fprog prog = { 953 .len = (unsigned short)ARRAY_SIZE(filter), 954 .filter = filter, 955 }; 956 long ret; 957 958 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 959 ASSERT_EQ(0, ret); 960 961 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 962 EXPECT_EQ(-1, ret); 963 EXPECT_EQ(EINVAL, errno); 964 } 965 966 #define ERRNO_FILTER(name, errno) \ 967 struct sock_filter _read_filter_##name[] = { \ 968 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 969 offsetof(struct seccomp_data, nr)), \ 970 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 971 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 972 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 973 }; \ 974 struct sock_fprog prog_##name = { \ 975 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 976 .filter = _read_filter_##name, \ 977 } 978 979 /* Make sure basic errno values are correctly passed through a filter. */ 980 TEST(ERRNO_valid) 981 { 982 ERRNO_FILTER(valid, E2BIG); 983 long ret; 984 pid_t parent = getppid(); 985 986 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 987 ASSERT_EQ(0, ret); 988 989 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 990 ASSERT_EQ(0, ret); 991 992 EXPECT_EQ(parent, syscall(__NR_getppid)); 993 EXPECT_EQ(-1, read(-1, NULL, 0)); 994 EXPECT_EQ(E2BIG, errno); 995 } 996 997 /* Make sure an errno of zero is correctly handled by the arch code. */ 998 TEST(ERRNO_zero) 999 { 1000 ERRNO_FILTER(zero, 0); 1001 long ret; 1002 pid_t parent = getppid(); 1003 1004 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1005 ASSERT_EQ(0, ret); 1006 1007 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 1008 ASSERT_EQ(0, ret); 1009 1010 EXPECT_EQ(parent, syscall(__NR_getppid)); 1011 /* "errno" of 0 is ok. */ 1012 EXPECT_EQ(0, read(-1, NULL, 0)); 1013 } 1014 1015 /* 1016 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 1017 * This tests that the errno value gets capped correctly, fixed by 1018 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 1019 */ 1020 TEST(ERRNO_capped) 1021 { 1022 ERRNO_FILTER(capped, 4096); 1023 long ret; 1024 pid_t parent = getppid(); 1025 1026 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1027 ASSERT_EQ(0, ret); 1028 1029 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 1030 ASSERT_EQ(0, ret); 1031 1032 EXPECT_EQ(parent, syscall(__NR_getppid)); 1033 EXPECT_EQ(-1, read(-1, NULL, 0)); 1034 EXPECT_EQ(4095, errno); 1035 } 1036 1037 /* 1038 * Filters are processed in reverse order: last applied is executed first. 1039 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 1040 * SECCOMP_RET_DATA mask results will follow the most recently applied 1041 * matching filter return (and not the lowest or highest value). 1042 */ 1043 TEST(ERRNO_order) 1044 { 1045 ERRNO_FILTER(first, 11); 1046 ERRNO_FILTER(second, 13); 1047 ERRNO_FILTER(third, 12); 1048 long ret; 1049 pid_t parent = getppid(); 1050 1051 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1052 ASSERT_EQ(0, ret); 1053 1054 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 1055 ASSERT_EQ(0, ret); 1056 1057 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 1058 ASSERT_EQ(0, ret); 1059 1060 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 1061 ASSERT_EQ(0, ret); 1062 1063 EXPECT_EQ(parent, syscall(__NR_getppid)); 1064 EXPECT_EQ(-1, read(-1, NULL, 0)); 1065 EXPECT_EQ(12, errno); 1066 } 1067 1068 FIXTURE(TRAP) { 1069 struct sock_fprog prog; 1070 }; 1071 1072 FIXTURE_SETUP(TRAP) 1073 { 1074 struct sock_filter filter[] = { 1075 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1076 offsetof(struct seccomp_data, nr)), 1077 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1078 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1079 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1080 }; 1081 1082 memset(&self->prog, 0, sizeof(self->prog)); 1083 self->prog.filter = malloc(sizeof(filter)); 1084 ASSERT_NE(NULL, self->prog.filter); 1085 memcpy(self->prog.filter, filter, sizeof(filter)); 1086 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1087 } 1088 1089 FIXTURE_TEARDOWN(TRAP) 1090 { 1091 if (self->prog.filter) 1092 free(self->prog.filter); 1093 } 1094 1095 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1096 { 1097 long ret; 1098 1099 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1100 ASSERT_EQ(0, ret); 1101 1102 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1103 ASSERT_EQ(0, ret); 1104 syscall(__NR_getpid); 1105 } 1106 1107 /* Ensure that SIGSYS overrides SIG_IGN */ 1108 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1109 { 1110 long ret; 1111 1112 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1113 ASSERT_EQ(0, ret); 1114 1115 signal(SIGSYS, SIG_IGN); 1116 1117 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1118 ASSERT_EQ(0, ret); 1119 syscall(__NR_getpid); 1120 } 1121 1122 static siginfo_t TRAP_info; 1123 static volatile int TRAP_nr; 1124 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1125 { 1126 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1127 TRAP_nr = nr; 1128 } 1129 1130 TEST_F(TRAP, handler) 1131 { 1132 int ret, test; 1133 struct sigaction act; 1134 sigset_t mask; 1135 1136 memset(&act, 0, sizeof(act)); 1137 sigemptyset(&mask); 1138 sigaddset(&mask, SIGSYS); 1139 1140 act.sa_sigaction = &TRAP_action; 1141 act.sa_flags = SA_SIGINFO; 1142 ret = sigaction(SIGSYS, &act, NULL); 1143 ASSERT_EQ(0, ret) { 1144 TH_LOG("sigaction failed"); 1145 } 1146 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1147 ASSERT_EQ(0, ret) { 1148 TH_LOG("sigprocmask failed"); 1149 } 1150 1151 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1152 ASSERT_EQ(0, ret); 1153 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1154 ASSERT_EQ(0, ret); 1155 TRAP_nr = 0; 1156 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1157 /* Expect the registers to be rolled back. (nr = error) may vary 1158 * based on arch. */ 1159 ret = syscall(__NR_getpid); 1160 /* Silence gcc warning about volatile. */ 1161 test = TRAP_nr; 1162 EXPECT_EQ(SIGSYS, test); 1163 struct local_sigsys { 1164 void *_call_addr; /* calling user insn */ 1165 int _syscall; /* triggering system call number */ 1166 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1167 } *sigsys = (struct local_sigsys *) 1168 #ifdef si_syscall 1169 &(TRAP_info.si_call_addr); 1170 #else 1171 &TRAP_info.si_pid; 1172 #endif 1173 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1174 /* Make sure arch is non-zero. */ 1175 EXPECT_NE(0, sigsys->_arch); 1176 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1177 } 1178 1179 FIXTURE(precedence) { 1180 struct sock_fprog allow; 1181 struct sock_fprog log; 1182 struct sock_fprog trace; 1183 struct sock_fprog error; 1184 struct sock_fprog trap; 1185 struct sock_fprog kill; 1186 }; 1187 1188 FIXTURE_SETUP(precedence) 1189 { 1190 struct sock_filter allow_insns[] = { 1191 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1192 }; 1193 struct sock_filter log_insns[] = { 1194 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1195 offsetof(struct seccomp_data, nr)), 1196 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1197 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1198 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1199 }; 1200 struct sock_filter trace_insns[] = { 1201 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1202 offsetof(struct seccomp_data, nr)), 1203 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1204 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1205 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1206 }; 1207 struct sock_filter error_insns[] = { 1208 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1209 offsetof(struct seccomp_data, nr)), 1210 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1211 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1212 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1213 }; 1214 struct sock_filter trap_insns[] = { 1215 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1216 offsetof(struct seccomp_data, nr)), 1217 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1218 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1219 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1220 }; 1221 struct sock_filter kill_insns[] = { 1222 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1223 offsetof(struct seccomp_data, nr)), 1224 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1225 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1226 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1227 }; 1228 1229 memset(self, 0, sizeof(*self)); 1230 #define FILTER_ALLOC(_x) \ 1231 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1232 ASSERT_NE(NULL, self->_x.filter); \ 1233 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1234 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1235 FILTER_ALLOC(allow); 1236 FILTER_ALLOC(log); 1237 FILTER_ALLOC(trace); 1238 FILTER_ALLOC(error); 1239 FILTER_ALLOC(trap); 1240 FILTER_ALLOC(kill); 1241 } 1242 1243 FIXTURE_TEARDOWN(precedence) 1244 { 1245 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1246 FILTER_FREE(allow); 1247 FILTER_FREE(log); 1248 FILTER_FREE(trace); 1249 FILTER_FREE(error); 1250 FILTER_FREE(trap); 1251 FILTER_FREE(kill); 1252 } 1253 1254 TEST_F(precedence, allow_ok) 1255 { 1256 pid_t parent, res = 0; 1257 long ret; 1258 1259 parent = getppid(); 1260 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1261 ASSERT_EQ(0, ret); 1262 1263 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1264 ASSERT_EQ(0, ret); 1265 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1266 ASSERT_EQ(0, ret); 1267 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1268 ASSERT_EQ(0, ret); 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1274 ASSERT_EQ(0, ret); 1275 /* Should work just fine. */ 1276 res = syscall(__NR_getppid); 1277 EXPECT_EQ(parent, res); 1278 } 1279 1280 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1281 { 1282 pid_t parent, res = 0; 1283 long ret; 1284 1285 parent = getppid(); 1286 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1287 ASSERT_EQ(0, ret); 1288 1289 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1290 ASSERT_EQ(0, ret); 1291 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1292 ASSERT_EQ(0, ret); 1293 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1294 ASSERT_EQ(0, ret); 1295 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1296 ASSERT_EQ(0, ret); 1297 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1298 ASSERT_EQ(0, ret); 1299 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1300 ASSERT_EQ(0, ret); 1301 /* Should work just fine. */ 1302 res = syscall(__NR_getppid); 1303 EXPECT_EQ(parent, res); 1304 /* getpid() should never return. */ 1305 res = syscall(__NR_getpid); 1306 EXPECT_EQ(0, res); 1307 } 1308 1309 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1310 { 1311 pid_t parent; 1312 long ret; 1313 1314 parent = getppid(); 1315 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1316 ASSERT_EQ(0, ret); 1317 1318 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1319 ASSERT_EQ(0, ret); 1320 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1321 ASSERT_EQ(0, ret); 1322 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1323 ASSERT_EQ(0, ret); 1324 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1325 ASSERT_EQ(0, ret); 1326 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1327 ASSERT_EQ(0, ret); 1328 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1329 ASSERT_EQ(0, ret); 1330 /* Should work just fine. */ 1331 EXPECT_EQ(parent, syscall(__NR_getppid)); 1332 /* getpid() should never return. */ 1333 EXPECT_EQ(0, syscall(__NR_getpid)); 1334 } 1335 1336 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1337 { 1338 pid_t parent; 1339 long ret; 1340 1341 parent = getppid(); 1342 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1343 ASSERT_EQ(0, ret); 1344 1345 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1346 ASSERT_EQ(0, ret); 1347 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1348 ASSERT_EQ(0, ret); 1349 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1350 ASSERT_EQ(0, ret); 1351 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1352 ASSERT_EQ(0, ret); 1353 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1354 ASSERT_EQ(0, ret); 1355 /* Should work just fine. */ 1356 EXPECT_EQ(parent, syscall(__NR_getppid)); 1357 /* getpid() should never return. */ 1358 EXPECT_EQ(0, syscall(__NR_getpid)); 1359 } 1360 1361 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1362 { 1363 pid_t parent; 1364 long ret; 1365 1366 parent = getppid(); 1367 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1368 ASSERT_EQ(0, ret); 1369 1370 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1371 ASSERT_EQ(0, ret); 1372 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1373 ASSERT_EQ(0, ret); 1374 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1375 ASSERT_EQ(0, ret); 1376 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1377 ASSERT_EQ(0, ret); 1378 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1379 ASSERT_EQ(0, ret); 1380 /* Should work just fine. */ 1381 EXPECT_EQ(parent, syscall(__NR_getppid)); 1382 /* getpid() should never return. */ 1383 EXPECT_EQ(0, syscall(__NR_getpid)); 1384 } 1385 1386 TEST_F(precedence, errno_is_third) 1387 { 1388 pid_t parent; 1389 long ret; 1390 1391 parent = getppid(); 1392 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1393 ASSERT_EQ(0, ret); 1394 1395 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1396 ASSERT_EQ(0, ret); 1397 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1398 ASSERT_EQ(0, ret); 1399 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1400 ASSERT_EQ(0, ret); 1401 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1402 ASSERT_EQ(0, ret); 1403 /* Should work just fine. */ 1404 EXPECT_EQ(parent, syscall(__NR_getppid)); 1405 EXPECT_EQ(0, syscall(__NR_getpid)); 1406 } 1407 1408 TEST_F(precedence, errno_is_third_in_any_order) 1409 { 1410 pid_t parent; 1411 long ret; 1412 1413 parent = getppid(); 1414 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1415 ASSERT_EQ(0, ret); 1416 1417 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1418 ASSERT_EQ(0, ret); 1419 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1420 ASSERT_EQ(0, ret); 1421 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1422 ASSERT_EQ(0, ret); 1423 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1424 ASSERT_EQ(0, ret); 1425 /* Should work just fine. */ 1426 EXPECT_EQ(parent, syscall(__NR_getppid)); 1427 EXPECT_EQ(0, syscall(__NR_getpid)); 1428 } 1429 1430 TEST_F(precedence, trace_is_fourth) 1431 { 1432 pid_t parent; 1433 long ret; 1434 1435 parent = getppid(); 1436 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1437 ASSERT_EQ(0, ret); 1438 1439 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1440 ASSERT_EQ(0, ret); 1441 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1442 ASSERT_EQ(0, ret); 1443 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1444 ASSERT_EQ(0, ret); 1445 /* Should work just fine. */ 1446 EXPECT_EQ(parent, syscall(__NR_getppid)); 1447 /* No ptracer */ 1448 EXPECT_EQ(-1, syscall(__NR_getpid)); 1449 } 1450 1451 TEST_F(precedence, trace_is_fourth_in_any_order) 1452 { 1453 pid_t parent; 1454 long ret; 1455 1456 parent = getppid(); 1457 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1458 ASSERT_EQ(0, ret); 1459 1460 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1461 ASSERT_EQ(0, ret); 1462 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1463 ASSERT_EQ(0, ret); 1464 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1465 ASSERT_EQ(0, ret); 1466 /* Should work just fine. */ 1467 EXPECT_EQ(parent, syscall(__NR_getppid)); 1468 /* No ptracer */ 1469 EXPECT_EQ(-1, syscall(__NR_getpid)); 1470 } 1471 1472 TEST_F(precedence, log_is_fifth) 1473 { 1474 pid_t mypid, parent; 1475 long ret; 1476 1477 mypid = getpid(); 1478 parent = getppid(); 1479 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1480 ASSERT_EQ(0, ret); 1481 1482 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1483 ASSERT_EQ(0, ret); 1484 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1485 ASSERT_EQ(0, ret); 1486 /* Should work just fine. */ 1487 EXPECT_EQ(parent, syscall(__NR_getppid)); 1488 /* Should also work just fine */ 1489 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1490 } 1491 1492 TEST_F(precedence, log_is_fifth_in_any_order) 1493 { 1494 pid_t mypid, parent; 1495 long ret; 1496 1497 mypid = getpid(); 1498 parent = getppid(); 1499 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1500 ASSERT_EQ(0, ret); 1501 1502 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1503 ASSERT_EQ(0, ret); 1504 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1505 ASSERT_EQ(0, ret); 1506 /* Should work just fine. */ 1507 EXPECT_EQ(parent, syscall(__NR_getppid)); 1508 /* Should also work just fine */ 1509 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1510 } 1511 1512 #ifndef PTRACE_O_TRACESECCOMP 1513 #define PTRACE_O_TRACESECCOMP 0x00000080 1514 #endif 1515 1516 /* Catch the Ubuntu 12.04 value error. */ 1517 #if PTRACE_EVENT_SECCOMP != 7 1518 #undef PTRACE_EVENT_SECCOMP 1519 #endif 1520 1521 #ifndef PTRACE_EVENT_SECCOMP 1522 #define PTRACE_EVENT_SECCOMP 7 1523 #endif 1524 1525 #define PTRACE_EVENT_MASK(status) ((status) >> 16) 1526 bool tracer_running; 1527 void tracer_stop(int sig) 1528 { 1529 tracer_running = false; 1530 } 1531 1532 typedef void tracer_func_t(struct __test_metadata *_metadata, 1533 pid_t tracee, int status, void *args); 1534 1535 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1536 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1537 { 1538 int ret = -1; 1539 struct sigaction action = { 1540 .sa_handler = tracer_stop, 1541 }; 1542 1543 /* Allow external shutdown. */ 1544 tracer_running = true; 1545 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1546 1547 errno = 0; 1548 while (ret == -1 && errno != EINVAL) 1549 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1550 ASSERT_EQ(0, ret) { 1551 kill(tracee, SIGKILL); 1552 } 1553 /* Wait for attach stop */ 1554 wait(NULL); 1555 1556 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1557 PTRACE_O_TRACESYSGOOD : 1558 PTRACE_O_TRACESECCOMP); 1559 ASSERT_EQ(0, ret) { 1560 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1561 kill(tracee, SIGKILL); 1562 } 1563 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1564 tracee, NULL, 0); 1565 ASSERT_EQ(0, ret); 1566 1567 /* Unblock the tracee */ 1568 ASSERT_EQ(1, write(fd, "A", 1)); 1569 ASSERT_EQ(0, close(fd)); 1570 1571 /* Run until we're shut down. Must assert to stop execution. */ 1572 while (tracer_running) { 1573 int status; 1574 1575 if (wait(&status) != tracee) 1576 continue; 1577 1578 if (WIFSIGNALED(status)) { 1579 /* Child caught a fatal signal. */ 1580 return; 1581 } 1582 if (WIFEXITED(status)) { 1583 /* Child exited with code. */ 1584 return; 1585 } 1586 1587 /* Check if we got an expected event. */ 1588 ASSERT_EQ(WIFCONTINUED(status), false); 1589 ASSERT_EQ(WIFSTOPPED(status), true); 1590 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { 1591 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 1592 } 1593 1594 tracer_func(_metadata, tracee, status, args); 1595 1596 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1597 tracee, NULL, 0); 1598 ASSERT_EQ(0, ret); 1599 } 1600 /* Directly report the status of our test harness results. */ 1601 syscall(__NR_exit, _metadata->exit_code); 1602 } 1603 1604 /* Common tracer setup/teardown functions. */ 1605 void cont_handler(int num) 1606 { } 1607 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1608 tracer_func_t func, void *args, bool ptrace_syscall) 1609 { 1610 char sync; 1611 int pipefd[2]; 1612 pid_t tracer_pid; 1613 pid_t tracee = getpid(); 1614 1615 /* Setup a pipe for clean synchronization. */ 1616 ASSERT_EQ(0, pipe(pipefd)); 1617 1618 /* Fork a child which we'll promote to tracer */ 1619 tracer_pid = fork(); 1620 ASSERT_LE(0, tracer_pid); 1621 signal(SIGALRM, cont_handler); 1622 if (tracer_pid == 0) { 1623 close(pipefd[0]); 1624 start_tracer(_metadata, pipefd[1], tracee, func, args, 1625 ptrace_syscall); 1626 syscall(__NR_exit, 0); 1627 } 1628 close(pipefd[1]); 1629 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1630 read(pipefd[0], &sync, 1); 1631 close(pipefd[0]); 1632 1633 return tracer_pid; 1634 } 1635 1636 void teardown_trace_fixture(struct __test_metadata *_metadata, 1637 pid_t tracer) 1638 { 1639 if (tracer) { 1640 int status; 1641 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1642 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1643 } 1644 } 1645 1646 /* "poke" tracer arguments and function. */ 1647 struct tracer_args_poke_t { 1648 unsigned long poke_addr; 1649 }; 1650 1651 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1652 void *args) 1653 { 1654 int ret; 1655 unsigned long msg; 1656 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1657 1658 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1659 EXPECT_EQ(0, ret); 1660 /* If this fails, don't try to recover. */ 1661 ASSERT_EQ(0x1001, msg) { 1662 kill(tracee, SIGKILL); 1663 } 1664 /* 1665 * Poke in the message. 1666 * Registers are not touched to try to keep this relatively arch 1667 * agnostic. 1668 */ 1669 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1670 EXPECT_EQ(0, ret); 1671 } 1672 1673 FIXTURE(TRACE_poke) { 1674 struct sock_fprog prog; 1675 pid_t tracer; 1676 long poked; 1677 struct tracer_args_poke_t tracer_args; 1678 }; 1679 1680 FIXTURE_SETUP(TRACE_poke) 1681 { 1682 struct sock_filter filter[] = { 1683 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1684 offsetof(struct seccomp_data, nr)), 1685 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1686 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1687 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1688 }; 1689 1690 self->poked = 0; 1691 memset(&self->prog, 0, sizeof(self->prog)); 1692 self->prog.filter = malloc(sizeof(filter)); 1693 ASSERT_NE(NULL, self->prog.filter); 1694 memcpy(self->prog.filter, filter, sizeof(filter)); 1695 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1696 1697 /* Set up tracer args. */ 1698 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1699 1700 /* Launch tracer. */ 1701 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1702 &self->tracer_args, false); 1703 } 1704 1705 FIXTURE_TEARDOWN(TRACE_poke) 1706 { 1707 teardown_trace_fixture(_metadata, self->tracer); 1708 if (self->prog.filter) 1709 free(self->prog.filter); 1710 } 1711 1712 TEST_F(TRACE_poke, read_has_side_effects) 1713 { 1714 ssize_t ret; 1715 1716 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1717 ASSERT_EQ(0, ret); 1718 1719 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1720 ASSERT_EQ(0, ret); 1721 1722 EXPECT_EQ(0, self->poked); 1723 ret = read(-1, NULL, 0); 1724 EXPECT_EQ(-1, ret); 1725 EXPECT_EQ(0x1001, self->poked); 1726 } 1727 1728 TEST_F(TRACE_poke, getpid_runs_normally) 1729 { 1730 long ret; 1731 1732 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1733 ASSERT_EQ(0, ret); 1734 1735 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1736 ASSERT_EQ(0, ret); 1737 1738 EXPECT_EQ(0, self->poked); 1739 EXPECT_NE(0, syscall(__NR_getpid)); 1740 EXPECT_EQ(0, self->poked); 1741 } 1742 1743 #if defined(__x86_64__) 1744 # define ARCH_REGS struct user_regs_struct 1745 # define SYSCALL_NUM(_regs) (_regs).orig_rax 1746 # define SYSCALL_RET(_regs) (_regs).rax 1747 #elif defined(__i386__) 1748 # define ARCH_REGS struct user_regs_struct 1749 # define SYSCALL_NUM(_regs) (_regs).orig_eax 1750 # define SYSCALL_RET(_regs) (_regs).eax 1751 #elif defined(__arm__) 1752 # define ARCH_REGS struct pt_regs 1753 # define SYSCALL_NUM(_regs) (_regs).ARM_r7 1754 # ifndef PTRACE_SET_SYSCALL 1755 # define PTRACE_SET_SYSCALL 23 1756 # endif 1757 # define SYSCALL_NUM_SET(_regs, _nr) \ 1758 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) 1759 # define SYSCALL_RET(_regs) (_regs).ARM_r0 1760 #elif defined(__aarch64__) 1761 # define ARCH_REGS struct user_pt_regs 1762 # define SYSCALL_NUM(_regs) (_regs).regs[8] 1763 # ifndef NT_ARM_SYSTEM_CALL 1764 # define NT_ARM_SYSTEM_CALL 0x404 1765 # endif 1766 # define SYSCALL_NUM_SET(_regs, _nr) \ 1767 do { \ 1768 struct iovec __v; \ 1769 typeof(_nr) __nr = (_nr); \ 1770 __v.iov_base = &__nr; \ 1771 __v.iov_len = sizeof(__nr); \ 1772 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ 1773 NT_ARM_SYSTEM_CALL, &__v)); \ 1774 } while (0) 1775 # define SYSCALL_RET(_regs) (_regs).regs[0] 1776 #elif defined(__loongarch__) 1777 # define ARCH_REGS struct user_pt_regs 1778 # define SYSCALL_NUM(_regs) (_regs).regs[11] 1779 # define SYSCALL_RET(_regs) (_regs).regs[4] 1780 #elif defined(__riscv) && __riscv_xlen == 64 1781 # define ARCH_REGS struct user_regs_struct 1782 # define SYSCALL_NUM(_regs) (_regs).a7 1783 # define SYSCALL_RET(_regs) (_regs).a0 1784 #elif defined(__csky__) 1785 # define ARCH_REGS struct pt_regs 1786 # if defined(__CSKYABIV2__) 1787 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1788 # else 1789 # define SYSCALL_NUM(_regs) (_regs).regs[9] 1790 # endif 1791 # define SYSCALL_RET(_regs) (_regs).a0 1792 #elif defined(__hppa__) 1793 # define ARCH_REGS struct user_regs_struct 1794 # define SYSCALL_NUM(_regs) (_regs).gr[20] 1795 # define SYSCALL_RET(_regs) (_regs).gr[28] 1796 #elif defined(__powerpc__) 1797 # define ARCH_REGS struct pt_regs 1798 # define SYSCALL_NUM(_regs) (_regs).gpr[0] 1799 # define SYSCALL_RET(_regs) (_regs).gpr[3] 1800 # define SYSCALL_RET_SET(_regs, _val) \ 1801 do { \ 1802 typeof(_val) _result = (_val); \ 1803 if ((_regs.trap & 0xfff0) == 0x3000) { \ 1804 /* \ 1805 * scv 0 system call uses -ve result \ 1806 * for error, so no need to adjust. \ 1807 */ \ 1808 SYSCALL_RET(_regs) = _result; \ 1809 } else { \ 1810 /* \ 1811 * A syscall error is signaled by the \ 1812 * CR0 SO bit and the code is stored as \ 1813 * a positive value. \ 1814 */ \ 1815 if (_result < 0) { \ 1816 SYSCALL_RET(_regs) = -_result; \ 1817 (_regs).ccr |= 0x10000000; \ 1818 } else { \ 1819 SYSCALL_RET(_regs) = _result; \ 1820 (_regs).ccr &= ~0x10000000; \ 1821 } \ 1822 } \ 1823 } while (0) 1824 # define SYSCALL_RET_SET_ON_PTRACE_EXIT 1825 #elif defined(__s390__) 1826 # define ARCH_REGS s390_regs 1827 # define SYSCALL_NUM(_regs) (_regs).gprs[2] 1828 # define SYSCALL_RET_SET(_regs, _val) \ 1829 TH_LOG("Can't modify syscall return on this architecture") 1830 #elif defined(__mips__) 1831 # include <asm/unistd_nr_n32.h> 1832 # include <asm/unistd_nr_n64.h> 1833 # include <asm/unistd_nr_o32.h> 1834 # define ARCH_REGS struct pt_regs 1835 # define SYSCALL_NUM(_regs) \ 1836 ({ \ 1837 typeof((_regs).regs[2]) _nr; \ 1838 if ((_regs).regs[2] == __NR_O32_Linux) \ 1839 _nr = (_regs).regs[4]; \ 1840 else \ 1841 _nr = (_regs).regs[2]; \ 1842 _nr; \ 1843 }) 1844 # define SYSCALL_NUM_SET(_regs, _nr) \ 1845 do { \ 1846 if ((_regs).regs[2] == __NR_O32_Linux) \ 1847 (_regs).regs[4] = _nr; \ 1848 else \ 1849 (_regs).regs[2] = _nr; \ 1850 } while (0) 1851 # define SYSCALL_RET_SET(_regs, _val) \ 1852 TH_LOG("Can't modify syscall return on this architecture") 1853 #elif defined(__xtensa__) 1854 # define ARCH_REGS struct user_pt_regs 1855 # define SYSCALL_NUM(_regs) (_regs).syscall 1856 /* 1857 * On xtensa syscall return value is in the register 1858 * a2 of the current window which is not fixed. 1859 */ 1860 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] 1861 #elif defined(__sh__) 1862 # define ARCH_REGS struct pt_regs 1863 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1864 # define SYSCALL_RET(_regs) (_regs).regs[0] 1865 #elif defined(__mc68000__) 1866 # define ARCH_REGS struct user_regs_struct 1867 # define SYSCALL_NUM(_regs) (_regs).orig_d0 1868 # define SYSCALL_RET(_regs) (_regs).d0 1869 #else 1870 # error "Do not know how to find your architecture's registers and syscalls" 1871 #endif 1872 1873 /* 1874 * Most architectures can change the syscall by just updating the 1875 * associated register. This is the default if not defined above. 1876 */ 1877 #ifndef SYSCALL_NUM_SET 1878 # define SYSCALL_NUM_SET(_regs, _nr) \ 1879 do { \ 1880 SYSCALL_NUM(_regs) = (_nr); \ 1881 } while (0) 1882 #endif 1883 /* 1884 * Most architectures can change the syscall return value by just 1885 * writing to the SYSCALL_RET register. This is the default if not 1886 * defined above. If an architecture cannot set the return value 1887 * (for example when the syscall and return value register is 1888 * shared), report it with TH_LOG() in an arch-specific definition 1889 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. 1890 */ 1891 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) 1892 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" 1893 #endif 1894 #ifndef SYSCALL_RET_SET 1895 # define SYSCALL_RET_SET(_regs, _val) \ 1896 do { \ 1897 SYSCALL_RET(_regs) = (_val); \ 1898 } while (0) 1899 #endif 1900 1901 /* When the syscall return can't be changed, stub out the tests for it. */ 1902 #ifndef SYSCALL_RET 1903 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1904 #else 1905 # define EXPECT_SYSCALL_RETURN(val, action) \ 1906 do { \ 1907 errno = 0; \ 1908 if (val < 0) { \ 1909 EXPECT_EQ(-1, action); \ 1910 EXPECT_EQ(-(val), errno); \ 1911 } else { \ 1912 EXPECT_EQ(val, action); \ 1913 } \ 1914 } while (0) 1915 #endif 1916 1917 /* 1918 * Some architectures (e.g. powerpc) can only set syscall 1919 * return values on syscall exit during ptrace. 1920 */ 1921 const bool ptrace_entry_set_syscall_nr = true; 1922 const bool ptrace_entry_set_syscall_ret = 1923 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT 1924 true; 1925 #else 1926 false; 1927 #endif 1928 1929 /* 1930 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1931 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1932 */ 1933 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) 1934 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) 1935 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) 1936 #else 1937 # define ARCH_GETREGS(_regs) ({ \ 1938 struct iovec __v; \ 1939 __v.iov_base = &(_regs); \ 1940 __v.iov_len = sizeof(_regs); \ 1941 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ 1942 }) 1943 # define ARCH_SETREGS(_regs) ({ \ 1944 struct iovec __v; \ 1945 __v.iov_base = &(_regs); \ 1946 __v.iov_len = sizeof(_regs); \ 1947 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ 1948 }) 1949 #endif 1950 1951 /* Architecture-specific syscall fetching routine. */ 1952 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1953 { 1954 ARCH_REGS regs; 1955 1956 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1957 return -1; 1958 } 1959 1960 return SYSCALL_NUM(regs); 1961 } 1962 1963 /* Architecture-specific syscall changing routine. */ 1964 void __change_syscall(struct __test_metadata *_metadata, 1965 pid_t tracee, long *syscall, long *ret) 1966 { 1967 ARCH_REGS orig, regs; 1968 1969 /* Do not get/set registers if we have nothing to do. */ 1970 if (!syscall && !ret) 1971 return; 1972 1973 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1974 return; 1975 } 1976 orig = regs; 1977 1978 if (syscall) 1979 SYSCALL_NUM_SET(regs, *syscall); 1980 1981 if (ret) 1982 SYSCALL_RET_SET(regs, *ret); 1983 1984 /* Flush any register changes made. */ 1985 if (memcmp(&orig, ®s, sizeof(orig)) != 0) 1986 EXPECT_EQ(0, ARCH_SETREGS(regs)); 1987 } 1988 1989 /* Change only syscall number. */ 1990 void change_syscall_nr(struct __test_metadata *_metadata, 1991 pid_t tracee, long syscall) 1992 { 1993 __change_syscall(_metadata, tracee, &syscall, NULL); 1994 } 1995 1996 /* Change syscall return value (and set syscall number to -1). */ 1997 void change_syscall_ret(struct __test_metadata *_metadata, 1998 pid_t tracee, long ret) 1999 { 2000 long syscall = -1; 2001 2002 __change_syscall(_metadata, tracee, &syscall, &ret); 2003 } 2004 2005 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 2006 int status, void *args) 2007 { 2008 int ret; 2009 unsigned long msg; 2010 2011 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { 2012 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); 2013 return; 2014 } 2015 2016 /* Make sure we got the right message. */ 2017 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2018 EXPECT_EQ(0, ret); 2019 2020 /* Validate and take action on expected syscalls. */ 2021 switch (msg) { 2022 case 0x1002: 2023 /* change getpid to getppid. */ 2024 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 2025 change_syscall_nr(_metadata, tracee, __NR_getppid); 2026 break; 2027 case 0x1003: 2028 /* skip gettid with valid return code. */ 2029 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 2030 change_syscall_ret(_metadata, tracee, 45000); 2031 break; 2032 case 0x1004: 2033 /* skip openat with error. */ 2034 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 2035 change_syscall_ret(_metadata, tracee, -ESRCH); 2036 break; 2037 case 0x1005: 2038 /* do nothing (allow getppid) */ 2039 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 2040 break; 2041 default: 2042 EXPECT_EQ(0, msg) { 2043 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 2044 kill(tracee, SIGKILL); 2045 } 2046 } 2047 2048 } 2049 2050 FIXTURE(TRACE_syscall) { 2051 struct sock_fprog prog; 2052 pid_t tracer, mytid, mypid, parent; 2053 long syscall_nr; 2054 }; 2055 2056 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 2057 int status, void *args) 2058 { 2059 int ret; 2060 unsigned long msg; 2061 static bool entry; 2062 long syscall_nr_val, syscall_ret_val; 2063 long *syscall_nr = NULL, *syscall_ret = NULL; 2064 FIXTURE_DATA(TRACE_syscall) *self = args; 2065 2066 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { 2067 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 2068 return; 2069 } 2070 2071 /* 2072 * The traditional way to tell PTRACE_SYSCALL entry/exit 2073 * is by counting. 2074 */ 2075 entry = !entry; 2076 2077 /* Make sure we got an appropriate message. */ 2078 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2079 EXPECT_EQ(0, ret); 2080 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 2081 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 2082 2083 /* 2084 * Some architectures only support setting return values during 2085 * syscall exit under ptrace, and on exit the syscall number may 2086 * no longer be available. Therefore, save the initial sycall 2087 * number here, so it can be examined during both entry and exit 2088 * phases. 2089 */ 2090 if (entry) 2091 self->syscall_nr = get_syscall(_metadata, tracee); 2092 2093 /* 2094 * Depending on the architecture's syscall setting abilities, we 2095 * pick which things to set during this phase (entry or exit). 2096 */ 2097 if (entry == ptrace_entry_set_syscall_nr) 2098 syscall_nr = &syscall_nr_val; 2099 if (entry == ptrace_entry_set_syscall_ret) 2100 syscall_ret = &syscall_ret_val; 2101 2102 /* Now handle the actual rewriting cases. */ 2103 switch (self->syscall_nr) { 2104 case __NR_getpid: 2105 syscall_nr_val = __NR_getppid; 2106 /* Never change syscall return for this case. */ 2107 syscall_ret = NULL; 2108 break; 2109 case __NR_gettid: 2110 syscall_nr_val = -1; 2111 syscall_ret_val = 45000; 2112 break; 2113 case __NR_openat: 2114 syscall_nr_val = -1; 2115 syscall_ret_val = -ESRCH; 2116 break; 2117 default: 2118 /* Unhandled, do nothing. */ 2119 return; 2120 } 2121 2122 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); 2123 } 2124 2125 FIXTURE_VARIANT(TRACE_syscall) { 2126 /* 2127 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 2128 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 2129 * This indicates if we should use SECCOMP_RET_TRACE (false), or 2130 * ptrace (true). 2131 */ 2132 bool use_ptrace; 2133 }; 2134 2135 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 2136 .use_ptrace = true, 2137 }; 2138 2139 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 2140 .use_ptrace = false, 2141 }; 2142 2143 FIXTURE_SETUP(TRACE_syscall) 2144 { 2145 struct sock_filter filter[] = { 2146 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2147 offsetof(struct seccomp_data, nr)), 2148 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2149 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 2150 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 2151 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 2152 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 2153 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 2154 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2155 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 2156 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2157 }; 2158 struct sock_fprog prog = { 2159 .len = (unsigned short)ARRAY_SIZE(filter), 2160 .filter = filter, 2161 }; 2162 long ret; 2163 2164 /* Prepare some testable syscall results. */ 2165 self->mytid = syscall(__NR_gettid); 2166 ASSERT_GT(self->mytid, 0); 2167 ASSERT_NE(self->mytid, 1) { 2168 TH_LOG("Running this test as init is not supported. :)"); 2169 } 2170 2171 self->mypid = getpid(); 2172 ASSERT_GT(self->mypid, 0); 2173 ASSERT_EQ(self->mytid, self->mypid); 2174 2175 self->parent = getppid(); 2176 ASSERT_GT(self->parent, 0); 2177 ASSERT_NE(self->parent, self->mypid); 2178 2179 /* Launch tracer. */ 2180 self->tracer = setup_trace_fixture(_metadata, 2181 variant->use_ptrace ? tracer_ptrace 2182 : tracer_seccomp, 2183 self, variant->use_ptrace); 2184 2185 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2186 ASSERT_EQ(0, ret); 2187 2188 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ 2189 if (variant->use_ptrace) 2190 return; 2191 2192 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2193 ASSERT_EQ(0, ret); 2194 } 2195 2196 FIXTURE_TEARDOWN(TRACE_syscall) 2197 { 2198 teardown_trace_fixture(_metadata, self->tracer); 2199 } 2200 2201 TEST(negative_ENOSYS) 2202 { 2203 #if defined(__arm__) 2204 SKIP(return, "arm32 does not support calling syscall -1"); 2205 #endif 2206 /* 2207 * There should be no difference between an "internal" skip 2208 * and userspace asking for syscall "-1". 2209 */ 2210 errno = 0; 2211 EXPECT_EQ(-1, syscall(-1)); 2212 EXPECT_EQ(errno, ENOSYS); 2213 /* And no difference for "still not valid but not -1". */ 2214 errno = 0; 2215 EXPECT_EQ(-1, syscall(-101)); 2216 EXPECT_EQ(errno, ENOSYS); 2217 } 2218 2219 TEST_F(TRACE_syscall, negative_ENOSYS) 2220 { 2221 negative_ENOSYS(_metadata); 2222 } 2223 2224 TEST_F(TRACE_syscall, syscall_allowed) 2225 { 2226 /* getppid works as expected (no changes). */ 2227 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2228 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2229 } 2230 2231 TEST_F(TRACE_syscall, syscall_redirected) 2232 { 2233 /* getpid has been redirected to getppid as expected. */ 2234 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2235 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2236 } 2237 2238 TEST_F(TRACE_syscall, syscall_errno) 2239 { 2240 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2241 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2242 } 2243 2244 TEST_F(TRACE_syscall, syscall_faked) 2245 { 2246 /* Tracer skips the gettid syscall and store altered return value. */ 2247 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2248 } 2249 2250 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) 2251 { 2252 struct sock_filter filter[] = { 2253 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2254 offsetof(struct seccomp_data, nr)), 2255 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), 2256 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 2257 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2258 }; 2259 struct sock_fprog prog = { 2260 .len = (unsigned short)ARRAY_SIZE(filter), 2261 .filter = filter, 2262 }; 2263 long ret; 2264 2265 /* Install "kill on mknodat" filter. */ 2266 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2267 ASSERT_EQ(0, ret); 2268 2269 /* This should immediately die with SIGSYS, regardless of tracer. */ 2270 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); 2271 } 2272 2273 TEST_F(TRACE_syscall, skip_after) 2274 { 2275 struct sock_filter filter[] = { 2276 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2277 offsetof(struct seccomp_data, nr)), 2278 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2279 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2280 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2281 }; 2282 struct sock_fprog prog = { 2283 .len = (unsigned short)ARRAY_SIZE(filter), 2284 .filter = filter, 2285 }; 2286 long ret; 2287 2288 /* Install additional "errno on getppid" filter. */ 2289 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2290 ASSERT_EQ(0, ret); 2291 2292 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2293 errno = 0; 2294 EXPECT_EQ(-1, syscall(__NR_getpid)); 2295 EXPECT_EQ(EPERM, errno); 2296 } 2297 2298 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2299 { 2300 struct sock_filter filter[] = { 2301 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2302 offsetof(struct seccomp_data, nr)), 2303 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2304 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2305 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2306 }; 2307 struct sock_fprog prog = { 2308 .len = (unsigned short)ARRAY_SIZE(filter), 2309 .filter = filter, 2310 }; 2311 long ret; 2312 2313 /* Install additional "death on getppid" filter. */ 2314 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2315 ASSERT_EQ(0, ret); 2316 2317 /* Tracer will redirect getpid to getppid, and we should die. */ 2318 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2319 } 2320 2321 TEST(seccomp_syscall) 2322 { 2323 struct sock_filter filter[] = { 2324 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2325 }; 2326 struct sock_fprog prog = { 2327 .len = (unsigned short)ARRAY_SIZE(filter), 2328 .filter = filter, 2329 }; 2330 long ret; 2331 2332 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2333 ASSERT_EQ(0, ret) { 2334 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2335 } 2336 2337 /* Reject insane operation. */ 2338 ret = seccomp(-1, 0, &prog); 2339 ASSERT_NE(ENOSYS, errno) { 2340 TH_LOG("Kernel does not support seccomp syscall!"); 2341 } 2342 EXPECT_EQ(EINVAL, errno) { 2343 TH_LOG("Did not reject crazy op value!"); 2344 } 2345 2346 /* Reject strict with flags or pointer. */ 2347 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2348 EXPECT_EQ(EINVAL, errno) { 2349 TH_LOG("Did not reject mode strict with flags!"); 2350 } 2351 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2352 EXPECT_EQ(EINVAL, errno) { 2353 TH_LOG("Did not reject mode strict with uargs!"); 2354 } 2355 2356 /* Reject insane args for filter. */ 2357 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2358 EXPECT_EQ(EINVAL, errno) { 2359 TH_LOG("Did not reject crazy filter flags!"); 2360 } 2361 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2362 EXPECT_EQ(EFAULT, errno) { 2363 TH_LOG("Did not reject NULL filter!"); 2364 } 2365 2366 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2367 EXPECT_EQ(0, errno) { 2368 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2369 strerror(errno)); 2370 } 2371 } 2372 2373 TEST(seccomp_syscall_mode_lock) 2374 { 2375 struct sock_filter filter[] = { 2376 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2377 }; 2378 struct sock_fprog prog = { 2379 .len = (unsigned short)ARRAY_SIZE(filter), 2380 .filter = filter, 2381 }; 2382 long ret; 2383 2384 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2385 ASSERT_EQ(0, ret) { 2386 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2387 } 2388 2389 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2390 ASSERT_NE(ENOSYS, errno) { 2391 TH_LOG("Kernel does not support seccomp syscall!"); 2392 } 2393 EXPECT_EQ(0, ret) { 2394 TH_LOG("Could not install filter!"); 2395 } 2396 2397 /* Make sure neither entry point will switch to strict. */ 2398 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2399 EXPECT_EQ(EINVAL, errno) { 2400 TH_LOG("Switched to mode strict!"); 2401 } 2402 2403 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2404 EXPECT_EQ(EINVAL, errno) { 2405 TH_LOG("Switched to mode strict!"); 2406 } 2407 } 2408 2409 /* 2410 * Test detection of known and unknown filter flags. Userspace needs to be able 2411 * to check if a filter flag is supported by the current kernel and a good way 2412 * of doing that is by attempting to enter filter mode, with the flag bit in 2413 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2414 * that the flag is valid and EINVAL indicates that the flag is invalid. 2415 */ 2416 TEST(detect_seccomp_filter_flags) 2417 { 2418 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2419 SECCOMP_FILTER_FLAG_LOG, 2420 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2421 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2422 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2423 unsigned int exclusive[] = { 2424 SECCOMP_FILTER_FLAG_TSYNC, 2425 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2426 unsigned int flag, all_flags, exclusive_mask; 2427 int i; 2428 long ret; 2429 2430 /* Test detection of individual known-good filter flags */ 2431 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2432 int bits = 0; 2433 2434 flag = flags[i]; 2435 /* Make sure the flag is a single bit! */ 2436 while (flag) { 2437 if (flag & 0x1) 2438 bits ++; 2439 flag >>= 1; 2440 } 2441 ASSERT_EQ(1, bits); 2442 flag = flags[i]; 2443 2444 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2445 ASSERT_NE(ENOSYS, errno) { 2446 TH_LOG("Kernel does not support seccomp syscall!"); 2447 } 2448 EXPECT_EQ(-1, ret); 2449 EXPECT_EQ(EFAULT, errno) { 2450 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2451 flag); 2452 } 2453 2454 all_flags |= flag; 2455 } 2456 2457 /* 2458 * Test detection of all known-good filter flags combined. But 2459 * for the exclusive flags we need to mask them out and try them 2460 * individually for the "all flags" testing. 2461 */ 2462 exclusive_mask = 0; 2463 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2464 exclusive_mask |= exclusive[i]; 2465 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2466 flag = all_flags & ~exclusive_mask; 2467 flag |= exclusive[i]; 2468 2469 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2470 EXPECT_EQ(-1, ret); 2471 EXPECT_EQ(EFAULT, errno) { 2472 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2473 flag); 2474 } 2475 } 2476 2477 /* Test detection of an unknown filter flags, without exclusives. */ 2478 flag = -1; 2479 flag &= ~exclusive_mask; 2480 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2481 EXPECT_EQ(-1, ret); 2482 EXPECT_EQ(EINVAL, errno) { 2483 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2484 flag); 2485 } 2486 2487 /* 2488 * Test detection of an unknown filter flag that may simply need to be 2489 * added to this test 2490 */ 2491 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2492 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2493 EXPECT_EQ(-1, ret); 2494 EXPECT_EQ(EINVAL, errno) { 2495 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2496 flag); 2497 } 2498 } 2499 2500 TEST(TSYNC_first) 2501 { 2502 struct sock_filter filter[] = { 2503 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2504 }; 2505 struct sock_fprog prog = { 2506 .len = (unsigned short)ARRAY_SIZE(filter), 2507 .filter = filter, 2508 }; 2509 long ret; 2510 2511 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2512 ASSERT_EQ(0, ret) { 2513 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2514 } 2515 2516 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2517 &prog); 2518 ASSERT_NE(ENOSYS, errno) { 2519 TH_LOG("Kernel does not support seccomp syscall!"); 2520 } 2521 EXPECT_EQ(0, ret) { 2522 TH_LOG("Could not install initial filter with TSYNC!"); 2523 } 2524 } 2525 2526 #define TSYNC_SIBLINGS 2 2527 struct tsync_sibling { 2528 pthread_t tid; 2529 pid_t system_tid; 2530 sem_t *started; 2531 pthread_cond_t *cond; 2532 pthread_mutex_t *mutex; 2533 int diverge; 2534 int num_waits; 2535 struct sock_fprog *prog; 2536 struct __test_metadata *metadata; 2537 }; 2538 2539 /* 2540 * To avoid joining joined threads (which is not allowed by Bionic), 2541 * make sure we both successfully join and clear the tid to skip a 2542 * later join attempt during fixture teardown. Any remaining threads 2543 * will be directly killed during teardown. 2544 */ 2545 #define PTHREAD_JOIN(tid, status) \ 2546 do { \ 2547 int _rc = pthread_join(tid, status); \ 2548 if (_rc) { \ 2549 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2550 (unsigned int)tid, _rc); \ 2551 } else { \ 2552 tid = 0; \ 2553 } \ 2554 } while (0) 2555 2556 FIXTURE(TSYNC) { 2557 struct sock_fprog root_prog, apply_prog; 2558 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2559 sem_t started; 2560 pthread_cond_t cond; 2561 pthread_mutex_t mutex; 2562 int sibling_count; 2563 }; 2564 2565 FIXTURE_SETUP(TSYNC) 2566 { 2567 struct sock_filter root_filter[] = { 2568 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2569 }; 2570 struct sock_filter apply_filter[] = { 2571 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2572 offsetof(struct seccomp_data, nr)), 2573 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2574 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2575 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2576 }; 2577 2578 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2579 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2580 memset(&self->sibling, 0, sizeof(self->sibling)); 2581 self->root_prog.filter = malloc(sizeof(root_filter)); 2582 ASSERT_NE(NULL, self->root_prog.filter); 2583 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2584 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2585 2586 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2587 ASSERT_NE(NULL, self->apply_prog.filter); 2588 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2589 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2590 2591 self->sibling_count = 0; 2592 pthread_mutex_init(&self->mutex, NULL); 2593 pthread_cond_init(&self->cond, NULL); 2594 sem_init(&self->started, 0, 0); 2595 self->sibling[0].tid = 0; 2596 self->sibling[0].cond = &self->cond; 2597 self->sibling[0].started = &self->started; 2598 self->sibling[0].mutex = &self->mutex; 2599 self->sibling[0].diverge = 0; 2600 self->sibling[0].num_waits = 1; 2601 self->sibling[0].prog = &self->root_prog; 2602 self->sibling[0].metadata = _metadata; 2603 self->sibling[1].tid = 0; 2604 self->sibling[1].cond = &self->cond; 2605 self->sibling[1].started = &self->started; 2606 self->sibling[1].mutex = &self->mutex; 2607 self->sibling[1].diverge = 0; 2608 self->sibling[1].prog = &self->root_prog; 2609 self->sibling[1].num_waits = 1; 2610 self->sibling[1].metadata = _metadata; 2611 } 2612 2613 FIXTURE_TEARDOWN(TSYNC) 2614 { 2615 int sib = 0; 2616 2617 if (self->root_prog.filter) 2618 free(self->root_prog.filter); 2619 if (self->apply_prog.filter) 2620 free(self->apply_prog.filter); 2621 2622 for ( ; sib < self->sibling_count; ++sib) { 2623 struct tsync_sibling *s = &self->sibling[sib]; 2624 2625 if (!s->tid) 2626 continue; 2627 /* 2628 * If a thread is still running, it may be stuck, so hit 2629 * it over the head really hard. 2630 */ 2631 pthread_kill(s->tid, 9); 2632 } 2633 pthread_mutex_destroy(&self->mutex); 2634 pthread_cond_destroy(&self->cond); 2635 sem_destroy(&self->started); 2636 } 2637 2638 void *tsync_sibling(void *data) 2639 { 2640 long ret = 0; 2641 struct tsync_sibling *me = data; 2642 2643 me->system_tid = syscall(__NR_gettid); 2644 2645 pthread_mutex_lock(me->mutex); 2646 if (me->diverge) { 2647 /* Just re-apply the root prog to fork the tree */ 2648 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2649 me->prog, 0, 0); 2650 } 2651 sem_post(me->started); 2652 /* Return outside of started so parent notices failures. */ 2653 if (ret) { 2654 pthread_mutex_unlock(me->mutex); 2655 return (void *)SIBLING_EXIT_FAILURE; 2656 } 2657 do { 2658 pthread_cond_wait(me->cond, me->mutex); 2659 me->num_waits = me->num_waits - 1; 2660 } while (me->num_waits); 2661 pthread_mutex_unlock(me->mutex); 2662 2663 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2664 if (!ret) 2665 return (void *)SIBLING_EXIT_NEWPRIVS; 2666 read(-1, NULL, 0); 2667 return (void *)SIBLING_EXIT_UNKILLED; 2668 } 2669 2670 void tsync_start_sibling(struct tsync_sibling *sibling) 2671 { 2672 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2673 } 2674 2675 TEST_F(TSYNC, siblings_fail_prctl) 2676 { 2677 long ret; 2678 void *status; 2679 struct sock_filter filter[] = { 2680 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2681 offsetof(struct seccomp_data, nr)), 2682 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2683 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2684 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2685 }; 2686 struct sock_fprog prog = { 2687 .len = (unsigned short)ARRAY_SIZE(filter), 2688 .filter = filter, 2689 }; 2690 2691 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2692 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2693 } 2694 2695 /* Check prctl failure detection by requesting sib 0 diverge. */ 2696 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2697 ASSERT_NE(ENOSYS, errno) { 2698 TH_LOG("Kernel does not support seccomp syscall!"); 2699 } 2700 ASSERT_EQ(0, ret) { 2701 TH_LOG("setting filter failed"); 2702 } 2703 2704 self->sibling[0].diverge = 1; 2705 tsync_start_sibling(&self->sibling[0]); 2706 tsync_start_sibling(&self->sibling[1]); 2707 2708 while (self->sibling_count < TSYNC_SIBLINGS) { 2709 sem_wait(&self->started); 2710 self->sibling_count++; 2711 } 2712 2713 /* Signal the threads to clean up*/ 2714 pthread_mutex_lock(&self->mutex); 2715 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2716 TH_LOG("cond broadcast non-zero"); 2717 } 2718 pthread_mutex_unlock(&self->mutex); 2719 2720 /* Ensure diverging sibling failed to call prctl. */ 2721 PTHREAD_JOIN(self->sibling[0].tid, &status); 2722 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2723 PTHREAD_JOIN(self->sibling[1].tid, &status); 2724 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2725 } 2726 2727 TEST_F(TSYNC, two_siblings_with_ancestor) 2728 { 2729 long ret; 2730 void *status; 2731 2732 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2733 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2734 } 2735 2736 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2737 ASSERT_NE(ENOSYS, errno) { 2738 TH_LOG("Kernel does not support seccomp syscall!"); 2739 } 2740 ASSERT_EQ(0, ret) { 2741 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2742 } 2743 tsync_start_sibling(&self->sibling[0]); 2744 tsync_start_sibling(&self->sibling[1]); 2745 2746 while (self->sibling_count < TSYNC_SIBLINGS) { 2747 sem_wait(&self->started); 2748 self->sibling_count++; 2749 } 2750 2751 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2752 &self->apply_prog); 2753 ASSERT_EQ(0, ret) { 2754 TH_LOG("Could install filter on all threads!"); 2755 } 2756 /* Tell the siblings to test the policy */ 2757 pthread_mutex_lock(&self->mutex); 2758 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2759 TH_LOG("cond broadcast non-zero"); 2760 } 2761 pthread_mutex_unlock(&self->mutex); 2762 /* Ensure they are both killed and don't exit cleanly. */ 2763 PTHREAD_JOIN(self->sibling[0].tid, &status); 2764 EXPECT_EQ(0x0, (long)status); 2765 PTHREAD_JOIN(self->sibling[1].tid, &status); 2766 EXPECT_EQ(0x0, (long)status); 2767 } 2768 2769 TEST_F(TSYNC, two_sibling_want_nnp) 2770 { 2771 void *status; 2772 2773 /* start siblings before any prctl() operations */ 2774 tsync_start_sibling(&self->sibling[0]); 2775 tsync_start_sibling(&self->sibling[1]); 2776 while (self->sibling_count < TSYNC_SIBLINGS) { 2777 sem_wait(&self->started); 2778 self->sibling_count++; 2779 } 2780 2781 /* Tell the siblings to test no policy */ 2782 pthread_mutex_lock(&self->mutex); 2783 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2784 TH_LOG("cond broadcast non-zero"); 2785 } 2786 pthread_mutex_unlock(&self->mutex); 2787 2788 /* Ensure they are both upset about lacking nnp. */ 2789 PTHREAD_JOIN(self->sibling[0].tid, &status); 2790 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2791 PTHREAD_JOIN(self->sibling[1].tid, &status); 2792 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2793 } 2794 2795 TEST_F(TSYNC, two_siblings_with_no_filter) 2796 { 2797 long ret; 2798 void *status; 2799 2800 /* start siblings before any prctl() operations */ 2801 tsync_start_sibling(&self->sibling[0]); 2802 tsync_start_sibling(&self->sibling[1]); 2803 while (self->sibling_count < TSYNC_SIBLINGS) { 2804 sem_wait(&self->started); 2805 self->sibling_count++; 2806 } 2807 2808 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2809 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2810 } 2811 2812 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2813 &self->apply_prog); 2814 ASSERT_NE(ENOSYS, errno) { 2815 TH_LOG("Kernel does not support seccomp syscall!"); 2816 } 2817 ASSERT_EQ(0, ret) { 2818 TH_LOG("Could install filter on all threads!"); 2819 } 2820 2821 /* Tell the siblings to test the policy */ 2822 pthread_mutex_lock(&self->mutex); 2823 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2824 TH_LOG("cond broadcast non-zero"); 2825 } 2826 pthread_mutex_unlock(&self->mutex); 2827 2828 /* Ensure they are both killed and don't exit cleanly. */ 2829 PTHREAD_JOIN(self->sibling[0].tid, &status); 2830 EXPECT_EQ(0x0, (long)status); 2831 PTHREAD_JOIN(self->sibling[1].tid, &status); 2832 EXPECT_EQ(0x0, (long)status); 2833 } 2834 2835 TEST_F(TSYNC, two_siblings_with_one_divergence) 2836 { 2837 long ret; 2838 void *status; 2839 2840 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2841 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2842 } 2843 2844 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2845 ASSERT_NE(ENOSYS, errno) { 2846 TH_LOG("Kernel does not support seccomp syscall!"); 2847 } 2848 ASSERT_EQ(0, ret) { 2849 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2850 } 2851 self->sibling[0].diverge = 1; 2852 tsync_start_sibling(&self->sibling[0]); 2853 tsync_start_sibling(&self->sibling[1]); 2854 2855 while (self->sibling_count < TSYNC_SIBLINGS) { 2856 sem_wait(&self->started); 2857 self->sibling_count++; 2858 } 2859 2860 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2861 &self->apply_prog); 2862 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2863 TH_LOG("Did not fail on diverged sibling."); 2864 } 2865 2866 /* Wake the threads */ 2867 pthread_mutex_lock(&self->mutex); 2868 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2869 TH_LOG("cond broadcast non-zero"); 2870 } 2871 pthread_mutex_unlock(&self->mutex); 2872 2873 /* Ensure they are both unkilled. */ 2874 PTHREAD_JOIN(self->sibling[0].tid, &status); 2875 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2876 PTHREAD_JOIN(self->sibling[1].tid, &status); 2877 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2878 } 2879 2880 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2881 { 2882 long ret, flags; 2883 void *status; 2884 2885 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2886 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2887 } 2888 2889 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2890 ASSERT_NE(ENOSYS, errno) { 2891 TH_LOG("Kernel does not support seccomp syscall!"); 2892 } 2893 ASSERT_EQ(0, ret) { 2894 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2895 } 2896 self->sibling[0].diverge = 1; 2897 tsync_start_sibling(&self->sibling[0]); 2898 tsync_start_sibling(&self->sibling[1]); 2899 2900 while (self->sibling_count < TSYNC_SIBLINGS) { 2901 sem_wait(&self->started); 2902 self->sibling_count++; 2903 } 2904 2905 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2906 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2907 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2908 ASSERT_EQ(ESRCH, errno) { 2909 TH_LOG("Did not return ESRCH for diverged sibling."); 2910 } 2911 ASSERT_EQ(-1, ret) { 2912 TH_LOG("Did not fail on diverged sibling."); 2913 } 2914 2915 /* Wake the threads */ 2916 pthread_mutex_lock(&self->mutex); 2917 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2918 TH_LOG("cond broadcast non-zero"); 2919 } 2920 pthread_mutex_unlock(&self->mutex); 2921 2922 /* Ensure they are both unkilled. */ 2923 PTHREAD_JOIN(self->sibling[0].tid, &status); 2924 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2925 PTHREAD_JOIN(self->sibling[1].tid, &status); 2926 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2927 } 2928 2929 TEST_F(TSYNC, two_siblings_not_under_filter) 2930 { 2931 long ret, sib; 2932 void *status; 2933 struct timespec delay = { .tv_nsec = 100000000 }; 2934 2935 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2936 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2937 } 2938 2939 /* 2940 * Sibling 0 will have its own seccomp policy 2941 * and Sibling 1 will not be under seccomp at 2942 * all. Sibling 1 will enter seccomp and 0 2943 * will cause failure. 2944 */ 2945 self->sibling[0].diverge = 1; 2946 tsync_start_sibling(&self->sibling[0]); 2947 tsync_start_sibling(&self->sibling[1]); 2948 2949 while (self->sibling_count < TSYNC_SIBLINGS) { 2950 sem_wait(&self->started); 2951 self->sibling_count++; 2952 } 2953 2954 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2955 ASSERT_NE(ENOSYS, errno) { 2956 TH_LOG("Kernel does not support seccomp syscall!"); 2957 } 2958 ASSERT_EQ(0, ret) { 2959 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2960 } 2961 2962 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2963 &self->apply_prog); 2964 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2965 TH_LOG("Did not fail on diverged sibling."); 2966 } 2967 sib = 1; 2968 if (ret == self->sibling[0].system_tid) 2969 sib = 0; 2970 2971 pthread_mutex_lock(&self->mutex); 2972 2973 /* Increment the other siblings num_waits so we can clean up 2974 * the one we just saw. 2975 */ 2976 self->sibling[!sib].num_waits += 1; 2977 2978 /* Signal the thread to clean up*/ 2979 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2980 TH_LOG("cond broadcast non-zero"); 2981 } 2982 pthread_mutex_unlock(&self->mutex); 2983 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2984 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2985 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2986 while (!kill(self->sibling[sib].system_tid, 0)) 2987 nanosleep(&delay, NULL); 2988 /* Switch to the remaining sibling */ 2989 sib = !sib; 2990 2991 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2992 &self->apply_prog); 2993 ASSERT_EQ(0, ret) { 2994 TH_LOG("Expected the remaining sibling to sync"); 2995 }; 2996 2997 pthread_mutex_lock(&self->mutex); 2998 2999 /* If remaining sibling didn't have a chance to wake up during 3000 * the first broadcast, manually reduce the num_waits now. 3001 */ 3002 if (self->sibling[sib].num_waits > 1) 3003 self->sibling[sib].num_waits = 1; 3004 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 3005 TH_LOG("cond broadcast non-zero"); 3006 } 3007 pthread_mutex_unlock(&self->mutex); 3008 PTHREAD_JOIN(self->sibling[sib].tid, &status); 3009 EXPECT_EQ(0, (long)status); 3010 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 3011 while (!kill(self->sibling[sib].system_tid, 0)) 3012 nanosleep(&delay, NULL); 3013 3014 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 3015 &self->apply_prog); 3016 ASSERT_EQ(0, ret); /* just us chickens */ 3017 } 3018 3019 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 3020 TEST(syscall_restart) 3021 { 3022 long ret; 3023 unsigned long msg; 3024 pid_t child_pid; 3025 int pipefd[2]; 3026 int status; 3027 siginfo_t info = { }; 3028 struct sock_filter filter[] = { 3029 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3030 offsetof(struct seccomp_data, nr)), 3031 3032 #ifdef __NR_sigreturn 3033 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 3034 #endif 3035 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 3036 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 3037 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 3038 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 3039 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 3040 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 3041 3042 /* Allow __NR_write for easy logging. */ 3043 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 3044 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3045 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3046 /* The nanosleep jump target. */ 3047 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 3048 /* The restart_syscall jump target. */ 3049 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 3050 }; 3051 struct sock_fprog prog = { 3052 .len = (unsigned short)ARRAY_SIZE(filter), 3053 .filter = filter, 3054 }; 3055 #if defined(__arm__) 3056 struct utsname utsbuf; 3057 #endif 3058 3059 ASSERT_EQ(0, pipe(pipefd)); 3060 3061 child_pid = fork(); 3062 ASSERT_LE(0, child_pid); 3063 if (child_pid == 0) { 3064 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 3065 char buf = ' '; 3066 struct timespec timeout = { }; 3067 3068 /* Attach parent as tracer and stop. */ 3069 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 3070 EXPECT_EQ(0, raise(SIGSTOP)); 3071 3072 EXPECT_EQ(0, close(pipefd[1])); 3073 3074 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 3075 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3076 } 3077 3078 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 3079 EXPECT_EQ(0, ret) { 3080 TH_LOG("Failed to install filter!"); 3081 } 3082 3083 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3084 TH_LOG("Failed to read() sync from parent"); 3085 } 3086 EXPECT_EQ('.', buf) { 3087 TH_LOG("Failed to get sync data from read()"); 3088 } 3089 3090 /* Start nanosleep to be interrupted. */ 3091 timeout.tv_sec = 1; 3092 errno = 0; 3093 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 3094 TH_LOG("Call to nanosleep() failed (errno %d: %s)", 3095 errno, strerror(errno)); 3096 } 3097 3098 /* Read final sync from parent. */ 3099 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3100 TH_LOG("Failed final read() from parent"); 3101 } 3102 EXPECT_EQ('!', buf) { 3103 TH_LOG("Failed to get final data from read()"); 3104 } 3105 3106 /* Directly report the status of our test harness results. */ 3107 syscall(__NR_exit, _metadata->exit_code); 3108 } 3109 EXPECT_EQ(0, close(pipefd[0])); 3110 3111 /* Attach to child, setup options, and release. */ 3112 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3113 ASSERT_EQ(true, WIFSTOPPED(status)); 3114 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 3115 PTRACE_O_TRACESECCOMP)); 3116 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3117 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 3118 3119 /* Wait for nanosleep() to start. */ 3120 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3121 ASSERT_EQ(true, WIFSTOPPED(status)); 3122 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3123 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3124 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3125 ASSERT_EQ(0x100, msg); 3126 ret = get_syscall(_metadata, child_pid); 3127 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 3128 3129 /* Might as well check siginfo for sanity while we're here. */ 3130 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3131 ASSERT_EQ(SIGTRAP, info.si_signo); 3132 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 3133 EXPECT_EQ(0, info.si_errno); 3134 EXPECT_EQ(getuid(), info.si_uid); 3135 /* Verify signal delivery came from child (seccomp-triggered). */ 3136 EXPECT_EQ(child_pid, info.si_pid); 3137 3138 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 3139 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 3140 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3141 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3142 ASSERT_EQ(true, WIFSTOPPED(status)); 3143 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 3144 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3145 /* 3146 * There is no siginfo on SIGSTOP any more, so we can't verify 3147 * signal delivery came from parent now (getpid() == info.si_pid). 3148 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 3149 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 3150 */ 3151 EXPECT_EQ(SIGSTOP, info.si_signo); 3152 3153 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 3154 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 3155 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3156 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3157 ASSERT_EQ(true, WIFSTOPPED(status)); 3158 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 3159 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3160 3161 /* Wait for restart_syscall() to start. */ 3162 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3163 ASSERT_EQ(true, WIFSTOPPED(status)); 3164 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3165 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3166 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3167 3168 ASSERT_EQ(0x200, msg); 3169 ret = get_syscall(_metadata, child_pid); 3170 #if defined(__arm__) 3171 /* 3172 * - native ARM registers do NOT expose true syscall. 3173 * - compat ARM registers on ARM64 DO expose true syscall. 3174 * - values of utsbuf.machine include 'armv8l' or 'armb8b' 3175 * for ARM64 running in compat mode. 3176 */ 3177 ASSERT_EQ(0, uname(&utsbuf)); 3178 if ((strncmp(utsbuf.machine, "arm", 3) == 0) && 3179 (strncmp(utsbuf.machine, "armv8l", 6) != 0) && 3180 (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { 3181 EXPECT_EQ(__NR_nanosleep, ret); 3182 } else 3183 #endif 3184 { 3185 EXPECT_EQ(__NR_restart_syscall, ret); 3186 } 3187 3188 /* Write again to end test. */ 3189 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3190 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 3191 EXPECT_EQ(0, close(pipefd[1])); 3192 3193 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3194 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 3195 _metadata->exit_code = KSFT_FAIL; 3196 } 3197 3198 TEST_SIGNAL(filter_flag_log, SIGSYS) 3199 { 3200 struct sock_filter allow_filter[] = { 3201 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3202 }; 3203 struct sock_filter kill_filter[] = { 3204 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3205 offsetof(struct seccomp_data, nr)), 3206 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 3207 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3208 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3209 }; 3210 struct sock_fprog allow_prog = { 3211 .len = (unsigned short)ARRAY_SIZE(allow_filter), 3212 .filter = allow_filter, 3213 }; 3214 struct sock_fprog kill_prog = { 3215 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3216 .filter = kill_filter, 3217 }; 3218 long ret; 3219 pid_t parent = getppid(); 3220 3221 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3222 ASSERT_EQ(0, ret); 3223 3224 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3225 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3226 &allow_prog); 3227 ASSERT_NE(ENOSYS, errno) { 3228 TH_LOG("Kernel does not support seccomp syscall!"); 3229 } 3230 EXPECT_NE(0, ret) { 3231 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3232 } 3233 EXPECT_EQ(EINVAL, errno) { 3234 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3235 } 3236 3237 /* Verify that a simple, permissive filter can be added with no flags */ 3238 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3239 EXPECT_EQ(0, ret); 3240 3241 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3242 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3243 &allow_prog); 3244 ASSERT_NE(EINVAL, errno) { 3245 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3246 } 3247 EXPECT_EQ(0, ret); 3248 3249 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3250 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3251 &kill_prog); 3252 EXPECT_EQ(0, ret); 3253 3254 EXPECT_EQ(parent, syscall(__NR_getppid)); 3255 /* getpid() should never return. */ 3256 EXPECT_EQ(0, syscall(__NR_getpid)); 3257 } 3258 3259 TEST(get_action_avail) 3260 { 3261 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3262 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3263 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3264 __u32 unknown_action = 0x10000000U; 3265 int i; 3266 long ret; 3267 3268 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3269 ASSERT_NE(ENOSYS, errno) { 3270 TH_LOG("Kernel does not support seccomp syscall!"); 3271 } 3272 ASSERT_NE(EINVAL, errno) { 3273 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3274 } 3275 EXPECT_EQ(ret, 0); 3276 3277 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3278 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3279 EXPECT_EQ(ret, 0) { 3280 TH_LOG("Expected action (0x%X) not available!", 3281 actions[i]); 3282 } 3283 } 3284 3285 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3286 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3287 EXPECT_EQ(ret, -1); 3288 EXPECT_EQ(errno, EOPNOTSUPP); 3289 } 3290 3291 TEST(get_metadata) 3292 { 3293 pid_t pid; 3294 int pipefd[2]; 3295 char buf; 3296 struct seccomp_metadata md; 3297 long ret; 3298 3299 /* Only real root can get metadata. */ 3300 if (geteuid()) { 3301 SKIP(return, "get_metadata requires real root"); 3302 return; 3303 } 3304 3305 ASSERT_EQ(0, pipe(pipefd)); 3306 3307 pid = fork(); 3308 ASSERT_GE(pid, 0); 3309 if (pid == 0) { 3310 struct sock_filter filter[] = { 3311 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3312 }; 3313 struct sock_fprog prog = { 3314 .len = (unsigned short)ARRAY_SIZE(filter), 3315 .filter = filter, 3316 }; 3317 3318 /* one with log, one without */ 3319 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3320 SECCOMP_FILTER_FLAG_LOG, &prog)); 3321 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3322 3323 EXPECT_EQ(0, close(pipefd[0])); 3324 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3325 ASSERT_EQ(0, close(pipefd[1])); 3326 3327 while (1) 3328 sleep(100); 3329 } 3330 3331 ASSERT_EQ(0, close(pipefd[1])); 3332 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3333 3334 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3335 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3336 3337 /* Past here must not use ASSERT or child process is never killed. */ 3338 3339 md.filter_off = 0; 3340 errno = 0; 3341 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3342 EXPECT_EQ(sizeof(md), ret) { 3343 if (errno == EINVAL) 3344 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3345 } 3346 3347 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3348 EXPECT_EQ(md.filter_off, 0); 3349 3350 md.filter_off = 1; 3351 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3352 EXPECT_EQ(sizeof(md), ret); 3353 EXPECT_EQ(md.flags, 0); 3354 EXPECT_EQ(md.filter_off, 1); 3355 3356 skip: 3357 ASSERT_EQ(0, kill(pid, SIGKILL)); 3358 } 3359 3360 static int user_notif_syscall(int nr, unsigned int flags) 3361 { 3362 struct sock_filter filter[] = { 3363 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3364 offsetof(struct seccomp_data, nr)), 3365 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), 3366 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), 3367 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3368 }; 3369 3370 struct sock_fprog prog = { 3371 .len = (unsigned short)ARRAY_SIZE(filter), 3372 .filter = filter, 3373 }; 3374 3375 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3376 } 3377 3378 #define USER_NOTIF_MAGIC INT_MAX 3379 TEST(user_notification_basic) 3380 { 3381 pid_t pid; 3382 long ret; 3383 int status, listener; 3384 struct seccomp_notif req = {}; 3385 struct seccomp_notif_resp resp = {}; 3386 struct pollfd pollfd; 3387 3388 struct sock_filter filter[] = { 3389 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3390 }; 3391 struct sock_fprog prog = { 3392 .len = (unsigned short)ARRAY_SIZE(filter), 3393 .filter = filter, 3394 }; 3395 3396 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3397 ASSERT_EQ(0, ret) { 3398 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3399 } 3400 3401 pid = fork(); 3402 ASSERT_GE(pid, 0); 3403 3404 /* Check that we get -ENOSYS with no listener attached */ 3405 if (pid == 0) { 3406 if (user_notif_syscall(__NR_getppid, 0) < 0) 3407 exit(1); 3408 ret = syscall(__NR_getppid); 3409 exit(ret >= 0 || errno != ENOSYS); 3410 } 3411 3412 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3413 EXPECT_EQ(true, WIFEXITED(status)); 3414 EXPECT_EQ(0, WEXITSTATUS(status)); 3415 3416 /* Add some no-op filters for grins. */ 3417 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3418 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3419 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3420 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3421 3422 /* Check that the basic notification machinery works */ 3423 listener = user_notif_syscall(__NR_getppid, 3424 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3425 ASSERT_GE(listener, 0); 3426 3427 /* Installing a second listener in the chain should EBUSY */ 3428 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3429 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3430 -1); 3431 EXPECT_EQ(errno, EBUSY); 3432 3433 pid = fork(); 3434 ASSERT_GE(pid, 0); 3435 3436 if (pid == 0) { 3437 ret = syscall(__NR_getppid); 3438 exit(ret != USER_NOTIF_MAGIC); 3439 } 3440 3441 pollfd.fd = listener; 3442 pollfd.events = POLLIN | POLLOUT; 3443 3444 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3445 EXPECT_EQ(pollfd.revents, POLLIN); 3446 3447 /* Test that we can't pass garbage to the kernel. */ 3448 memset(&req, 0, sizeof(req)); 3449 req.pid = -1; 3450 errno = 0; 3451 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3452 EXPECT_EQ(-1, ret); 3453 EXPECT_EQ(EINVAL, errno); 3454 3455 if (ret) { 3456 req.pid = 0; 3457 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3458 } 3459 3460 pollfd.fd = listener; 3461 pollfd.events = POLLIN | POLLOUT; 3462 3463 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3464 EXPECT_EQ(pollfd.revents, POLLOUT); 3465 3466 EXPECT_EQ(req.data.nr, __NR_getppid); 3467 3468 resp.id = req.id; 3469 resp.error = 0; 3470 resp.val = USER_NOTIF_MAGIC; 3471 3472 /* check that we make sure flags == 0 */ 3473 resp.flags = 1; 3474 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3475 EXPECT_EQ(errno, EINVAL); 3476 3477 resp.flags = 0; 3478 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3479 3480 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3481 EXPECT_EQ(true, WIFEXITED(status)); 3482 EXPECT_EQ(0, WEXITSTATUS(status)); 3483 } 3484 3485 TEST(user_notification_with_tsync) 3486 { 3487 int ret; 3488 unsigned int flags; 3489 3490 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3491 ASSERT_EQ(0, ret) { 3492 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3493 } 3494 3495 /* these were exclusive */ 3496 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3497 SECCOMP_FILTER_FLAG_TSYNC; 3498 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3499 ASSERT_EQ(EINVAL, errno); 3500 3501 /* but now they're not */ 3502 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3503 ret = user_notif_syscall(__NR_getppid, flags); 3504 close(ret); 3505 ASSERT_LE(0, ret); 3506 } 3507 3508 TEST(user_notification_kill_in_middle) 3509 { 3510 pid_t pid; 3511 long ret; 3512 int listener; 3513 struct seccomp_notif req = {}; 3514 struct seccomp_notif_resp resp = {}; 3515 3516 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3517 ASSERT_EQ(0, ret) { 3518 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3519 } 3520 3521 listener = user_notif_syscall(__NR_getppid, 3522 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3523 ASSERT_GE(listener, 0); 3524 3525 /* 3526 * Check that nothing bad happens when we kill the task in the middle 3527 * of a syscall. 3528 */ 3529 pid = fork(); 3530 ASSERT_GE(pid, 0); 3531 3532 if (pid == 0) { 3533 ret = syscall(__NR_getppid); 3534 exit(ret != USER_NOTIF_MAGIC); 3535 } 3536 3537 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3538 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3539 3540 EXPECT_EQ(kill(pid, SIGKILL), 0); 3541 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3542 3543 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3544 3545 resp.id = req.id; 3546 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3547 EXPECT_EQ(ret, -1); 3548 EXPECT_EQ(errno, ENOENT); 3549 } 3550 3551 static int handled = -1; 3552 3553 static void signal_handler(int signal) 3554 { 3555 if (write(handled, "c", 1) != 1) 3556 perror("write from signal"); 3557 } 3558 3559 static void signal_handler_nop(int signal) 3560 { 3561 } 3562 3563 TEST(user_notification_signal) 3564 { 3565 pid_t pid; 3566 long ret; 3567 int status, listener, sk_pair[2]; 3568 struct seccomp_notif req = {}; 3569 struct seccomp_notif_resp resp = {}; 3570 char c; 3571 3572 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3573 ASSERT_EQ(0, ret) { 3574 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3575 } 3576 3577 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3578 3579 listener = user_notif_syscall(__NR_gettid, 3580 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3581 ASSERT_GE(listener, 0); 3582 3583 pid = fork(); 3584 ASSERT_GE(pid, 0); 3585 3586 if (pid == 0) { 3587 close(sk_pair[0]); 3588 handled = sk_pair[1]; 3589 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3590 perror("signal"); 3591 exit(1); 3592 } 3593 /* 3594 * ERESTARTSYS behavior is a bit hard to test, because we need 3595 * to rely on a signal that has not yet been handled. Let's at 3596 * least check that the error code gets propagated through, and 3597 * hope that it doesn't break when there is actually a signal :) 3598 */ 3599 ret = syscall(__NR_gettid); 3600 exit(!(ret == -1 && errno == 512)); 3601 } 3602 3603 close(sk_pair[1]); 3604 3605 memset(&req, 0, sizeof(req)); 3606 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3607 3608 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3609 3610 /* 3611 * Make sure the signal really is delivered, which means we're not 3612 * stuck in the user notification code any more and the notification 3613 * should be dead. 3614 */ 3615 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3616 3617 resp.id = req.id; 3618 resp.error = -EPERM; 3619 resp.val = 0; 3620 3621 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3622 EXPECT_EQ(errno, ENOENT); 3623 3624 memset(&req, 0, sizeof(req)); 3625 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3626 3627 resp.id = req.id; 3628 resp.error = -512; /* -ERESTARTSYS */ 3629 resp.val = 0; 3630 3631 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3632 3633 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3634 EXPECT_EQ(true, WIFEXITED(status)); 3635 EXPECT_EQ(0, WEXITSTATUS(status)); 3636 } 3637 3638 TEST(user_notification_closed_listener) 3639 { 3640 pid_t pid; 3641 long ret; 3642 int status, listener; 3643 3644 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3645 ASSERT_EQ(0, ret) { 3646 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3647 } 3648 3649 listener = user_notif_syscall(__NR_getppid, 3650 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3651 ASSERT_GE(listener, 0); 3652 3653 /* 3654 * Check that we get an ENOSYS when the listener is closed. 3655 */ 3656 pid = fork(); 3657 ASSERT_GE(pid, 0); 3658 if (pid == 0) { 3659 close(listener); 3660 ret = syscall(__NR_getppid); 3661 exit(ret != -1 && errno != ENOSYS); 3662 } 3663 3664 close(listener); 3665 3666 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3667 EXPECT_EQ(true, WIFEXITED(status)); 3668 EXPECT_EQ(0, WEXITSTATUS(status)); 3669 } 3670 3671 /* 3672 * Check that a pid in a child namespace still shows up as valid in ours. 3673 */ 3674 TEST(user_notification_child_pid_ns) 3675 { 3676 pid_t pid; 3677 int status, listener; 3678 struct seccomp_notif req = {}; 3679 struct seccomp_notif_resp resp = {}; 3680 3681 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3682 if (errno == EINVAL) 3683 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3684 }; 3685 3686 listener = user_notif_syscall(__NR_getppid, 3687 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3688 ASSERT_GE(listener, 0); 3689 3690 pid = fork(); 3691 ASSERT_GE(pid, 0); 3692 3693 if (pid == 0) 3694 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3695 3696 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3697 EXPECT_EQ(req.pid, pid); 3698 3699 resp.id = req.id; 3700 resp.error = 0; 3701 resp.val = USER_NOTIF_MAGIC; 3702 3703 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3704 3705 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3706 EXPECT_EQ(true, WIFEXITED(status)); 3707 EXPECT_EQ(0, WEXITSTATUS(status)); 3708 close(listener); 3709 } 3710 3711 /* 3712 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3713 * invalid. 3714 */ 3715 TEST(user_notification_sibling_pid_ns) 3716 { 3717 pid_t pid, pid2; 3718 int status, listener; 3719 struct seccomp_notif req = {}; 3720 struct seccomp_notif_resp resp = {}; 3721 3722 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3723 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3724 } 3725 3726 listener = user_notif_syscall(__NR_getppid, 3727 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3728 ASSERT_GE(listener, 0); 3729 3730 pid = fork(); 3731 ASSERT_GE(pid, 0); 3732 3733 if (pid == 0) { 3734 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3735 if (errno == EPERM) 3736 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3737 else if (errno == EINVAL) 3738 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3739 } 3740 3741 pid2 = fork(); 3742 ASSERT_GE(pid2, 0); 3743 3744 if (pid2 == 0) 3745 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3746 3747 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3748 EXPECT_EQ(true, WIFEXITED(status)); 3749 EXPECT_EQ(0, WEXITSTATUS(status)); 3750 exit(WEXITSTATUS(status)); 3751 } 3752 3753 /* Create the sibling ns, and sibling in it. */ 3754 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3755 if (errno == EPERM) 3756 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3757 else if (errno == EINVAL) 3758 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3759 } 3760 ASSERT_EQ(errno, 0); 3761 3762 pid2 = fork(); 3763 ASSERT_GE(pid2, 0); 3764 3765 if (pid2 == 0) { 3766 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3767 /* 3768 * The pid should be 0, i.e. the task is in some namespace that 3769 * we can't "see". 3770 */ 3771 EXPECT_EQ(req.pid, 0); 3772 3773 resp.id = req.id; 3774 resp.error = 0; 3775 resp.val = USER_NOTIF_MAGIC; 3776 3777 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3778 exit(0); 3779 } 3780 3781 close(listener); 3782 3783 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3784 EXPECT_EQ(true, WIFEXITED(status)); 3785 EXPECT_EQ(0, WEXITSTATUS(status)); 3786 3787 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3788 EXPECT_EQ(true, WIFEXITED(status)); 3789 EXPECT_EQ(0, WEXITSTATUS(status)); 3790 } 3791 3792 TEST(user_notification_fault_recv) 3793 { 3794 pid_t pid; 3795 int status, listener; 3796 struct seccomp_notif req = {}; 3797 struct seccomp_notif_resp resp = {}; 3798 3799 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { 3800 if (errno == EINVAL) 3801 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3802 } 3803 3804 listener = user_notif_syscall(__NR_getppid, 3805 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3806 ASSERT_GE(listener, 0); 3807 3808 pid = fork(); 3809 ASSERT_GE(pid, 0); 3810 3811 if (pid == 0) 3812 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3813 3814 /* Do a bad recv() */ 3815 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3816 EXPECT_EQ(errno, EFAULT); 3817 3818 /* We should still be able to receive this notification, though. */ 3819 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3820 EXPECT_EQ(req.pid, pid); 3821 3822 resp.id = req.id; 3823 resp.error = 0; 3824 resp.val = USER_NOTIF_MAGIC; 3825 3826 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3827 3828 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3829 EXPECT_EQ(true, WIFEXITED(status)); 3830 EXPECT_EQ(0, WEXITSTATUS(status)); 3831 } 3832 3833 TEST(seccomp_get_notif_sizes) 3834 { 3835 struct seccomp_notif_sizes sizes; 3836 3837 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3838 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3839 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3840 } 3841 3842 TEST(user_notification_continue) 3843 { 3844 pid_t pid; 3845 long ret; 3846 int status, listener; 3847 struct seccomp_notif req = {}; 3848 struct seccomp_notif_resp resp = {}; 3849 struct pollfd pollfd; 3850 3851 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3852 ASSERT_EQ(0, ret) { 3853 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3854 } 3855 3856 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3857 ASSERT_GE(listener, 0); 3858 3859 pid = fork(); 3860 ASSERT_GE(pid, 0); 3861 3862 if (pid == 0) { 3863 int dup_fd, pipe_fds[2]; 3864 pid_t self; 3865 3866 ASSERT_GE(pipe(pipe_fds), 0); 3867 3868 dup_fd = dup(pipe_fds[0]); 3869 ASSERT_GE(dup_fd, 0); 3870 EXPECT_NE(pipe_fds[0], dup_fd); 3871 3872 self = getpid(); 3873 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3874 exit(0); 3875 } 3876 3877 pollfd.fd = listener; 3878 pollfd.events = POLLIN | POLLOUT; 3879 3880 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3881 EXPECT_EQ(pollfd.revents, POLLIN); 3882 3883 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3884 3885 pollfd.fd = listener; 3886 pollfd.events = POLLIN | POLLOUT; 3887 3888 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3889 EXPECT_EQ(pollfd.revents, POLLOUT); 3890 3891 EXPECT_EQ(req.data.nr, __NR_dup); 3892 3893 resp.id = req.id; 3894 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3895 3896 /* 3897 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3898 * args be set to 0. 3899 */ 3900 resp.error = 0; 3901 resp.val = USER_NOTIF_MAGIC; 3902 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3903 EXPECT_EQ(errno, EINVAL); 3904 3905 resp.error = USER_NOTIF_MAGIC; 3906 resp.val = 0; 3907 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3908 EXPECT_EQ(errno, EINVAL); 3909 3910 resp.error = 0; 3911 resp.val = 0; 3912 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3913 if (errno == EINVAL) 3914 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3915 } 3916 3917 skip: 3918 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3919 EXPECT_EQ(true, WIFEXITED(status)); 3920 EXPECT_EQ(0, WEXITSTATUS(status)) { 3921 if (WEXITSTATUS(status) == 2) { 3922 SKIP(return, "Kernel does not support kcmp() syscall"); 3923 return; 3924 } 3925 } 3926 } 3927 3928 TEST(user_notification_filter_empty) 3929 { 3930 pid_t pid; 3931 long ret; 3932 int status; 3933 struct pollfd pollfd; 3934 struct __clone_args args = { 3935 .flags = CLONE_FILES, 3936 .exit_signal = SIGCHLD, 3937 }; 3938 3939 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3940 ASSERT_EQ(0, ret) { 3941 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3942 } 3943 3944 if (__NR_clone3 < 0) 3945 SKIP(return, "Test not built with clone3 support"); 3946 3947 pid = sys_clone3(&args, sizeof(args)); 3948 ASSERT_GE(pid, 0); 3949 3950 if (pid == 0) { 3951 int listener; 3952 3953 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3954 if (listener < 0) 3955 _exit(EXIT_FAILURE); 3956 3957 if (dup2(listener, 200) != 200) 3958 _exit(EXIT_FAILURE); 3959 3960 close(listener); 3961 3962 _exit(EXIT_SUCCESS); 3963 } 3964 3965 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3966 EXPECT_EQ(true, WIFEXITED(status)); 3967 EXPECT_EQ(0, WEXITSTATUS(status)); 3968 3969 /* 3970 * The seccomp filter has become unused so we should be notified once 3971 * the kernel gets around to cleaning up task struct. 3972 */ 3973 pollfd.fd = 200; 3974 pollfd.events = POLLHUP; 3975 3976 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3977 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3978 } 3979 3980 TEST(user_ioctl_notification_filter_empty) 3981 { 3982 pid_t pid; 3983 long ret; 3984 int status, p[2]; 3985 struct __clone_args args = { 3986 .flags = CLONE_FILES, 3987 .exit_signal = SIGCHLD, 3988 }; 3989 struct seccomp_notif req = {}; 3990 3991 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3992 ASSERT_EQ(0, ret) { 3993 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3994 } 3995 3996 if (__NR_clone3 < 0) 3997 SKIP(return, "Test not built with clone3 support"); 3998 3999 ASSERT_EQ(0, pipe(p)); 4000 4001 pid = sys_clone3(&args, sizeof(args)); 4002 ASSERT_GE(pid, 0); 4003 4004 if (pid == 0) { 4005 int listener; 4006 4007 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4008 if (listener < 0) 4009 _exit(EXIT_FAILURE); 4010 4011 if (dup2(listener, 200) != 200) 4012 _exit(EXIT_FAILURE); 4013 close(p[1]); 4014 close(listener); 4015 sleep(1); 4016 4017 _exit(EXIT_SUCCESS); 4018 } 4019 if (read(p[0], &status, 1) != 0) 4020 _exit(EXIT_SUCCESS); 4021 close(p[0]); 4022 /* 4023 * The seccomp filter has become unused so we should be notified once 4024 * the kernel gets around to cleaning up task struct. 4025 */ 4026 EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); 4027 EXPECT_EQ(errno, ENOENT); 4028 4029 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4030 EXPECT_EQ(true, WIFEXITED(status)); 4031 EXPECT_EQ(0, WEXITSTATUS(status)); 4032 } 4033 4034 static void *do_thread(void *data) 4035 { 4036 return NULL; 4037 } 4038 4039 TEST(user_notification_filter_empty_threaded) 4040 { 4041 pid_t pid; 4042 long ret; 4043 int status; 4044 struct pollfd pollfd; 4045 struct __clone_args args = { 4046 .flags = CLONE_FILES, 4047 .exit_signal = SIGCHLD, 4048 }; 4049 4050 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4051 ASSERT_EQ(0, ret) { 4052 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4053 } 4054 4055 if (__NR_clone3 < 0) 4056 SKIP(return, "Test not built with clone3 support"); 4057 4058 pid = sys_clone3(&args, sizeof(args)); 4059 ASSERT_GE(pid, 0); 4060 4061 if (pid == 0) { 4062 pid_t pid1, pid2; 4063 int listener, status; 4064 pthread_t thread; 4065 4066 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4067 if (listener < 0) 4068 _exit(EXIT_FAILURE); 4069 4070 if (dup2(listener, 200) != 200) 4071 _exit(EXIT_FAILURE); 4072 4073 close(listener); 4074 4075 pid1 = fork(); 4076 if (pid1 < 0) 4077 _exit(EXIT_FAILURE); 4078 4079 if (pid1 == 0) 4080 _exit(EXIT_SUCCESS); 4081 4082 pid2 = fork(); 4083 if (pid2 < 0) 4084 _exit(EXIT_FAILURE); 4085 4086 if (pid2 == 0) 4087 _exit(EXIT_SUCCESS); 4088 4089 if (pthread_create(&thread, NULL, do_thread, NULL) || 4090 pthread_join(thread, NULL)) 4091 _exit(EXIT_FAILURE); 4092 4093 if (pthread_create(&thread, NULL, do_thread, NULL) || 4094 pthread_join(thread, NULL)) 4095 _exit(EXIT_FAILURE); 4096 4097 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 4098 WEXITSTATUS(status)) 4099 _exit(EXIT_FAILURE); 4100 4101 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 4102 WEXITSTATUS(status)) 4103 _exit(EXIT_FAILURE); 4104 4105 exit(EXIT_SUCCESS); 4106 } 4107 4108 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4109 EXPECT_EQ(true, WIFEXITED(status)); 4110 EXPECT_EQ(0, WEXITSTATUS(status)); 4111 4112 /* 4113 * The seccomp filter has become unused so we should be notified once 4114 * the kernel gets around to cleaning up task struct. 4115 */ 4116 pollfd.fd = 200; 4117 pollfd.events = POLLHUP; 4118 4119 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 4120 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 4121 } 4122 4123 4124 int get_next_fd(int prev_fd) 4125 { 4126 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { 4127 if (fcntl(i, F_GETFD) == -1) 4128 return i; 4129 } 4130 _exit(EXIT_FAILURE); 4131 } 4132 4133 TEST(user_notification_addfd) 4134 { 4135 pid_t pid; 4136 long ret; 4137 int status, listener, memfd, fd, nextfd; 4138 struct seccomp_notif_addfd addfd = {}; 4139 struct seccomp_notif_addfd_small small = {}; 4140 struct seccomp_notif_addfd_big big = {}; 4141 struct seccomp_notif req = {}; 4142 struct seccomp_notif_resp resp = {}; 4143 /* 100 ms */ 4144 struct timespec delay = { .tv_nsec = 100000000 }; 4145 4146 /* There may be arbitrary already-open fds at test start. */ 4147 memfd = memfd_create("test", 0); 4148 ASSERT_GE(memfd, 0); 4149 nextfd = get_next_fd(memfd); 4150 4151 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4152 ASSERT_EQ(0, ret) { 4153 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4154 } 4155 4156 /* fd: 4 */ 4157 /* Check that the basic notification machinery works */ 4158 listener = user_notif_syscall(__NR_getppid, 4159 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4160 ASSERT_EQ(listener, nextfd); 4161 nextfd = get_next_fd(nextfd); 4162 4163 pid = fork(); 4164 ASSERT_GE(pid, 0); 4165 4166 if (pid == 0) { 4167 /* fds will be added and this value is expected */ 4168 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 4169 exit(1); 4170 4171 /* Atomic addfd+send is received here. Check it is a valid fd */ 4172 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) 4173 exit(1); 4174 4175 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4176 } 4177 4178 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4179 4180 addfd.srcfd = memfd; 4181 addfd.newfd = 0; 4182 addfd.id = req.id; 4183 addfd.flags = 0x0; 4184 4185 /* Verify bad newfd_flags cannot be set */ 4186 addfd.newfd_flags = ~O_CLOEXEC; 4187 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4188 EXPECT_EQ(errno, EINVAL); 4189 addfd.newfd_flags = O_CLOEXEC; 4190 4191 /* Verify bad flags cannot be set */ 4192 addfd.flags = 0xff; 4193 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4194 EXPECT_EQ(errno, EINVAL); 4195 addfd.flags = 0; 4196 4197 /* Verify that remote_fd cannot be set without setting flags */ 4198 addfd.newfd = 1; 4199 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4200 EXPECT_EQ(errno, EINVAL); 4201 addfd.newfd = 0; 4202 4203 /* Verify small size cannot be set */ 4204 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 4205 EXPECT_EQ(errno, EINVAL); 4206 4207 /* Verify we can't send bits filled in unknown buffer area */ 4208 memset(&big, 0xAA, sizeof(big)); 4209 big.addfd = addfd; 4210 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 4211 EXPECT_EQ(errno, E2BIG); 4212 4213 4214 /* Verify we can set an arbitrary remote fd */ 4215 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4216 EXPECT_EQ(fd, nextfd); 4217 nextfd = get_next_fd(nextfd); 4218 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4219 4220 /* Verify we can set an arbitrary remote fd with large size */ 4221 memset(&big, 0x0, sizeof(big)); 4222 big.addfd = addfd; 4223 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 4224 EXPECT_EQ(fd, nextfd); 4225 nextfd = get_next_fd(nextfd); 4226 4227 /* Verify we can set a specific remote fd */ 4228 addfd.newfd = 42; 4229 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4230 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4231 EXPECT_EQ(fd, 42); 4232 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4233 4234 /* Resume syscall */ 4235 resp.id = req.id; 4236 resp.error = 0; 4237 resp.val = USER_NOTIF_MAGIC; 4238 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4239 4240 /* 4241 * This sets the ID of the ADD FD to the last request plus 1. The 4242 * notification ID increments 1 per notification. 4243 */ 4244 addfd.id = req.id + 1; 4245 4246 /* This spins until the underlying notification is generated */ 4247 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4248 errno != -EINPROGRESS) 4249 nanosleep(&delay, NULL); 4250 4251 memset(&req, 0, sizeof(req)); 4252 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4253 ASSERT_EQ(addfd.id, req.id); 4254 4255 /* Verify we can do an atomic addfd and send */ 4256 addfd.newfd = 0; 4257 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4258 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4259 /* 4260 * Child has earlier "low" fds and now 42, so we expect the next 4261 * lowest available fd to be assigned here. 4262 */ 4263 EXPECT_EQ(fd, nextfd); 4264 nextfd = get_next_fd(nextfd); 4265 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4266 4267 /* 4268 * This sets the ID of the ADD FD to the last request plus 1. The 4269 * notification ID increments 1 per notification. 4270 */ 4271 addfd.id = req.id + 1; 4272 4273 /* This spins until the underlying notification is generated */ 4274 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4275 errno != -EINPROGRESS) 4276 nanosleep(&delay, NULL); 4277 4278 memset(&req, 0, sizeof(req)); 4279 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4280 ASSERT_EQ(addfd.id, req.id); 4281 4282 resp.id = req.id; 4283 resp.error = 0; 4284 resp.val = USER_NOTIF_MAGIC; 4285 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4286 4287 /* Wait for child to finish. */ 4288 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4289 EXPECT_EQ(true, WIFEXITED(status)); 4290 EXPECT_EQ(0, WEXITSTATUS(status)); 4291 4292 close(memfd); 4293 } 4294 4295 TEST(user_notification_addfd_rlimit) 4296 { 4297 pid_t pid; 4298 long ret; 4299 int status, listener, memfd; 4300 struct seccomp_notif_addfd addfd = {}; 4301 struct seccomp_notif req = {}; 4302 struct seccomp_notif_resp resp = {}; 4303 const struct rlimit lim = { 4304 .rlim_cur = 0, 4305 .rlim_max = 0, 4306 }; 4307 4308 memfd = memfd_create("test", 0); 4309 ASSERT_GE(memfd, 0); 4310 4311 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4312 ASSERT_EQ(0, ret) { 4313 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4314 } 4315 4316 /* Check that the basic notification machinery works */ 4317 listener = user_notif_syscall(__NR_getppid, 4318 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4319 ASSERT_GE(listener, 0); 4320 4321 pid = fork(); 4322 ASSERT_GE(pid, 0); 4323 4324 if (pid == 0) 4325 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4326 4327 4328 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4329 4330 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 4331 4332 addfd.srcfd = memfd; 4333 addfd.newfd_flags = O_CLOEXEC; 4334 addfd.newfd = 0; 4335 addfd.id = req.id; 4336 addfd.flags = 0; 4337 4338 /* Should probably spot check /proc/sys/fs/file-nr */ 4339 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4340 EXPECT_EQ(errno, EMFILE); 4341 4342 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4343 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4344 EXPECT_EQ(errno, EMFILE); 4345 4346 addfd.newfd = 100; 4347 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4348 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4349 EXPECT_EQ(errno, EBADF); 4350 4351 resp.id = req.id; 4352 resp.error = 0; 4353 resp.val = USER_NOTIF_MAGIC; 4354 4355 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4356 4357 /* Wait for child to finish. */ 4358 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4359 EXPECT_EQ(true, WIFEXITED(status)); 4360 EXPECT_EQ(0, WEXITSTATUS(status)); 4361 4362 close(memfd); 4363 } 4364 4365 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP 4366 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 4367 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 4368 #endif 4369 4370 TEST(user_notification_sync) 4371 { 4372 struct seccomp_notif req = {}; 4373 struct seccomp_notif_resp resp = {}; 4374 int status, listener; 4375 pid_t pid; 4376 long ret; 4377 4378 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4379 ASSERT_EQ(0, ret) { 4380 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4381 } 4382 4383 listener = user_notif_syscall(__NR_getppid, 4384 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4385 ASSERT_GE(listener, 0); 4386 4387 /* Try to set invalid flags. */ 4388 EXPECT_SYSCALL_RETURN(-EINVAL, 4389 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); 4390 4391 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 4392 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); 4393 4394 pid = fork(); 4395 ASSERT_GE(pid, 0); 4396 if (pid == 0) { 4397 ret = syscall(__NR_getppid); 4398 ASSERT_EQ(ret, USER_NOTIF_MAGIC) { 4399 _exit(1); 4400 } 4401 _exit(0); 4402 } 4403 4404 req.pid = 0; 4405 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4406 4407 ASSERT_EQ(req.data.nr, __NR_getppid); 4408 4409 resp.id = req.id; 4410 resp.error = 0; 4411 resp.val = USER_NOTIF_MAGIC; 4412 resp.flags = 0; 4413 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4414 4415 ASSERT_EQ(waitpid(pid, &status, 0), pid); 4416 ASSERT_EQ(status, 0); 4417 } 4418 4419 4420 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ 4421 FIXTURE(O_SUSPEND_SECCOMP) { 4422 pid_t pid; 4423 }; 4424 4425 FIXTURE_SETUP(O_SUSPEND_SECCOMP) 4426 { 4427 ERRNO_FILTER(block_read, E2BIG); 4428 cap_value_t cap_list[] = { CAP_SYS_ADMIN }; 4429 cap_t caps; 4430 4431 self->pid = 0; 4432 4433 /* make sure we don't have CAP_SYS_ADMIN */ 4434 caps = cap_get_proc(); 4435 ASSERT_NE(NULL, caps); 4436 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); 4437 ASSERT_EQ(0, cap_set_proc(caps)); 4438 cap_free(caps); 4439 4440 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 4441 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); 4442 4443 self->pid = fork(); 4444 ASSERT_GE(self->pid, 0); 4445 4446 if (self->pid == 0) { 4447 while (1) 4448 pause(); 4449 _exit(127); 4450 } 4451 } 4452 4453 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) 4454 { 4455 if (self->pid) 4456 kill(self->pid, SIGKILL); 4457 } 4458 4459 TEST_F(O_SUSPEND_SECCOMP, setoptions) 4460 { 4461 int wstatus; 4462 4463 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); 4464 ASSERT_EQ(self->pid, wait(&wstatus)); 4465 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); 4466 if (errno == EINVAL) 4467 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4468 ASSERT_EQ(EPERM, errno); 4469 } 4470 4471 TEST_F(O_SUSPEND_SECCOMP, seize) 4472 { 4473 int ret; 4474 4475 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); 4476 ASSERT_EQ(-1, ret); 4477 if (errno == EINVAL) 4478 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4479 ASSERT_EQ(EPERM, errno); 4480 } 4481 4482 /* 4483 * get_nth - Get the nth, space separated entry in a file. 4484 * 4485 * Returns the length of the read field. 4486 * Throws error if field is zero-lengthed. 4487 */ 4488 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, 4489 const unsigned int position, char **entry) 4490 { 4491 char *line = NULL; 4492 unsigned int i; 4493 ssize_t nread; 4494 size_t len = 0; 4495 FILE *f; 4496 4497 f = fopen(path, "r"); 4498 ASSERT_NE(f, NULL) { 4499 TH_LOG("Could not open %s: %s", path, strerror(errno)); 4500 } 4501 4502 for (i = 0; i < position; i++) { 4503 nread = getdelim(&line, &len, ' ', f); 4504 ASSERT_GE(nread, 0) { 4505 TH_LOG("Failed to read %d entry in file %s", i, path); 4506 } 4507 } 4508 fclose(f); 4509 4510 ASSERT_GT(nread, 0) { 4511 TH_LOG("Entry in file %s had zero length", path); 4512 } 4513 4514 *entry = line; 4515 return nread - 1; 4516 } 4517 4518 /* For a given PID, get the task state (D, R, etc...) */ 4519 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) 4520 { 4521 char proc_path[100] = {0}; 4522 char status; 4523 char *line; 4524 4525 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); 4526 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); 4527 4528 status = *line; 4529 free(line); 4530 4531 return status; 4532 } 4533 4534 TEST(user_notification_fifo) 4535 { 4536 struct seccomp_notif_resp resp = {}; 4537 struct seccomp_notif req = {}; 4538 int i, status, listener; 4539 pid_t pid, pids[3]; 4540 __u64 baseid; 4541 long ret; 4542 /* 100 ms */ 4543 struct timespec delay = { .tv_nsec = 100000000 }; 4544 4545 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4546 ASSERT_EQ(0, ret) { 4547 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4548 } 4549 4550 /* Setup a listener */ 4551 listener = user_notif_syscall(__NR_getppid, 4552 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4553 ASSERT_GE(listener, 0); 4554 4555 pid = fork(); 4556 ASSERT_GE(pid, 0); 4557 4558 if (pid == 0) { 4559 ret = syscall(__NR_getppid); 4560 exit(ret != USER_NOTIF_MAGIC); 4561 } 4562 4563 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4564 baseid = req.id + 1; 4565 4566 resp.id = req.id; 4567 resp.error = 0; 4568 resp.val = USER_NOTIF_MAGIC; 4569 4570 /* check that we make sure flags == 0 */ 4571 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4572 4573 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4574 EXPECT_EQ(true, WIFEXITED(status)); 4575 EXPECT_EQ(0, WEXITSTATUS(status)); 4576 4577 /* Start children, and generate notifications */ 4578 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4579 pid = fork(); 4580 if (pid == 0) { 4581 ret = syscall(__NR_getppid); 4582 exit(ret != USER_NOTIF_MAGIC); 4583 } 4584 pids[i] = pid; 4585 } 4586 4587 /* This spins until all of the children are sleeping */ 4588 restart_wait: 4589 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4590 if (get_proc_stat(_metadata, pids[i]) != 'S') { 4591 nanosleep(&delay, NULL); 4592 goto restart_wait; 4593 } 4594 } 4595 4596 /* Read the notifications in order (and respond) */ 4597 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4598 memset(&req, 0, sizeof(req)); 4599 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4600 EXPECT_EQ(req.id, baseid + i); 4601 resp.id = req.id; 4602 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4603 } 4604 4605 /* Make sure notifications were received */ 4606 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4607 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); 4608 EXPECT_EQ(true, WIFEXITED(status)); 4609 EXPECT_EQ(0, WEXITSTATUS(status)); 4610 } 4611 } 4612 4613 /* get_proc_syscall - Get the syscall in progress for a given pid 4614 * 4615 * Returns the current syscall number for a given process 4616 * Returns -1 if not in syscall (running or blocked) 4617 */ 4618 static long get_proc_syscall(struct __test_metadata *_metadata, int pid) 4619 { 4620 char proc_path[100] = {0}; 4621 long ret = -1; 4622 ssize_t nread; 4623 char *line; 4624 4625 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); 4626 nread = get_nth(_metadata, proc_path, 1, &line); 4627 ASSERT_GT(nread, 0); 4628 4629 if (!strncmp("running", line, MIN(7, nread))) 4630 ret = strtol(line, NULL, 16); 4631 4632 free(line); 4633 return ret; 4634 } 4635 4636 /* Ensure non-fatal signals prior to receive are unmodified */ 4637 TEST(user_notification_wait_killable_pre_notification) 4638 { 4639 struct sigaction new_action = { 4640 .sa_handler = signal_handler, 4641 }; 4642 int listener, status, sk_pair[2]; 4643 pid_t pid; 4644 long ret; 4645 char c; 4646 /* 100 ms */ 4647 struct timespec delay = { .tv_nsec = 100000000 }; 4648 4649 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4650 4651 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4652 ASSERT_EQ(0, ret) 4653 { 4654 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4655 } 4656 4657 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4658 4659 listener = user_notif_syscall( 4660 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4661 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4662 ASSERT_GE(listener, 0); 4663 4664 /* 4665 * Check that we can kill the process with SIGUSR1 prior to receiving 4666 * the notification. SIGUSR1 is wired up to a custom signal handler, 4667 * and make sure it gets called. 4668 */ 4669 pid = fork(); 4670 ASSERT_GE(pid, 0); 4671 4672 if (pid == 0) { 4673 close(sk_pair[0]); 4674 handled = sk_pair[1]; 4675 4676 /* Setup the non-fatal sigaction without SA_RESTART */ 4677 if (sigaction(SIGUSR1, &new_action, NULL)) { 4678 perror("sigaction"); 4679 exit(1); 4680 } 4681 4682 ret = syscall(__NR_getppid); 4683 /* Make sure we got a return from a signal interruption */ 4684 exit(ret != -1 || errno != EINTR); 4685 } 4686 4687 /* 4688 * Make sure we've gotten to the seccomp user notification wait 4689 * from getppid prior to sending any signals 4690 */ 4691 while (get_proc_syscall(_metadata, pid) != __NR_getppid && 4692 get_proc_stat(_metadata, pid) != 'S') 4693 nanosleep(&delay, NULL); 4694 4695 /* Send non-fatal kill signal */ 4696 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4697 4698 /* wait for process to exit (exit checks for EINTR) */ 4699 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4700 EXPECT_EQ(true, WIFEXITED(status)); 4701 EXPECT_EQ(0, WEXITSTATUS(status)); 4702 4703 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4704 } 4705 4706 /* Ensure non-fatal signals after receive are blocked */ 4707 TEST(user_notification_wait_killable) 4708 { 4709 struct sigaction new_action = { 4710 .sa_handler = signal_handler, 4711 }; 4712 struct seccomp_notif_resp resp = {}; 4713 struct seccomp_notif req = {}; 4714 int listener, status, sk_pair[2]; 4715 pid_t pid; 4716 long ret; 4717 char c; 4718 /* 100 ms */ 4719 struct timespec delay = { .tv_nsec = 100000000 }; 4720 4721 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4722 4723 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4724 ASSERT_EQ(0, ret) 4725 { 4726 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4727 } 4728 4729 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4730 4731 listener = user_notif_syscall( 4732 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4733 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4734 ASSERT_GE(listener, 0); 4735 4736 pid = fork(); 4737 ASSERT_GE(pid, 0); 4738 4739 if (pid == 0) { 4740 close(sk_pair[0]); 4741 handled = sk_pair[1]; 4742 4743 /* Setup the sigaction without SA_RESTART */ 4744 if (sigaction(SIGUSR1, &new_action, NULL)) { 4745 perror("sigaction"); 4746 exit(1); 4747 } 4748 4749 /* Make sure that the syscall is completed (no EINTR) */ 4750 ret = syscall(__NR_getppid); 4751 exit(ret != USER_NOTIF_MAGIC); 4752 } 4753 4754 /* 4755 * Get the notification, to make move the notifying process into a 4756 * non-preemptible (TASK_KILLABLE) state. 4757 */ 4758 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4759 /* Send non-fatal kill signal */ 4760 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4761 4762 /* 4763 * Make sure the task enters moves to TASK_KILLABLE by waiting for 4764 * D (Disk Sleep) state after receiving non-fatal signal. 4765 */ 4766 while (get_proc_stat(_metadata, pid) != 'D') 4767 nanosleep(&delay, NULL); 4768 4769 resp.id = req.id; 4770 resp.val = USER_NOTIF_MAGIC; 4771 /* Make sure the notification is found and able to be replied to */ 4772 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4773 4774 /* 4775 * Make sure that the signal handler does get called once we're back in 4776 * userspace. 4777 */ 4778 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4779 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ 4780 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4781 EXPECT_EQ(true, WIFEXITED(status)); 4782 EXPECT_EQ(0, WEXITSTATUS(status)); 4783 } 4784 4785 /* Ensure fatal signals after receive are not blocked */ 4786 TEST(user_notification_wait_killable_fatal) 4787 { 4788 struct seccomp_notif req = {}; 4789 int listener, status; 4790 pid_t pid; 4791 long ret; 4792 /* 100 ms */ 4793 struct timespec delay = { .tv_nsec = 100000000 }; 4794 4795 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4796 ASSERT_EQ(0, ret) 4797 { 4798 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4799 } 4800 4801 listener = user_notif_syscall( 4802 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4803 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4804 ASSERT_GE(listener, 0); 4805 4806 pid = fork(); 4807 ASSERT_GE(pid, 0); 4808 4809 if (pid == 0) { 4810 /* This should never complete as it should get a SIGTERM */ 4811 syscall(__NR_getppid); 4812 exit(1); 4813 } 4814 4815 while (get_proc_stat(_metadata, pid) != 'S') 4816 nanosleep(&delay, NULL); 4817 4818 /* 4819 * Get the notification, to make move the notifying process into a 4820 * non-preemptible (TASK_KILLABLE) state. 4821 */ 4822 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4823 /* Kill the process with a fatal signal */ 4824 EXPECT_EQ(kill(pid, SIGTERM), 0); 4825 4826 /* 4827 * Wait for the process to exit, and make sure the process terminated 4828 * due to the SIGTERM signal. 4829 */ 4830 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4831 EXPECT_EQ(true, WIFSIGNALED(status)); 4832 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4833 } 4834 4835 /* Ensure signals after the reply do not interrupt */ 4836 TEST(user_notification_wait_killable_after_reply) 4837 { 4838 int i, max_iter = 100000; 4839 int listener, status; 4840 int pipe_fds[2]; 4841 pid_t pid; 4842 long ret; 4843 4844 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4845 ASSERT_EQ(0, ret) 4846 { 4847 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4848 } 4849 4850 listener = user_notif_syscall( 4851 __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4852 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4853 ASSERT_GE(listener, 0); 4854 4855 /* 4856 * Used to count invocations. One token is transferred from the child 4857 * to the parent per syscall invocation, the parent tries to take 4858 * one token per successful RECV. If the syscall is restarted after 4859 * RECV the parent will try to get two tokens while the child only 4860 * provided one. 4861 */ 4862 ASSERT_EQ(pipe(pipe_fds), 0); 4863 4864 pid = fork(); 4865 ASSERT_GE(pid, 0); 4866 4867 if (pid == 0) { 4868 struct sigaction new_action = { 4869 .sa_handler = signal_handler_nop, 4870 .sa_flags = SA_RESTART, 4871 }; 4872 struct itimerval timer = { 4873 .it_value = { .tv_usec = 1000 }, 4874 .it_interval = { .tv_usec = 1000 }, 4875 }; 4876 char c = 'a'; 4877 4878 close(pipe_fds[0]); 4879 4880 /* Setup the sigaction with SA_RESTART */ 4881 if (sigaction(SIGALRM, &new_action, NULL)) { 4882 perror("sigaction"); 4883 exit(1); 4884 } 4885 4886 /* 4887 * Kill with SIGALRM repeatedly, to try to hit the race when 4888 * handling the syscall. 4889 */ 4890 if (setitimer(ITIMER_REAL, &timer, NULL) < 0) 4891 perror("setitimer"); 4892 4893 for (i = 0; i < max_iter; ++i) { 4894 int fd; 4895 4896 /* Send one token per iteration to catch repeats. */ 4897 if (write(pipe_fds[1], &c, sizeof(c)) != 1) { 4898 perror("write"); 4899 exit(1); 4900 } 4901 4902 fd = syscall(__NR_dup, 0); 4903 if (fd < 0) { 4904 perror("dup"); 4905 exit(1); 4906 } 4907 close(fd); 4908 } 4909 4910 exit(0); 4911 } 4912 4913 close(pipe_fds[1]); 4914 4915 for (i = 0; i < max_iter; ++i) { 4916 struct seccomp_notif req = {}; 4917 struct seccomp_notif_addfd addfd = {}; 4918 struct pollfd pfd = { 4919 .fd = pipe_fds[0], 4920 .events = POLLIN, 4921 }; 4922 char c; 4923 4924 /* 4925 * Try to receive one token. If it failed, one child syscall 4926 * was restarted after RECV and needed to be handled twice. 4927 */ 4928 ASSERT_EQ(poll(&pfd, 1, 1000), 1) 4929 kill(pid, SIGKILL); 4930 4931 ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) 4932 kill(pid, SIGKILL); 4933 4934 /* 4935 * Get the notification, reply to it as fast as possible to test 4936 * whether the child wrongly skips going into the non-preemptible 4937 * (TASK_KILLABLE) state. 4938 */ 4939 do 4940 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 4941 while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ 4942 ASSERT_EQ(ret, 0) 4943 kill(pid, SIGKILL); 4944 4945 addfd.id = req.id; 4946 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4947 addfd.srcfd = 0; 4948 ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) 4949 kill(pid, SIGKILL); 4950 } 4951 4952 /* 4953 * Wait for the process to exit, and make sure the process terminated 4954 * with a zero exit code.. 4955 */ 4956 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4957 EXPECT_EQ(true, WIFEXITED(status)); 4958 EXPECT_EQ(0, WEXITSTATUS(status)); 4959 } 4960 4961 struct tsync_vs_thread_leader_args { 4962 pthread_t leader; 4963 }; 4964 4965 static void *tsync_vs_dead_thread_leader_sibling(void *_args) 4966 { 4967 struct sock_filter allow_filter[] = { 4968 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4969 }; 4970 struct sock_fprog allow_prog = { 4971 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4972 .filter = allow_filter, 4973 }; 4974 struct tsync_vs_thread_leader_args *args = _args; 4975 void *retval; 4976 long ret; 4977 4978 ret = pthread_join(args->leader, &retval); 4979 if (ret) 4980 exit(1); 4981 if (retval != _args) 4982 exit(2); 4983 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); 4984 if (ret) 4985 exit(3); 4986 4987 exit(0); 4988 } 4989 4990 /* 4991 * Ensure that a dead thread leader doesn't prevent installing new filters with 4992 * SECCOMP_FILTER_FLAG_TSYNC from other threads. 4993 */ 4994 TEST(tsync_vs_dead_thread_leader) 4995 { 4996 int status; 4997 pid_t pid; 4998 long ret; 4999 5000 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 5001 ASSERT_EQ(0, ret) { 5002 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 5003 } 5004 5005 pid = fork(); 5006 ASSERT_GE(pid, 0); 5007 5008 if (pid == 0) { 5009 struct sock_filter allow_filter[] = { 5010 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5011 }; 5012 struct sock_fprog allow_prog = { 5013 .len = (unsigned short)ARRAY_SIZE(allow_filter), 5014 .filter = allow_filter, 5015 }; 5016 struct tsync_vs_thread_leader_args *args; 5017 pthread_t sibling; 5018 5019 args = malloc(sizeof(*args)); 5020 ASSERT_NE(NULL, args); 5021 args->leader = pthread_self(); 5022 5023 ret = pthread_create(&sibling, NULL, 5024 tsync_vs_dead_thread_leader_sibling, args); 5025 ASSERT_EQ(0, ret); 5026 5027 /* Install a new filter just to the leader thread. */ 5028 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 5029 ASSERT_EQ(0, ret); 5030 pthread_exit(args); 5031 exit(1); 5032 } 5033 5034 EXPECT_EQ(pid, waitpid(pid, &status, 0)); 5035 EXPECT_EQ(0, status); 5036 } 5037 5038 #ifdef __x86_64__ 5039 5040 /* 5041 * We need naked probed_uprobe function. Using __nocf_check 5042 * check to skip possible endbr64 instruction and ignoring 5043 * -Wattributes, otherwise the compilation might fail. 5044 */ 5045 #pragma GCC diagnostic push 5046 #pragma GCC diagnostic ignored "-Wattributes" 5047 5048 __naked __nocf_check noinline int probed_uprobe(void) 5049 { 5050 /* 5051 * Optimized uprobe is possible only on top of nop5 instruction. 5052 */ 5053 asm volatile (" \n" 5054 ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" 5055 "ret \n" 5056 ); 5057 } 5058 #pragma GCC diagnostic pop 5059 5060 #else 5061 noinline int probed_uprobe(void) 5062 { 5063 return 1; 5064 } 5065 #endif 5066 5067 noinline int probed_uretprobe(void) 5068 { 5069 return 1; 5070 } 5071 5072 static int parse_uint_from_file(const char *file, const char *fmt) 5073 { 5074 int err = -1, ret; 5075 FILE *f; 5076 5077 f = fopen(file, "re"); 5078 if (f) { 5079 err = fscanf(f, fmt, &ret); 5080 fclose(f); 5081 } 5082 return err == 1 ? ret : err; 5083 } 5084 5085 static int determine_uprobe_perf_type(void) 5086 { 5087 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 5088 5089 return parse_uint_from_file(file, "%d\n"); 5090 } 5091 5092 static int determine_uprobe_retprobe_bit(void) 5093 { 5094 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 5095 5096 return parse_uint_from_file(file, "config:%d\n"); 5097 } 5098 5099 static ssize_t get_uprobe_offset(const void *addr) 5100 { 5101 size_t start, base, end; 5102 bool found = false; 5103 char buf[256]; 5104 FILE *f; 5105 5106 f = fopen("/proc/self/maps", "r"); 5107 if (!f) 5108 return -1; 5109 5110 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 5111 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 5112 found = true; 5113 break; 5114 } 5115 } 5116 fclose(f); 5117 return found ? (uintptr_t)addr - start + base : -1; 5118 } 5119 5120 FIXTURE(UPROBE) { 5121 int fd; 5122 }; 5123 5124 FIXTURE_VARIANT(UPROBE) { 5125 /* 5126 * All of the U(RET)PROBE behaviors can be tested with either 5127 * u(ret)probe attached or not 5128 */ 5129 bool attach; 5130 /* 5131 * Test both uprobe and uretprobe. 5132 */ 5133 bool uretprobe; 5134 }; 5135 5136 FIXTURE_VARIANT_ADD(UPROBE, not_attached) { 5137 .attach = false, 5138 .uretprobe = false, 5139 }; 5140 5141 FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) { 5142 .attach = true, 5143 .uretprobe = false, 5144 }; 5145 5146 FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) { 5147 .attach = true, 5148 .uretprobe = true, 5149 }; 5150 5151 FIXTURE_SETUP(UPROBE) 5152 { 5153 const size_t attr_sz = sizeof(struct perf_event_attr); 5154 struct perf_event_attr attr; 5155 ssize_t offset; 5156 int type, bit; 5157 5158 #if !defined(__NR_uprobe) || !defined(__NR_uretprobe) 5159 SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined"); 5160 #endif 5161 5162 if (!variant->attach) 5163 return; 5164 5165 memset(&attr, 0, attr_sz); 5166 5167 type = determine_uprobe_perf_type(); 5168 ASSERT_GE(type, 0); 5169 5170 if (variant->uretprobe) { 5171 bit = determine_uprobe_retprobe_bit(); 5172 ASSERT_GE(bit, 0); 5173 } 5174 5175 offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe); 5176 ASSERT_GE(offset, 0); 5177 5178 if (variant->uretprobe) 5179 attr.config |= 1 << bit; 5180 attr.size = attr_sz; 5181 attr.type = type; 5182 attr.config1 = ptr_to_u64("/proc/self/exe"); 5183 attr.config2 = offset; 5184 5185 self->fd = syscall(__NR_perf_event_open, &attr, 5186 getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, 5187 PERF_FLAG_FD_CLOEXEC); 5188 } 5189 5190 FIXTURE_TEARDOWN(UPROBE) 5191 { 5192 /* we could call close(self->fd), but we'd need extra filter for 5193 * that and since we are calling _exit right away.. 5194 */ 5195 } 5196 5197 static int run_probed_with_filter(struct sock_fprog *prog) 5198 { 5199 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || 5200 seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { 5201 return -1; 5202 } 5203 5204 /* 5205 * Uprobe is optimized after first hit, so let's hit twice. 5206 */ 5207 probed_uprobe(); 5208 probed_uprobe(); 5209 5210 probed_uretprobe(); 5211 return 0; 5212 } 5213 5214 TEST_F(UPROBE, uprobe_default_allow) 5215 { 5216 struct sock_filter filter[] = { 5217 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5218 }; 5219 struct sock_fprog prog = { 5220 .len = (unsigned short)ARRAY_SIZE(filter), 5221 .filter = filter, 5222 }; 5223 5224 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5225 } 5226 5227 TEST_F(UPROBE, uprobe_default_block) 5228 { 5229 struct sock_filter filter[] = { 5230 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5231 offsetof(struct seccomp_data, nr)), 5232 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5233 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5234 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5235 }; 5236 struct sock_fprog prog = { 5237 .len = (unsigned short)ARRAY_SIZE(filter), 5238 .filter = filter, 5239 }; 5240 5241 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5242 } 5243 5244 TEST_F(UPROBE, uprobe_block_syscall) 5245 { 5246 struct sock_filter filter[] = { 5247 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5248 offsetof(struct seccomp_data, nr)), 5249 #ifdef __NR_uprobe 5250 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2), 5251 #endif 5252 #ifdef __NR_uretprobe 5253 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), 5254 #endif 5255 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5256 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5257 }; 5258 struct sock_fprog prog = { 5259 .len = (unsigned short)ARRAY_SIZE(filter), 5260 .filter = filter, 5261 }; 5262 5263 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5264 } 5265 5266 TEST_F(UPROBE, uprobe_default_block_with_syscall) 5267 { 5268 struct sock_filter filter[] = { 5269 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5270 offsetof(struct seccomp_data, nr)), 5271 #ifdef __NR_uprobe 5272 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0), 5273 #endif 5274 #ifdef __NR_uretprobe 5275 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), 5276 #endif 5277 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5278 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5279 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5280 }; 5281 struct sock_fprog prog = { 5282 .len = (unsigned short)ARRAY_SIZE(filter), 5283 .filter = filter, 5284 }; 5285 5286 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5287 } 5288 5289 /* 5290 * TODO: 5291 * - expand NNP testing 5292 * - better arch-specific TRACE and TRAP handlers. 5293 * - endianness checking when appropriate 5294 * - 64-bit arg prodding 5295 * - arch value testing (x86 modes especially) 5296 * - verify that FILTER_FLAG_LOG filters generate log messages 5297 * - verify that RET_LOG generates log messages 5298 */ 5299 5300 TEST_HARNESS_MAIN 5301