1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/time.h> 28 #include <sys/user.h> 29 #include <linux/prctl.h> 30 #include <linux/ptrace.h> 31 #include <linux/seccomp.h> 32 #include <pthread.h> 33 #include <semaphore.h> 34 #include <signal.h> 35 #include <stddef.h> 36 #include <stdbool.h> 37 #include <string.h> 38 #include <time.h> 39 #include <limits.h> 40 #include <linux/elf.h> 41 #include <sys/uio.h> 42 #include <sys/utsname.h> 43 #include <sys/fcntl.h> 44 #include <sys/mman.h> 45 #include <sys/times.h> 46 #include <sys/socket.h> 47 #include <sys/ioctl.h> 48 #include <linux/kcmp.h> 49 #include <sys/resource.h> 50 #include <sys/capability.h> 51 #include <linux/perf_event.h> 52 53 #include <unistd.h> 54 #include <sys/syscall.h> 55 #include <poll.h> 56 57 #include "kselftest_harness.h" 58 #include "../clone3/clone3_selftests.h" 59 60 /* Attempt to de-conflict with the selftests tree. */ 61 #ifndef SKIP 62 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 63 #endif 64 65 #ifndef MIN 66 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 67 #endif 68 69 #ifndef PR_SET_PTRACER 70 # define PR_SET_PTRACER 0x59616d61 71 #endif 72 73 #ifndef noinline 74 #define noinline __attribute__((noinline)) 75 #endif 76 77 #ifndef __nocf_check 78 #define __nocf_check __attribute__((nocf_check)) 79 #endif 80 81 #ifndef __naked 82 #define __naked __attribute__((__naked__)) 83 #endif 84 85 #ifndef PR_SET_NO_NEW_PRIVS 86 #define PR_SET_NO_NEW_PRIVS 38 87 #define PR_GET_NO_NEW_PRIVS 39 88 #endif 89 90 #ifndef PR_SECCOMP_EXT 91 #define PR_SECCOMP_EXT 43 92 #endif 93 94 #ifndef SECCOMP_EXT_ACT 95 #define SECCOMP_EXT_ACT 1 96 #endif 97 98 #ifndef SECCOMP_EXT_ACT_TSYNC 99 #define SECCOMP_EXT_ACT_TSYNC 1 100 #endif 101 102 #ifndef SECCOMP_MODE_STRICT 103 #define SECCOMP_MODE_STRICT 1 104 #endif 105 106 #ifndef SECCOMP_MODE_FILTER 107 #define SECCOMP_MODE_FILTER 2 108 #endif 109 110 #ifndef SECCOMP_RET_ALLOW 111 struct seccomp_data { 112 int nr; 113 __u32 arch; 114 __u64 instruction_pointer; 115 __u64 args[6]; 116 }; 117 #endif 118 119 #ifndef SECCOMP_RET_KILL_PROCESS 120 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 121 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 122 #endif 123 #ifndef SECCOMP_RET_KILL 124 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 125 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 126 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 127 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 128 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 129 #endif 130 #ifndef SECCOMP_RET_LOG 131 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 132 #endif 133 134 #ifndef __NR_seccomp 135 # if defined(__i386__) 136 # define __NR_seccomp 354 137 # elif defined(__x86_64__) 138 # define __NR_seccomp 317 139 # elif defined(__arm__) 140 # define __NR_seccomp 383 141 # elif defined(__aarch64__) 142 # define __NR_seccomp 277 143 # elif defined(__riscv) 144 # define __NR_seccomp 277 145 # elif defined(__csky__) 146 # define __NR_seccomp 277 147 # elif defined(__loongarch__) 148 # define __NR_seccomp 277 149 # elif defined(__hppa__) 150 # define __NR_seccomp 338 151 # elif defined(__powerpc__) 152 # define __NR_seccomp 358 153 # elif defined(__s390__) 154 # define __NR_seccomp 348 155 # elif defined(__xtensa__) 156 # define __NR_seccomp 337 157 # elif defined(__sh__) 158 # define __NR_seccomp 372 159 # elif defined(__mc68000__) 160 # define __NR_seccomp 380 161 # else 162 # warning "seccomp syscall number unknown for this architecture" 163 # define __NR_seccomp 0xffff 164 # endif 165 #endif 166 167 #ifndef __NR_uretprobe 168 # if defined(__x86_64__) 169 # define __NR_uretprobe 335 170 # endif 171 #endif 172 173 #ifndef __NR_uprobe 174 # if defined(__x86_64__) 175 # define __NR_uprobe 336 176 # endif 177 #endif 178 179 #ifndef SECCOMP_SET_MODE_STRICT 180 #define SECCOMP_SET_MODE_STRICT 0 181 #endif 182 183 #ifndef SECCOMP_SET_MODE_FILTER 184 #define SECCOMP_SET_MODE_FILTER 1 185 #endif 186 187 #ifndef SECCOMP_GET_ACTION_AVAIL 188 #define SECCOMP_GET_ACTION_AVAIL 2 189 #endif 190 191 #ifndef SECCOMP_GET_NOTIF_SIZES 192 #define SECCOMP_GET_NOTIF_SIZES 3 193 #endif 194 195 #ifndef SECCOMP_FILTER_FLAG_TSYNC 196 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 197 #endif 198 199 #ifndef SECCOMP_FILTER_FLAG_LOG 200 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 201 #endif 202 203 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 204 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 205 #endif 206 207 #ifndef PTRACE_SECCOMP_GET_METADATA 208 #define PTRACE_SECCOMP_GET_METADATA 0x420d 209 210 struct seccomp_metadata { 211 __u64 filter_off; /* Input: which filter */ 212 __u64 flags; /* Output: filter's flags */ 213 }; 214 #endif 215 216 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 217 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 218 #endif 219 220 #ifndef SECCOMP_RET_USER_NOTIF 221 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 222 223 #define SECCOMP_IOC_MAGIC '!' 224 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 225 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 226 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 227 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 228 229 /* Flags for seccomp notification fd ioctl. */ 230 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 231 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 232 struct seccomp_notif_resp) 233 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 234 235 struct seccomp_notif { 236 __u64 id; 237 __u32 pid; 238 __u32 flags; 239 struct seccomp_data data; 240 }; 241 242 struct seccomp_notif_resp { 243 __u64 id; 244 __s64 val; 245 __s32 error; 246 __u32 flags; 247 }; 248 249 struct seccomp_notif_sizes { 250 __u16 seccomp_notif; 251 __u16 seccomp_notif_resp; 252 __u16 seccomp_data; 253 }; 254 #endif 255 256 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 257 /* On success, the return value is the remote process's added fd number */ 258 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 259 struct seccomp_notif_addfd) 260 261 /* valid flags for seccomp_notif_addfd */ 262 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 263 264 struct seccomp_notif_addfd { 265 __u64 id; 266 __u32 flags; 267 __u32 srcfd; 268 __u32 newfd; 269 __u32 newfd_flags; 270 }; 271 #endif 272 273 #ifndef SECCOMP_ADDFD_FLAG_SEND 274 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 275 #endif 276 277 struct seccomp_notif_addfd_small { 278 __u64 id; 279 char weird[4]; 280 }; 281 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 282 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 283 284 struct seccomp_notif_addfd_big { 285 union { 286 struct seccomp_notif_addfd addfd; 287 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 288 }; 289 }; 290 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 291 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 292 293 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 294 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 295 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 296 #endif 297 298 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 299 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 300 #endif 301 302 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 303 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 304 #endif 305 306 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV 307 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 308 #endif 309 310 #ifndef seccomp 311 int seccomp(unsigned int op, unsigned int flags, void *args) 312 { 313 errno = 0; 314 return syscall(__NR_seccomp, op, flags, args); 315 } 316 #endif 317 318 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 319 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 320 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 321 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 322 #else 323 #error "wut? Unknown __BYTE_ORDER__?!" 324 #endif 325 326 #define SIBLING_EXIT_UNKILLED 0xbadbeef 327 #define SIBLING_EXIT_FAILURE 0xbadface 328 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 329 330 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 331 { 332 #ifdef __NR_kcmp 333 errno = 0; 334 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 335 #else 336 errno = ENOSYS; 337 return -1; 338 #endif 339 } 340 341 /* Have TH_LOG report actual location filecmp() is used. */ 342 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 343 int _ret; \ 344 \ 345 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 346 if (_ret != 0) { \ 347 if (_ret < 0 && errno == ENOSYS) { \ 348 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 349 _ret = 0; \ 350 } \ 351 } \ 352 _ret; }) 353 354 TEST(kcmp) 355 { 356 int ret; 357 358 ret = __filecmp(getpid(), getpid(), 1, 1); 359 EXPECT_EQ(ret, 0); 360 if (ret != 0 && errno == ENOSYS) 361 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); 362 } 363 364 TEST(mode_strict_support) 365 { 366 long ret; 367 368 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 369 ASSERT_EQ(0, ret) { 370 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 371 } 372 syscall(__NR_exit, 0); 373 } 374 375 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 376 { 377 long ret; 378 379 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 380 ASSERT_EQ(0, ret) { 381 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 382 } 383 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 384 NULL, NULL, NULL); 385 EXPECT_FALSE(true) { 386 TH_LOG("Unreachable!"); 387 } 388 } 389 390 /* Note! This doesn't test no new privs behavior */ 391 TEST(no_new_privs_support) 392 { 393 long ret; 394 395 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 396 EXPECT_EQ(0, ret) { 397 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 398 } 399 } 400 401 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 402 TEST(mode_filter_support) 403 { 404 long ret; 405 406 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 407 ASSERT_EQ(0, ret) { 408 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 409 } 410 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 411 EXPECT_EQ(-1, ret); 412 EXPECT_EQ(EFAULT, errno) { 413 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 414 } 415 } 416 417 TEST(mode_filter_without_nnp) 418 { 419 struct sock_filter filter[] = { 420 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 421 }; 422 struct sock_fprog prog = { 423 .len = (unsigned short)ARRAY_SIZE(filter), 424 .filter = filter, 425 }; 426 long ret; 427 cap_t cap = cap_get_proc(); 428 cap_flag_value_t is_cap_sys_admin = 0; 429 430 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 431 ASSERT_LE(0, ret) { 432 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 433 } 434 errno = 0; 435 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 436 /* Succeeds with CAP_SYS_ADMIN, fails without */ 437 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); 438 if (!is_cap_sys_admin) { 439 EXPECT_EQ(-1, ret); 440 EXPECT_EQ(EACCES, errno); 441 } else { 442 EXPECT_EQ(0, ret); 443 } 444 } 445 446 #define MAX_INSNS_PER_PATH 32768 447 448 TEST(filter_size_limits) 449 { 450 int i; 451 int count = BPF_MAXINSNS + 1; 452 struct sock_filter allow[] = { 453 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 454 }; 455 struct sock_filter *filter; 456 struct sock_fprog prog = { }; 457 long ret; 458 459 filter = calloc(count, sizeof(*filter)); 460 ASSERT_NE(NULL, filter); 461 462 for (i = 0; i < count; i++) 463 filter[i] = allow[0]; 464 465 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 466 ASSERT_EQ(0, ret); 467 468 prog.filter = filter; 469 prog.len = count; 470 471 /* Too many filter instructions in a single filter. */ 472 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 473 ASSERT_NE(0, ret) { 474 TH_LOG("Installing %d insn filter was allowed", prog.len); 475 } 476 477 /* One less is okay, though. */ 478 prog.len -= 1; 479 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 480 ASSERT_EQ(0, ret) { 481 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 482 } 483 } 484 485 TEST(filter_chain_limits) 486 { 487 int i; 488 int count = BPF_MAXINSNS; 489 struct sock_filter allow[] = { 490 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 491 }; 492 struct sock_filter *filter; 493 struct sock_fprog prog = { }; 494 long ret; 495 496 filter = calloc(count, sizeof(*filter)); 497 ASSERT_NE(NULL, filter); 498 499 for (i = 0; i < count; i++) 500 filter[i] = allow[0]; 501 502 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 503 ASSERT_EQ(0, ret); 504 505 prog.filter = filter; 506 prog.len = 1; 507 508 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 509 ASSERT_EQ(0, ret); 510 511 prog.len = count; 512 513 /* Too many total filter instructions. */ 514 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 515 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 516 if (ret != 0) 517 break; 518 } 519 ASSERT_NE(0, ret) { 520 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 521 i, count, i * (count + 4)); 522 } 523 } 524 525 TEST(mode_filter_cannot_move_to_strict) 526 { 527 struct sock_filter filter[] = { 528 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 529 }; 530 struct sock_fprog prog = { 531 .len = (unsigned short)ARRAY_SIZE(filter), 532 .filter = filter, 533 }; 534 long ret; 535 536 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 537 ASSERT_EQ(0, ret); 538 539 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 540 ASSERT_EQ(0, ret); 541 542 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 543 EXPECT_EQ(-1, ret); 544 EXPECT_EQ(EINVAL, errno); 545 } 546 547 548 TEST(mode_filter_get_seccomp) 549 { 550 struct sock_filter filter[] = { 551 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 552 }; 553 struct sock_fprog prog = { 554 .len = (unsigned short)ARRAY_SIZE(filter), 555 .filter = filter, 556 }; 557 long ret; 558 559 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 560 ASSERT_EQ(0, ret); 561 562 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 563 EXPECT_EQ(0, ret); 564 565 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 566 ASSERT_EQ(0, ret); 567 568 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 569 EXPECT_EQ(2, ret); 570 } 571 572 573 TEST(ALLOW_all) 574 { 575 struct sock_filter filter[] = { 576 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 577 }; 578 struct sock_fprog prog = { 579 .len = (unsigned short)ARRAY_SIZE(filter), 580 .filter = filter, 581 }; 582 long ret; 583 584 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 585 ASSERT_EQ(0, ret); 586 587 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 588 ASSERT_EQ(0, ret); 589 } 590 591 TEST(empty_prog) 592 { 593 struct sock_filter filter[] = { 594 }; 595 struct sock_fprog prog = { 596 .len = (unsigned short)ARRAY_SIZE(filter), 597 .filter = filter, 598 }; 599 long ret; 600 601 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 602 ASSERT_EQ(0, ret); 603 604 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 605 EXPECT_EQ(-1, ret); 606 EXPECT_EQ(EINVAL, errno); 607 } 608 609 TEST(log_all) 610 { 611 struct sock_filter filter[] = { 612 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 613 }; 614 struct sock_fprog prog = { 615 .len = (unsigned short)ARRAY_SIZE(filter), 616 .filter = filter, 617 }; 618 long ret; 619 pid_t parent = getppid(); 620 621 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 622 ASSERT_EQ(0, ret); 623 624 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 625 ASSERT_EQ(0, ret); 626 627 /* getppid() should succeed and be logged (no check for logging) */ 628 EXPECT_EQ(parent, syscall(__NR_getppid)); 629 } 630 631 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 632 { 633 struct sock_filter filter[] = { 634 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 635 }; 636 struct sock_fprog prog = { 637 .len = (unsigned short)ARRAY_SIZE(filter), 638 .filter = filter, 639 }; 640 long ret; 641 642 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 643 ASSERT_EQ(0, ret); 644 645 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 646 ASSERT_EQ(0, ret); 647 EXPECT_EQ(0, syscall(__NR_getpid)) { 648 TH_LOG("getpid() shouldn't ever return"); 649 } 650 } 651 652 /* return code >= 0x80000000 is unused. */ 653 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 654 { 655 struct sock_filter filter[] = { 656 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 657 }; 658 struct sock_fprog prog = { 659 .len = (unsigned short)ARRAY_SIZE(filter), 660 .filter = filter, 661 }; 662 long ret; 663 664 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 665 ASSERT_EQ(0, ret); 666 667 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 668 ASSERT_EQ(0, ret); 669 EXPECT_EQ(0, syscall(__NR_getpid)) { 670 TH_LOG("getpid() shouldn't ever return"); 671 } 672 } 673 674 TEST_SIGNAL(KILL_all, SIGSYS) 675 { 676 struct sock_filter filter[] = { 677 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 678 }; 679 struct sock_fprog prog = { 680 .len = (unsigned short)ARRAY_SIZE(filter), 681 .filter = filter, 682 }; 683 long ret; 684 685 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 686 ASSERT_EQ(0, ret); 687 688 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 689 ASSERT_EQ(0, ret); 690 } 691 692 TEST_SIGNAL(KILL_one, SIGSYS) 693 { 694 struct sock_filter filter[] = { 695 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 696 offsetof(struct seccomp_data, nr)), 697 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 698 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 699 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 700 }; 701 struct sock_fprog prog = { 702 .len = (unsigned short)ARRAY_SIZE(filter), 703 .filter = filter, 704 }; 705 long ret; 706 pid_t parent = getppid(); 707 708 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 709 ASSERT_EQ(0, ret); 710 711 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 712 ASSERT_EQ(0, ret); 713 714 EXPECT_EQ(parent, syscall(__NR_getppid)); 715 /* getpid() should never return. */ 716 EXPECT_EQ(0, syscall(__NR_getpid)); 717 } 718 719 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 720 { 721 void *fatal_address; 722 struct sock_filter filter[] = { 723 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 724 offsetof(struct seccomp_data, nr)), 725 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 726 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 727 /* Only both with lower 32-bit for now. */ 728 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 729 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 730 (unsigned long)&fatal_address, 0, 1), 731 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 732 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 733 }; 734 struct sock_fprog prog = { 735 .len = (unsigned short)ARRAY_SIZE(filter), 736 .filter = filter, 737 }; 738 long ret; 739 pid_t parent = getppid(); 740 struct tms timebuf; 741 clock_t clock = times(&timebuf); 742 743 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 744 ASSERT_EQ(0, ret); 745 746 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 747 ASSERT_EQ(0, ret); 748 749 EXPECT_EQ(parent, syscall(__NR_getppid)); 750 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 751 /* times() should never return. */ 752 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 753 } 754 755 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 756 { 757 #ifndef __NR_mmap2 758 int sysno = __NR_mmap; 759 #else 760 int sysno = __NR_mmap2; 761 #endif 762 struct sock_filter filter[] = { 763 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 764 offsetof(struct seccomp_data, nr)), 765 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 766 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 767 /* Only both with lower 32-bit for now. */ 768 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 769 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 770 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 771 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 772 }; 773 struct sock_fprog prog = { 774 .len = (unsigned short)ARRAY_SIZE(filter), 775 .filter = filter, 776 }; 777 long ret; 778 pid_t parent = getppid(); 779 int fd; 780 void *map1, *map2; 781 int page_size = sysconf(_SC_PAGESIZE); 782 783 ASSERT_LT(0, page_size); 784 785 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 786 ASSERT_EQ(0, ret); 787 788 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 789 ASSERT_EQ(0, ret); 790 791 fd = open("/dev/zero", O_RDONLY); 792 ASSERT_NE(-1, fd); 793 794 EXPECT_EQ(parent, syscall(__NR_getppid)); 795 map1 = (void *)syscall(sysno, 796 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 797 EXPECT_NE(MAP_FAILED, map1); 798 /* mmap2() should never return. */ 799 map2 = (void *)syscall(sysno, 800 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 801 EXPECT_EQ(MAP_FAILED, map2); 802 803 /* The test failed, so clean up the resources. */ 804 munmap(map1, page_size); 805 munmap(map2, page_size); 806 close(fd); 807 } 808 809 /* This is a thread task to die via seccomp filter violation. */ 810 void *kill_thread(void *data) 811 { 812 bool die = (bool)data; 813 814 if (die) { 815 syscall(__NR_getpid); 816 return (void *)SIBLING_EXIT_FAILURE; 817 } 818 819 return (void *)SIBLING_EXIT_UNKILLED; 820 } 821 822 enum kill_t { 823 KILL_THREAD, 824 KILL_PROCESS, 825 RET_UNKNOWN 826 }; 827 828 /* Prepare a thread that will kill itself or both of us. */ 829 void kill_thread_or_group(struct __test_metadata *_metadata, 830 enum kill_t kill_how) 831 { 832 pthread_t thread; 833 void *status; 834 /* Kill only when calling __NR_getpid. */ 835 struct sock_filter filter_thread[] = { 836 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 837 offsetof(struct seccomp_data, nr)), 838 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 839 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 840 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 841 }; 842 struct sock_fprog prog_thread = { 843 .len = (unsigned short)ARRAY_SIZE(filter_thread), 844 .filter = filter_thread, 845 }; 846 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; 847 struct sock_filter filter_process[] = { 848 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 849 offsetof(struct seccomp_data, nr)), 850 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 851 BPF_STMT(BPF_RET|BPF_K, kill), 852 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 853 }; 854 struct sock_fprog prog_process = { 855 .len = (unsigned short)ARRAY_SIZE(filter_process), 856 .filter = filter_process, 857 }; 858 859 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 860 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 861 } 862 863 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 864 kill_how == KILL_THREAD ? &prog_thread 865 : &prog_process)); 866 867 /* 868 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 869 * flag cannot be downgraded by a new filter. 870 */ 871 if (kill_how == KILL_PROCESS) 872 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 873 874 /* Start a thread that will exit immediately. */ 875 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 876 ASSERT_EQ(0, pthread_join(thread, &status)); 877 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 878 879 /* Start a thread that will die immediately. */ 880 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 881 ASSERT_EQ(0, pthread_join(thread, &status)); 882 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 883 884 /* 885 * If we get here, only the spawned thread died. Let the parent know 886 * the whole process didn't die (i.e. this thread, the spawner, 887 * stayed running). 888 */ 889 exit(42); 890 } 891 892 TEST(KILL_thread) 893 { 894 int status; 895 pid_t child_pid; 896 897 child_pid = fork(); 898 ASSERT_LE(0, child_pid); 899 if (child_pid == 0) { 900 kill_thread_or_group(_metadata, KILL_THREAD); 901 _exit(38); 902 } 903 904 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 905 906 /* If only the thread was killed, we'll see exit 42. */ 907 ASSERT_TRUE(WIFEXITED(status)); 908 ASSERT_EQ(42, WEXITSTATUS(status)); 909 } 910 911 TEST(KILL_process) 912 { 913 int status; 914 pid_t child_pid; 915 916 child_pid = fork(); 917 ASSERT_LE(0, child_pid); 918 if (child_pid == 0) { 919 kill_thread_or_group(_metadata, KILL_PROCESS); 920 _exit(38); 921 } 922 923 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 924 925 /* If the entire process was killed, we'll see SIGSYS. */ 926 ASSERT_TRUE(WIFSIGNALED(status)); 927 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 928 } 929 930 TEST(KILL_unknown) 931 { 932 int status; 933 pid_t child_pid; 934 935 child_pid = fork(); 936 ASSERT_LE(0, child_pid); 937 if (child_pid == 0) { 938 kill_thread_or_group(_metadata, RET_UNKNOWN); 939 _exit(38); 940 } 941 942 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 943 944 /* If the entire process was killed, we'll see SIGSYS. */ 945 EXPECT_TRUE(WIFSIGNALED(status)) { 946 TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); 947 } 948 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 949 } 950 951 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 952 TEST(arg_out_of_range) 953 { 954 struct sock_filter filter[] = { 955 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 956 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 957 }; 958 struct sock_fprog prog = { 959 .len = (unsigned short)ARRAY_SIZE(filter), 960 .filter = filter, 961 }; 962 long ret; 963 964 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 965 ASSERT_EQ(0, ret); 966 967 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 968 EXPECT_EQ(-1, ret); 969 EXPECT_EQ(EINVAL, errno); 970 } 971 972 #define ERRNO_FILTER(name, errno) \ 973 struct sock_filter _read_filter_##name[] = { \ 974 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 975 offsetof(struct seccomp_data, nr)), \ 976 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 977 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 978 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 979 }; \ 980 struct sock_fprog prog_##name = { \ 981 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 982 .filter = _read_filter_##name, \ 983 } 984 985 /* Make sure basic errno values are correctly passed through a filter. */ 986 TEST(ERRNO_valid) 987 { 988 ERRNO_FILTER(valid, E2BIG); 989 long ret; 990 pid_t parent = getppid(); 991 992 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 993 ASSERT_EQ(0, ret); 994 995 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 996 ASSERT_EQ(0, ret); 997 998 EXPECT_EQ(parent, syscall(__NR_getppid)); 999 EXPECT_EQ(-1, read(-1, NULL, 0)); 1000 EXPECT_EQ(E2BIG, errno); 1001 } 1002 1003 /* Make sure an errno of zero is correctly handled by the arch code. */ 1004 TEST(ERRNO_zero) 1005 { 1006 ERRNO_FILTER(zero, 0); 1007 long ret; 1008 pid_t parent = getppid(); 1009 1010 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1011 ASSERT_EQ(0, ret); 1012 1013 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 1014 ASSERT_EQ(0, ret); 1015 1016 EXPECT_EQ(parent, syscall(__NR_getppid)); 1017 /* "errno" of 0 is ok. */ 1018 EXPECT_EQ(0, read(-1, NULL, 0)); 1019 } 1020 1021 /* 1022 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 1023 * This tests that the errno value gets capped correctly, fixed by 1024 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 1025 */ 1026 TEST(ERRNO_capped) 1027 { 1028 ERRNO_FILTER(capped, 4096); 1029 long ret; 1030 pid_t parent = getppid(); 1031 1032 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1033 ASSERT_EQ(0, ret); 1034 1035 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 1036 ASSERT_EQ(0, ret); 1037 1038 EXPECT_EQ(parent, syscall(__NR_getppid)); 1039 EXPECT_EQ(-1, read(-1, NULL, 0)); 1040 EXPECT_EQ(4095, errno); 1041 } 1042 1043 /* 1044 * Filters are processed in reverse order: last applied is executed first. 1045 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 1046 * SECCOMP_RET_DATA mask results will follow the most recently applied 1047 * matching filter return (and not the lowest or highest value). 1048 */ 1049 TEST(ERRNO_order) 1050 { 1051 ERRNO_FILTER(first, 11); 1052 ERRNO_FILTER(second, 13); 1053 ERRNO_FILTER(third, 12); 1054 long ret; 1055 pid_t parent = getppid(); 1056 1057 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1058 ASSERT_EQ(0, ret); 1059 1060 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 1061 ASSERT_EQ(0, ret); 1062 1063 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 1064 ASSERT_EQ(0, ret); 1065 1066 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 1067 ASSERT_EQ(0, ret); 1068 1069 EXPECT_EQ(parent, syscall(__NR_getppid)); 1070 EXPECT_EQ(-1, read(-1, NULL, 0)); 1071 EXPECT_EQ(12, errno); 1072 } 1073 1074 FIXTURE(TRAP) { 1075 struct sock_fprog prog; 1076 }; 1077 1078 FIXTURE_SETUP(TRAP) 1079 { 1080 struct sock_filter filter[] = { 1081 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1082 offsetof(struct seccomp_data, nr)), 1083 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1084 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1085 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1086 }; 1087 1088 memset(&self->prog, 0, sizeof(self->prog)); 1089 self->prog.filter = malloc(sizeof(filter)); 1090 ASSERT_NE(NULL, self->prog.filter); 1091 memcpy(self->prog.filter, filter, sizeof(filter)); 1092 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1093 } 1094 1095 FIXTURE_TEARDOWN(TRAP) 1096 { 1097 if (self->prog.filter) 1098 free(self->prog.filter); 1099 } 1100 1101 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1102 { 1103 long ret; 1104 1105 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1106 ASSERT_EQ(0, ret); 1107 1108 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1109 ASSERT_EQ(0, ret); 1110 syscall(__NR_getpid); 1111 } 1112 1113 /* Ensure that SIGSYS overrides SIG_IGN */ 1114 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1115 { 1116 long ret; 1117 1118 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1119 ASSERT_EQ(0, ret); 1120 1121 signal(SIGSYS, SIG_IGN); 1122 1123 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1124 ASSERT_EQ(0, ret); 1125 syscall(__NR_getpid); 1126 } 1127 1128 static siginfo_t TRAP_info; 1129 static volatile int TRAP_nr; 1130 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1131 { 1132 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1133 TRAP_nr = nr; 1134 } 1135 1136 TEST_F(TRAP, handler) 1137 { 1138 int ret, test; 1139 struct sigaction act; 1140 sigset_t mask; 1141 1142 memset(&act, 0, sizeof(act)); 1143 sigemptyset(&mask); 1144 sigaddset(&mask, SIGSYS); 1145 1146 act.sa_sigaction = &TRAP_action; 1147 act.sa_flags = SA_SIGINFO; 1148 ret = sigaction(SIGSYS, &act, NULL); 1149 ASSERT_EQ(0, ret) { 1150 TH_LOG("sigaction failed"); 1151 } 1152 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1153 ASSERT_EQ(0, ret) { 1154 TH_LOG("sigprocmask failed"); 1155 } 1156 1157 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1158 ASSERT_EQ(0, ret); 1159 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1160 ASSERT_EQ(0, ret); 1161 TRAP_nr = 0; 1162 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1163 /* Expect the registers to be rolled back. (nr = error) may vary 1164 * based on arch. */ 1165 ret = syscall(__NR_getpid); 1166 /* Silence gcc warning about volatile. */ 1167 test = TRAP_nr; 1168 EXPECT_EQ(SIGSYS, test); 1169 struct local_sigsys { 1170 void *_call_addr; /* calling user insn */ 1171 int _syscall; /* triggering system call number */ 1172 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1173 } *sigsys = (struct local_sigsys *) 1174 #ifdef si_syscall 1175 &(TRAP_info.si_call_addr); 1176 #else 1177 &TRAP_info.si_pid; 1178 #endif 1179 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1180 /* Make sure arch is non-zero. */ 1181 EXPECT_NE(0, sigsys->_arch); 1182 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1183 } 1184 1185 FIXTURE(precedence) { 1186 struct sock_fprog allow; 1187 struct sock_fprog log; 1188 struct sock_fprog trace; 1189 struct sock_fprog error; 1190 struct sock_fprog trap; 1191 struct sock_fprog kill; 1192 }; 1193 1194 FIXTURE_SETUP(precedence) 1195 { 1196 struct sock_filter allow_insns[] = { 1197 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1198 }; 1199 struct sock_filter log_insns[] = { 1200 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1201 offsetof(struct seccomp_data, nr)), 1202 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1203 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1204 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1205 }; 1206 struct sock_filter trace_insns[] = { 1207 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1208 offsetof(struct seccomp_data, nr)), 1209 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1210 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1211 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1212 }; 1213 struct sock_filter error_insns[] = { 1214 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1215 offsetof(struct seccomp_data, nr)), 1216 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1217 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1218 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1219 }; 1220 struct sock_filter trap_insns[] = { 1221 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1222 offsetof(struct seccomp_data, nr)), 1223 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1224 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1225 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1226 }; 1227 struct sock_filter kill_insns[] = { 1228 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1229 offsetof(struct seccomp_data, nr)), 1230 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1231 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1232 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1233 }; 1234 1235 memset(self, 0, sizeof(*self)); 1236 #define FILTER_ALLOC(_x) \ 1237 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1238 ASSERT_NE(NULL, self->_x.filter); \ 1239 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1240 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1241 FILTER_ALLOC(allow); 1242 FILTER_ALLOC(log); 1243 FILTER_ALLOC(trace); 1244 FILTER_ALLOC(error); 1245 FILTER_ALLOC(trap); 1246 FILTER_ALLOC(kill); 1247 } 1248 1249 FIXTURE_TEARDOWN(precedence) 1250 { 1251 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1252 FILTER_FREE(allow); 1253 FILTER_FREE(log); 1254 FILTER_FREE(trace); 1255 FILTER_FREE(error); 1256 FILTER_FREE(trap); 1257 FILTER_FREE(kill); 1258 } 1259 1260 TEST_F(precedence, allow_ok) 1261 { 1262 pid_t parent, res = 0; 1263 long ret; 1264 1265 parent = getppid(); 1266 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1267 ASSERT_EQ(0, ret); 1268 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1274 ASSERT_EQ(0, ret); 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1278 ASSERT_EQ(0, ret); 1279 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1280 ASSERT_EQ(0, ret); 1281 /* Should work just fine. */ 1282 res = syscall(__NR_getppid); 1283 EXPECT_EQ(parent, res); 1284 } 1285 1286 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1287 { 1288 pid_t parent, res = 0; 1289 long ret; 1290 1291 parent = getppid(); 1292 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1293 ASSERT_EQ(0, ret); 1294 1295 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1296 ASSERT_EQ(0, ret); 1297 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1298 ASSERT_EQ(0, ret); 1299 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1300 ASSERT_EQ(0, ret); 1301 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1302 ASSERT_EQ(0, ret); 1303 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1304 ASSERT_EQ(0, ret); 1305 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1306 ASSERT_EQ(0, ret); 1307 /* Should work just fine. */ 1308 res = syscall(__NR_getppid); 1309 EXPECT_EQ(parent, res); 1310 /* getpid() should never return. */ 1311 res = syscall(__NR_getpid); 1312 EXPECT_EQ(0, res); 1313 } 1314 1315 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1316 { 1317 pid_t parent; 1318 long ret; 1319 1320 parent = getppid(); 1321 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1322 ASSERT_EQ(0, ret); 1323 1324 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1325 ASSERT_EQ(0, ret); 1326 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1327 ASSERT_EQ(0, ret); 1328 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1329 ASSERT_EQ(0, ret); 1330 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1331 ASSERT_EQ(0, ret); 1332 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1333 ASSERT_EQ(0, ret); 1334 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1335 ASSERT_EQ(0, ret); 1336 /* Should work just fine. */ 1337 EXPECT_EQ(parent, syscall(__NR_getppid)); 1338 /* getpid() should never return. */ 1339 EXPECT_EQ(0, syscall(__NR_getpid)); 1340 } 1341 1342 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1343 { 1344 pid_t parent; 1345 long ret; 1346 1347 parent = getppid(); 1348 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1349 ASSERT_EQ(0, ret); 1350 1351 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1352 ASSERT_EQ(0, ret); 1353 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1354 ASSERT_EQ(0, ret); 1355 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1356 ASSERT_EQ(0, ret); 1357 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1358 ASSERT_EQ(0, ret); 1359 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1360 ASSERT_EQ(0, ret); 1361 /* Should work just fine. */ 1362 EXPECT_EQ(parent, syscall(__NR_getppid)); 1363 /* getpid() should never return. */ 1364 EXPECT_EQ(0, syscall(__NR_getpid)); 1365 } 1366 1367 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1368 { 1369 pid_t parent; 1370 long ret; 1371 1372 parent = getppid(); 1373 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1374 ASSERT_EQ(0, ret); 1375 1376 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1377 ASSERT_EQ(0, ret); 1378 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1379 ASSERT_EQ(0, ret); 1380 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1381 ASSERT_EQ(0, ret); 1382 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1383 ASSERT_EQ(0, ret); 1384 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1385 ASSERT_EQ(0, ret); 1386 /* Should work just fine. */ 1387 EXPECT_EQ(parent, syscall(__NR_getppid)); 1388 /* getpid() should never return. */ 1389 EXPECT_EQ(0, syscall(__NR_getpid)); 1390 } 1391 1392 TEST_F(precedence, errno_is_third) 1393 { 1394 pid_t parent; 1395 long ret; 1396 1397 parent = getppid(); 1398 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1399 ASSERT_EQ(0, ret); 1400 1401 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1402 ASSERT_EQ(0, ret); 1403 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1404 ASSERT_EQ(0, ret); 1405 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1406 ASSERT_EQ(0, ret); 1407 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1408 ASSERT_EQ(0, ret); 1409 /* Should work just fine. */ 1410 EXPECT_EQ(parent, syscall(__NR_getppid)); 1411 EXPECT_EQ(0, syscall(__NR_getpid)); 1412 } 1413 1414 TEST_F(precedence, errno_is_third_in_any_order) 1415 { 1416 pid_t parent; 1417 long ret; 1418 1419 parent = getppid(); 1420 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1421 ASSERT_EQ(0, ret); 1422 1423 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1424 ASSERT_EQ(0, ret); 1425 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1426 ASSERT_EQ(0, ret); 1427 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1428 ASSERT_EQ(0, ret); 1429 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1430 ASSERT_EQ(0, ret); 1431 /* Should work just fine. */ 1432 EXPECT_EQ(parent, syscall(__NR_getppid)); 1433 EXPECT_EQ(0, syscall(__NR_getpid)); 1434 } 1435 1436 TEST_F(precedence, trace_is_fourth) 1437 { 1438 pid_t parent; 1439 long ret; 1440 1441 parent = getppid(); 1442 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1443 ASSERT_EQ(0, ret); 1444 1445 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1446 ASSERT_EQ(0, ret); 1447 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1448 ASSERT_EQ(0, ret); 1449 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1450 ASSERT_EQ(0, ret); 1451 /* Should work just fine. */ 1452 EXPECT_EQ(parent, syscall(__NR_getppid)); 1453 /* No ptracer */ 1454 EXPECT_EQ(-1, syscall(__NR_getpid)); 1455 } 1456 1457 TEST_F(precedence, trace_is_fourth_in_any_order) 1458 { 1459 pid_t parent; 1460 long ret; 1461 1462 parent = getppid(); 1463 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1464 ASSERT_EQ(0, ret); 1465 1466 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1467 ASSERT_EQ(0, ret); 1468 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1469 ASSERT_EQ(0, ret); 1470 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1471 ASSERT_EQ(0, ret); 1472 /* Should work just fine. */ 1473 EXPECT_EQ(parent, syscall(__NR_getppid)); 1474 /* No ptracer */ 1475 EXPECT_EQ(-1, syscall(__NR_getpid)); 1476 } 1477 1478 TEST_F(precedence, log_is_fifth) 1479 { 1480 pid_t mypid, parent; 1481 long ret; 1482 1483 mypid = getpid(); 1484 parent = getppid(); 1485 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1486 ASSERT_EQ(0, ret); 1487 1488 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1489 ASSERT_EQ(0, ret); 1490 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1491 ASSERT_EQ(0, ret); 1492 /* Should work just fine. */ 1493 EXPECT_EQ(parent, syscall(__NR_getppid)); 1494 /* Should also work just fine */ 1495 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1496 } 1497 1498 TEST_F(precedence, log_is_fifth_in_any_order) 1499 { 1500 pid_t mypid, parent; 1501 long ret; 1502 1503 mypid = getpid(); 1504 parent = getppid(); 1505 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1506 ASSERT_EQ(0, ret); 1507 1508 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1509 ASSERT_EQ(0, ret); 1510 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1511 ASSERT_EQ(0, ret); 1512 /* Should work just fine. */ 1513 EXPECT_EQ(parent, syscall(__NR_getppid)); 1514 /* Should also work just fine */ 1515 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1516 } 1517 1518 #ifndef PTRACE_O_TRACESECCOMP 1519 #define PTRACE_O_TRACESECCOMP 0x00000080 1520 #endif 1521 1522 /* Catch the Ubuntu 12.04 value error. */ 1523 #if PTRACE_EVENT_SECCOMP != 7 1524 #undef PTRACE_EVENT_SECCOMP 1525 #endif 1526 1527 #ifndef PTRACE_EVENT_SECCOMP 1528 #define PTRACE_EVENT_SECCOMP 7 1529 #endif 1530 1531 #define PTRACE_EVENT_MASK(status) ((status) >> 16) 1532 bool tracer_running; 1533 void tracer_stop(int sig) 1534 { 1535 tracer_running = false; 1536 } 1537 1538 typedef void tracer_func_t(struct __test_metadata *_metadata, 1539 pid_t tracee, int status, void *args); 1540 1541 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1542 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1543 { 1544 int ret = -1; 1545 struct sigaction action = { 1546 .sa_handler = tracer_stop, 1547 }; 1548 1549 /* Allow external shutdown. */ 1550 tracer_running = true; 1551 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1552 1553 errno = 0; 1554 while (ret == -1 && errno != EINVAL) 1555 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1556 ASSERT_EQ(0, ret) { 1557 kill(tracee, SIGKILL); 1558 } 1559 /* Wait for attach stop */ 1560 wait(NULL); 1561 1562 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1563 PTRACE_O_TRACESYSGOOD : 1564 PTRACE_O_TRACESECCOMP); 1565 ASSERT_EQ(0, ret) { 1566 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1567 kill(tracee, SIGKILL); 1568 } 1569 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1570 tracee, NULL, 0); 1571 ASSERT_EQ(0, ret); 1572 1573 /* Unblock the tracee */ 1574 ASSERT_EQ(1, write(fd, "A", 1)); 1575 ASSERT_EQ(0, close(fd)); 1576 1577 /* Run until we're shut down. Must assert to stop execution. */ 1578 while (tracer_running) { 1579 int status; 1580 1581 if (wait(&status) != tracee) 1582 continue; 1583 1584 if (WIFSIGNALED(status)) { 1585 /* Child caught a fatal signal. */ 1586 return; 1587 } 1588 if (WIFEXITED(status)) { 1589 /* Child exited with code. */ 1590 return; 1591 } 1592 1593 /* Check if we got an expected event. */ 1594 ASSERT_EQ(WIFCONTINUED(status), false); 1595 ASSERT_EQ(WIFSTOPPED(status), true); 1596 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { 1597 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 1598 } 1599 1600 tracer_func(_metadata, tracee, status, args); 1601 1602 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1603 tracee, NULL, 0); 1604 ASSERT_EQ(0, ret); 1605 } 1606 /* Directly report the status of our test harness results. */ 1607 syscall(__NR_exit, _metadata->exit_code); 1608 } 1609 1610 /* Common tracer setup/teardown functions. */ 1611 void cont_handler(int num) 1612 { } 1613 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1614 tracer_func_t func, void *args, bool ptrace_syscall) 1615 { 1616 char sync; 1617 int pipefd[2]; 1618 pid_t tracer_pid; 1619 pid_t tracee = getpid(); 1620 1621 /* Setup a pipe for clean synchronization. */ 1622 ASSERT_EQ(0, pipe(pipefd)); 1623 1624 /* Fork a child which we'll promote to tracer */ 1625 tracer_pid = fork(); 1626 ASSERT_LE(0, tracer_pid); 1627 signal(SIGALRM, cont_handler); 1628 if (tracer_pid == 0) { 1629 close(pipefd[0]); 1630 start_tracer(_metadata, pipefd[1], tracee, func, args, 1631 ptrace_syscall); 1632 syscall(__NR_exit, 0); 1633 } 1634 close(pipefd[1]); 1635 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1636 read(pipefd[0], &sync, 1); 1637 close(pipefd[0]); 1638 1639 return tracer_pid; 1640 } 1641 1642 void teardown_trace_fixture(struct __test_metadata *_metadata, 1643 pid_t tracer) 1644 { 1645 if (tracer) { 1646 int status; 1647 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1648 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1649 } 1650 } 1651 1652 /* "poke" tracer arguments and function. */ 1653 struct tracer_args_poke_t { 1654 unsigned long poke_addr; 1655 }; 1656 1657 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1658 void *args) 1659 { 1660 int ret; 1661 unsigned long msg; 1662 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1663 1664 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1665 EXPECT_EQ(0, ret); 1666 /* If this fails, don't try to recover. */ 1667 ASSERT_EQ(0x1001, msg) { 1668 kill(tracee, SIGKILL); 1669 } 1670 /* 1671 * Poke in the message. 1672 * Registers are not touched to try to keep this relatively arch 1673 * agnostic. 1674 */ 1675 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1676 EXPECT_EQ(0, ret); 1677 } 1678 1679 FIXTURE(TRACE_poke) { 1680 struct sock_fprog prog; 1681 pid_t tracer; 1682 long poked; 1683 struct tracer_args_poke_t tracer_args; 1684 }; 1685 1686 FIXTURE_SETUP(TRACE_poke) 1687 { 1688 struct sock_filter filter[] = { 1689 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1690 offsetof(struct seccomp_data, nr)), 1691 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1692 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1693 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1694 }; 1695 1696 self->poked = 0; 1697 memset(&self->prog, 0, sizeof(self->prog)); 1698 self->prog.filter = malloc(sizeof(filter)); 1699 ASSERT_NE(NULL, self->prog.filter); 1700 memcpy(self->prog.filter, filter, sizeof(filter)); 1701 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1702 1703 /* Set up tracer args. */ 1704 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1705 1706 /* Launch tracer. */ 1707 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1708 &self->tracer_args, false); 1709 } 1710 1711 FIXTURE_TEARDOWN(TRACE_poke) 1712 { 1713 teardown_trace_fixture(_metadata, self->tracer); 1714 if (self->prog.filter) 1715 free(self->prog.filter); 1716 } 1717 1718 TEST_F(TRACE_poke, read_has_side_effects) 1719 { 1720 ssize_t ret; 1721 1722 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1723 ASSERT_EQ(0, ret); 1724 1725 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1726 ASSERT_EQ(0, ret); 1727 1728 EXPECT_EQ(0, self->poked); 1729 ret = read(-1, NULL, 0); 1730 EXPECT_EQ(-1, ret); 1731 EXPECT_EQ(0x1001, self->poked); 1732 } 1733 1734 TEST_F(TRACE_poke, getpid_runs_normally) 1735 { 1736 long ret; 1737 1738 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1739 ASSERT_EQ(0, ret); 1740 1741 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1742 ASSERT_EQ(0, ret); 1743 1744 EXPECT_EQ(0, self->poked); 1745 EXPECT_NE(0, syscall(__NR_getpid)); 1746 EXPECT_EQ(0, self->poked); 1747 } 1748 1749 #if defined(__x86_64__) 1750 # define ARCH_REGS struct user_regs_struct 1751 # define SYSCALL_NUM(_regs) (_regs).orig_rax 1752 # define SYSCALL_RET(_regs) (_regs).rax 1753 #elif defined(__i386__) 1754 # define ARCH_REGS struct user_regs_struct 1755 # define SYSCALL_NUM(_regs) (_regs).orig_eax 1756 # define SYSCALL_RET(_regs) (_regs).eax 1757 #elif defined(__arm__) 1758 # define ARCH_REGS struct pt_regs 1759 # define SYSCALL_NUM(_regs) (_regs).ARM_r7 1760 # ifndef PTRACE_SET_SYSCALL 1761 # define PTRACE_SET_SYSCALL 23 1762 # endif 1763 # define SYSCALL_NUM_SET(_regs, _nr) \ 1764 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) 1765 # define SYSCALL_RET(_regs) (_regs).ARM_r0 1766 #elif defined(__aarch64__) 1767 # define ARCH_REGS struct user_pt_regs 1768 # define SYSCALL_NUM(_regs) (_regs).regs[8] 1769 # ifndef NT_ARM_SYSTEM_CALL 1770 # define NT_ARM_SYSTEM_CALL 0x404 1771 # endif 1772 # define SYSCALL_NUM_SET(_regs, _nr) \ 1773 do { \ 1774 struct iovec __v; \ 1775 typeof(_nr) __nr = (_nr); \ 1776 __v.iov_base = &__nr; \ 1777 __v.iov_len = sizeof(__nr); \ 1778 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ 1779 NT_ARM_SYSTEM_CALL, &__v)); \ 1780 } while (0) 1781 # define SYSCALL_RET(_regs) (_regs).regs[0] 1782 #elif defined(__loongarch__) 1783 # define ARCH_REGS struct user_pt_regs 1784 # define SYSCALL_NUM(_regs) (_regs).regs[11] 1785 # define SYSCALL_RET(_regs) (_regs).regs[4] 1786 #elif defined(__riscv) && __riscv_xlen == 64 1787 # define ARCH_REGS struct user_regs_struct 1788 # define SYSCALL_NUM(_regs) (_regs).a7 1789 # define SYSCALL_RET(_regs) (_regs).a0 1790 #elif defined(__csky__) 1791 # define ARCH_REGS struct pt_regs 1792 # if defined(__CSKYABIV2__) 1793 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1794 # else 1795 # define SYSCALL_NUM(_regs) (_regs).regs[9] 1796 # endif 1797 # define SYSCALL_RET(_regs) (_regs).a0 1798 #elif defined(__hppa__) 1799 # define ARCH_REGS struct user_regs_struct 1800 # define SYSCALL_NUM(_regs) (_regs).gr[20] 1801 # define SYSCALL_RET(_regs) (_regs).gr[28] 1802 #elif defined(__powerpc__) 1803 # define ARCH_REGS struct pt_regs 1804 # define SYSCALL_NUM(_regs) (_regs).gpr[0] 1805 # define SYSCALL_RET(_regs) (_regs).gpr[3] 1806 # define SYSCALL_RET_SET(_regs, _val) \ 1807 do { \ 1808 typeof(_val) _result = (_val); \ 1809 if ((_regs.trap & 0xfff0) == 0x3000) { \ 1810 /* \ 1811 * scv 0 system call uses -ve result \ 1812 * for error, so no need to adjust. \ 1813 */ \ 1814 SYSCALL_RET(_regs) = _result; \ 1815 } else { \ 1816 /* \ 1817 * A syscall error is signaled by the \ 1818 * CR0 SO bit and the code is stored as \ 1819 * a positive value. \ 1820 */ \ 1821 if (_result < 0) { \ 1822 SYSCALL_RET(_regs) = -_result; \ 1823 (_regs).ccr |= 0x10000000; \ 1824 } else { \ 1825 SYSCALL_RET(_regs) = _result; \ 1826 (_regs).ccr &= ~0x10000000; \ 1827 } \ 1828 } \ 1829 } while (0) 1830 # define SYSCALL_RET_SET_ON_PTRACE_EXIT 1831 #elif defined(__s390__) 1832 # define ARCH_REGS s390_regs 1833 # define SYSCALL_NUM(_regs) (_regs).gprs[2] 1834 # define SYSCALL_RET_SET(_regs, _val) \ 1835 TH_LOG("Can't modify syscall return on this architecture") 1836 #elif defined(__mips__) 1837 # include <asm/unistd_nr_n32.h> 1838 # include <asm/unistd_nr_n64.h> 1839 # include <asm/unistd_nr_o32.h> 1840 # define ARCH_REGS struct pt_regs 1841 # define SYSCALL_NUM(_regs) \ 1842 ({ \ 1843 typeof((_regs).regs[2]) _nr; \ 1844 if ((_regs).regs[2] == __NR_O32_Linux) \ 1845 _nr = (_regs).regs[4]; \ 1846 else \ 1847 _nr = (_regs).regs[2]; \ 1848 _nr; \ 1849 }) 1850 # define SYSCALL_NUM_SET(_regs, _nr) \ 1851 do { \ 1852 if ((_regs).regs[2] == __NR_O32_Linux) \ 1853 (_regs).regs[4] = _nr; \ 1854 else \ 1855 (_regs).regs[2] = _nr; \ 1856 } while (0) 1857 # define SYSCALL_RET_SET(_regs, _val) \ 1858 TH_LOG("Can't modify syscall return on this architecture") 1859 #elif defined(__xtensa__) 1860 # define ARCH_REGS struct user_pt_regs 1861 # define SYSCALL_NUM(_regs) (_regs).syscall 1862 /* 1863 * On xtensa syscall return value is in the register 1864 * a2 of the current window which is not fixed. 1865 */ 1866 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] 1867 #elif defined(__sh__) 1868 # define ARCH_REGS struct pt_regs 1869 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1870 # define SYSCALL_RET(_regs) (_regs).regs[0] 1871 #elif defined(__mc68000__) 1872 # define ARCH_REGS struct user_regs_struct 1873 # define SYSCALL_NUM(_regs) (_regs).orig_d0 1874 # define SYSCALL_RET(_regs) (_regs).d0 1875 #else 1876 # error "Do not know how to find your architecture's registers and syscalls" 1877 #endif 1878 1879 /* 1880 * Most architectures can change the syscall by just updating the 1881 * associated register. This is the default if not defined above. 1882 */ 1883 #ifndef SYSCALL_NUM_SET 1884 # define SYSCALL_NUM_SET(_regs, _nr) \ 1885 do { \ 1886 SYSCALL_NUM(_regs) = (_nr); \ 1887 } while (0) 1888 #endif 1889 /* 1890 * Most architectures can change the syscall return value by just 1891 * writing to the SYSCALL_RET register. This is the default if not 1892 * defined above. If an architecture cannot set the return value 1893 * (for example when the syscall and return value register is 1894 * shared), report it with TH_LOG() in an arch-specific definition 1895 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. 1896 */ 1897 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) 1898 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" 1899 #endif 1900 #ifndef SYSCALL_RET_SET 1901 # define SYSCALL_RET_SET(_regs, _val) \ 1902 do { \ 1903 SYSCALL_RET(_regs) = (_val); \ 1904 } while (0) 1905 #endif 1906 1907 /* When the syscall return can't be changed, stub out the tests for it. */ 1908 #ifndef SYSCALL_RET 1909 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1910 #else 1911 # define EXPECT_SYSCALL_RETURN(val, action) \ 1912 do { \ 1913 errno = 0; \ 1914 if (val < 0) { \ 1915 EXPECT_EQ(-1, action); \ 1916 EXPECT_EQ(-(val), errno); \ 1917 } else { \ 1918 EXPECT_EQ(val, action); \ 1919 } \ 1920 } while (0) 1921 #endif 1922 1923 /* 1924 * Some architectures (e.g. powerpc) can only set syscall 1925 * return values on syscall exit during ptrace. 1926 */ 1927 const bool ptrace_entry_set_syscall_nr = true; 1928 const bool ptrace_entry_set_syscall_ret = 1929 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT 1930 true; 1931 #else 1932 false; 1933 #endif 1934 1935 /* 1936 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1937 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1938 */ 1939 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) 1940 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) 1941 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) 1942 #else 1943 # define ARCH_GETREGS(_regs) ({ \ 1944 struct iovec __v; \ 1945 __v.iov_base = &(_regs); \ 1946 __v.iov_len = sizeof(_regs); \ 1947 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ 1948 }) 1949 # define ARCH_SETREGS(_regs) ({ \ 1950 struct iovec __v; \ 1951 __v.iov_base = &(_regs); \ 1952 __v.iov_len = sizeof(_regs); \ 1953 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ 1954 }) 1955 #endif 1956 1957 /* Architecture-specific syscall fetching routine. */ 1958 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1959 { 1960 ARCH_REGS regs; 1961 1962 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1963 return -1; 1964 } 1965 1966 return SYSCALL_NUM(regs); 1967 } 1968 1969 /* Architecture-specific syscall changing routine. */ 1970 void __change_syscall(struct __test_metadata *_metadata, 1971 pid_t tracee, long *syscall, long *ret) 1972 { 1973 ARCH_REGS orig, regs; 1974 1975 /* Do not get/set registers if we have nothing to do. */ 1976 if (!syscall && !ret) 1977 return; 1978 1979 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1980 return; 1981 } 1982 orig = regs; 1983 1984 if (syscall) 1985 SYSCALL_NUM_SET(regs, *syscall); 1986 1987 if (ret) 1988 SYSCALL_RET_SET(regs, *ret); 1989 1990 /* Flush any register changes made. */ 1991 if (memcmp(&orig, ®s, sizeof(orig)) != 0) 1992 EXPECT_EQ(0, ARCH_SETREGS(regs)); 1993 } 1994 1995 /* Change only syscall number. */ 1996 void change_syscall_nr(struct __test_metadata *_metadata, 1997 pid_t tracee, long syscall) 1998 { 1999 __change_syscall(_metadata, tracee, &syscall, NULL); 2000 } 2001 2002 /* Change syscall return value (and set syscall number to -1). */ 2003 void change_syscall_ret(struct __test_metadata *_metadata, 2004 pid_t tracee, long ret) 2005 { 2006 long syscall = -1; 2007 2008 __change_syscall(_metadata, tracee, &syscall, &ret); 2009 } 2010 2011 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 2012 int status, void *args) 2013 { 2014 int ret; 2015 unsigned long msg; 2016 2017 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { 2018 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); 2019 return; 2020 } 2021 2022 /* Make sure we got the right message. */ 2023 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2024 EXPECT_EQ(0, ret); 2025 2026 /* Validate and take action on expected syscalls. */ 2027 switch (msg) { 2028 case 0x1002: 2029 /* change getpid to getppid. */ 2030 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 2031 change_syscall_nr(_metadata, tracee, __NR_getppid); 2032 break; 2033 case 0x1003: 2034 /* skip gettid with valid return code. */ 2035 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 2036 change_syscall_ret(_metadata, tracee, 45000); 2037 break; 2038 case 0x1004: 2039 /* skip openat with error. */ 2040 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 2041 change_syscall_ret(_metadata, tracee, -ESRCH); 2042 break; 2043 case 0x1005: 2044 /* do nothing (allow getppid) */ 2045 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 2046 break; 2047 default: 2048 EXPECT_EQ(0, msg) { 2049 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 2050 kill(tracee, SIGKILL); 2051 } 2052 } 2053 2054 } 2055 2056 FIXTURE(TRACE_syscall) { 2057 struct sock_fprog prog; 2058 pid_t tracer, mytid, mypid, parent; 2059 long syscall_nr; 2060 }; 2061 2062 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 2063 int status, void *args) 2064 { 2065 int ret; 2066 unsigned long msg; 2067 static bool entry; 2068 long syscall_nr_val, syscall_ret_val; 2069 long *syscall_nr = NULL, *syscall_ret = NULL; 2070 FIXTURE_DATA(TRACE_syscall) *self = args; 2071 2072 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { 2073 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 2074 return; 2075 } 2076 2077 /* 2078 * The traditional way to tell PTRACE_SYSCALL entry/exit 2079 * is by counting. 2080 */ 2081 entry = !entry; 2082 2083 /* Make sure we got an appropriate message. */ 2084 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2085 EXPECT_EQ(0, ret); 2086 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 2087 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 2088 2089 /* 2090 * Some architectures only support setting return values during 2091 * syscall exit under ptrace, and on exit the syscall number may 2092 * no longer be available. Therefore, save the initial sycall 2093 * number here, so it can be examined during both entry and exit 2094 * phases. 2095 */ 2096 if (entry) 2097 self->syscall_nr = get_syscall(_metadata, tracee); 2098 2099 /* 2100 * Depending on the architecture's syscall setting abilities, we 2101 * pick which things to set during this phase (entry or exit). 2102 */ 2103 if (entry == ptrace_entry_set_syscall_nr) 2104 syscall_nr = &syscall_nr_val; 2105 if (entry == ptrace_entry_set_syscall_ret) 2106 syscall_ret = &syscall_ret_val; 2107 2108 /* Now handle the actual rewriting cases. */ 2109 switch (self->syscall_nr) { 2110 case __NR_getpid: 2111 syscall_nr_val = __NR_getppid; 2112 /* Never change syscall return for this case. */ 2113 syscall_ret = NULL; 2114 break; 2115 case __NR_gettid: 2116 syscall_nr_val = -1; 2117 syscall_ret_val = 45000; 2118 break; 2119 case __NR_openat: 2120 syscall_nr_val = -1; 2121 syscall_ret_val = -ESRCH; 2122 break; 2123 default: 2124 /* Unhandled, do nothing. */ 2125 return; 2126 } 2127 2128 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); 2129 } 2130 2131 FIXTURE_VARIANT(TRACE_syscall) { 2132 /* 2133 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 2134 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 2135 * This indicates if we should use SECCOMP_RET_TRACE (false), or 2136 * ptrace (true). 2137 */ 2138 bool use_ptrace; 2139 }; 2140 2141 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 2142 .use_ptrace = true, 2143 }; 2144 2145 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 2146 .use_ptrace = false, 2147 }; 2148 2149 FIXTURE_SETUP(TRACE_syscall) 2150 { 2151 struct sock_filter filter[] = { 2152 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2153 offsetof(struct seccomp_data, nr)), 2154 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2155 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 2156 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 2157 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 2158 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 2159 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 2160 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2161 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 2162 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2163 }; 2164 struct sock_fprog prog = { 2165 .len = (unsigned short)ARRAY_SIZE(filter), 2166 .filter = filter, 2167 }; 2168 long ret; 2169 2170 /* Prepare some testable syscall results. */ 2171 self->mytid = syscall(__NR_gettid); 2172 ASSERT_GT(self->mytid, 0); 2173 ASSERT_NE(self->mytid, 1) { 2174 TH_LOG("Running this test as init is not supported. :)"); 2175 } 2176 2177 self->mypid = getpid(); 2178 ASSERT_GT(self->mypid, 0); 2179 ASSERT_EQ(self->mytid, self->mypid); 2180 2181 self->parent = getppid(); 2182 ASSERT_GT(self->parent, 0); 2183 ASSERT_NE(self->parent, self->mypid); 2184 2185 /* Launch tracer. */ 2186 self->tracer = setup_trace_fixture(_metadata, 2187 variant->use_ptrace ? tracer_ptrace 2188 : tracer_seccomp, 2189 self, variant->use_ptrace); 2190 2191 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2192 ASSERT_EQ(0, ret); 2193 2194 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ 2195 if (variant->use_ptrace) 2196 return; 2197 2198 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2199 ASSERT_EQ(0, ret); 2200 } 2201 2202 FIXTURE_TEARDOWN(TRACE_syscall) 2203 { 2204 teardown_trace_fixture(_metadata, self->tracer); 2205 } 2206 2207 TEST(negative_ENOSYS) 2208 { 2209 #if defined(__arm__) 2210 SKIP(return, "arm32 does not support calling syscall -1"); 2211 #endif 2212 /* 2213 * There should be no difference between an "internal" skip 2214 * and userspace asking for syscall "-1". 2215 */ 2216 errno = 0; 2217 EXPECT_EQ(-1, syscall(-1)); 2218 EXPECT_EQ(errno, ENOSYS); 2219 /* And no difference for "still not valid but not -1". */ 2220 errno = 0; 2221 EXPECT_EQ(-1, syscall(-101)); 2222 EXPECT_EQ(errno, ENOSYS); 2223 } 2224 2225 TEST_F(TRACE_syscall, negative_ENOSYS) 2226 { 2227 negative_ENOSYS(_metadata); 2228 } 2229 2230 TEST_F(TRACE_syscall, syscall_allowed) 2231 { 2232 /* getppid works as expected (no changes). */ 2233 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2234 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2235 } 2236 2237 TEST_F(TRACE_syscall, syscall_redirected) 2238 { 2239 /* getpid has been redirected to getppid as expected. */ 2240 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2241 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2242 } 2243 2244 TEST_F(TRACE_syscall, syscall_errno) 2245 { 2246 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2247 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2248 } 2249 2250 TEST_F(TRACE_syscall, syscall_faked) 2251 { 2252 /* Tracer skips the gettid syscall and store altered return value. */ 2253 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2254 } 2255 2256 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) 2257 { 2258 struct sock_filter filter[] = { 2259 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2260 offsetof(struct seccomp_data, nr)), 2261 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), 2262 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 2263 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2264 }; 2265 struct sock_fprog prog = { 2266 .len = (unsigned short)ARRAY_SIZE(filter), 2267 .filter = filter, 2268 }; 2269 long ret; 2270 2271 /* Install "kill on mknodat" filter. */ 2272 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2273 ASSERT_EQ(0, ret); 2274 2275 /* This should immediately die with SIGSYS, regardless of tracer. */ 2276 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); 2277 } 2278 2279 TEST_F(TRACE_syscall, skip_after) 2280 { 2281 struct sock_filter filter[] = { 2282 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2283 offsetof(struct seccomp_data, nr)), 2284 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2285 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2286 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2287 }; 2288 struct sock_fprog prog = { 2289 .len = (unsigned short)ARRAY_SIZE(filter), 2290 .filter = filter, 2291 }; 2292 long ret; 2293 2294 /* Install additional "errno on getppid" filter. */ 2295 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2296 ASSERT_EQ(0, ret); 2297 2298 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2299 errno = 0; 2300 EXPECT_EQ(-1, syscall(__NR_getpid)); 2301 EXPECT_EQ(EPERM, errno); 2302 } 2303 2304 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2305 { 2306 struct sock_filter filter[] = { 2307 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2308 offsetof(struct seccomp_data, nr)), 2309 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2310 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2311 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2312 }; 2313 struct sock_fprog prog = { 2314 .len = (unsigned short)ARRAY_SIZE(filter), 2315 .filter = filter, 2316 }; 2317 long ret; 2318 2319 /* Install additional "death on getppid" filter. */ 2320 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2321 ASSERT_EQ(0, ret); 2322 2323 /* Tracer will redirect getpid to getppid, and we should die. */ 2324 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2325 } 2326 2327 TEST(seccomp_syscall) 2328 { 2329 struct sock_filter filter[] = { 2330 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2331 }; 2332 struct sock_fprog prog = { 2333 .len = (unsigned short)ARRAY_SIZE(filter), 2334 .filter = filter, 2335 }; 2336 long ret; 2337 2338 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2339 ASSERT_EQ(0, ret) { 2340 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2341 } 2342 2343 /* Reject insane operation. */ 2344 ret = seccomp(-1, 0, &prog); 2345 ASSERT_NE(ENOSYS, errno) { 2346 TH_LOG("Kernel does not support seccomp syscall!"); 2347 } 2348 EXPECT_EQ(EINVAL, errno) { 2349 TH_LOG("Did not reject crazy op value!"); 2350 } 2351 2352 /* Reject strict with flags or pointer. */ 2353 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2354 EXPECT_EQ(EINVAL, errno) { 2355 TH_LOG("Did not reject mode strict with flags!"); 2356 } 2357 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2358 EXPECT_EQ(EINVAL, errno) { 2359 TH_LOG("Did not reject mode strict with uargs!"); 2360 } 2361 2362 /* Reject insane args for filter. */ 2363 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2364 EXPECT_EQ(EINVAL, errno) { 2365 TH_LOG("Did not reject crazy filter flags!"); 2366 } 2367 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2368 EXPECT_EQ(EFAULT, errno) { 2369 TH_LOG("Did not reject NULL filter!"); 2370 } 2371 2372 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2373 EXPECT_EQ(0, errno) { 2374 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2375 strerror(errno)); 2376 } 2377 } 2378 2379 TEST(seccomp_syscall_mode_lock) 2380 { 2381 struct sock_filter filter[] = { 2382 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2383 }; 2384 struct sock_fprog prog = { 2385 .len = (unsigned short)ARRAY_SIZE(filter), 2386 .filter = filter, 2387 }; 2388 long ret; 2389 2390 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2391 ASSERT_EQ(0, ret) { 2392 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2393 } 2394 2395 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2396 ASSERT_NE(ENOSYS, errno) { 2397 TH_LOG("Kernel does not support seccomp syscall!"); 2398 } 2399 EXPECT_EQ(0, ret) { 2400 TH_LOG("Could not install filter!"); 2401 } 2402 2403 /* Make sure neither entry point will switch to strict. */ 2404 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2405 EXPECT_EQ(EINVAL, errno) { 2406 TH_LOG("Switched to mode strict!"); 2407 } 2408 2409 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2410 EXPECT_EQ(EINVAL, errno) { 2411 TH_LOG("Switched to mode strict!"); 2412 } 2413 } 2414 2415 /* 2416 * Test detection of known and unknown filter flags. Userspace needs to be able 2417 * to check if a filter flag is supported by the current kernel and a good way 2418 * of doing that is by attempting to enter filter mode, with the flag bit in 2419 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2420 * that the flag is valid and EINVAL indicates that the flag is invalid. 2421 */ 2422 TEST(detect_seccomp_filter_flags) 2423 { 2424 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2425 SECCOMP_FILTER_FLAG_LOG, 2426 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2427 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2428 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2429 unsigned int exclusive[] = { 2430 SECCOMP_FILTER_FLAG_TSYNC, 2431 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2432 unsigned int flag, all_flags, exclusive_mask; 2433 int i; 2434 long ret; 2435 2436 /* Test detection of individual known-good filter flags */ 2437 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2438 int bits = 0; 2439 2440 flag = flags[i]; 2441 /* Make sure the flag is a single bit! */ 2442 while (flag) { 2443 if (flag & 0x1) 2444 bits ++; 2445 flag >>= 1; 2446 } 2447 ASSERT_EQ(1, bits); 2448 flag = flags[i]; 2449 2450 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2451 ASSERT_NE(ENOSYS, errno) { 2452 TH_LOG("Kernel does not support seccomp syscall!"); 2453 } 2454 EXPECT_EQ(-1, ret); 2455 EXPECT_EQ(EFAULT, errno) { 2456 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2457 flag); 2458 } 2459 2460 all_flags |= flag; 2461 } 2462 2463 /* 2464 * Test detection of all known-good filter flags combined. But 2465 * for the exclusive flags we need to mask them out and try them 2466 * individually for the "all flags" testing. 2467 */ 2468 exclusive_mask = 0; 2469 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2470 exclusive_mask |= exclusive[i]; 2471 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2472 flag = all_flags & ~exclusive_mask; 2473 flag |= exclusive[i]; 2474 2475 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2476 EXPECT_EQ(-1, ret); 2477 EXPECT_EQ(EFAULT, errno) { 2478 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2479 flag); 2480 } 2481 } 2482 2483 /* Test detection of an unknown filter flags, without exclusives. */ 2484 flag = -1; 2485 flag &= ~exclusive_mask; 2486 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2487 EXPECT_EQ(-1, ret); 2488 EXPECT_EQ(EINVAL, errno) { 2489 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2490 flag); 2491 } 2492 2493 /* 2494 * Test detection of an unknown filter flag that may simply need to be 2495 * added to this test 2496 */ 2497 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2498 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2499 EXPECT_EQ(-1, ret); 2500 EXPECT_EQ(EINVAL, errno) { 2501 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2502 flag); 2503 } 2504 } 2505 2506 TEST(TSYNC_first) 2507 { 2508 struct sock_filter filter[] = { 2509 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2510 }; 2511 struct sock_fprog prog = { 2512 .len = (unsigned short)ARRAY_SIZE(filter), 2513 .filter = filter, 2514 }; 2515 long ret; 2516 2517 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2518 ASSERT_EQ(0, ret) { 2519 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2520 } 2521 2522 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2523 &prog); 2524 ASSERT_NE(ENOSYS, errno) { 2525 TH_LOG("Kernel does not support seccomp syscall!"); 2526 } 2527 EXPECT_EQ(0, ret) { 2528 TH_LOG("Could not install initial filter with TSYNC!"); 2529 } 2530 } 2531 2532 #define TSYNC_SIBLINGS 2 2533 struct tsync_sibling { 2534 pthread_t tid; 2535 pid_t system_tid; 2536 sem_t *started; 2537 pthread_cond_t *cond; 2538 pthread_mutex_t *mutex; 2539 int diverge; 2540 int num_waits; 2541 struct sock_fprog *prog; 2542 struct __test_metadata *metadata; 2543 }; 2544 2545 /* 2546 * To avoid joining joined threads (which is not allowed by Bionic), 2547 * make sure we both successfully join and clear the tid to skip a 2548 * later join attempt during fixture teardown. Any remaining threads 2549 * will be directly killed during teardown. 2550 */ 2551 #define PTHREAD_JOIN(tid, status) \ 2552 do { \ 2553 int _rc = pthread_join(tid, status); \ 2554 if (_rc) { \ 2555 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2556 (unsigned int)tid, _rc); \ 2557 } else { \ 2558 tid = 0; \ 2559 } \ 2560 } while (0) 2561 2562 FIXTURE(TSYNC) { 2563 struct sock_fprog root_prog, apply_prog; 2564 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2565 sem_t started; 2566 pthread_cond_t cond; 2567 pthread_mutex_t mutex; 2568 int sibling_count; 2569 }; 2570 2571 FIXTURE_SETUP(TSYNC) 2572 { 2573 struct sock_filter root_filter[] = { 2574 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2575 }; 2576 struct sock_filter apply_filter[] = { 2577 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2578 offsetof(struct seccomp_data, nr)), 2579 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2580 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2581 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2582 }; 2583 2584 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2585 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2586 memset(&self->sibling, 0, sizeof(self->sibling)); 2587 self->root_prog.filter = malloc(sizeof(root_filter)); 2588 ASSERT_NE(NULL, self->root_prog.filter); 2589 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2590 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2591 2592 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2593 ASSERT_NE(NULL, self->apply_prog.filter); 2594 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2595 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2596 2597 self->sibling_count = 0; 2598 pthread_mutex_init(&self->mutex, NULL); 2599 pthread_cond_init(&self->cond, NULL); 2600 sem_init(&self->started, 0, 0); 2601 self->sibling[0].tid = 0; 2602 self->sibling[0].cond = &self->cond; 2603 self->sibling[0].started = &self->started; 2604 self->sibling[0].mutex = &self->mutex; 2605 self->sibling[0].diverge = 0; 2606 self->sibling[0].num_waits = 1; 2607 self->sibling[0].prog = &self->root_prog; 2608 self->sibling[0].metadata = _metadata; 2609 self->sibling[1].tid = 0; 2610 self->sibling[1].cond = &self->cond; 2611 self->sibling[1].started = &self->started; 2612 self->sibling[1].mutex = &self->mutex; 2613 self->sibling[1].diverge = 0; 2614 self->sibling[1].prog = &self->root_prog; 2615 self->sibling[1].num_waits = 1; 2616 self->sibling[1].metadata = _metadata; 2617 } 2618 2619 FIXTURE_TEARDOWN(TSYNC) 2620 { 2621 int sib = 0; 2622 2623 if (self->root_prog.filter) 2624 free(self->root_prog.filter); 2625 if (self->apply_prog.filter) 2626 free(self->apply_prog.filter); 2627 2628 for ( ; sib < self->sibling_count; ++sib) { 2629 struct tsync_sibling *s = &self->sibling[sib]; 2630 2631 if (!s->tid) 2632 continue; 2633 /* 2634 * If a thread is still running, it may be stuck, so hit 2635 * it over the head really hard. 2636 */ 2637 pthread_kill(s->tid, 9); 2638 } 2639 pthread_mutex_destroy(&self->mutex); 2640 pthread_cond_destroy(&self->cond); 2641 sem_destroy(&self->started); 2642 } 2643 2644 void *tsync_sibling(void *data) 2645 { 2646 long ret = 0; 2647 struct tsync_sibling *me = data; 2648 2649 me->system_tid = syscall(__NR_gettid); 2650 2651 pthread_mutex_lock(me->mutex); 2652 if (me->diverge) { 2653 /* Just re-apply the root prog to fork the tree */ 2654 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2655 me->prog, 0, 0); 2656 } 2657 sem_post(me->started); 2658 /* Return outside of started so parent notices failures. */ 2659 if (ret) { 2660 pthread_mutex_unlock(me->mutex); 2661 return (void *)SIBLING_EXIT_FAILURE; 2662 } 2663 do { 2664 pthread_cond_wait(me->cond, me->mutex); 2665 me->num_waits = me->num_waits - 1; 2666 } while (me->num_waits); 2667 pthread_mutex_unlock(me->mutex); 2668 2669 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2670 if (!ret) 2671 return (void *)SIBLING_EXIT_NEWPRIVS; 2672 read(-1, NULL, 0); 2673 return (void *)SIBLING_EXIT_UNKILLED; 2674 } 2675 2676 void tsync_start_sibling(struct tsync_sibling *sibling) 2677 { 2678 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2679 } 2680 2681 TEST_F(TSYNC, siblings_fail_prctl) 2682 { 2683 long ret; 2684 void *status; 2685 struct sock_filter filter[] = { 2686 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2687 offsetof(struct seccomp_data, nr)), 2688 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2689 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2690 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2691 }; 2692 struct sock_fprog prog = { 2693 .len = (unsigned short)ARRAY_SIZE(filter), 2694 .filter = filter, 2695 }; 2696 2697 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2698 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2699 } 2700 2701 /* Check prctl failure detection by requesting sib 0 diverge. */ 2702 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2703 ASSERT_NE(ENOSYS, errno) { 2704 TH_LOG("Kernel does not support seccomp syscall!"); 2705 } 2706 ASSERT_EQ(0, ret) { 2707 TH_LOG("setting filter failed"); 2708 } 2709 2710 self->sibling[0].diverge = 1; 2711 tsync_start_sibling(&self->sibling[0]); 2712 tsync_start_sibling(&self->sibling[1]); 2713 2714 while (self->sibling_count < TSYNC_SIBLINGS) { 2715 sem_wait(&self->started); 2716 self->sibling_count++; 2717 } 2718 2719 /* Signal the threads to clean up*/ 2720 pthread_mutex_lock(&self->mutex); 2721 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2722 TH_LOG("cond broadcast non-zero"); 2723 } 2724 pthread_mutex_unlock(&self->mutex); 2725 2726 /* Ensure diverging sibling failed to call prctl. */ 2727 PTHREAD_JOIN(self->sibling[0].tid, &status); 2728 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2729 PTHREAD_JOIN(self->sibling[1].tid, &status); 2730 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2731 } 2732 2733 TEST_F(TSYNC, two_siblings_with_ancestor) 2734 { 2735 long ret; 2736 void *status; 2737 2738 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2739 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2740 } 2741 2742 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2743 ASSERT_NE(ENOSYS, errno) { 2744 TH_LOG("Kernel does not support seccomp syscall!"); 2745 } 2746 ASSERT_EQ(0, ret) { 2747 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2748 } 2749 tsync_start_sibling(&self->sibling[0]); 2750 tsync_start_sibling(&self->sibling[1]); 2751 2752 while (self->sibling_count < TSYNC_SIBLINGS) { 2753 sem_wait(&self->started); 2754 self->sibling_count++; 2755 } 2756 2757 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2758 &self->apply_prog); 2759 ASSERT_EQ(0, ret) { 2760 TH_LOG("Could install filter on all threads!"); 2761 } 2762 /* Tell the siblings to test the policy */ 2763 pthread_mutex_lock(&self->mutex); 2764 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2765 TH_LOG("cond broadcast non-zero"); 2766 } 2767 pthread_mutex_unlock(&self->mutex); 2768 /* Ensure they are both killed and don't exit cleanly. */ 2769 PTHREAD_JOIN(self->sibling[0].tid, &status); 2770 EXPECT_EQ(0x0, (long)status); 2771 PTHREAD_JOIN(self->sibling[1].tid, &status); 2772 EXPECT_EQ(0x0, (long)status); 2773 } 2774 2775 TEST_F(TSYNC, two_sibling_want_nnp) 2776 { 2777 void *status; 2778 2779 /* start siblings before any prctl() operations */ 2780 tsync_start_sibling(&self->sibling[0]); 2781 tsync_start_sibling(&self->sibling[1]); 2782 while (self->sibling_count < TSYNC_SIBLINGS) { 2783 sem_wait(&self->started); 2784 self->sibling_count++; 2785 } 2786 2787 /* Tell the siblings to test no policy */ 2788 pthread_mutex_lock(&self->mutex); 2789 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2790 TH_LOG("cond broadcast non-zero"); 2791 } 2792 pthread_mutex_unlock(&self->mutex); 2793 2794 /* Ensure they are both upset about lacking nnp. */ 2795 PTHREAD_JOIN(self->sibling[0].tid, &status); 2796 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2797 PTHREAD_JOIN(self->sibling[1].tid, &status); 2798 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2799 } 2800 2801 TEST_F(TSYNC, two_siblings_with_no_filter) 2802 { 2803 long ret; 2804 void *status; 2805 2806 /* start siblings before any prctl() operations */ 2807 tsync_start_sibling(&self->sibling[0]); 2808 tsync_start_sibling(&self->sibling[1]); 2809 while (self->sibling_count < TSYNC_SIBLINGS) { 2810 sem_wait(&self->started); 2811 self->sibling_count++; 2812 } 2813 2814 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2815 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2816 } 2817 2818 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2819 &self->apply_prog); 2820 ASSERT_NE(ENOSYS, errno) { 2821 TH_LOG("Kernel does not support seccomp syscall!"); 2822 } 2823 ASSERT_EQ(0, ret) { 2824 TH_LOG("Could install filter on all threads!"); 2825 } 2826 2827 /* Tell the siblings to test the policy */ 2828 pthread_mutex_lock(&self->mutex); 2829 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2830 TH_LOG("cond broadcast non-zero"); 2831 } 2832 pthread_mutex_unlock(&self->mutex); 2833 2834 /* Ensure they are both killed and don't exit cleanly. */ 2835 PTHREAD_JOIN(self->sibling[0].tid, &status); 2836 EXPECT_EQ(0x0, (long)status); 2837 PTHREAD_JOIN(self->sibling[1].tid, &status); 2838 EXPECT_EQ(0x0, (long)status); 2839 } 2840 2841 TEST_F(TSYNC, two_siblings_with_one_divergence) 2842 { 2843 long ret; 2844 void *status; 2845 2846 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2847 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2848 } 2849 2850 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2851 ASSERT_NE(ENOSYS, errno) { 2852 TH_LOG("Kernel does not support seccomp syscall!"); 2853 } 2854 ASSERT_EQ(0, ret) { 2855 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2856 } 2857 self->sibling[0].diverge = 1; 2858 tsync_start_sibling(&self->sibling[0]); 2859 tsync_start_sibling(&self->sibling[1]); 2860 2861 while (self->sibling_count < TSYNC_SIBLINGS) { 2862 sem_wait(&self->started); 2863 self->sibling_count++; 2864 } 2865 2866 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2867 &self->apply_prog); 2868 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2869 TH_LOG("Did not fail on diverged sibling."); 2870 } 2871 2872 /* Wake the threads */ 2873 pthread_mutex_lock(&self->mutex); 2874 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2875 TH_LOG("cond broadcast non-zero"); 2876 } 2877 pthread_mutex_unlock(&self->mutex); 2878 2879 /* Ensure they are both unkilled. */ 2880 PTHREAD_JOIN(self->sibling[0].tid, &status); 2881 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2882 PTHREAD_JOIN(self->sibling[1].tid, &status); 2883 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2884 } 2885 2886 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2887 { 2888 long ret, flags; 2889 void *status; 2890 2891 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2892 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2893 } 2894 2895 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2896 ASSERT_NE(ENOSYS, errno) { 2897 TH_LOG("Kernel does not support seccomp syscall!"); 2898 } 2899 ASSERT_EQ(0, ret) { 2900 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2901 } 2902 self->sibling[0].diverge = 1; 2903 tsync_start_sibling(&self->sibling[0]); 2904 tsync_start_sibling(&self->sibling[1]); 2905 2906 while (self->sibling_count < TSYNC_SIBLINGS) { 2907 sem_wait(&self->started); 2908 self->sibling_count++; 2909 } 2910 2911 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2912 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2913 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2914 ASSERT_EQ(ESRCH, errno) { 2915 TH_LOG("Did not return ESRCH for diverged sibling."); 2916 } 2917 ASSERT_EQ(-1, ret) { 2918 TH_LOG("Did not fail on diverged sibling."); 2919 } 2920 2921 /* Wake the threads */ 2922 pthread_mutex_lock(&self->mutex); 2923 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2924 TH_LOG("cond broadcast non-zero"); 2925 } 2926 pthread_mutex_unlock(&self->mutex); 2927 2928 /* Ensure they are both unkilled. */ 2929 PTHREAD_JOIN(self->sibling[0].tid, &status); 2930 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2931 PTHREAD_JOIN(self->sibling[1].tid, &status); 2932 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2933 } 2934 2935 TEST_F(TSYNC, two_siblings_not_under_filter) 2936 { 2937 long ret, sib; 2938 void *status; 2939 struct timespec delay = { .tv_nsec = 100000000 }; 2940 2941 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2942 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2943 } 2944 2945 /* 2946 * Sibling 0 will have its own seccomp policy 2947 * and Sibling 1 will not be under seccomp at 2948 * all. Sibling 1 will enter seccomp and 0 2949 * will cause failure. 2950 */ 2951 self->sibling[0].diverge = 1; 2952 tsync_start_sibling(&self->sibling[0]); 2953 tsync_start_sibling(&self->sibling[1]); 2954 2955 while (self->sibling_count < TSYNC_SIBLINGS) { 2956 sem_wait(&self->started); 2957 self->sibling_count++; 2958 } 2959 2960 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2961 ASSERT_NE(ENOSYS, errno) { 2962 TH_LOG("Kernel does not support seccomp syscall!"); 2963 } 2964 ASSERT_EQ(0, ret) { 2965 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2966 } 2967 2968 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2969 &self->apply_prog); 2970 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2971 TH_LOG("Did not fail on diverged sibling."); 2972 } 2973 sib = 1; 2974 if (ret == self->sibling[0].system_tid) 2975 sib = 0; 2976 2977 pthread_mutex_lock(&self->mutex); 2978 2979 /* Increment the other siblings num_waits so we can clean up 2980 * the one we just saw. 2981 */ 2982 self->sibling[!sib].num_waits += 1; 2983 2984 /* Signal the thread to clean up*/ 2985 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2986 TH_LOG("cond broadcast non-zero"); 2987 } 2988 pthread_mutex_unlock(&self->mutex); 2989 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2990 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2991 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2992 while (!kill(self->sibling[sib].system_tid, 0)) 2993 nanosleep(&delay, NULL); 2994 /* Switch to the remaining sibling */ 2995 sib = !sib; 2996 2997 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2998 &self->apply_prog); 2999 ASSERT_EQ(0, ret) { 3000 TH_LOG("Expected the remaining sibling to sync"); 3001 }; 3002 3003 pthread_mutex_lock(&self->mutex); 3004 3005 /* If remaining sibling didn't have a chance to wake up during 3006 * the first broadcast, manually reduce the num_waits now. 3007 */ 3008 if (self->sibling[sib].num_waits > 1) 3009 self->sibling[sib].num_waits = 1; 3010 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 3011 TH_LOG("cond broadcast non-zero"); 3012 } 3013 pthread_mutex_unlock(&self->mutex); 3014 PTHREAD_JOIN(self->sibling[sib].tid, &status); 3015 EXPECT_EQ(0, (long)status); 3016 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 3017 while (!kill(self->sibling[sib].system_tid, 0)) 3018 nanosleep(&delay, NULL); 3019 3020 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 3021 &self->apply_prog); 3022 ASSERT_EQ(0, ret); /* just us chickens */ 3023 } 3024 3025 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 3026 TEST(syscall_restart) 3027 { 3028 long ret; 3029 unsigned long msg; 3030 pid_t child_pid; 3031 int pipefd[2]; 3032 int status; 3033 siginfo_t info = { }; 3034 struct sock_filter filter[] = { 3035 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3036 offsetof(struct seccomp_data, nr)), 3037 3038 #ifdef __NR_sigreturn 3039 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 3040 #endif 3041 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 3042 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 3043 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 3044 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 3045 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 3046 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 3047 3048 /* Allow __NR_write for easy logging. */ 3049 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 3050 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3051 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3052 /* The nanosleep jump target. */ 3053 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 3054 /* The restart_syscall jump target. */ 3055 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 3056 }; 3057 struct sock_fprog prog = { 3058 .len = (unsigned short)ARRAY_SIZE(filter), 3059 .filter = filter, 3060 }; 3061 #if defined(__arm__) 3062 struct utsname utsbuf; 3063 #endif 3064 3065 ASSERT_EQ(0, pipe(pipefd)); 3066 3067 child_pid = fork(); 3068 ASSERT_LE(0, child_pid); 3069 if (child_pid == 0) { 3070 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 3071 char buf = ' '; 3072 struct timespec timeout = { }; 3073 3074 /* Attach parent as tracer and stop. */ 3075 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 3076 EXPECT_EQ(0, raise(SIGSTOP)); 3077 3078 EXPECT_EQ(0, close(pipefd[1])); 3079 3080 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 3081 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3082 } 3083 3084 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 3085 EXPECT_EQ(0, ret) { 3086 TH_LOG("Failed to install filter!"); 3087 } 3088 3089 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3090 TH_LOG("Failed to read() sync from parent"); 3091 } 3092 EXPECT_EQ('.', buf) { 3093 TH_LOG("Failed to get sync data from read()"); 3094 } 3095 3096 /* Start nanosleep to be interrupted. */ 3097 timeout.tv_sec = 1; 3098 errno = 0; 3099 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 3100 TH_LOG("Call to nanosleep() failed (errno %d: %s)", 3101 errno, strerror(errno)); 3102 } 3103 3104 /* Read final sync from parent. */ 3105 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3106 TH_LOG("Failed final read() from parent"); 3107 } 3108 EXPECT_EQ('!', buf) { 3109 TH_LOG("Failed to get final data from read()"); 3110 } 3111 3112 /* Directly report the status of our test harness results. */ 3113 syscall(__NR_exit, _metadata->exit_code); 3114 } 3115 EXPECT_EQ(0, close(pipefd[0])); 3116 3117 /* Attach to child, setup options, and release. */ 3118 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3119 ASSERT_EQ(true, WIFSTOPPED(status)); 3120 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 3121 PTRACE_O_TRACESECCOMP)); 3122 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3123 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 3124 3125 /* Wait for nanosleep() to start. */ 3126 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3127 ASSERT_EQ(true, WIFSTOPPED(status)); 3128 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3129 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3130 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3131 ASSERT_EQ(0x100, msg); 3132 ret = get_syscall(_metadata, child_pid); 3133 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 3134 3135 /* Might as well check siginfo for sanity while we're here. */ 3136 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3137 ASSERT_EQ(SIGTRAP, info.si_signo); 3138 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 3139 EXPECT_EQ(0, info.si_errno); 3140 EXPECT_EQ(getuid(), info.si_uid); 3141 /* Verify signal delivery came from child (seccomp-triggered). */ 3142 EXPECT_EQ(child_pid, info.si_pid); 3143 3144 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 3145 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 3146 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3147 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3148 ASSERT_EQ(true, WIFSTOPPED(status)); 3149 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 3150 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3151 /* 3152 * There is no siginfo on SIGSTOP any more, so we can't verify 3153 * signal delivery came from parent now (getpid() == info.si_pid). 3154 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 3155 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 3156 */ 3157 EXPECT_EQ(SIGSTOP, info.si_signo); 3158 3159 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 3160 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 3161 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3162 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3163 ASSERT_EQ(true, WIFSTOPPED(status)); 3164 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 3165 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3166 3167 /* Wait for restart_syscall() to start. */ 3168 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3169 ASSERT_EQ(true, WIFSTOPPED(status)); 3170 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3171 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3172 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3173 3174 ASSERT_EQ(0x200, msg); 3175 ret = get_syscall(_metadata, child_pid); 3176 #if defined(__arm__) 3177 /* 3178 * - native ARM registers do NOT expose true syscall. 3179 * - compat ARM registers on ARM64 DO expose true syscall. 3180 * - values of utsbuf.machine include 'armv8l' or 'armb8b' 3181 * for ARM64 running in compat mode. 3182 */ 3183 ASSERT_EQ(0, uname(&utsbuf)); 3184 if ((strncmp(utsbuf.machine, "arm", 3) == 0) && 3185 (strncmp(utsbuf.machine, "armv8l", 6) != 0) && 3186 (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { 3187 EXPECT_EQ(__NR_nanosleep, ret); 3188 } else 3189 #endif 3190 { 3191 EXPECT_EQ(__NR_restart_syscall, ret); 3192 } 3193 3194 /* Write again to end test. */ 3195 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3196 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 3197 EXPECT_EQ(0, close(pipefd[1])); 3198 3199 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3200 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 3201 _metadata->exit_code = KSFT_FAIL; 3202 } 3203 3204 TEST_SIGNAL(filter_flag_log, SIGSYS) 3205 { 3206 struct sock_filter allow_filter[] = { 3207 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3208 }; 3209 struct sock_filter kill_filter[] = { 3210 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3211 offsetof(struct seccomp_data, nr)), 3212 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 3213 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3214 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3215 }; 3216 struct sock_fprog allow_prog = { 3217 .len = (unsigned short)ARRAY_SIZE(allow_filter), 3218 .filter = allow_filter, 3219 }; 3220 struct sock_fprog kill_prog = { 3221 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3222 .filter = kill_filter, 3223 }; 3224 long ret; 3225 pid_t parent = getppid(); 3226 3227 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3228 ASSERT_EQ(0, ret); 3229 3230 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3231 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3232 &allow_prog); 3233 ASSERT_NE(ENOSYS, errno) { 3234 TH_LOG("Kernel does not support seccomp syscall!"); 3235 } 3236 EXPECT_NE(0, ret) { 3237 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3238 } 3239 EXPECT_EQ(EINVAL, errno) { 3240 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3241 } 3242 3243 /* Verify that a simple, permissive filter can be added with no flags */ 3244 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3245 EXPECT_EQ(0, ret); 3246 3247 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3248 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3249 &allow_prog); 3250 ASSERT_NE(EINVAL, errno) { 3251 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3252 } 3253 EXPECT_EQ(0, ret); 3254 3255 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3256 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3257 &kill_prog); 3258 EXPECT_EQ(0, ret); 3259 3260 EXPECT_EQ(parent, syscall(__NR_getppid)); 3261 /* getpid() should never return. */ 3262 EXPECT_EQ(0, syscall(__NR_getpid)); 3263 } 3264 3265 TEST(get_action_avail) 3266 { 3267 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3268 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3269 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3270 __u32 unknown_action = 0x10000000U; 3271 int i; 3272 long ret; 3273 3274 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3275 ASSERT_NE(ENOSYS, errno) { 3276 TH_LOG("Kernel does not support seccomp syscall!"); 3277 } 3278 ASSERT_NE(EINVAL, errno) { 3279 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3280 } 3281 EXPECT_EQ(ret, 0); 3282 3283 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3284 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3285 EXPECT_EQ(ret, 0) { 3286 TH_LOG("Expected action (0x%X) not available!", 3287 actions[i]); 3288 } 3289 } 3290 3291 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3292 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3293 EXPECT_EQ(ret, -1); 3294 EXPECT_EQ(errno, EOPNOTSUPP); 3295 } 3296 3297 TEST(get_metadata) 3298 { 3299 pid_t pid; 3300 int pipefd[2]; 3301 char buf; 3302 struct seccomp_metadata md; 3303 long ret; 3304 3305 /* Only real root can get metadata. */ 3306 if (geteuid()) { 3307 SKIP(return, "get_metadata requires real root"); 3308 return; 3309 } 3310 3311 ASSERT_EQ(0, pipe(pipefd)); 3312 3313 pid = fork(); 3314 ASSERT_GE(pid, 0); 3315 if (pid == 0) { 3316 struct sock_filter filter[] = { 3317 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3318 }; 3319 struct sock_fprog prog = { 3320 .len = (unsigned short)ARRAY_SIZE(filter), 3321 .filter = filter, 3322 }; 3323 3324 /* one with log, one without */ 3325 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3326 SECCOMP_FILTER_FLAG_LOG, &prog)); 3327 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3328 3329 EXPECT_EQ(0, close(pipefd[0])); 3330 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3331 ASSERT_EQ(0, close(pipefd[1])); 3332 3333 while (1) 3334 sleep(100); 3335 } 3336 3337 ASSERT_EQ(0, close(pipefd[1])); 3338 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3339 3340 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3341 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3342 3343 /* Past here must not use ASSERT or child process is never killed. */ 3344 3345 md.filter_off = 0; 3346 errno = 0; 3347 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3348 EXPECT_EQ(sizeof(md), ret) { 3349 if (errno == EINVAL) 3350 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3351 } 3352 3353 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3354 EXPECT_EQ(md.filter_off, 0); 3355 3356 md.filter_off = 1; 3357 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3358 EXPECT_EQ(sizeof(md), ret); 3359 EXPECT_EQ(md.flags, 0); 3360 EXPECT_EQ(md.filter_off, 1); 3361 3362 skip: 3363 ASSERT_EQ(0, kill(pid, SIGKILL)); 3364 } 3365 3366 static int user_notif_syscall(int nr, unsigned int flags) 3367 { 3368 struct sock_filter filter[] = { 3369 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3370 offsetof(struct seccomp_data, nr)), 3371 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), 3372 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), 3373 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3374 }; 3375 3376 struct sock_fprog prog = { 3377 .len = (unsigned short)ARRAY_SIZE(filter), 3378 .filter = filter, 3379 }; 3380 3381 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3382 } 3383 3384 #define USER_NOTIF_MAGIC INT_MAX 3385 TEST(user_notification_basic) 3386 { 3387 pid_t pid; 3388 long ret; 3389 int status, listener; 3390 struct seccomp_notif req = {}; 3391 struct seccomp_notif_resp resp = {}; 3392 struct pollfd pollfd; 3393 3394 struct sock_filter filter[] = { 3395 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3396 }; 3397 struct sock_fprog prog = { 3398 .len = (unsigned short)ARRAY_SIZE(filter), 3399 .filter = filter, 3400 }; 3401 3402 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3403 ASSERT_EQ(0, ret) { 3404 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3405 } 3406 3407 pid = fork(); 3408 ASSERT_GE(pid, 0); 3409 3410 /* Check that we get -ENOSYS with no listener attached */ 3411 if (pid == 0) { 3412 if (user_notif_syscall(__NR_getppid, 0) < 0) 3413 exit(1); 3414 ret = syscall(__NR_getppid); 3415 exit(ret >= 0 || errno != ENOSYS); 3416 } 3417 3418 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3419 EXPECT_EQ(true, WIFEXITED(status)); 3420 EXPECT_EQ(0, WEXITSTATUS(status)); 3421 3422 /* Add some no-op filters for grins. */ 3423 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3424 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3425 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3426 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3427 3428 /* Check that the basic notification machinery works */ 3429 listener = user_notif_syscall(__NR_getppid, 3430 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3431 ASSERT_GE(listener, 0); 3432 3433 /* Installing a second listener in the chain should EBUSY */ 3434 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3435 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3436 -1); 3437 EXPECT_EQ(errno, EBUSY); 3438 3439 pid = fork(); 3440 ASSERT_GE(pid, 0); 3441 3442 if (pid == 0) { 3443 ret = syscall(__NR_getppid); 3444 exit(ret != USER_NOTIF_MAGIC); 3445 } 3446 3447 pollfd.fd = listener; 3448 pollfd.events = POLLIN | POLLOUT; 3449 3450 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3451 EXPECT_EQ(pollfd.revents, POLLIN); 3452 3453 /* Test that we can't pass garbage to the kernel. */ 3454 memset(&req, 0, sizeof(req)); 3455 req.pid = -1; 3456 errno = 0; 3457 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3458 EXPECT_EQ(-1, ret); 3459 EXPECT_EQ(EINVAL, errno); 3460 3461 if (ret) { 3462 req.pid = 0; 3463 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3464 } 3465 3466 pollfd.fd = listener; 3467 pollfd.events = POLLIN | POLLOUT; 3468 3469 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3470 EXPECT_EQ(pollfd.revents, POLLOUT); 3471 3472 EXPECT_EQ(req.data.nr, __NR_getppid); 3473 3474 resp.id = req.id; 3475 resp.error = 0; 3476 resp.val = USER_NOTIF_MAGIC; 3477 3478 /* check that we make sure flags == 0 */ 3479 resp.flags = 1; 3480 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3481 EXPECT_EQ(errno, EINVAL); 3482 3483 resp.flags = 0; 3484 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3485 3486 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3487 EXPECT_EQ(true, WIFEXITED(status)); 3488 EXPECT_EQ(0, WEXITSTATUS(status)); 3489 } 3490 3491 TEST(user_notification_with_tsync) 3492 { 3493 int ret; 3494 unsigned int flags; 3495 3496 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3497 ASSERT_EQ(0, ret) { 3498 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3499 } 3500 3501 /* these were exclusive */ 3502 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3503 SECCOMP_FILTER_FLAG_TSYNC; 3504 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3505 ASSERT_EQ(EINVAL, errno); 3506 3507 /* but now they're not */ 3508 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3509 ret = user_notif_syscall(__NR_getppid, flags); 3510 close(ret); 3511 ASSERT_LE(0, ret); 3512 } 3513 3514 TEST(user_notification_kill_in_middle) 3515 { 3516 pid_t pid; 3517 long ret; 3518 int listener; 3519 struct seccomp_notif req = {}; 3520 struct seccomp_notif_resp resp = {}; 3521 3522 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3523 ASSERT_EQ(0, ret) { 3524 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3525 } 3526 3527 listener = user_notif_syscall(__NR_getppid, 3528 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3529 ASSERT_GE(listener, 0); 3530 3531 /* 3532 * Check that nothing bad happens when we kill the task in the middle 3533 * of a syscall. 3534 */ 3535 pid = fork(); 3536 ASSERT_GE(pid, 0); 3537 3538 if (pid == 0) { 3539 ret = syscall(__NR_getppid); 3540 exit(ret != USER_NOTIF_MAGIC); 3541 } 3542 3543 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3544 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3545 3546 EXPECT_EQ(kill(pid, SIGKILL), 0); 3547 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3548 3549 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3550 3551 resp.id = req.id; 3552 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3553 EXPECT_EQ(ret, -1); 3554 EXPECT_EQ(errno, ENOENT); 3555 } 3556 3557 static int handled = -1; 3558 3559 static void signal_handler(int signal) 3560 { 3561 if (write(handled, "c", 1) != 1) 3562 perror("write from signal"); 3563 } 3564 3565 static void signal_handler_nop(int signal) 3566 { 3567 } 3568 3569 TEST(user_notification_signal) 3570 { 3571 pid_t pid; 3572 long ret; 3573 int status, listener, sk_pair[2]; 3574 struct seccomp_notif req = {}; 3575 struct seccomp_notif_resp resp = {}; 3576 char c; 3577 3578 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3579 ASSERT_EQ(0, ret) { 3580 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3581 } 3582 3583 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3584 3585 listener = user_notif_syscall(__NR_gettid, 3586 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3587 ASSERT_GE(listener, 0); 3588 3589 pid = fork(); 3590 ASSERT_GE(pid, 0); 3591 3592 if (pid == 0) { 3593 close(sk_pair[0]); 3594 handled = sk_pair[1]; 3595 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3596 perror("signal"); 3597 exit(1); 3598 } 3599 /* 3600 * ERESTARTSYS behavior is a bit hard to test, because we need 3601 * to rely on a signal that has not yet been handled. Let's at 3602 * least check that the error code gets propagated through, and 3603 * hope that it doesn't break when there is actually a signal :) 3604 */ 3605 ret = syscall(__NR_gettid); 3606 exit(!(ret == -1 && errno == 512)); 3607 } 3608 3609 close(sk_pair[1]); 3610 3611 memset(&req, 0, sizeof(req)); 3612 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3613 3614 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3615 3616 /* 3617 * Make sure the signal really is delivered, which means we're not 3618 * stuck in the user notification code any more and the notification 3619 * should be dead. 3620 */ 3621 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3622 3623 resp.id = req.id; 3624 resp.error = -EPERM; 3625 resp.val = 0; 3626 3627 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3628 EXPECT_EQ(errno, ENOENT); 3629 3630 memset(&req, 0, sizeof(req)); 3631 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3632 3633 resp.id = req.id; 3634 resp.error = -512; /* -ERESTARTSYS */ 3635 resp.val = 0; 3636 3637 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3638 3639 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3640 EXPECT_EQ(true, WIFEXITED(status)); 3641 EXPECT_EQ(0, WEXITSTATUS(status)); 3642 } 3643 3644 TEST(user_notification_closed_listener) 3645 { 3646 pid_t pid; 3647 long ret; 3648 int status, listener; 3649 3650 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3651 ASSERT_EQ(0, ret) { 3652 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3653 } 3654 3655 listener = user_notif_syscall(__NR_getppid, 3656 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3657 ASSERT_GE(listener, 0); 3658 3659 /* 3660 * Check that we get an ENOSYS when the listener is closed. 3661 */ 3662 pid = fork(); 3663 ASSERT_GE(pid, 0); 3664 if (pid == 0) { 3665 close(listener); 3666 ret = syscall(__NR_getppid); 3667 exit(ret != -1 && errno != ENOSYS); 3668 } 3669 3670 close(listener); 3671 3672 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3673 EXPECT_EQ(true, WIFEXITED(status)); 3674 EXPECT_EQ(0, WEXITSTATUS(status)); 3675 } 3676 3677 /* 3678 * Check that a pid in a child namespace still shows up as valid in ours. 3679 */ 3680 TEST(user_notification_child_pid_ns) 3681 { 3682 pid_t pid; 3683 int status, listener; 3684 struct seccomp_notif req = {}; 3685 struct seccomp_notif_resp resp = {}; 3686 3687 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3688 if (errno == EINVAL) 3689 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3690 }; 3691 3692 listener = user_notif_syscall(__NR_getppid, 3693 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3694 ASSERT_GE(listener, 0); 3695 3696 pid = fork(); 3697 ASSERT_GE(pid, 0); 3698 3699 if (pid == 0) 3700 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3701 3702 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3703 EXPECT_EQ(req.pid, pid); 3704 3705 resp.id = req.id; 3706 resp.error = 0; 3707 resp.val = USER_NOTIF_MAGIC; 3708 3709 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3710 3711 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3712 EXPECT_EQ(true, WIFEXITED(status)); 3713 EXPECT_EQ(0, WEXITSTATUS(status)); 3714 close(listener); 3715 } 3716 3717 /* 3718 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3719 * invalid. 3720 */ 3721 TEST(user_notification_sibling_pid_ns) 3722 { 3723 pid_t pid, pid2; 3724 int status, listener; 3725 struct seccomp_notif req = {}; 3726 struct seccomp_notif_resp resp = {}; 3727 3728 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3729 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3730 } 3731 3732 listener = user_notif_syscall(__NR_getppid, 3733 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3734 ASSERT_GE(listener, 0); 3735 3736 pid = fork(); 3737 ASSERT_GE(pid, 0); 3738 3739 if (pid == 0) { 3740 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3741 if (errno == EPERM) 3742 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3743 else if (errno == EINVAL) 3744 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3745 } 3746 3747 pid2 = fork(); 3748 ASSERT_GE(pid2, 0); 3749 3750 if (pid2 == 0) 3751 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3752 3753 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3754 EXPECT_EQ(true, WIFEXITED(status)); 3755 EXPECT_EQ(0, WEXITSTATUS(status)); 3756 exit(WEXITSTATUS(status)); 3757 } 3758 3759 /* Create the sibling ns, and sibling in it. */ 3760 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3761 if (errno == EPERM) 3762 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3763 else if (errno == EINVAL) 3764 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3765 } 3766 ASSERT_EQ(errno, 0); 3767 3768 pid2 = fork(); 3769 ASSERT_GE(pid2, 0); 3770 3771 if (pid2 == 0) { 3772 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3773 /* 3774 * The pid should be 0, i.e. the task is in some namespace that 3775 * we can't "see". 3776 */ 3777 EXPECT_EQ(req.pid, 0); 3778 3779 resp.id = req.id; 3780 resp.error = 0; 3781 resp.val = USER_NOTIF_MAGIC; 3782 3783 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3784 exit(0); 3785 } 3786 3787 close(listener); 3788 3789 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3790 EXPECT_EQ(true, WIFEXITED(status)); 3791 EXPECT_EQ(0, WEXITSTATUS(status)); 3792 3793 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3794 EXPECT_EQ(true, WIFEXITED(status)); 3795 EXPECT_EQ(0, WEXITSTATUS(status)); 3796 } 3797 3798 TEST(user_notification_fault_recv) 3799 { 3800 pid_t pid; 3801 int status, listener; 3802 struct seccomp_notif req = {}; 3803 struct seccomp_notif_resp resp = {}; 3804 3805 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { 3806 if (errno == EINVAL) 3807 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3808 } 3809 3810 listener = user_notif_syscall(__NR_getppid, 3811 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3812 ASSERT_GE(listener, 0); 3813 3814 pid = fork(); 3815 ASSERT_GE(pid, 0); 3816 3817 if (pid == 0) 3818 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3819 3820 /* Do a bad recv() */ 3821 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3822 EXPECT_EQ(errno, EFAULT); 3823 3824 /* We should still be able to receive this notification, though. */ 3825 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3826 EXPECT_EQ(req.pid, pid); 3827 3828 resp.id = req.id; 3829 resp.error = 0; 3830 resp.val = USER_NOTIF_MAGIC; 3831 3832 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3833 3834 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3835 EXPECT_EQ(true, WIFEXITED(status)); 3836 EXPECT_EQ(0, WEXITSTATUS(status)); 3837 } 3838 3839 TEST(seccomp_get_notif_sizes) 3840 { 3841 struct seccomp_notif_sizes sizes; 3842 3843 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3844 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3845 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3846 } 3847 3848 TEST(user_notification_continue) 3849 { 3850 pid_t pid; 3851 long ret; 3852 int status, listener; 3853 struct seccomp_notif req = {}; 3854 struct seccomp_notif_resp resp = {}; 3855 struct pollfd pollfd; 3856 3857 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3858 ASSERT_EQ(0, ret) { 3859 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3860 } 3861 3862 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3863 ASSERT_GE(listener, 0); 3864 3865 pid = fork(); 3866 ASSERT_GE(pid, 0); 3867 3868 if (pid == 0) { 3869 int dup_fd, pipe_fds[2]; 3870 pid_t self; 3871 3872 ASSERT_GE(pipe(pipe_fds), 0); 3873 3874 dup_fd = dup(pipe_fds[0]); 3875 ASSERT_GE(dup_fd, 0); 3876 EXPECT_NE(pipe_fds[0], dup_fd); 3877 3878 self = getpid(); 3879 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3880 exit(0); 3881 } 3882 3883 pollfd.fd = listener; 3884 pollfd.events = POLLIN | POLLOUT; 3885 3886 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3887 EXPECT_EQ(pollfd.revents, POLLIN); 3888 3889 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3890 3891 pollfd.fd = listener; 3892 pollfd.events = POLLIN | POLLOUT; 3893 3894 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3895 EXPECT_EQ(pollfd.revents, POLLOUT); 3896 3897 EXPECT_EQ(req.data.nr, __NR_dup); 3898 3899 resp.id = req.id; 3900 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3901 3902 /* 3903 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3904 * args be set to 0. 3905 */ 3906 resp.error = 0; 3907 resp.val = USER_NOTIF_MAGIC; 3908 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3909 EXPECT_EQ(errno, EINVAL); 3910 3911 resp.error = USER_NOTIF_MAGIC; 3912 resp.val = 0; 3913 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3914 EXPECT_EQ(errno, EINVAL); 3915 3916 resp.error = 0; 3917 resp.val = 0; 3918 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3919 if (errno == EINVAL) 3920 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3921 } 3922 3923 skip: 3924 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3925 EXPECT_EQ(true, WIFEXITED(status)); 3926 EXPECT_EQ(0, WEXITSTATUS(status)) { 3927 if (WEXITSTATUS(status) == 2) { 3928 SKIP(return, "Kernel does not support kcmp() syscall"); 3929 return; 3930 } 3931 } 3932 } 3933 3934 TEST(user_notification_filter_empty) 3935 { 3936 pid_t pid; 3937 long ret; 3938 int status; 3939 struct pollfd pollfd; 3940 struct __clone_args args = { 3941 .flags = CLONE_FILES, 3942 .exit_signal = SIGCHLD, 3943 }; 3944 3945 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3946 ASSERT_EQ(0, ret) { 3947 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3948 } 3949 3950 if (__NR_clone3 < 0) 3951 SKIP(return, "Test not built with clone3 support"); 3952 3953 pid = sys_clone3(&args, sizeof(args)); 3954 ASSERT_GE(pid, 0); 3955 3956 if (pid == 0) { 3957 int listener; 3958 3959 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3960 if (listener < 0) 3961 _exit(EXIT_FAILURE); 3962 3963 if (dup2(listener, 200) != 200) 3964 _exit(EXIT_FAILURE); 3965 3966 close(listener); 3967 3968 _exit(EXIT_SUCCESS); 3969 } 3970 3971 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3972 EXPECT_EQ(true, WIFEXITED(status)); 3973 EXPECT_EQ(0, WEXITSTATUS(status)); 3974 3975 /* 3976 * The seccomp filter has become unused so we should be notified once 3977 * the kernel gets around to cleaning up task struct. 3978 */ 3979 pollfd.fd = 200; 3980 pollfd.events = POLLHUP; 3981 3982 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3983 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3984 } 3985 3986 TEST(user_ioctl_notification_filter_empty) 3987 { 3988 pid_t pid; 3989 long ret; 3990 int status, p[2]; 3991 struct __clone_args args = { 3992 .flags = CLONE_FILES, 3993 .exit_signal = SIGCHLD, 3994 }; 3995 struct seccomp_notif req = {}; 3996 3997 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3998 ASSERT_EQ(0, ret) { 3999 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4000 } 4001 4002 if (__NR_clone3 < 0) 4003 SKIP(return, "Test not built with clone3 support"); 4004 4005 ASSERT_EQ(0, pipe(p)); 4006 4007 pid = sys_clone3(&args, sizeof(args)); 4008 ASSERT_GE(pid, 0); 4009 4010 if (pid == 0) { 4011 int listener; 4012 4013 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4014 if (listener < 0) 4015 _exit(EXIT_FAILURE); 4016 4017 if (dup2(listener, 200) != 200) 4018 _exit(EXIT_FAILURE); 4019 close(p[1]); 4020 close(listener); 4021 sleep(1); 4022 4023 _exit(EXIT_SUCCESS); 4024 } 4025 if (read(p[0], &status, 1) != 0) 4026 _exit(EXIT_SUCCESS); 4027 close(p[0]); 4028 /* 4029 * The seccomp filter has become unused so we should be notified once 4030 * the kernel gets around to cleaning up task struct. 4031 */ 4032 EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); 4033 EXPECT_EQ(errno, ENOENT); 4034 4035 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4036 EXPECT_EQ(true, WIFEXITED(status)); 4037 EXPECT_EQ(0, WEXITSTATUS(status)); 4038 } 4039 4040 static void *do_thread(void *data) 4041 { 4042 return NULL; 4043 } 4044 4045 TEST(user_notification_filter_empty_threaded) 4046 { 4047 pid_t pid; 4048 long ret; 4049 int status; 4050 struct pollfd pollfd; 4051 struct __clone_args args = { 4052 .flags = CLONE_FILES, 4053 .exit_signal = SIGCHLD, 4054 }; 4055 4056 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4057 ASSERT_EQ(0, ret) { 4058 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4059 } 4060 4061 if (__NR_clone3 < 0) 4062 SKIP(return, "Test not built with clone3 support"); 4063 4064 pid = sys_clone3(&args, sizeof(args)); 4065 ASSERT_GE(pid, 0); 4066 4067 if (pid == 0) { 4068 pid_t pid1, pid2; 4069 int listener, status; 4070 pthread_t thread; 4071 4072 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 4073 if (listener < 0) 4074 _exit(EXIT_FAILURE); 4075 4076 if (dup2(listener, 200) != 200) 4077 _exit(EXIT_FAILURE); 4078 4079 close(listener); 4080 4081 pid1 = fork(); 4082 if (pid1 < 0) 4083 _exit(EXIT_FAILURE); 4084 4085 if (pid1 == 0) 4086 _exit(EXIT_SUCCESS); 4087 4088 pid2 = fork(); 4089 if (pid2 < 0) 4090 _exit(EXIT_FAILURE); 4091 4092 if (pid2 == 0) 4093 _exit(EXIT_SUCCESS); 4094 4095 if (pthread_create(&thread, NULL, do_thread, NULL) || 4096 pthread_join(thread, NULL)) 4097 _exit(EXIT_FAILURE); 4098 4099 if (pthread_create(&thread, NULL, do_thread, NULL) || 4100 pthread_join(thread, NULL)) 4101 _exit(EXIT_FAILURE); 4102 4103 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 4104 WEXITSTATUS(status)) 4105 _exit(EXIT_FAILURE); 4106 4107 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 4108 WEXITSTATUS(status)) 4109 _exit(EXIT_FAILURE); 4110 4111 exit(EXIT_SUCCESS); 4112 } 4113 4114 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4115 EXPECT_EQ(true, WIFEXITED(status)); 4116 EXPECT_EQ(0, WEXITSTATUS(status)); 4117 4118 /* 4119 * The seccomp filter has become unused so we should be notified once 4120 * the kernel gets around to cleaning up task struct. 4121 */ 4122 pollfd.fd = 200; 4123 pollfd.events = POLLHUP; 4124 4125 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 4126 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 4127 } 4128 4129 4130 int get_next_fd(int prev_fd) 4131 { 4132 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { 4133 if (fcntl(i, F_GETFD) == -1) 4134 return i; 4135 } 4136 _exit(EXIT_FAILURE); 4137 } 4138 4139 TEST(user_notification_addfd) 4140 { 4141 pid_t pid; 4142 long ret; 4143 int status, listener, memfd, fd, nextfd; 4144 struct seccomp_notif_addfd addfd = {}; 4145 struct seccomp_notif_addfd_small small = {}; 4146 struct seccomp_notif_addfd_big big = {}; 4147 struct seccomp_notif req = {}; 4148 struct seccomp_notif_resp resp = {}; 4149 /* 100 ms */ 4150 struct timespec delay = { .tv_nsec = 100000000 }; 4151 4152 /* There may be arbitrary already-open fds at test start. */ 4153 memfd = memfd_create("test", 0); 4154 ASSERT_GE(memfd, 0); 4155 nextfd = get_next_fd(memfd); 4156 4157 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4158 ASSERT_EQ(0, ret) { 4159 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4160 } 4161 4162 /* fd: 4 */ 4163 /* Check that the basic notification machinery works */ 4164 listener = user_notif_syscall(__NR_getppid, 4165 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4166 ASSERT_EQ(listener, nextfd); 4167 nextfd = get_next_fd(nextfd); 4168 4169 pid = fork(); 4170 ASSERT_GE(pid, 0); 4171 4172 if (pid == 0) { 4173 /* fds will be added and this value is expected */ 4174 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 4175 exit(1); 4176 4177 /* Atomic addfd+send is received here. Check it is a valid fd */ 4178 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) 4179 exit(1); 4180 4181 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4182 } 4183 4184 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4185 4186 addfd.srcfd = memfd; 4187 addfd.newfd = 0; 4188 addfd.id = req.id; 4189 addfd.flags = 0x0; 4190 4191 /* Verify bad newfd_flags cannot be set */ 4192 addfd.newfd_flags = ~O_CLOEXEC; 4193 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4194 EXPECT_EQ(errno, EINVAL); 4195 addfd.newfd_flags = O_CLOEXEC; 4196 4197 /* Verify bad flags cannot be set */ 4198 addfd.flags = 0xff; 4199 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4200 EXPECT_EQ(errno, EINVAL); 4201 addfd.flags = 0; 4202 4203 /* Verify that remote_fd cannot be set without setting flags */ 4204 addfd.newfd = 1; 4205 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4206 EXPECT_EQ(errno, EINVAL); 4207 addfd.newfd = 0; 4208 4209 /* Verify small size cannot be set */ 4210 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 4211 EXPECT_EQ(errno, EINVAL); 4212 4213 /* Verify we can't send bits filled in unknown buffer area */ 4214 memset(&big, 0xAA, sizeof(big)); 4215 big.addfd = addfd; 4216 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 4217 EXPECT_EQ(errno, E2BIG); 4218 4219 4220 /* Verify we can set an arbitrary remote fd */ 4221 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4222 EXPECT_EQ(fd, nextfd); 4223 nextfd = get_next_fd(nextfd); 4224 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4225 4226 /* Verify we can set an arbitrary remote fd with large size */ 4227 memset(&big, 0x0, sizeof(big)); 4228 big.addfd = addfd; 4229 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 4230 EXPECT_EQ(fd, nextfd); 4231 nextfd = get_next_fd(nextfd); 4232 4233 /* Verify we can set a specific remote fd */ 4234 addfd.newfd = 42; 4235 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4236 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4237 EXPECT_EQ(fd, 42); 4238 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4239 4240 /* Resume syscall */ 4241 resp.id = req.id; 4242 resp.error = 0; 4243 resp.val = USER_NOTIF_MAGIC; 4244 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4245 4246 /* 4247 * This sets the ID of the ADD FD to the last request plus 1. The 4248 * notification ID increments 1 per notification. 4249 */ 4250 addfd.id = req.id + 1; 4251 4252 /* This spins until the underlying notification is generated */ 4253 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4254 errno != -EINPROGRESS) 4255 nanosleep(&delay, NULL); 4256 4257 memset(&req, 0, sizeof(req)); 4258 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4259 ASSERT_EQ(addfd.id, req.id); 4260 4261 /* Verify we can do an atomic addfd and send */ 4262 addfd.newfd = 0; 4263 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4264 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4265 /* 4266 * Child has earlier "low" fds and now 42, so we expect the next 4267 * lowest available fd to be assigned here. 4268 */ 4269 EXPECT_EQ(fd, nextfd); 4270 nextfd = get_next_fd(nextfd); 4271 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4272 4273 /* 4274 * This sets the ID of the ADD FD to the last request plus 1. The 4275 * notification ID increments 1 per notification. 4276 */ 4277 addfd.id = req.id + 1; 4278 4279 /* This spins until the underlying notification is generated */ 4280 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4281 errno != -EINPROGRESS) 4282 nanosleep(&delay, NULL); 4283 4284 memset(&req, 0, sizeof(req)); 4285 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4286 ASSERT_EQ(addfd.id, req.id); 4287 4288 resp.id = req.id; 4289 resp.error = 0; 4290 resp.val = USER_NOTIF_MAGIC; 4291 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4292 4293 /* Wait for child to finish. */ 4294 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4295 EXPECT_EQ(true, WIFEXITED(status)); 4296 EXPECT_EQ(0, WEXITSTATUS(status)); 4297 4298 close(memfd); 4299 } 4300 4301 TEST(user_notification_addfd_rlimit) 4302 { 4303 pid_t pid; 4304 long ret; 4305 int status, listener, memfd; 4306 struct seccomp_notif_addfd addfd = {}; 4307 struct seccomp_notif req = {}; 4308 struct seccomp_notif_resp resp = {}; 4309 const struct rlimit lim = { 4310 .rlim_cur = 0, 4311 .rlim_max = 0, 4312 }; 4313 4314 memfd = memfd_create("test", 0); 4315 ASSERT_GE(memfd, 0); 4316 4317 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4318 ASSERT_EQ(0, ret) { 4319 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4320 } 4321 4322 /* Check that the basic notification machinery works */ 4323 listener = user_notif_syscall(__NR_getppid, 4324 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4325 ASSERT_GE(listener, 0); 4326 4327 pid = fork(); 4328 ASSERT_GE(pid, 0); 4329 4330 if (pid == 0) 4331 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4332 4333 4334 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4335 4336 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 4337 4338 addfd.srcfd = memfd; 4339 addfd.newfd_flags = O_CLOEXEC; 4340 addfd.newfd = 0; 4341 addfd.id = req.id; 4342 addfd.flags = 0; 4343 4344 /* Should probably spot check /proc/sys/fs/file-nr */ 4345 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4346 EXPECT_EQ(errno, EMFILE); 4347 4348 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4349 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4350 EXPECT_EQ(errno, EMFILE); 4351 4352 addfd.newfd = 100; 4353 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4354 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4355 EXPECT_EQ(errno, EBADF); 4356 4357 resp.id = req.id; 4358 resp.error = 0; 4359 resp.val = USER_NOTIF_MAGIC; 4360 4361 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4362 4363 /* Wait for child to finish. */ 4364 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4365 EXPECT_EQ(true, WIFEXITED(status)); 4366 EXPECT_EQ(0, WEXITSTATUS(status)); 4367 4368 close(memfd); 4369 } 4370 4371 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP 4372 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 4373 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 4374 #endif 4375 4376 TEST(user_notification_sync) 4377 { 4378 struct seccomp_notif req = {}; 4379 struct seccomp_notif_resp resp = {}; 4380 int status, listener; 4381 pid_t pid; 4382 long ret; 4383 4384 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4385 ASSERT_EQ(0, ret) { 4386 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4387 } 4388 4389 listener = user_notif_syscall(__NR_getppid, 4390 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4391 ASSERT_GE(listener, 0); 4392 4393 /* Try to set invalid flags. */ 4394 EXPECT_SYSCALL_RETURN(-EINVAL, 4395 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); 4396 4397 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 4398 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); 4399 4400 pid = fork(); 4401 ASSERT_GE(pid, 0); 4402 if (pid == 0) { 4403 ret = syscall(__NR_getppid); 4404 ASSERT_EQ(ret, USER_NOTIF_MAGIC) { 4405 _exit(1); 4406 } 4407 _exit(0); 4408 } 4409 4410 req.pid = 0; 4411 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4412 4413 ASSERT_EQ(req.data.nr, __NR_getppid); 4414 4415 resp.id = req.id; 4416 resp.error = 0; 4417 resp.val = USER_NOTIF_MAGIC; 4418 resp.flags = 0; 4419 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4420 4421 ASSERT_EQ(waitpid(pid, &status, 0), pid); 4422 ASSERT_EQ(status, 0); 4423 } 4424 4425 4426 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ 4427 FIXTURE(O_SUSPEND_SECCOMP) { 4428 pid_t pid; 4429 }; 4430 4431 FIXTURE_SETUP(O_SUSPEND_SECCOMP) 4432 { 4433 ERRNO_FILTER(block_read, E2BIG); 4434 cap_value_t cap_list[] = { CAP_SYS_ADMIN }; 4435 cap_t caps; 4436 4437 self->pid = 0; 4438 4439 /* make sure we don't have CAP_SYS_ADMIN */ 4440 caps = cap_get_proc(); 4441 ASSERT_NE(NULL, caps); 4442 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); 4443 ASSERT_EQ(0, cap_set_proc(caps)); 4444 cap_free(caps); 4445 4446 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 4447 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); 4448 4449 self->pid = fork(); 4450 ASSERT_GE(self->pid, 0); 4451 4452 if (self->pid == 0) { 4453 while (1) 4454 pause(); 4455 _exit(127); 4456 } 4457 } 4458 4459 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) 4460 { 4461 if (self->pid) 4462 kill(self->pid, SIGKILL); 4463 } 4464 4465 TEST_F(O_SUSPEND_SECCOMP, setoptions) 4466 { 4467 int wstatus; 4468 4469 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); 4470 ASSERT_EQ(self->pid, wait(&wstatus)); 4471 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); 4472 if (errno == EINVAL) 4473 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4474 ASSERT_EQ(EPERM, errno); 4475 } 4476 4477 TEST_F(O_SUSPEND_SECCOMP, seize) 4478 { 4479 int ret; 4480 4481 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); 4482 ASSERT_EQ(-1, ret); 4483 if (errno == EINVAL) 4484 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4485 ASSERT_EQ(EPERM, errno); 4486 } 4487 4488 /* 4489 * get_nth - Get the nth, space separated entry in a file. 4490 * 4491 * Returns the length of the read field. 4492 * Throws error if field is zero-lengthed. 4493 */ 4494 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, 4495 const unsigned int position, char **entry) 4496 { 4497 char *line = NULL; 4498 unsigned int i; 4499 ssize_t nread; 4500 size_t len = 0; 4501 FILE *f; 4502 4503 f = fopen(path, "r"); 4504 ASSERT_NE(f, NULL) { 4505 TH_LOG("Could not open %s: %s", path, strerror(errno)); 4506 } 4507 4508 for (i = 0; i < position; i++) { 4509 nread = getdelim(&line, &len, ' ', f); 4510 ASSERT_GE(nread, 0) { 4511 TH_LOG("Failed to read %d entry in file %s", i, path); 4512 } 4513 } 4514 fclose(f); 4515 4516 ASSERT_GT(nread, 0) { 4517 TH_LOG("Entry in file %s had zero length", path); 4518 } 4519 4520 *entry = line; 4521 return nread - 1; 4522 } 4523 4524 /* For a given PID, get the task state (D, R, etc...) */ 4525 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) 4526 { 4527 char proc_path[100] = {0}; 4528 char status; 4529 char *line; 4530 4531 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); 4532 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); 4533 4534 status = *line; 4535 free(line); 4536 4537 return status; 4538 } 4539 4540 TEST(user_notification_fifo) 4541 { 4542 struct seccomp_notif_resp resp = {}; 4543 struct seccomp_notif req = {}; 4544 int i, status, listener; 4545 pid_t pid, pids[3]; 4546 __u64 baseid; 4547 long ret; 4548 /* 100 ms */ 4549 struct timespec delay = { .tv_nsec = 100000000 }; 4550 4551 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4552 ASSERT_EQ(0, ret) { 4553 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4554 } 4555 4556 /* Setup a listener */ 4557 listener = user_notif_syscall(__NR_getppid, 4558 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4559 ASSERT_GE(listener, 0); 4560 4561 pid = fork(); 4562 ASSERT_GE(pid, 0); 4563 4564 if (pid == 0) { 4565 ret = syscall(__NR_getppid); 4566 exit(ret != USER_NOTIF_MAGIC); 4567 } 4568 4569 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4570 baseid = req.id + 1; 4571 4572 resp.id = req.id; 4573 resp.error = 0; 4574 resp.val = USER_NOTIF_MAGIC; 4575 4576 /* check that we make sure flags == 0 */ 4577 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4578 4579 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4580 EXPECT_EQ(true, WIFEXITED(status)); 4581 EXPECT_EQ(0, WEXITSTATUS(status)); 4582 4583 /* Start children, and generate notifications */ 4584 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4585 pid = fork(); 4586 if (pid == 0) { 4587 ret = syscall(__NR_getppid); 4588 exit(ret != USER_NOTIF_MAGIC); 4589 } 4590 pids[i] = pid; 4591 } 4592 4593 /* This spins until all of the children are sleeping */ 4594 restart_wait: 4595 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4596 if (get_proc_stat(_metadata, pids[i]) != 'S') { 4597 nanosleep(&delay, NULL); 4598 goto restart_wait; 4599 } 4600 } 4601 4602 /* Read the notifications in order (and respond) */ 4603 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4604 memset(&req, 0, sizeof(req)); 4605 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4606 EXPECT_EQ(req.id, baseid + i); 4607 resp.id = req.id; 4608 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4609 } 4610 4611 /* Make sure notifications were received */ 4612 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4613 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); 4614 EXPECT_EQ(true, WIFEXITED(status)); 4615 EXPECT_EQ(0, WEXITSTATUS(status)); 4616 } 4617 } 4618 4619 /* get_proc_syscall - Get the syscall in progress for a given pid 4620 * 4621 * Returns the current syscall number for a given process 4622 * Returns -1 if not in syscall (running or blocked) 4623 */ 4624 static long get_proc_syscall(struct __test_metadata *_metadata, int pid) 4625 { 4626 char proc_path[100] = {0}; 4627 long ret = -1; 4628 ssize_t nread; 4629 char *line; 4630 4631 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); 4632 nread = get_nth(_metadata, proc_path, 1, &line); 4633 ASSERT_GT(nread, 0); 4634 4635 if (!strncmp("running", line, MIN(7, nread))) 4636 ret = strtol(line, NULL, 16); 4637 4638 free(line); 4639 return ret; 4640 } 4641 4642 /* Ensure non-fatal signals prior to receive are unmodified */ 4643 TEST(user_notification_wait_killable_pre_notification) 4644 { 4645 struct sigaction new_action = { 4646 .sa_handler = signal_handler, 4647 }; 4648 int listener, status, sk_pair[2]; 4649 pid_t pid; 4650 long ret; 4651 char c; 4652 /* 100 ms */ 4653 struct timespec delay = { .tv_nsec = 100000000 }; 4654 4655 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4656 4657 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4658 ASSERT_EQ(0, ret) 4659 { 4660 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4661 } 4662 4663 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4664 4665 listener = user_notif_syscall( 4666 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4667 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4668 ASSERT_GE(listener, 0); 4669 4670 /* 4671 * Check that we can kill the process with SIGUSR1 prior to receiving 4672 * the notification. SIGUSR1 is wired up to a custom signal handler, 4673 * and make sure it gets called. 4674 */ 4675 pid = fork(); 4676 ASSERT_GE(pid, 0); 4677 4678 if (pid == 0) { 4679 close(sk_pair[0]); 4680 handled = sk_pair[1]; 4681 4682 /* Setup the non-fatal sigaction without SA_RESTART */ 4683 if (sigaction(SIGUSR1, &new_action, NULL)) { 4684 perror("sigaction"); 4685 exit(1); 4686 } 4687 4688 ret = syscall(__NR_getppid); 4689 /* Make sure we got a return from a signal interruption */ 4690 exit(ret != -1 || errno != EINTR); 4691 } 4692 4693 /* 4694 * Make sure we've gotten to the seccomp user notification wait 4695 * from getppid prior to sending any signals 4696 */ 4697 while (get_proc_syscall(_metadata, pid) != __NR_getppid && 4698 get_proc_stat(_metadata, pid) != 'S') 4699 nanosleep(&delay, NULL); 4700 4701 /* Send non-fatal kill signal */ 4702 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4703 4704 /* wait for process to exit (exit checks for EINTR) */ 4705 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4706 EXPECT_EQ(true, WIFEXITED(status)); 4707 EXPECT_EQ(0, WEXITSTATUS(status)); 4708 4709 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4710 } 4711 4712 /* Ensure non-fatal signals after receive are blocked */ 4713 TEST(user_notification_wait_killable) 4714 { 4715 struct sigaction new_action = { 4716 .sa_handler = signal_handler, 4717 }; 4718 struct seccomp_notif_resp resp = {}; 4719 struct seccomp_notif req = {}; 4720 int listener, status, sk_pair[2]; 4721 pid_t pid; 4722 long ret; 4723 char c; 4724 /* 100 ms */ 4725 struct timespec delay = { .tv_nsec = 100000000 }; 4726 4727 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4728 4729 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4730 ASSERT_EQ(0, ret) 4731 { 4732 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4733 } 4734 4735 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4736 4737 listener = user_notif_syscall( 4738 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4739 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4740 ASSERT_GE(listener, 0); 4741 4742 pid = fork(); 4743 ASSERT_GE(pid, 0); 4744 4745 if (pid == 0) { 4746 close(sk_pair[0]); 4747 handled = sk_pair[1]; 4748 4749 /* Setup the sigaction without SA_RESTART */ 4750 if (sigaction(SIGUSR1, &new_action, NULL)) { 4751 perror("sigaction"); 4752 exit(1); 4753 } 4754 4755 /* Make sure that the syscall is completed (no EINTR) */ 4756 ret = syscall(__NR_getppid); 4757 exit(ret != USER_NOTIF_MAGIC); 4758 } 4759 4760 /* 4761 * Get the notification, to make move the notifying process into a 4762 * non-preemptible (TASK_KILLABLE) state. 4763 */ 4764 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4765 /* Send non-fatal kill signal */ 4766 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4767 4768 /* 4769 * Make sure the task enters moves to TASK_KILLABLE by waiting for 4770 * D (Disk Sleep) state after receiving non-fatal signal. 4771 */ 4772 while (get_proc_stat(_metadata, pid) != 'D') 4773 nanosleep(&delay, NULL); 4774 4775 resp.id = req.id; 4776 resp.val = USER_NOTIF_MAGIC; 4777 /* Make sure the notification is found and able to be replied to */ 4778 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4779 4780 /* 4781 * Make sure that the signal handler does get called once we're back in 4782 * userspace. 4783 */ 4784 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4785 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ 4786 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4787 EXPECT_EQ(true, WIFEXITED(status)); 4788 EXPECT_EQ(0, WEXITSTATUS(status)); 4789 } 4790 4791 /* Ensure fatal signals after receive are not blocked */ 4792 TEST(user_notification_wait_killable_fatal) 4793 { 4794 struct seccomp_notif req = {}; 4795 int listener, status; 4796 pid_t pid; 4797 long ret; 4798 /* 100 ms */ 4799 struct timespec delay = { .tv_nsec = 100000000 }; 4800 4801 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4802 ASSERT_EQ(0, ret) 4803 { 4804 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4805 } 4806 4807 listener = user_notif_syscall( 4808 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4809 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4810 ASSERT_GE(listener, 0); 4811 4812 pid = fork(); 4813 ASSERT_GE(pid, 0); 4814 4815 if (pid == 0) { 4816 /* This should never complete as it should get a SIGTERM */ 4817 syscall(__NR_getppid); 4818 exit(1); 4819 } 4820 4821 while (get_proc_stat(_metadata, pid) != 'S') 4822 nanosleep(&delay, NULL); 4823 4824 /* 4825 * Get the notification, to make move the notifying process into a 4826 * non-preemptible (TASK_KILLABLE) state. 4827 */ 4828 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4829 /* Kill the process with a fatal signal */ 4830 EXPECT_EQ(kill(pid, SIGTERM), 0); 4831 4832 /* 4833 * Wait for the process to exit, and make sure the process terminated 4834 * due to the SIGTERM signal. 4835 */ 4836 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4837 EXPECT_EQ(true, WIFSIGNALED(status)); 4838 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4839 } 4840 4841 /* Ensure signals after the reply do not interrupt */ 4842 TEST(user_notification_wait_killable_after_reply) 4843 { 4844 int i, max_iter = 100000; 4845 int listener, status; 4846 int pipe_fds[2]; 4847 pid_t pid; 4848 long ret; 4849 4850 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4851 ASSERT_EQ(0, ret) 4852 { 4853 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4854 } 4855 4856 listener = user_notif_syscall( 4857 __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4858 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4859 ASSERT_GE(listener, 0); 4860 4861 /* 4862 * Used to count invocations. One token is transferred from the child 4863 * to the parent per syscall invocation, the parent tries to take 4864 * one token per successful RECV. If the syscall is restarted after 4865 * RECV the parent will try to get two tokens while the child only 4866 * provided one. 4867 */ 4868 ASSERT_EQ(pipe(pipe_fds), 0); 4869 4870 pid = fork(); 4871 ASSERT_GE(pid, 0); 4872 4873 if (pid == 0) { 4874 struct sigaction new_action = { 4875 .sa_handler = signal_handler_nop, 4876 .sa_flags = SA_RESTART, 4877 }; 4878 struct itimerval timer = { 4879 .it_value = { .tv_usec = 1000 }, 4880 .it_interval = { .tv_usec = 1000 }, 4881 }; 4882 char c = 'a'; 4883 4884 close(pipe_fds[0]); 4885 4886 /* Setup the sigaction with SA_RESTART */ 4887 if (sigaction(SIGALRM, &new_action, NULL)) { 4888 perror("sigaction"); 4889 exit(1); 4890 } 4891 4892 /* 4893 * Kill with SIGALRM repeatedly, to try to hit the race when 4894 * handling the syscall. 4895 */ 4896 if (setitimer(ITIMER_REAL, &timer, NULL) < 0) 4897 perror("setitimer"); 4898 4899 for (i = 0; i < max_iter; ++i) { 4900 int fd; 4901 4902 /* Send one token per iteration to catch repeats. */ 4903 if (write(pipe_fds[1], &c, sizeof(c)) != 1) { 4904 perror("write"); 4905 exit(1); 4906 } 4907 4908 fd = syscall(__NR_dup, 0); 4909 if (fd < 0) { 4910 perror("dup"); 4911 exit(1); 4912 } 4913 close(fd); 4914 } 4915 4916 exit(0); 4917 } 4918 4919 close(pipe_fds[1]); 4920 4921 for (i = 0; i < max_iter; ++i) { 4922 struct seccomp_notif req = {}; 4923 struct seccomp_notif_addfd addfd = {}; 4924 struct pollfd pfd = { 4925 .fd = pipe_fds[0], 4926 .events = POLLIN, 4927 }; 4928 char c; 4929 4930 /* 4931 * Try to receive one token. If it failed, one child syscall 4932 * was restarted after RECV and needed to be handled twice. 4933 */ 4934 ASSERT_EQ(poll(&pfd, 1, 1000), 1) 4935 kill(pid, SIGKILL); 4936 4937 ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) 4938 kill(pid, SIGKILL); 4939 4940 /* 4941 * Get the notification, reply to it as fast as possible to test 4942 * whether the child wrongly skips going into the non-preemptible 4943 * (TASK_KILLABLE) state. 4944 */ 4945 do 4946 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 4947 while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ 4948 ASSERT_EQ(ret, 0) 4949 kill(pid, SIGKILL); 4950 4951 addfd.id = req.id; 4952 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4953 addfd.srcfd = 0; 4954 ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) 4955 kill(pid, SIGKILL); 4956 } 4957 4958 /* 4959 * Wait for the process to exit, and make sure the process terminated 4960 * with a zero exit code.. 4961 */ 4962 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4963 EXPECT_EQ(true, WIFEXITED(status)); 4964 EXPECT_EQ(0, WEXITSTATUS(status)); 4965 } 4966 4967 struct tsync_vs_thread_leader_args { 4968 pthread_t leader; 4969 }; 4970 4971 static void *tsync_vs_dead_thread_leader_sibling(void *_args) 4972 { 4973 struct sock_filter allow_filter[] = { 4974 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 4975 }; 4976 struct sock_fprog allow_prog = { 4977 .len = (unsigned short)ARRAY_SIZE(allow_filter), 4978 .filter = allow_filter, 4979 }; 4980 struct tsync_vs_thread_leader_args *args = _args; 4981 void *retval; 4982 long ret; 4983 4984 ret = pthread_join(args->leader, &retval); 4985 if (ret) 4986 exit(1); 4987 if (retval != _args) 4988 exit(2); 4989 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); 4990 if (ret) 4991 exit(3); 4992 4993 exit(0); 4994 } 4995 4996 /* 4997 * Ensure that a dead thread leader doesn't prevent installing new filters with 4998 * SECCOMP_FILTER_FLAG_TSYNC from other threads. 4999 */ 5000 TEST(tsync_vs_dead_thread_leader) 5001 { 5002 int status; 5003 pid_t pid; 5004 long ret; 5005 5006 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 5007 ASSERT_EQ(0, ret) { 5008 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 5009 } 5010 5011 pid = fork(); 5012 ASSERT_GE(pid, 0); 5013 5014 if (pid == 0) { 5015 struct sock_filter allow_filter[] = { 5016 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5017 }; 5018 struct sock_fprog allow_prog = { 5019 .len = (unsigned short)ARRAY_SIZE(allow_filter), 5020 .filter = allow_filter, 5021 }; 5022 struct tsync_vs_thread_leader_args *args; 5023 pthread_t sibling; 5024 5025 args = malloc(sizeof(*args)); 5026 ASSERT_NE(NULL, args); 5027 args->leader = pthread_self(); 5028 5029 ret = pthread_create(&sibling, NULL, 5030 tsync_vs_dead_thread_leader_sibling, args); 5031 ASSERT_EQ(0, ret); 5032 5033 /* Install a new filter just to the leader thread. */ 5034 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 5035 ASSERT_EQ(0, ret); 5036 pthread_exit(args); 5037 exit(1); 5038 } 5039 5040 EXPECT_EQ(pid, waitpid(pid, &status, 0)); 5041 EXPECT_EQ(0, status); 5042 } 5043 5044 #ifdef __x86_64__ 5045 5046 /* 5047 * We need naked probed_uprobe function. Using __nocf_check 5048 * check to skip possible endbr64 instruction and ignoring 5049 * -Wattributes, otherwise the compilation might fail. 5050 */ 5051 #pragma GCC diagnostic push 5052 #pragma GCC diagnostic ignored "-Wattributes" 5053 5054 __naked __nocf_check noinline int probed_uprobe(void) 5055 { 5056 /* 5057 * Optimized uprobe is possible only on top of nop5 instruction. 5058 */ 5059 asm volatile (" \n" 5060 ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" 5061 "ret \n" 5062 ); 5063 } 5064 #pragma GCC diagnostic pop 5065 5066 #else 5067 noinline int probed_uprobe(void) 5068 { 5069 return 1; 5070 } 5071 #endif 5072 5073 noinline int probed_uretprobe(void) 5074 { 5075 return 1; 5076 } 5077 5078 static int parse_uint_from_file(const char *file, const char *fmt) 5079 { 5080 int err = -1, ret; 5081 FILE *f; 5082 5083 f = fopen(file, "re"); 5084 if (f) { 5085 err = fscanf(f, fmt, &ret); 5086 fclose(f); 5087 } 5088 return err == 1 ? ret : err; 5089 } 5090 5091 static int determine_uprobe_perf_type(void) 5092 { 5093 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 5094 5095 return parse_uint_from_file(file, "%d\n"); 5096 } 5097 5098 static int determine_uprobe_retprobe_bit(void) 5099 { 5100 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 5101 5102 return parse_uint_from_file(file, "config:%d\n"); 5103 } 5104 5105 static ssize_t get_uprobe_offset(const void *addr) 5106 { 5107 size_t start, base, end; 5108 bool found = false; 5109 char buf[256]; 5110 FILE *f; 5111 5112 f = fopen("/proc/self/maps", "r"); 5113 if (!f) 5114 return -1; 5115 5116 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 5117 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 5118 found = true; 5119 break; 5120 } 5121 } 5122 fclose(f); 5123 return found ? (uintptr_t)addr - start + base : -1; 5124 } 5125 5126 FIXTURE(UPROBE) { 5127 int fd; 5128 }; 5129 5130 FIXTURE_VARIANT(UPROBE) { 5131 /* 5132 * All of the U(RET)PROBE behaviors can be tested with either 5133 * u(ret)probe attached or not 5134 */ 5135 bool attach; 5136 /* 5137 * Test both uprobe and uretprobe. 5138 */ 5139 bool uretprobe; 5140 }; 5141 5142 FIXTURE_VARIANT_ADD(UPROBE, not_attached) { 5143 .attach = false, 5144 .uretprobe = false, 5145 }; 5146 5147 FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) { 5148 .attach = true, 5149 .uretprobe = false, 5150 }; 5151 5152 FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) { 5153 .attach = true, 5154 .uretprobe = true, 5155 }; 5156 5157 FIXTURE_SETUP(UPROBE) 5158 { 5159 const size_t attr_sz = sizeof(struct perf_event_attr); 5160 struct perf_event_attr attr; 5161 ssize_t offset; 5162 int type, bit; 5163 5164 #if !defined(__NR_uprobe) || !defined(__NR_uretprobe) 5165 SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined"); 5166 #endif 5167 5168 if (!variant->attach) 5169 return; 5170 5171 memset(&attr, 0, attr_sz); 5172 5173 type = determine_uprobe_perf_type(); 5174 ASSERT_GE(type, 0); 5175 5176 if (variant->uretprobe) { 5177 bit = determine_uprobe_retprobe_bit(); 5178 ASSERT_GE(bit, 0); 5179 } 5180 5181 offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe); 5182 ASSERT_GE(offset, 0); 5183 5184 if (variant->uretprobe) 5185 attr.config |= 1 << bit; 5186 attr.size = attr_sz; 5187 attr.type = type; 5188 attr.config1 = ptr_to_u64("/proc/self/exe"); 5189 attr.config2 = offset; 5190 5191 self->fd = syscall(__NR_perf_event_open, &attr, 5192 getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, 5193 PERF_FLAG_FD_CLOEXEC); 5194 } 5195 5196 FIXTURE_TEARDOWN(UPROBE) 5197 { 5198 /* we could call close(self->fd), but we'd need extra filter for 5199 * that and since we are calling _exit right away.. 5200 */ 5201 } 5202 5203 static int run_probed_with_filter(struct sock_fprog *prog) 5204 { 5205 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || 5206 seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { 5207 return -1; 5208 } 5209 5210 /* 5211 * Uprobe is optimized after first hit, so let's hit twice. 5212 */ 5213 probed_uprobe(); 5214 probed_uprobe(); 5215 5216 probed_uretprobe(); 5217 return 0; 5218 } 5219 5220 TEST_F(UPROBE, uprobe_default_allow) 5221 { 5222 struct sock_filter filter[] = { 5223 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5224 }; 5225 struct sock_fprog prog = { 5226 .len = (unsigned short)ARRAY_SIZE(filter), 5227 .filter = filter, 5228 }; 5229 5230 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5231 } 5232 5233 TEST_F(UPROBE, uprobe_default_block) 5234 { 5235 struct sock_filter filter[] = { 5236 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5237 offsetof(struct seccomp_data, nr)), 5238 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5239 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5240 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5241 }; 5242 struct sock_fprog prog = { 5243 .len = (unsigned short)ARRAY_SIZE(filter), 5244 .filter = filter, 5245 }; 5246 5247 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5248 } 5249 5250 TEST_F(UPROBE, uprobe_block_syscall) 5251 { 5252 struct sock_filter filter[] = { 5253 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5254 offsetof(struct seccomp_data, nr)), 5255 #ifdef __NR_uprobe 5256 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2), 5257 #endif 5258 #ifdef __NR_uretprobe 5259 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), 5260 #endif 5261 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5262 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5263 }; 5264 struct sock_fprog prog = { 5265 .len = (unsigned short)ARRAY_SIZE(filter), 5266 .filter = filter, 5267 }; 5268 5269 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5270 } 5271 5272 TEST_F(UPROBE, uprobe_default_block_with_syscall) 5273 { 5274 struct sock_filter filter[] = { 5275 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 5276 offsetof(struct seccomp_data, nr)), 5277 #ifdef __NR_uprobe 5278 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0), 5279 #endif 5280 #ifdef __NR_uretprobe 5281 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), 5282 #endif 5283 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), 5284 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 5285 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 5286 }; 5287 struct sock_fprog prog = { 5288 .len = (unsigned short)ARRAY_SIZE(filter), 5289 .filter = filter, 5290 }; 5291 5292 ASSERT_EQ(0, run_probed_with_filter(&prog)); 5293 } 5294 5295 /* 5296 * TODO: 5297 * - expand NNP testing 5298 * - better arch-specific TRACE and TRAP handlers. 5299 * - endianness checking when appropriate 5300 * - 64-bit arg prodding 5301 * - arch value testing (x86 modes especially) 5302 * - verify that FILTER_FLAG_LOG filters generate log messages 5303 * - verify that RET_LOG generates log messages 5304 */ 5305 5306 TEST_HARNESS_MAIN 5307